@apideck/agent-analytics
Advanced tools
+45
-2
@@ -93,2 +93,29 @@ 'use strict'; | ||
| } | ||
| function detectHeadless(req) { | ||
| const signals = []; | ||
| const ua = (req.headers.get("user-agent") || "").toLowerCase(); | ||
| const isBrowserUA = ua.includes("mozilla") || ua.includes("chrome") || ua.includes("safari") || ua.includes("firefox"); | ||
| if (!isBrowserUA) return { score: 0, signals: [], likely: false }; | ||
| if (!req.headers.get("accept-language")) { | ||
| signals.push("missing-accept-language"); | ||
| } | ||
| if (!req.headers.get("sec-fetch-mode")) { | ||
| signals.push("missing-sec-fetch-mode"); | ||
| } | ||
| const secChUa = req.headers.get("sec-ch-ua"); | ||
| if (!secChUa) { | ||
| signals.push("missing-sec-ch-ua"); | ||
| } else if (secChUa.toLowerCase().includes("headlesschrome")) { | ||
| signals.push("headless-chrome-hint"); | ||
| } | ||
| const accept = req.headers.get("accept") || ""; | ||
| if (!accept || accept === "*/*") { | ||
| signals.push("missing-or-bare-accept"); | ||
| } | ||
| if ((req.headers.get("connection") || "").toLowerCase() === "close") { | ||
| signals.push("connection-close"); | ||
| } | ||
| const score = signals.length; | ||
| return { score, signals, likely: score >= 2 }; | ||
| } | ||
| function classifyAgent(userAgent) { | ||
@@ -105,2 +132,12 @@ const label = parseBotName(userAgent); | ||
| } | ||
| function classifyRequest(req) { | ||
| const userAgent = req.headers.get("user-agent") || ""; | ||
| const base = classifyAgent(userAgent); | ||
| const headless = detectHeadless(req); | ||
| let kind = base.kind; | ||
| if (kind === "browser" && headless.likely) { | ||
| kind = "headless-likely"; | ||
| } | ||
| return { ...base, kind, headless }; | ||
| } | ||
@@ -122,3 +159,5 @@ // src/hash.ts | ||
| if (onlyBots && !isAiBot(userAgent)) return; | ||
| if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) return; | ||
| if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) { | ||
| if (!detectHeadless(req).likely) return; | ||
| } | ||
| let pathname = "/"; | ||
@@ -137,3 +176,3 @@ let originFromUrl = ""; | ||
| const referer = req.headers.get("referer"); | ||
| const classification = classifyAgent(userAgent); | ||
| const classification = classifyRequest(req); | ||
| const event = { | ||
@@ -152,2 +191,4 @@ event: opts.eventName ?? "agent_visit", | ||
| coding_agent_hint: classification.codingAgentHint, | ||
| headless_score: classification.headless?.score ?? 0, | ||
| headless_likely: classification.headless?.likely ?? false, | ||
| referer, | ||
@@ -217,3 +258,5 @@ source: opts.source ?? null, | ||
| exports.classifyAgent = classifyAgent; | ||
| exports.classifyRequest = classifyRequest; | ||
| exports.customAnalytics = customAnalytics; | ||
| exports.detectHeadless = detectHeadless; | ||
| exports.firstUserAgentProduct = firstUserAgentProduct; | ||
@@ -220,0 +263,0 @@ exports.hashId = hashId; |
@@ -1,1 +0,1 @@ | ||
| {"version":3,"sources":["../src/bots.ts","../src/hash.ts","../src/track.ts","../src/adapters/posthog.ts","../src/adapters/webhook.ts","../src/adapters/custom.ts"],"names":[],"mappings":";;;AAgBO,IAAM,cAAA,GACX;AAmBK,IAAM,mBAAA,GACX;AAEK,SAAS,QAAQ,SAAA,EAA+C;AACrE,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,cAAA,CAAe,KAAK,SAAS,CAAA;AACtC;AAEO,SAAS,aAAa,SAAA,EAA+C;AAC1E,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,mBAAA,CAAoB,KAAK,SAAS,CAAA;AAC3C;AAeO,SAAS,aAAa,SAAA,EAA8C;AACzE,EAAA,IAAI,CAAC,SAAA,IAAa,OAAO,SAAA,KAAc,UAAU,OAAO,OAAA;AACxD,EAAA,MAAM,CAAA,GAAI,UAAU,WAAA,EAAY;AAGhC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,cAAc,CAAA,IAAK,EAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,eAAe,CAAA,IAAK,CAAA,CAAE,SAAS,QAAQ,CAAA;AAC1G,IAAA,OAAO,SAAA;AACT,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AAC5F,EAAA,IAAI,CAAA,CAAE,SAAS,eAAe,CAAA,IAAK,EAAE,QAAA,CAAS,iBAAiB,GAAG,OAAO,YAAA;AACzE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,OAAO,CAAA,EAAG,OAAO,cAAA;AAChC,EAAA,IAAI,CAAA,CAAE,SAAS,iBAAiB,CAAA,IAAK,EAAE,QAAA,CAAS,WAAW,GAAG,OAAO,QAAA;AACrE,EAAA,IAAI,CAAA,CAAE,SAAS,mBAAmB,CAAA,IAAK,EAAE,QAAA,CAAS,UAAU,GAAG,OAAO,OAAA;AACtE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,MAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,SAAS,oBAAoB,CAAA,IAAK,EAAE,QAAA,CAAS,aAAa,GAAG,OAAO,MAAA;AAC1E,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,SAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,eAAe,CAAA,EAAG,OAAO,YAAA;AACxC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,SAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,SAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,KAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AAGnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,UAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,KAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,KAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,gBAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,aAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AAGtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,aAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,SAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,WAAA;AACzC,EAAA,IAAI,CAAA,CAAE,SAAS,cAAc,CAAA,IAAK,EAAE,QAAA,CAAS,SAAS,GAAG,OAAO,SAAA;AAChE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,OAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AAGnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,OAAO,CAAA,EAAG,OAAO,OAAA;AAChC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,OAAA;AAGtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,qBAAqB,CAAA,EAAG,OAAO,UAAA;AAC9C,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,kBAAkB,CAAA,EAAG,OAAO,mBAAA;AAG3C,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,SAAA;AAGjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,iBAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,WAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,MAAM,CAAA,EAAG,OAAO,MAAA;AAC/B,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,QAAA;AAIrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,UAAA;AACpC,EAAA,IAAI,QAAA,CAAS,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,MAAA;AAC7B,EAAA,IAAI,SAAA,CAAU,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,OAAA;AAC9B,EAAA,IAAI,4BAAA,CAA6B,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,KAAA;AACjD,EAAA,IAAI,WAAA,CAAY,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,OAAA;AAChC,EAAA,IAAI,cAAA,CAAe,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,YAAA;AACnC,EAAA,IAAI,mBAAA,CAAoB,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,iBAAA;AACxC,EAAA,IAAI,kBAAA,CAAmB,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,gBAAA;AACvC,EAAA,IAAI,UAAA,CAAW,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,QAAA;AAC/B,EAAA,IAAI,WAAA,CAAY,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,SAAA;AAChC,EAAA,IAAI,QAAA,CAAS,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,MAAA;AAG7B,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,IAAK,EAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,SAAS,SAAS,CAAA;AAC/F,IAAA,OAAO,SAAA;AAET,EAAA,OAAO,OAAA;AACT;AAOO,SAAS,sBAAsB,SAAA,EAA8C;AAClF,EAAA,IAAI,CAAC,SAAA,IAAa,OAAO,SAAA,KAAc,UAAU,OAAO,OAAA;AACxD,EAAA,MAAM,eAAA,GAAkB,SAAA,CAAU,KAAA,CAAM,yCAAyC,CAAA;AACjF,EAAA,IAAI,eAAA,IAAmB,gBAAgB,CAAC,CAAA,SAAU,eAAA,CAAgB,CAAC,EAAE,IAAA,EAAK;AAC1E,EAAA,MAAM,QAAQ,SAAA,CAAU,IAAA,EAAK,CAAE,KAAA,CAAM,GAAG,CAAA,CAAE,CAAC,CAAA,EAAG,IAAA,GAAO,KAAA,CAAM,KAAK,CAAA,CAAE,CAAC,GAAG,IAAA,EAAK;AAC3E,EAAA,OAAO,KAAA,IAAS,OAAA;AAClB;AAmCO,SAAS,cAAc,SAAA,EAA2D;AACvF,EAAA,MAAM,KAAA,GAAQ,aAAa,SAAS,CAAA;AACpC,EAAA,MAAM,KAAA,GAAQ,QAAQ,SAAS,CAAA;AAC/B,EAAA,MAAM,UAAA,GAAa,aAAa,SAAS,CAAA;AAEzC,EAAA,IAAI,IAAA;AACJ,EAAA,IAAI,OAAO,IAAA,GAAO,kBAAA;AAAA,OAAA,IACT,YAAY,IAAA,GAAO,mBAAA;AAAA,OAAA,IACnB,KAAA,KAAU,WAAW,IAAA,GAAO,SAAA;AAAA,OAChC,IAAA,GAAO,OAAA;AAEZ,EAAA,OAAO,EAAE,IAAA,EAAM,KAAA,EAAO,OAAA,EAAS,KAAA,EAAO,iBAAiB,UAAA,EAAW;AACpE;;;AC/MO,SAAS,OAAO,KAAA,EAAuB;AAC5C,EAAA,IAAI,CAAA,GAAI,IAAA;AACR,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,KAAA,CAAM,QAAQ,CAAA,EAAA,EAAK;AACrC,IAAA,CAAA,GAAA,CAAM,KAAK,CAAA,IAAK,CAAA,GAAI,KAAA,CAAM,UAAA,CAAW,CAAC,CAAA,GAAK,UAAA;AAAA,EAC7C;AACA,EAAA,OAAO,OAAA,GAAA,CAAW,CAAA,KAAM,CAAA,EAAG,QAAA,CAAS,EAAE,CAAA;AACxC;;;ACCA,eAAsB,UAAA,CACpB,KACA,IAAA,EACe;AACf,EAAA,MAAM,SAAA,GAAY,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AAEnD,EAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,KAAA;AAClC,EAAA,MAAM,YAAA,GAAe,KAAK,YAAA,IAAgB,KAAA;AAC1C,EAAA,IAAI,QAAA,IAAY,CAAC,OAAA,CAAQ,SAAS,CAAA,EAAG;AACrC,EAAA,IAAI,YAAA,IAAgB,CAAC,OAAA,CAAQ,SAAS,KAAK,CAAC,YAAA,CAAa,SAAS,CAAA,EAAG;AAErE,EAAA,IAAI,QAAA,GAAW,GAAA;AACf,EAAA,IAAI,aAAA,GAAgB,EAAA;AACpB,EAAA,IAAI;AACF,IAAA,MAAM,GAAA,GAAM,IAAI,GAAA,CAAI,GAAA,CAAI,GAAG,CAAA;AAC3B,IAAA,QAAA,GAAW,GAAA,CAAI,QAAA;AACf,IAAA,aAAA,GAAgB,GAAA,CAAI,MAAA;AAAA,EACtB,CAAA,CAAA,MAAQ;AAEN,IAAA,QAAA,GAAW,IAAI,GAAA,IAAO,GAAA;AAAA,EACxB;AACA,EAAA,MAAM,MAAA,GAAS,KAAK,MAAA,IAAU,aAAA;AAE9B,EAAA,MAAM,YAAA,GAAe,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,iBAAiB,CAAA,IAAK,EAAA;AAC3D,EAAA,MAAM,EAAA,GAAK,aAAa,KAAA,CAAM,GAAG,EAAE,CAAC,CAAA,EAAG,MAAK,IAAK,EAAA;AACjD,EAAA,MAAM,OAAA,GAAU,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,SAAS,CAAA;AACzC,EAAA,MAAM,cAAA,GAAiB,cAAc,SAAS,CAAA;AAE9C,EAAA,MAAM,KAAA,GAAQ;AAAA,IACZ,KAAA,EAAO,KAAK,SAAA,IAAa,aAAA;AAAA,IACzB,YAAY,MAAA,CAAO,CAAA,EAAG,EAAE,CAAA,CAAA,EAAI,SAAS,CAAA,CAAE,CAAA;AAAA,IACvC,SAAA,EAAA,iBAAW,IAAI,IAAA,EAAK,EAAE,WAAA,EAAY;AAAA,IAClC,UAAA,EAAY;AAAA,MACV,uBAAA,EAAyB,KAAA;AAAA,MACzB,cAAc,MAAA,GAAS,CAAA,EAAG,MAAM,CAAA,EAAG,QAAQ,CAAA,CAAA,GAAK,QAAA;AAAA,MAChD,IAAA,EAAM,QAAA;AAAA,MACN,UAAA,EAAY,SAAA;AAAA,MACZ,WAAW,cAAA,CAAe,OAAA;AAAA,MAC1B,UAAU,cAAA,CAAe,KAAA;AAAA,MACzB,aAAa,cAAA,CAAe,IAAA;AAAA,MAC5B,mBAAmB,cAAA,CAAe,eAAA;AAAA,MAClC,OAAA;AAAA,MACA,MAAA,EAAQ,KAAK,MAAA,IAAU,IAAA;AAAA,MACvB,GAAG,IAAA,CAAK;AAAA;AACV,GACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,IAAA,CAAK,SAAA,CAAU,OAAA,CAAQ,KAAK,CAAA;AAAA,EACpC,CAAA,CAAA,MAAQ;AAAA,EAER;AACF;;;ACpCO,SAAS,iBAAiB,MAAA,EAAgD;AAC/E,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,IAAQ,0BAAA;AAC/B,EAAA,MAAM,IAAA,GAAA,CAAQ,cAAA,CAAe,IAAA,CAAK,OAAO,CAAA,GAAI,OAAA,GAAU,CAAA,QAAA,EAAW,OAAO,CAAA,CAAA,EAAI,OAAA,CAAQ,KAAA,EAAO,EAAE,CAAA;AAC9F,EAAA,MAAM,QAAQ,MAAA,CAAO,IAAA,IAAQ,UAAA,EAAY,OAAA,CAAQ,WAAW,GAAG,CAAA;AAC/D,EAAA,MAAM,QAAA,GAAW,CAAA,EAAG,IAAI,CAAA,EAAG,IAAI,CAAA,CAAA;AAC/B,EAAA,MAAM,SAAA,GAAY,OAAO,SAAA,IAAa,KAAA;AAEtC,EAAA,OAAO;AAAA,IACL,MAAM,QAAQ,KAAA,EAAoC;AAChD,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,SAAS,MAAA,CAAO,MAAA;AAAA,QAChB,OAAO,KAAA,CAAM,KAAA;AAAA,QACb,aAAa,KAAA,CAAM,UAAA;AAAA,QACnB,WAAW,KAAA,CAAM,SAAA;AAAA,QACjB,YAAY,KAAA,CAAM;AAAA,OACpB;AACA,MAAA,MAAM,UAAU,QAAA,EAAU;AAAA,QACxB,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS,EAAE,cAAA,EAAgB,kBAAA,EAAmB;AAAA,QAC9C,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO,CAAA;AAAA,QAC5B,SAAA,EAAW;AAAA,OACZ,CAAA;AAAA,IACH;AAAA,GACF;AACF;;;AC/BO,SAAS,iBAAiB,MAAA,EAAgD;AAC/E,EAAA,MAAM,SAAA,GAAY,OAAO,SAAA,IAAa,KAAA;AACtC,EAAA,MAAM,SAAA,GAAY,MAAA,CAAO,SAAA,KAAc,CAAC,CAAA,KAA6B,CAAA,CAAA;AAErE,EAAA,OAAO;AAAA,IACL,MAAM,QAAQ,KAAA,EAAoC;AAChD,MAAA,MAAM,SAAA,CAAU,OAAO,GAAA,EAAK;AAAA,QAC1B,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS;AAAA,UACP,cAAA,EAAgB,kBAAA;AAAA,UAChB,GAAI,MAAA,CAAO,OAAA,IAAW;AAAC,SACzB;AAAA,QACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,SAAA,CAAU,KAAK,CAAC,CAAA;AAAA,QACrC,SAAA,EAAW;AAAA,OACZ,CAAA;AAAA,IACH;AAAA,GACF;AACF;;;AC3BO,SAAS,gBACd,OAAA,EACkB;AAClB,EAAA,OAAO,EAAE,OAAA,EAAQ;AACnB","file":"index.cjs","sourcesContent":["/**\n * User-agent substrings that identify **publicly declared** AI crawlers — the\n * branded bots that identify themselves by name (OpenAI's GPTBot, Anthropic's\n * ClaudeBot, Perplexity-User, Google-Extended, etc.). High-confidence: when\n * this matches, the request almost certainly comes from that vendor's crawler\n * fleet.\n *\n * Does NOT include **coding-agent traffic** (Claude Code, Cline, Cursor,\n * Windsurf, Aider, OpenCode, VS Code). Those tools use generic HTTP library\n * UAs (axios, curl, got, colly, Electron) or spoof full browser UAs — they\n * can't be distinguished from non-AI traffic by UA alone. See\n * {@link HTTP_CLIENT_PATTERN} for the loose heuristic layer.\n *\n * Sources consulted when updating: darkvisitors.com, vendor docs from OpenAI,\n * Anthropic, Google, Perplexity, Cohere, Apple, Bytedance.\n */\nexport const AI_BOT_PATTERN =\n /ClaudeBot|Claude-User|Anthropic|ChatGPT-User|GPTBot|OAI-SearchBot|PerplexityBot|Perplexity-User|Google-Extended|Applebot-Extended|cohere-ai|Bytespider|CCBot|Amazonbot|Meta-ExternalAgent|FacebookBot|DuckAssistBot|MistralAI-User|YouBot|AI2Bot|Diffbot|Cursor|Windsurf/i\n\n/**\n * HTTP library / runtime signatures frequently used by coding agents. Matching\n * any of these is a **loose** signal — legitimate curl scripts, CI jobs, and\n * server-to-server traffic use the same libraries. Use this for the wider\n * net (`coding_agent_hint: true`) and pair with other signals (request\n * shape, JA4 fingerprint, path patterns) for higher confidence.\n *\n * Based on behavioural signatures observed by Addy Osmani:\n * Claude Code → axios/1.8.4\n * Cline, Junie → curl/8.4.0\n * Cursor → got (sindresorhus/got)\n * Windsurf → colly\n * VS Code → Electron / Chromium\n *\n * Aider and OpenCode use Playwright-driven full Mozilla/Safari UAs and are\n * indistinguishable from real browsers at the UA layer.\n */\nexport const HTTP_CLIENT_PATTERN =\n /axios\\/|curl\\/|(?:^|[\\s(])got(?:\\/|[\\s(])|\\bcolly\\b|Electron\\/|node-fetch\\/|python-requests\\/|Go-http-client\\/|okhttp\\/|aiohttp\\/|Deno\\//i\n\nexport function isAiBot(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return AI_BOT_PATTERN.test(userAgent)\n}\n\nexport function isHttpClient(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return HTTP_CLIENT_PATTERN.test(userAgent)\n}\n\n/**\n * Map a user-agent string to a coarse, human-readable label. Returns one of:\n *\n * - A branded-crawler name (`'Claude'`, `'ChatGPT'`, …) — pair with\n * {@link isAiBot} for `is_ai_bot: true` segmentation.\n * - An HTTP-library name (`'curl'`, `'axios'`, `'got'`, `'colly'`,\n * `'Electron'`, …) — hint of a coding agent or automation; not\n * conclusive. Pair with {@link isHttpClient}.\n * - `'Browser'` for typical desktop browsers (possibly spoofed by\n * Playwright-based agents like Aider/OpenCode — this label alone can't\n * tell you).\n * - `'Other'` for anything unrecognised or empty input.\n */\nexport function parseBotName(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const s = userAgent.toLowerCase()\n\n // Publicly declared AI crawlers (high confidence).\n if (s.includes('chatgpt-user') || s.includes('gptbot') || s.includes('oai-searchbot') || s.includes('openai'))\n return 'ChatGPT'\n if (s.includes('claudebot') || s.includes('claude-user') || s.includes('anthropic')) return 'Claude'\n if (s.includes('perplexitybot') || s.includes('perplexity-user')) return 'Perplexity'\n if (s.includes('ccbot')) return 'Common Crawl'\n if (s.includes('google-extended') || s.includes('googlebot')) return 'Google'\n if (s.includes('applebot-extended') || s.includes('applebot')) return 'Apple'\n if (s.includes('bingbot')) return 'Bing'\n if (s.includes('bytespider')) return 'Bytespider'\n if (s.includes('amazonbot')) return 'Amazon'\n if (s.includes('meta-externalagent') || s.includes('facebookbot')) return 'Meta'\n if (s.includes('mistralai-user')) return 'Mistral'\n if (s.includes('duckassistbot')) return 'DuckDuckGo'\n if (s.includes('youbot')) return 'You.com'\n if (s.includes('diffbot')) return 'Diffbot'\n if (s.includes('ai2bot')) return 'AI2'\n if (s.includes('cohere')) return 'Cohere'\n if (s.includes('cursor')) return 'Cursor'\n if (s.includes('windsurf')) return 'Windsurf'\n if (s.includes('petalbot')) return 'PetalBot'\n\n // SEO crawlers and monitoring bots.\n if (s.includes('ahrefsbot')) return 'Ahrefs'\n if (s.includes('semrushbot')) return 'Semrush'\n if (s.includes('mj12bot')) return 'Majestic'\n if (s.includes('dotbot')) return 'Moz'\n if (s.includes('rogerbot')) return 'Moz'\n if (s.includes('screaming frog')) return 'Screaming Frog'\n if (s.includes('sitebulb')) return 'Sitebulb'\n if (s.includes('linkfluence')) return 'Linkfluence'\n if (s.includes('dataforseo')) return 'DataForSEO'\n if (s.includes('serpstatbot')) return 'Serpstat'\n\n // Monitoring and feed bots.\n if (s.includes('uptimerobot')) return 'UptimeRobot'\n if (s.includes('pingdom')) return 'Pingdom'\n if (s.includes('statuscake')) return 'StatusCake'\n if (s.includes('newrelicpinger')) return 'New Relic'\n if (s.includes('datadogagent') || s.includes('datadog')) return 'Datadog'\n if (s.includes('slackbot')) return 'Slack'\n if (s.includes('twitterbot')) return 'Twitter'\n if (s.includes('linkedinbot')) return 'LinkedIn'\n if (s.includes('discordbot')) return 'Discord'\n if (s.includes('telegrambot')) return 'Telegram'\n if (s.includes('whatsapp')) return 'WhatsApp'\n\n // AI search and indexing bots.\n if (s.includes('linkupbot')) return 'Linkup'\n if (s.includes('sogou')) return 'Sogou'\n if (s.includes('yandexbot')) return 'Yandex'\n if (s.includes('baiduspider')) return 'Baidu'\n\n // Link preview fetchers.\n if (s.includes('facebookexternalhit')) return 'Facebook'\n if (s.includes('com.apple.webkit')) return 'Apple URL Preview'\n\n // Uptime and monitoring.\n if (s.includes('ohdear')) return 'Oh Dear'\n\n // Generic scrapers.\n if (s.includes('scrapy')) return 'Scrapy'\n if (s.includes('headlesschrome')) return 'Headless Chrome'\n if (s.includes('phantomjs')) return 'PhantomJS'\n if (s.includes('wget')) return 'wget'\n if (s.includes('httpie')) return 'HTTPie'\n if (s.includes('guzzlehttp')) return 'Guzzle'\n\n // HTTP library / runtime signatures (loose — coding agent or automation).\n // Check Electron before Browser since Electron UAs contain Chrome/Safari.\n if (s.includes('electron/')) return 'Electron'\n if (/curl\\//.test(s)) return 'curl'\n if (/axios\\//.test(s)) return 'axios'\n if (/(?:^|[\\s(])got(?:\\/|[\\s(])/.test(s)) return 'got'\n if (/\\bcolly\\b/.test(s)) return 'colly'\n if (/node-fetch\\//.test(s)) return 'node-fetch'\n if (/python-requests\\//.test(s)) return 'python-requests'\n if (/go-http-client\\//.test(s)) return 'Go http client'\n if (/okhttp\\//.test(s)) return 'OkHttp'\n if (/aiohttp\\//.test(s)) return 'aiohttp'\n if (/deno\\//.test(s)) return 'Deno'\n\n // Real browsers (or UAs spoofed to look like them — see Aider/OpenCode note).\n if (s.includes('mozilla') || s.includes('chrome') || s.includes('safari') || s.includes('firefox'))\n return 'Browser'\n\n return 'Other'\n}\n\n/**\n * Return the first product token from a UA header, useful for segmenting by\n * client without hard-coding every bot name. Falls back to `'Other'` for empty\n * input.\n */\nexport function firstUserAgentProduct(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const compatibleMatch = userAgent.match(/compatible;\\s*([^/;\\s]+)(?:\\/[^\\s;]*)?/i)\n if (compatibleMatch && compatibleMatch[1]) return compatibleMatch[1].trim()\n const first = userAgent.trim().split('/')[0]?.trim().split(/\\s+/)[0]?.trim()\n return first || 'Other'\n}\n\nexport type AgentKind =\n | 'declared-crawler'\n | 'coding-agent-hint'\n | 'browser'\n | 'other'\n\nexport interface AgentClassification {\n /**\n * Categorical tag for the UA:\n *\n * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.\n * - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose\n * signal; could be a coding agent, a curl script, or any automation.\n * - `'browser'` — looks like a real browser. Could be a genuine user or\n * a Playwright-based agent (Aider, OpenCode) that can't be distinguished\n * at the UA layer.\n * - `'other'` — unrecognised or empty.\n */\n kind: AgentKind\n /** Human-readable label, same string {@link parseBotName} returns. */\n label: string\n /** Strict: `true` only when the UA matches a branded AI crawler. */\n isAiBot: boolean\n /** Loose: `true` for known HTTP-library / automation UAs. */\n codingAgentHint: boolean\n}\n\n/**\n * One-stop classification of a user-agent. Combines {@link isAiBot},\n * {@link isHttpClient}, and {@link parseBotName} into a single structured\n * result. Used internally by `trackVisit` to populate event properties;\n * useful in consumer code when you need all signals at once.\n */\nexport function classifyAgent(userAgent: string | null | undefined): AgentClassification {\n const label = parseBotName(userAgent)\n const aiBot = isAiBot(userAgent)\n const httpClient = isHttpClient(userAgent)\n\n let kind: AgentKind\n if (aiBot) kind = 'declared-crawler'\n else if (httpClient) kind = 'coding-agent-hint'\n else if (label === 'Browser') kind = 'browser'\n else kind = 'other'\n\n return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }\n}\n","/**\n * djb2 hash returning an 8-char hex string prefixed with `anon_`. Used to\n * build stable anonymous distinct-ids from `ip:ua:...` tuples without\n * collecting identifying data. Not cryptographic — collisions are fine for\n * analytics segmentation.\n */\nexport function hashId(input: string): string {\n let h = 5381\n for (let i = 0; i < input.length; i++) {\n h = ((h << 5) + h + input.charCodeAt(i)) & 0xffffffff\n }\n return 'anon_' + (h >>> 0).toString(16)\n}\n","import { classifyAgent, isAiBot, isHttpClient } from './bots.js'\nimport { hashId } from './hash.js'\nimport type { TrackVisitOptions } from './types.js'\n\n/**\n * Capture an event describing the incoming request. Fire-and-forget: awaits\n * the adapter but swallows errors so a downed analytics backend never breaks\n * the response path. Callers typically don't await the returned promise.\n *\n * By default, captures every request so coding-agent traffic (axios, curl,\n * Electron, …) shows up alongside branded crawlers. Set `onlyBots: true` to\n * restrict capture to UAs matching {@link AI_BOT_PATTERN}.\n */\nexport async function trackVisit(\n req: Request,\n opts: TrackVisitOptions\n): Promise<void> {\n const userAgent = req.headers.get('user-agent') || ''\n\n const onlyBots = opts.onlyBots ?? false\n const skipBrowsers = opts.skipBrowsers ?? false\n if (onlyBots && !isAiBot(userAgent)) return\n if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) return\n\n let pathname = '/'\n let originFromUrl = ''\n try {\n const url = new URL(req.url)\n pathname = url.pathname\n originFromUrl = url.origin\n } catch {\n // Some runtimes hand us a relative URL; fall back to the raw string.\n pathname = req.url || '/'\n }\n const origin = opts.origin ?? originFromUrl\n\n const forwardedFor = req.headers.get('x-forwarded-for') || ''\n const ip = forwardedFor.split(',')[0]?.trim() ?? ''\n const referer = req.headers.get('referer')\n const classification = classifyAgent(userAgent)\n\n const event = {\n event: opts.eventName ?? 'agent_visit',\n distinctId: hashId(`${ip}:${userAgent}`),\n timestamp: new Date().toISOString(),\n properties: {\n $process_person_profile: false,\n $current_url: origin ? `${origin}${pathname}` : pathname,\n path: pathname,\n user_agent: userAgent,\n is_ai_bot: classification.isAiBot,\n bot_name: classification.label,\n ua_category: classification.kind,\n coding_agent_hint: classification.codingAgentHint,\n referer,\n source: opts.source ?? null,\n ...opts.properties\n }\n }\n\n try {\n await opts.analytics.capture(event)\n } catch {\n // Intentional swallow — analytics failures must not affect the response.\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\nexport interface PostHogAdapterConfig {\n /** PostHog project API key (the public one used by the JS SDK). */\n apiKey: string\n /**\n * PostHog host, with or without scheme. Defaults to `https://us.i.posthog.com`.\n * Use `https://eu.i.posthog.com` for EU cloud, or your own reverse-proxy\n * domain (e.g. `https://svc.example.com`).\n */\n host?: string\n /**\n * Path on the host that accepts single-event captures. Defaults to\n * `/i/v0/e/` which is PostHog's current endpoint for this.\n */\n path?: string\n /**\n * Override the `fetch` implementation (useful for tests or custom runtimes\n * that need a pinned fetch).\n */\n fetchImpl?: typeof fetch\n}\n\n/**\n * Adapter that posts each event to the PostHog capture endpoint. Uses\n * `keepalive: true` so the request survives after a serverless response\n * returns — events aren't guaranteed (fire-and-forget), but that's the\n * trade we want to keep the hot path fast.\n */\nexport function posthogAnalytics(config: PostHogAdapterConfig): AnalyticsAdapter {\n const hostRaw = config.host ?? 'https://us.i.posthog.com'\n const base = (/^https?:\\/\\//.test(hostRaw) ? hostRaw : `https://${hostRaw}`).replace(/\\/$/, '')\n const path = (config.path ?? '/i/v0/e/').replace(/^(?!\\/)/, '/')\n const endpoint = `${base}${path}`\n const fetchImpl = config.fetchImpl ?? fetch\n\n return {\n async capture(event: CaptureEvent): Promise<void> {\n const payload = {\n api_key: config.apiKey,\n event: event.event,\n distinct_id: event.distinctId,\n timestamp: event.timestamp,\n properties: event.properties\n }\n await fetchImpl(endpoint, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(payload),\n keepalive: true\n })\n }\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\nexport interface WebhookAdapterConfig {\n /** Destination URL that receives a POST for each event. */\n url: string\n /** Extra headers merged onto the POST (useful for shared-secret auth). */\n headers?: Record<string, string>\n /**\n * Transform the event into the exact JSON body the destination expects.\n * Defaults to sending the {@link CaptureEvent} as-is.\n */\n transform?: (event: CaptureEvent) => unknown\n /** Override the `fetch` implementation. */\n fetchImpl?: typeof fetch\n}\n\n/**\n * Adapter that POSTs each event to an arbitrary webhook URL. Keeps the\n * library analytics-backend-agnostic — use this when PostHog isn't your\n * analytics of record, or when you want to multiplex events through your\n * own ingestion layer.\n */\nexport function webhookAnalytics(config: WebhookAdapterConfig): AnalyticsAdapter {\n const fetchImpl = config.fetchImpl ?? fetch\n const transform = config.transform ?? ((e: CaptureEvent): unknown => e)\n\n return {\n async capture(event: CaptureEvent): Promise<void> {\n await fetchImpl(config.url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n ...(config.headers ?? {})\n },\n body: JSON.stringify(transform(event)),\n keepalive: true\n })\n }\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\n/**\n * Escape hatch for wiring a callback directly as an analytics adapter.\n * Useful when you want to log events, pipe them through your own SDK, or\n * compose multiple adapters.\n *\n * @example\n * ```ts\n * const devAnalytics = customAnalytics((e) => console.log('[doc_view]', e))\n * ```\n */\nexport function customAnalytics(\n capture: (event: CaptureEvent) => Promise<void> | void\n): AnalyticsAdapter {\n return { capture }\n}\n"]} | ||
| {"version":3,"sources":["../src/bots.ts","../src/hash.ts","../src/track.ts","../src/adapters/posthog.ts","../src/adapters/webhook.ts","../src/adapters/custom.ts"],"names":[],"mappings":";;;AAgBO,IAAM,cAAA,GACX;AAmBK,IAAM,mBAAA,GACX;AAEK,SAAS,QAAQ,SAAA,EAA+C;AACrE,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,cAAA,CAAe,KAAK,SAAS,CAAA;AACtC;AAEO,SAAS,aAAa,SAAA,EAA+C;AAC1E,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,mBAAA,CAAoB,KAAK,SAAS,CAAA;AAC3C;AAeO,SAAS,aAAa,SAAA,EAA8C;AACzE,EAAA,IAAI,CAAC,SAAA,IAAa,OAAO,SAAA,KAAc,UAAU,OAAO,OAAA;AACxD,EAAA,MAAM,CAAA,GAAI,UAAU,WAAA,EAAY;AAGhC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,cAAc,CAAA,IAAK,EAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,eAAe,CAAA,IAAK,CAAA,CAAE,SAAS,QAAQ,CAAA;AAC1G,IAAA,OAAO,SAAA;AACT,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AAC5F,EAAA,IAAI,CAAA,CAAE,SAAS,eAAe,CAAA,IAAK,EAAE,QAAA,CAAS,iBAAiB,GAAG,OAAO,YAAA;AACzE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,OAAO,CAAA,EAAG,OAAO,cAAA;AAChC,EAAA,IAAI,CAAA,CAAE,SAAS,iBAAiB,CAAA,IAAK,EAAE,QAAA,CAAS,WAAW,GAAG,OAAO,QAAA;AACrE,EAAA,IAAI,CAAA,CAAE,SAAS,mBAAmB,CAAA,IAAK,EAAE,QAAA,CAAS,UAAU,GAAG,OAAO,OAAA;AACtE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,MAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,SAAS,oBAAoB,CAAA,IAAK,EAAE,QAAA,CAAS,aAAa,GAAG,OAAO,MAAA;AAC1E,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,SAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,eAAe,CAAA,EAAG,OAAO,YAAA;AACxC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,SAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,SAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,KAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AAGnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,UAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,KAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,KAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,gBAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,aAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AAGtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,aAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,SAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,WAAA;AACzC,EAAA,IAAI,CAAA,CAAE,SAAS,cAAc,CAAA,IAAK,EAAE,QAAA,CAAS,SAAS,GAAG,OAAO,SAAA;AAChE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,OAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AAGnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,OAAO,CAAA,EAAG,OAAO,OAAA;AAChC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,OAAA;AAGtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,qBAAqB,CAAA,EAAG,OAAO,UAAA;AAC9C,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,kBAAkB,CAAA,EAAG,OAAO,mBAAA;AAG3C,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,SAAA;AAGjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,iBAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,WAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,MAAM,CAAA,EAAG,OAAO,MAAA;AAC/B,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,QAAA;AAIrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,UAAA;AACpC,EAAA,IAAI,QAAA,CAAS,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,MAAA;AAC7B,EAAA,IAAI,SAAA,CAAU,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,OAAA;AAC9B,EAAA,IAAI,4BAAA,CAA6B,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,KAAA;AACjD,EAAA,IAAI,WAAA,CAAY,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,OAAA;AAChC,EAAA,IAAI,cAAA,CAAe,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,YAAA;AACnC,EAAA,IAAI,mBAAA,CAAoB,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,iBAAA;AACxC,EAAA,IAAI,kBAAA,CAAmB,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,gBAAA;AACvC,EAAA,IAAI,UAAA,CAAW,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,QAAA;AAC/B,EAAA,IAAI,WAAA,CAAY,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,SAAA;AAChC,EAAA,IAAI,QAAA,CAAS,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,MAAA;AAG7B,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,IAAK,EAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,SAAS,SAAS,CAAA;AAC/F,IAAA,OAAO,SAAA;AAET,EAAA,OAAO,OAAA;AACT;AAOO,SAAS,sBAAsB,SAAA,EAA8C;AAClF,EAAA,IAAI,CAAC,SAAA,IAAa,OAAO,SAAA,KAAc,UAAU,OAAO,OAAA;AACxD,EAAA,MAAM,eAAA,GAAkB,SAAA,CAAU,KAAA,CAAM,yCAAyC,CAAA;AACjF,EAAA,IAAI,eAAA,IAAmB,gBAAgB,CAAC,CAAA,SAAU,eAAA,CAAgB,CAAC,EAAE,IAAA,EAAK;AAC1E,EAAA,MAAM,QAAQ,SAAA,CAAU,IAAA,EAAK,CAAE,KAAA,CAAM,GAAG,CAAA,CAAE,CAAC,CAAA,EAAG,IAAA,GAAO,KAAA,CAAM,KAAK,CAAA,CAAE,CAAC,GAAG,IAAA,EAAK;AAC3E,EAAA,OAAO,KAAA,IAAS,OAAA;AAClB;AAkBO,SAAS,eAAe,GAAA,EAAiC;AAC9D,EAAA,MAAM,UAAoB,EAAC;AAC3B,EAAA,MAAM,MAAM,GAAA,CAAI,OAAA,CAAQ,IAAI,YAAY,CAAA,IAAK,IAAI,WAAA,EAAY;AAC7D,EAAA,MAAM,WAAA,GACJ,EAAA,CAAG,QAAA,CAAS,SAAS,KAAK,EAAA,CAAG,QAAA,CAAS,QAAQ,CAAA,IAAK,GAAG,QAAA,CAAS,QAAQ,CAAA,IAAK,EAAA,CAAG,SAAS,SAAS,CAAA;AAEnG,EAAA,IAAI,CAAC,WAAA,EAAa,OAAO,EAAE,KAAA,EAAO,GAAG,OAAA,EAAS,EAAC,EAAG,MAAA,EAAQ,KAAA,EAAM;AAEhE,EAAA,IAAI,CAAC,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,iBAAiB,CAAA,EAAG;AACvC,IAAA,OAAA,CAAQ,KAAK,yBAAyB,CAAA;AAAA,EACxC;AACA,EAAA,IAAI,CAAC,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,EAAG;AACtC,IAAA,OAAA,CAAQ,KAAK,wBAAwB,CAAA;AAAA,EACvC;AACA,EAAA,MAAM,OAAA,GAAU,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,WAAW,CAAA;AAC3C,EAAA,IAAI,CAAC,OAAA,EAAS;AACZ,IAAA,OAAA,CAAQ,KAAK,mBAAmB,CAAA;AAAA,EAClC,WAAW,OAAA,CAAQ,WAAA,EAAY,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG;AAC3D,IAAA,OAAA,CAAQ,KAAK,sBAAsB,CAAA;AAAA,EACrC;AACA,EAAA,MAAM,MAAA,GAAS,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,QAAQ,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,CAAC,MAAA,IAAU,MAAA,KAAW,KAAA,EAAO;AAC/B,IAAA,OAAA,CAAQ,KAAK,wBAAwB,CAAA;AAAA,EACvC;AACA,EAAA,IAAA,CAAK,GAAA,CAAI,QAAQ,GAAA,CAAI,YAAY,KAAK,EAAA,EAAI,WAAA,OAAkB,OAAA,EAAS;AACnE,IAAA,OAAA,CAAQ,KAAK,kBAAkB,CAAA;AAAA,EACjC;AAEA,EAAA,MAAM,QAAQ,OAAA,CAAQ,MAAA;AACtB,EAAA,OAAO,EAAE,KAAA,EAAO,OAAA,EAAS,MAAA,EAAQ,SAAS,CAAA,EAAE;AAC9C;AA6CO,SAAS,cAAc,SAAA,EAA2D;AACvF,EAAA,MAAM,KAAA,GAAQ,aAAa,SAAS,CAAA;AACpC,EAAA,MAAM,KAAA,GAAQ,QAAQ,SAAS,CAAA;AAC/B,EAAA,MAAM,UAAA,GAAa,aAAa,SAAS,CAAA;AAEzC,EAAA,IAAI,IAAA;AACJ,EAAA,IAAI,OAAO,IAAA,GAAO,kBAAA;AAAA,OAAA,IACT,YAAY,IAAA,GAAO,mBAAA;AAAA,OAAA,IACnB,KAAA,KAAU,WAAW,IAAA,GAAO,SAAA;AAAA,OAChC,IAAA,GAAO,OAAA;AAEZ,EAAA,OAAO,EAAE,IAAA,EAAM,KAAA,EAAO,OAAA,EAAS,KAAA,EAAO,iBAAiB,UAAA,EAAW;AACpE;AAOO,SAAS,gBAAgB,GAAA,EAAmC;AACjE,EAAA,MAAM,SAAA,GAAY,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AACnD,EAAA,MAAM,IAAA,GAAO,cAAc,SAAS,CAAA;AACpC,EAAA,MAAM,QAAA,GAAW,eAAe,GAAG,CAAA;AAEnC,EAAA,IAAI,OAAO,IAAA,CAAK,IAAA;AAChB,EAAA,IAAI,IAAA,KAAS,SAAA,IAAa,QAAA,CAAS,MAAA,EAAQ;AACzC,IAAA,IAAA,GAAO,iBAAA;AAAA,EACT;AAEA,EAAA,OAAO,EAAE,GAAG,IAAA,EAAM,IAAA,EAAM,QAAA,EAAS;AACnC;;;AC3RO,SAAS,OAAO,KAAA,EAAuB;AAC5C,EAAA,IAAI,CAAA,GAAI,IAAA;AACR,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,KAAA,CAAM,QAAQ,CAAA,EAAA,EAAK;AACrC,IAAA,CAAA,GAAA,CAAM,KAAK,CAAA,IAAK,CAAA,GAAI,KAAA,CAAM,UAAA,CAAW,CAAC,CAAA,GAAK,UAAA;AAAA,EAC7C;AACA,EAAA,OAAO,OAAA,GAAA,CAAW,CAAA,KAAM,CAAA,EAAG,QAAA,CAAS,EAAE,CAAA;AACxC;;;ACCA,eAAsB,UAAA,CACpB,KACA,IAAA,EACe;AACf,EAAA,MAAM,SAAA,GAAY,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AAEnD,EAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,KAAA;AAClC,EAAA,MAAM,YAAA,GAAe,KAAK,YAAA,IAAgB,KAAA;AAC1C,EAAA,IAAI,QAAA,IAAY,CAAC,OAAA,CAAQ,SAAS,CAAA,EAAG;AACrC,EAAA,IAAI,YAAA,IAAgB,CAAC,OAAA,CAAQ,SAAS,KAAK,CAAC,YAAA,CAAa,SAAS,CAAA,EAAG;AAInE,IAAA,IAAI,CAAC,cAAA,CAAe,GAAG,CAAA,CAAE,MAAA,EAAQ;AAAA,EACnC;AAEA,EAAA,IAAI,QAAA,GAAW,GAAA;AACf,EAAA,IAAI,aAAA,GAAgB,EAAA;AACpB,EAAA,IAAI;AACF,IAAA,MAAM,GAAA,GAAM,IAAI,GAAA,CAAI,GAAA,CAAI,GAAG,CAAA;AAC3B,IAAA,QAAA,GAAW,GAAA,CAAI,QAAA;AACf,IAAA,aAAA,GAAgB,GAAA,CAAI,MAAA;AAAA,EACtB,CAAA,CAAA,MAAQ;AAEN,IAAA,QAAA,GAAW,IAAI,GAAA,IAAO,GAAA;AAAA,EACxB;AACA,EAAA,MAAM,MAAA,GAAS,KAAK,MAAA,IAAU,aAAA;AAE9B,EAAA,MAAM,YAAA,GAAe,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,iBAAiB,CAAA,IAAK,EAAA;AAC3D,EAAA,MAAM,EAAA,GAAK,aAAa,KAAA,CAAM,GAAG,EAAE,CAAC,CAAA,EAAG,MAAK,IAAK,EAAA;AACjD,EAAA,MAAM,OAAA,GAAU,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,SAAS,CAAA;AACzC,EAAA,MAAM,cAAA,GAAiB,gBAAgB,GAAG,CAAA;AAE1C,EAAA,MAAM,KAAA,GAAQ;AAAA,IACZ,KAAA,EAAO,KAAK,SAAA,IAAa,aAAA;AAAA,IACzB,YAAY,MAAA,CAAO,CAAA,EAAG,EAAE,CAAA,CAAA,EAAI,SAAS,CAAA,CAAE,CAAA;AAAA,IACvC,SAAA,EAAA,iBAAW,IAAI,IAAA,EAAK,EAAE,WAAA,EAAY;AAAA,IAClC,UAAA,EAAY;AAAA,MACV,uBAAA,EAAyB,KAAA;AAAA,MACzB,cAAc,MAAA,GAAS,CAAA,EAAG,MAAM,CAAA,EAAG,QAAQ,CAAA,CAAA,GAAK,QAAA;AAAA,MAChD,IAAA,EAAM,QAAA;AAAA,MACN,UAAA,EAAY,SAAA;AAAA,MACZ,WAAW,cAAA,CAAe,OAAA;AAAA,MAC1B,UAAU,cAAA,CAAe,KAAA;AAAA,MACzB,aAAa,cAAA,CAAe,IAAA;AAAA,MAC5B,mBAAmB,cAAA,CAAe,eAAA;AAAA,MAClC,cAAA,EAAgB,cAAA,CAAe,QAAA,EAAU,KAAA,IAAS,CAAA;AAAA,MAClD,eAAA,EAAiB,cAAA,CAAe,QAAA,EAAU,MAAA,IAAU,KAAA;AAAA,MACpD,OAAA;AAAA,MACA,MAAA,EAAQ,KAAK,MAAA,IAAU,IAAA;AAAA,MACvB,GAAG,IAAA,CAAK;AAAA;AACV,GACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,IAAA,CAAK,SAAA,CAAU,OAAA,CAAQ,KAAK,CAAA;AAAA,EACpC,CAAA,CAAA,MAAQ;AAAA,EAER;AACF;;;AC3CO,SAAS,iBAAiB,MAAA,EAAgD;AAC/E,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,IAAQ,0BAAA;AAC/B,EAAA,MAAM,IAAA,GAAA,CAAQ,cAAA,CAAe,IAAA,CAAK,OAAO,CAAA,GAAI,OAAA,GAAU,CAAA,QAAA,EAAW,OAAO,CAAA,CAAA,EAAI,OAAA,CAAQ,KAAA,EAAO,EAAE,CAAA;AAC9F,EAAA,MAAM,QAAQ,MAAA,CAAO,IAAA,IAAQ,UAAA,EAAY,OAAA,CAAQ,WAAW,GAAG,CAAA;AAC/D,EAAA,MAAM,QAAA,GAAW,CAAA,EAAG,IAAI,CAAA,EAAG,IAAI,CAAA,CAAA;AAC/B,EAAA,MAAM,SAAA,GAAY,OAAO,SAAA,IAAa,KAAA;AAEtC,EAAA,OAAO;AAAA,IACL,MAAM,QAAQ,KAAA,EAAoC;AAChD,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,SAAS,MAAA,CAAO,MAAA;AAAA,QAChB,OAAO,KAAA,CAAM,KAAA;AAAA,QACb,aAAa,KAAA,CAAM,UAAA;AAAA,QACnB,WAAW,KAAA,CAAM,SAAA;AAAA,QACjB,YAAY,KAAA,CAAM;AAAA,OACpB;AACA,MAAA,MAAM,UAAU,QAAA,EAAU;AAAA,QACxB,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS,EAAE,cAAA,EAAgB,kBAAA,EAAmB;AAAA,QAC9C,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO,CAAA;AAAA,QAC5B,SAAA,EAAW;AAAA,OACZ,CAAA;AAAA,IACH;AAAA,GACF;AACF;;;AC/BO,SAAS,iBAAiB,MAAA,EAAgD;AAC/E,EAAA,MAAM,SAAA,GAAY,OAAO,SAAA,IAAa,KAAA;AACtC,EAAA,MAAM,SAAA,GAAY,MAAA,CAAO,SAAA,KAAc,CAAC,CAAA,KAA6B,CAAA,CAAA;AAErE,EAAA,OAAO;AAAA,IACL,MAAM,QAAQ,KAAA,EAAoC;AAChD,MAAA,MAAM,SAAA,CAAU,OAAO,GAAA,EAAK;AAAA,QAC1B,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS;AAAA,UACP,cAAA,EAAgB,kBAAA;AAAA,UAChB,GAAI,MAAA,CAAO,OAAA,IAAW;AAAC,SACzB;AAAA,QACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,SAAA,CAAU,KAAK,CAAC,CAAA;AAAA,QACrC,SAAA,EAAW;AAAA,OACZ,CAAA;AAAA,IACH;AAAA,GACF;AACF;;;AC3BO,SAAS,gBACd,OAAA,EACkB;AAClB,EAAA,OAAO,EAAE,OAAA,EAAQ;AACnB","file":"index.cjs","sourcesContent":["/**\n * User-agent substrings that identify **publicly declared** AI crawlers — the\n * branded bots that identify themselves by name (OpenAI's GPTBot, Anthropic's\n * ClaudeBot, Perplexity-User, Google-Extended, etc.). High-confidence: when\n * this matches, the request almost certainly comes from that vendor's crawler\n * fleet.\n *\n * Does NOT include **coding-agent traffic** (Claude Code, Cline, Cursor,\n * Windsurf, Aider, OpenCode, VS Code). Those tools use generic HTTP library\n * UAs (axios, curl, got, colly, Electron) or spoof full browser UAs — they\n * can't be distinguished from non-AI traffic by UA alone. See\n * {@link HTTP_CLIENT_PATTERN} for the loose heuristic layer.\n *\n * Sources consulted when updating: darkvisitors.com, vendor docs from OpenAI,\n * Anthropic, Google, Perplexity, Cohere, Apple, Bytedance.\n */\nexport const AI_BOT_PATTERN =\n /ClaudeBot|Claude-User|Anthropic|ChatGPT-User|GPTBot|OAI-SearchBot|PerplexityBot|Perplexity-User|Google-Extended|Applebot-Extended|cohere-ai|Bytespider|CCBot|Amazonbot|Meta-ExternalAgent|FacebookBot|DuckAssistBot|MistralAI-User|YouBot|AI2Bot|Diffbot|Cursor|Windsurf/i\n\n/**\n * HTTP library / runtime signatures frequently used by coding agents. Matching\n * any of these is a **loose** signal — legitimate curl scripts, CI jobs, and\n * server-to-server traffic use the same libraries. Use this for the wider\n * net (`coding_agent_hint: true`) and pair with other signals (request\n * shape, JA4 fingerprint, path patterns) for higher confidence.\n *\n * Based on behavioural signatures observed by Addy Osmani:\n * Claude Code → axios/1.8.4\n * Cline, Junie → curl/8.4.0\n * Cursor → got (sindresorhus/got)\n * Windsurf → colly\n * VS Code → Electron / Chromium\n *\n * Aider and OpenCode use Playwright-driven full Mozilla/Safari UAs and are\n * indistinguishable from real browsers at the UA layer.\n */\nexport const HTTP_CLIENT_PATTERN =\n /axios\\/|curl\\/|(?:^|[\\s(])got(?:\\/|[\\s(])|\\bcolly\\b|Electron\\/|node-fetch\\/|python-requests\\/|Go-http-client\\/|okhttp\\/|aiohttp\\/|Deno\\//i\n\nexport function isAiBot(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return AI_BOT_PATTERN.test(userAgent)\n}\n\nexport function isHttpClient(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return HTTP_CLIENT_PATTERN.test(userAgent)\n}\n\n/**\n * Map a user-agent string to a coarse, human-readable label. Returns one of:\n *\n * - A branded-crawler name (`'Claude'`, `'ChatGPT'`, …) — pair with\n * {@link isAiBot} for `is_ai_bot: true` segmentation.\n * - An HTTP-library name (`'curl'`, `'axios'`, `'got'`, `'colly'`,\n * `'Electron'`, …) — hint of a coding agent or automation; not\n * conclusive. Pair with {@link isHttpClient}.\n * - `'Browser'` for typical desktop browsers (possibly spoofed by\n * Playwright-based agents like Aider/OpenCode — this label alone can't\n * tell you).\n * - `'Other'` for anything unrecognised or empty input.\n */\nexport function parseBotName(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const s = userAgent.toLowerCase()\n\n // Publicly declared AI crawlers (high confidence).\n if (s.includes('chatgpt-user') || s.includes('gptbot') || s.includes('oai-searchbot') || s.includes('openai'))\n return 'ChatGPT'\n if (s.includes('claudebot') || s.includes('claude-user') || s.includes('anthropic')) return 'Claude'\n if (s.includes('perplexitybot') || s.includes('perplexity-user')) return 'Perplexity'\n if (s.includes('ccbot')) return 'Common Crawl'\n if (s.includes('google-extended') || s.includes('googlebot')) return 'Google'\n if (s.includes('applebot-extended') || s.includes('applebot')) return 'Apple'\n if (s.includes('bingbot')) return 'Bing'\n if (s.includes('bytespider')) return 'Bytespider'\n if (s.includes('amazonbot')) return 'Amazon'\n if (s.includes('meta-externalagent') || s.includes('facebookbot')) return 'Meta'\n if (s.includes('mistralai-user')) return 'Mistral'\n if (s.includes('duckassistbot')) return 'DuckDuckGo'\n if (s.includes('youbot')) return 'You.com'\n if (s.includes('diffbot')) return 'Diffbot'\n if (s.includes('ai2bot')) return 'AI2'\n if (s.includes('cohere')) return 'Cohere'\n if (s.includes('cursor')) return 'Cursor'\n if (s.includes('windsurf')) return 'Windsurf'\n if (s.includes('petalbot')) return 'PetalBot'\n\n // SEO crawlers and monitoring bots.\n if (s.includes('ahrefsbot')) return 'Ahrefs'\n if (s.includes('semrushbot')) return 'Semrush'\n if (s.includes('mj12bot')) return 'Majestic'\n if (s.includes('dotbot')) return 'Moz'\n if (s.includes('rogerbot')) return 'Moz'\n if (s.includes('screaming frog')) return 'Screaming Frog'\n if (s.includes('sitebulb')) return 'Sitebulb'\n if (s.includes('linkfluence')) return 'Linkfluence'\n if (s.includes('dataforseo')) return 'DataForSEO'\n if (s.includes('serpstatbot')) return 'Serpstat'\n\n // Monitoring and feed bots.\n if (s.includes('uptimerobot')) return 'UptimeRobot'\n if (s.includes('pingdom')) return 'Pingdom'\n if (s.includes('statuscake')) return 'StatusCake'\n if (s.includes('newrelicpinger')) return 'New Relic'\n if (s.includes('datadogagent') || s.includes('datadog')) return 'Datadog'\n if (s.includes('slackbot')) return 'Slack'\n if (s.includes('twitterbot')) return 'Twitter'\n if (s.includes('linkedinbot')) return 'LinkedIn'\n if (s.includes('discordbot')) return 'Discord'\n if (s.includes('telegrambot')) return 'Telegram'\n if (s.includes('whatsapp')) return 'WhatsApp'\n\n // AI search and indexing bots.\n if (s.includes('linkupbot')) return 'Linkup'\n if (s.includes('sogou')) return 'Sogou'\n if (s.includes('yandexbot')) return 'Yandex'\n if (s.includes('baiduspider')) return 'Baidu'\n\n // Link preview fetchers.\n if (s.includes('facebookexternalhit')) return 'Facebook'\n if (s.includes('com.apple.webkit')) return 'Apple URL Preview'\n\n // Uptime and monitoring.\n if (s.includes('ohdear')) return 'Oh Dear'\n\n // Generic scrapers.\n if (s.includes('scrapy')) return 'Scrapy'\n if (s.includes('headlesschrome')) return 'Headless Chrome'\n if (s.includes('phantomjs')) return 'PhantomJS'\n if (s.includes('wget')) return 'wget'\n if (s.includes('httpie')) return 'HTTPie'\n if (s.includes('guzzlehttp')) return 'Guzzle'\n\n // HTTP library / runtime signatures (loose — coding agent or automation).\n // Check Electron before Browser since Electron UAs contain Chrome/Safari.\n if (s.includes('electron/')) return 'Electron'\n if (/curl\\//.test(s)) return 'curl'\n if (/axios\\//.test(s)) return 'axios'\n if (/(?:^|[\\s(])got(?:\\/|[\\s(])/.test(s)) return 'got'\n if (/\\bcolly\\b/.test(s)) return 'colly'\n if (/node-fetch\\//.test(s)) return 'node-fetch'\n if (/python-requests\\//.test(s)) return 'python-requests'\n if (/go-http-client\\//.test(s)) return 'Go http client'\n if (/okhttp\\//.test(s)) return 'OkHttp'\n if (/aiohttp\\//.test(s)) return 'aiohttp'\n if (/deno\\//.test(s)) return 'Deno'\n\n // Real browsers (or UAs spoofed to look like them — see Aider/OpenCode note).\n if (s.includes('mozilla') || s.includes('chrome') || s.includes('safari') || s.includes('firefox'))\n return 'Browser'\n\n return 'Other'\n}\n\n/**\n * Return the first product token from a UA header, useful for segmenting by\n * client without hard-coding every bot name. Falls back to `'Other'` for empty\n * input.\n */\nexport function firstUserAgentProduct(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const compatibleMatch = userAgent.match(/compatible;\\s*([^/;\\s]+)(?:\\/[^\\s;]*)?/i)\n if (compatibleMatch && compatibleMatch[1]) return compatibleMatch[1].trim()\n const first = userAgent.trim().split('/')[0]?.trim().split(/\\s+/)[0]?.trim()\n return first || 'Other'\n}\n\n/**\n * Detect likely headless/automated browsers by checking for missing headers\n * that real browsers always send. Playwright, Puppeteer, and similar tools\n * spoof the UA but often omit standard browser headers.\n *\n * Signals checked (each scores 1 point):\n * - Missing `Accept-Language` — every real browser sends this\n * - Missing `Sec-Fetch-Mode` — sent by all modern browsers\n * - Missing `Sec-CH-UA` — Client Hints, Chromium 89+\n * - `Sec-CH-UA` contains \"HeadlessChrome\"\n * - Missing or bare Accept header — browsers send detailed accept lists\n * - `Connection: close` with browser UA — browsers use keep-alive\n *\n * Returns a score (0-6), the signals that fired, and a boolean `likely`\n * flag (score >= 2 with a browser-like UA).\n */\nexport function detectHeadless(req: Request): HeadlessDetection {\n const signals: string[] = []\n const ua = (req.headers.get('user-agent') || '').toLowerCase()\n const isBrowserUA =\n ua.includes('mozilla') || ua.includes('chrome') || ua.includes('safari') || ua.includes('firefox')\n\n if (!isBrowserUA) return { score: 0, signals: [], likely: false }\n\n if (!req.headers.get('accept-language')) {\n signals.push('missing-accept-language')\n }\n if (!req.headers.get('sec-fetch-mode')) {\n signals.push('missing-sec-fetch-mode')\n }\n const secChUa = req.headers.get('sec-ch-ua')\n if (!secChUa) {\n signals.push('missing-sec-ch-ua')\n } else if (secChUa.toLowerCase().includes('headlesschrome')) {\n signals.push('headless-chrome-hint')\n }\n const accept = req.headers.get('accept') || ''\n if (!accept || accept === '*/*') {\n signals.push('missing-or-bare-accept')\n }\n if ((req.headers.get('connection') || '').toLowerCase() === 'close') {\n signals.push('connection-close')\n }\n\n const score = signals.length\n return { score, signals, likely: score >= 2 }\n}\n\nexport interface HeadlessDetection {\n /** Number of suspicious signals found (0-6). */\n score: number\n /** Names of the specific signals that fired. */\n signals: string[]\n /** True when score >= 2 — strong headless indication. */\n likely: boolean\n}\n\nexport type AgentKind =\n | 'declared-crawler'\n | 'coding-agent-hint'\n | 'headless-likely'\n | 'browser'\n | 'other'\n\nexport interface AgentClassification {\n /**\n * Categorical tag for the request:\n *\n * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.\n * - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose\n * signal; could be a coding agent, a curl script, or any automation.\n * - `'headless-likely'` — Browser-like UA but missing standard headers.\n * Strong signal of Playwright/Puppeteer automation (Aider, OpenCode, etc.).\n * - `'browser'` — Looks like a real browser with expected headers present.\n * - `'other'` — Unrecognised or empty.\n */\n kind: AgentKind\n /** Human-readable label, same string {@link parseBotName} returns. */\n label: string\n /** Strict: `true` only when the UA matches a branded AI crawler. */\n isAiBot: boolean\n /** Loose: `true` for known HTTP-library / automation UAs. */\n codingAgentHint: boolean\n /** Headless browser detection result. Only populated when `req` is passed. */\n headless?: HeadlessDetection\n}\n\n/**\n * UA-only classification. Use {@link classifyRequest} for full detection\n * including headless browser heuristics.\n */\nexport function classifyAgent(userAgent: string | null | undefined): AgentClassification {\n const label = parseBotName(userAgent)\n const aiBot = isAiBot(userAgent)\n const httpClient = isHttpClient(userAgent)\n\n let kind: AgentKind\n if (aiBot) kind = 'declared-crawler'\n else if (httpClient) kind = 'coding-agent-hint'\n else if (label === 'Browser') kind = 'browser'\n else kind = 'other'\n\n return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }\n}\n\n/**\n * Full request classification — combines UA parsing with header-based\n * headless detection. When a browser-like UA is missing standard headers,\n * the kind is promoted from `'browser'` to `'headless-likely'`.\n */\nexport function classifyRequest(req: Request): AgentClassification {\n const userAgent = req.headers.get('user-agent') || ''\n const base = classifyAgent(userAgent)\n const headless = detectHeadless(req)\n\n let kind = base.kind\n if (kind === 'browser' && headless.likely) {\n kind = 'headless-likely'\n }\n\n return { ...base, kind, headless }\n}\n","/**\n * djb2 hash returning an 8-char hex string prefixed with `anon_`. Used to\n * build stable anonymous distinct-ids from `ip:ua:...` tuples without\n * collecting identifying data. Not cryptographic — collisions are fine for\n * analytics segmentation.\n */\nexport function hashId(input: string): string {\n let h = 5381\n for (let i = 0; i < input.length; i++) {\n h = ((h << 5) + h + input.charCodeAt(i)) & 0xffffffff\n }\n return 'anon_' + (h >>> 0).toString(16)\n}\n","import { classifyRequest, detectHeadless, isAiBot, isHttpClient } from './bots.js'\nimport { hashId } from './hash.js'\nimport type { TrackVisitOptions } from './types.js'\n\n/**\n * Capture an event describing the incoming request. Fire-and-forget: awaits\n * the adapter but swallows errors so a downed analytics backend never breaks\n * the response path. Callers typically don't await the returned promise.\n *\n * By default, captures every request so coding-agent traffic (axios, curl,\n * Electron, …) shows up alongside branded crawlers. Set `onlyBots: true` to\n * restrict capture to UAs matching {@link AI_BOT_PATTERN}.\n */\nexport async function trackVisit(\n req: Request,\n opts: TrackVisitOptions\n): Promise<void> {\n const userAgent = req.headers.get('user-agent') || ''\n\n const onlyBots = opts.onlyBots ?? false\n const skipBrowsers = opts.skipBrowsers ?? false\n if (onlyBots && !isAiBot(userAgent)) return\n if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) {\n // Not a declared bot or HTTP client — check headless heuristics.\n // Playwright-based agents (Aider, OpenCode) will pass if they're missing\n // standard browser headers. Real browsers get skipped.\n if (!detectHeadless(req).likely) return\n }\n\n let pathname = '/'\n let originFromUrl = ''\n try {\n const url = new URL(req.url)\n pathname = url.pathname\n originFromUrl = url.origin\n } catch {\n // Some runtimes hand us a relative URL; fall back to the raw string.\n pathname = req.url || '/'\n }\n const origin = opts.origin ?? originFromUrl\n\n const forwardedFor = req.headers.get('x-forwarded-for') || ''\n const ip = forwardedFor.split(',')[0]?.trim() ?? ''\n const referer = req.headers.get('referer')\n const classification = classifyRequest(req)\n\n const event = {\n event: opts.eventName ?? 'agent_visit',\n distinctId: hashId(`${ip}:${userAgent}`),\n timestamp: new Date().toISOString(),\n properties: {\n $process_person_profile: false,\n $current_url: origin ? `${origin}${pathname}` : pathname,\n path: pathname,\n user_agent: userAgent,\n is_ai_bot: classification.isAiBot,\n bot_name: classification.label,\n ua_category: classification.kind,\n coding_agent_hint: classification.codingAgentHint,\n headless_score: classification.headless?.score ?? 0,\n headless_likely: classification.headless?.likely ?? false,\n referer,\n source: opts.source ?? null,\n ...opts.properties\n }\n }\n\n try {\n await opts.analytics.capture(event)\n } catch {\n // Intentional swallow — analytics failures must not affect the response.\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\nexport interface PostHogAdapterConfig {\n /** PostHog project API key (the public one used by the JS SDK). */\n apiKey: string\n /**\n * PostHog host, with or without scheme. Defaults to `https://us.i.posthog.com`.\n * Use `https://eu.i.posthog.com` for EU cloud, or your own reverse-proxy\n * domain (e.g. `https://svc.example.com`).\n */\n host?: string\n /**\n * Path on the host that accepts single-event captures. Defaults to\n * `/i/v0/e/` which is PostHog's current endpoint for this.\n */\n path?: string\n /**\n * Override the `fetch` implementation (useful for tests or custom runtimes\n * that need a pinned fetch).\n */\n fetchImpl?: typeof fetch\n}\n\n/**\n * Adapter that posts each event to the PostHog capture endpoint. Uses\n * `keepalive: true` so the request survives after a serverless response\n * returns — events aren't guaranteed (fire-and-forget), but that's the\n * trade we want to keep the hot path fast.\n */\nexport function posthogAnalytics(config: PostHogAdapterConfig): AnalyticsAdapter {\n const hostRaw = config.host ?? 'https://us.i.posthog.com'\n const base = (/^https?:\\/\\//.test(hostRaw) ? hostRaw : `https://${hostRaw}`).replace(/\\/$/, '')\n const path = (config.path ?? '/i/v0/e/').replace(/^(?!\\/)/, '/')\n const endpoint = `${base}${path}`\n const fetchImpl = config.fetchImpl ?? fetch\n\n return {\n async capture(event: CaptureEvent): Promise<void> {\n const payload = {\n api_key: config.apiKey,\n event: event.event,\n distinct_id: event.distinctId,\n timestamp: event.timestamp,\n properties: event.properties\n }\n await fetchImpl(endpoint, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(payload),\n keepalive: true\n })\n }\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\nexport interface WebhookAdapterConfig {\n /** Destination URL that receives a POST for each event. */\n url: string\n /** Extra headers merged onto the POST (useful for shared-secret auth). */\n headers?: Record<string, string>\n /**\n * Transform the event into the exact JSON body the destination expects.\n * Defaults to sending the {@link CaptureEvent} as-is.\n */\n transform?: (event: CaptureEvent) => unknown\n /** Override the `fetch` implementation. */\n fetchImpl?: typeof fetch\n}\n\n/**\n * Adapter that POSTs each event to an arbitrary webhook URL. Keeps the\n * library analytics-backend-agnostic — use this when PostHog isn't your\n * analytics of record, or when you want to multiplex events through your\n * own ingestion layer.\n */\nexport function webhookAnalytics(config: WebhookAdapterConfig): AnalyticsAdapter {\n const fetchImpl = config.fetchImpl ?? fetch\n const transform = config.transform ?? ((e: CaptureEvent): unknown => e)\n\n return {\n async capture(event: CaptureEvent): Promise<void> {\n await fetchImpl(config.url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n ...(config.headers ?? {})\n },\n body: JSON.stringify(transform(event)),\n keepalive: true\n })\n }\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\n/**\n * Escape hatch for wiring a callback directly as an analytics adapter.\n * Useful when you want to log events, pipe them through your own SDK, or\n * compose multiple adapters.\n *\n * @example\n * ```ts\n * const devAnalytics = customAnalytics((e) => console.log('[doc_view]', e))\n * ```\n */\nexport function customAnalytics(\n capture: (event: CaptureEvent) => Promise<void> | void\n): AnalyticsAdapter {\n return { capture }\n}\n"]} |
+42
-11
@@ -73,6 +73,31 @@ import { T as TrackVisitOptions, C as CaptureEvent, A as AnalyticsAdapter } from './types--odwdYFT.cjs'; | ||
| declare function firstUserAgentProduct(userAgent: string | null | undefined): string; | ||
| type AgentKind = 'declared-crawler' | 'coding-agent-hint' | 'browser' | 'other'; | ||
| /** | ||
| * Detect likely headless/automated browsers by checking for missing headers | ||
| * that real browsers always send. Playwright, Puppeteer, and similar tools | ||
| * spoof the UA but often omit standard browser headers. | ||
| * | ||
| * Signals checked (each scores 1 point): | ||
| * - Missing `Accept-Language` — every real browser sends this | ||
| * - Missing `Sec-Fetch-Mode` — sent by all modern browsers | ||
| * - Missing `Sec-CH-UA` — Client Hints, Chromium 89+ | ||
| * - `Sec-CH-UA` contains "HeadlessChrome" | ||
| * - Missing or bare Accept header — browsers send detailed accept lists | ||
| * - `Connection: close` with browser UA — browsers use keep-alive | ||
| * | ||
| * Returns a score (0-6), the signals that fired, and a boolean `likely` | ||
| * flag (score >= 2 with a browser-like UA). | ||
| */ | ||
| declare function detectHeadless(req: Request): HeadlessDetection; | ||
| interface HeadlessDetection { | ||
| /** Number of suspicious signals found (0-6). */ | ||
| score: number; | ||
| /** Names of the specific signals that fired. */ | ||
| signals: string[]; | ||
| /** True when score >= 2 — strong headless indication. */ | ||
| likely: boolean; | ||
| } | ||
| type AgentKind = 'declared-crawler' | 'coding-agent-hint' | 'headless-likely' | 'browser' | 'other'; | ||
| interface AgentClassification { | ||
| /** | ||
| * Categorical tag for the UA: | ||
| * Categorical tag for the request: | ||
| * | ||
@@ -82,6 +107,6 @@ * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence. | ||
| * signal; could be a coding agent, a curl script, or any automation. | ||
| * - `'browser'` — looks like a real browser. Could be a genuine user or | ||
| * a Playwright-based agent (Aider, OpenCode) that can't be distinguished | ||
| * at the UA layer. | ||
| * - `'other'` — unrecognised or empty. | ||
| * - `'headless-likely'` — Browser-like UA but missing standard headers. | ||
| * Strong signal of Playwright/Puppeteer automation (Aider, OpenCode, etc.). | ||
| * - `'browser'` — Looks like a real browser with expected headers present. | ||
| * - `'other'` — Unrecognised or empty. | ||
| */ | ||
@@ -95,10 +120,16 @@ kind: AgentKind; | ||
| codingAgentHint: boolean; | ||
| /** Headless browser detection result. Only populated when `req` is passed. */ | ||
| headless?: HeadlessDetection; | ||
| } | ||
| /** | ||
| * One-stop classification of a user-agent. Combines {@link isAiBot}, | ||
| * {@link isHttpClient}, and {@link parseBotName} into a single structured | ||
| * result. Used internally by `trackVisit` to populate event properties; | ||
| * useful in consumer code when you need all signals at once. | ||
| * UA-only classification. Use {@link classifyRequest} for full detection | ||
| * including headless browser heuristics. | ||
| */ | ||
| declare function classifyAgent(userAgent: string | null | undefined): AgentClassification; | ||
| /** | ||
| * Full request classification — combines UA parsing with header-based | ||
| * headless detection. When a browser-like UA is missing standard headers, | ||
| * the kind is promoted from `'browser'` to `'headless-likely'`. | ||
| */ | ||
| declare function classifyRequest(req: Request): AgentClassification; | ||
@@ -125,2 +156,2 @@ /** | ||
| export { AI_BOT_PATTERN, type AgentClassification, type AgentKind, AnalyticsAdapter, CaptureEvent, HTTP_CLIENT_PATTERN, TrackVisitOptions, classifyAgent, customAnalytics, firstUserAgentProduct, hashId, isAiBot, isHttpClient, parseBotName, trackVisit }; | ||
| export { AI_BOT_PATTERN, type AgentClassification, type AgentKind, AnalyticsAdapter, CaptureEvent, HTTP_CLIENT_PATTERN, type HeadlessDetection, TrackVisitOptions, classifyAgent, classifyRequest, customAnalytics, detectHeadless, firstUserAgentProduct, hashId, isAiBot, isHttpClient, parseBotName, trackVisit }; |
+42
-11
@@ -73,6 +73,31 @@ import { T as TrackVisitOptions, C as CaptureEvent, A as AnalyticsAdapter } from './types--odwdYFT.js'; | ||
| declare function firstUserAgentProduct(userAgent: string | null | undefined): string; | ||
| type AgentKind = 'declared-crawler' | 'coding-agent-hint' | 'browser' | 'other'; | ||
| /** | ||
| * Detect likely headless/automated browsers by checking for missing headers | ||
| * that real browsers always send. Playwright, Puppeteer, and similar tools | ||
| * spoof the UA but often omit standard browser headers. | ||
| * | ||
| * Signals checked (each scores 1 point): | ||
| * - Missing `Accept-Language` — every real browser sends this | ||
| * - Missing `Sec-Fetch-Mode` — sent by all modern browsers | ||
| * - Missing `Sec-CH-UA` — Client Hints, Chromium 89+ | ||
| * - `Sec-CH-UA` contains "HeadlessChrome" | ||
| * - Missing or bare Accept header — browsers send detailed accept lists | ||
| * - `Connection: close` with browser UA — browsers use keep-alive | ||
| * | ||
| * Returns a score (0-6), the signals that fired, and a boolean `likely` | ||
| * flag (score >= 2 with a browser-like UA). | ||
| */ | ||
| declare function detectHeadless(req: Request): HeadlessDetection; | ||
| interface HeadlessDetection { | ||
| /** Number of suspicious signals found (0-6). */ | ||
| score: number; | ||
| /** Names of the specific signals that fired. */ | ||
| signals: string[]; | ||
| /** True when score >= 2 — strong headless indication. */ | ||
| likely: boolean; | ||
| } | ||
| type AgentKind = 'declared-crawler' | 'coding-agent-hint' | 'headless-likely' | 'browser' | 'other'; | ||
| interface AgentClassification { | ||
| /** | ||
| * Categorical tag for the UA: | ||
| * Categorical tag for the request: | ||
| * | ||
@@ -82,6 +107,6 @@ * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence. | ||
| * signal; could be a coding agent, a curl script, or any automation. | ||
| * - `'browser'` — looks like a real browser. Could be a genuine user or | ||
| * a Playwright-based agent (Aider, OpenCode) that can't be distinguished | ||
| * at the UA layer. | ||
| * - `'other'` — unrecognised or empty. | ||
| * - `'headless-likely'` — Browser-like UA but missing standard headers. | ||
| * Strong signal of Playwright/Puppeteer automation (Aider, OpenCode, etc.). | ||
| * - `'browser'` — Looks like a real browser with expected headers present. | ||
| * - `'other'` — Unrecognised or empty. | ||
| */ | ||
@@ -95,10 +120,16 @@ kind: AgentKind; | ||
| codingAgentHint: boolean; | ||
| /** Headless browser detection result. Only populated when `req` is passed. */ | ||
| headless?: HeadlessDetection; | ||
| } | ||
| /** | ||
| * One-stop classification of a user-agent. Combines {@link isAiBot}, | ||
| * {@link isHttpClient}, and {@link parseBotName} into a single structured | ||
| * result. Used internally by `trackVisit` to populate event properties; | ||
| * useful in consumer code when you need all signals at once. | ||
| * UA-only classification. Use {@link classifyRequest} for full detection | ||
| * including headless browser heuristics. | ||
| */ | ||
| declare function classifyAgent(userAgent: string | null | undefined): AgentClassification; | ||
| /** | ||
| * Full request classification — combines UA parsing with header-based | ||
| * headless detection. When a browser-like UA is missing standard headers, | ||
| * the kind is promoted from `'browser'` to `'headless-likely'`. | ||
| */ | ||
| declare function classifyRequest(req: Request): AgentClassification; | ||
@@ -125,2 +156,2 @@ /** | ||
| export { AI_BOT_PATTERN, type AgentClassification, type AgentKind, AnalyticsAdapter, CaptureEvent, HTTP_CLIENT_PATTERN, TrackVisitOptions, classifyAgent, customAnalytics, firstUserAgentProduct, hashId, isAiBot, isHttpClient, parseBotName, trackVisit }; | ||
| export { AI_BOT_PATTERN, type AgentClassification, type AgentKind, AnalyticsAdapter, CaptureEvent, HTTP_CLIENT_PATTERN, type HeadlessDetection, TrackVisitOptions, classifyAgent, classifyRequest, customAnalytics, detectHeadless, firstUserAgentProduct, hashId, isAiBot, isHttpClient, parseBotName, trackVisit }; |
+44
-3
@@ -91,2 +91,29 @@ // src/bots.ts | ||
| } | ||
| function detectHeadless(req) { | ||
| const signals = []; | ||
| const ua = (req.headers.get("user-agent") || "").toLowerCase(); | ||
| const isBrowserUA = ua.includes("mozilla") || ua.includes("chrome") || ua.includes("safari") || ua.includes("firefox"); | ||
| if (!isBrowserUA) return { score: 0, signals: [], likely: false }; | ||
| if (!req.headers.get("accept-language")) { | ||
| signals.push("missing-accept-language"); | ||
| } | ||
| if (!req.headers.get("sec-fetch-mode")) { | ||
| signals.push("missing-sec-fetch-mode"); | ||
| } | ||
| const secChUa = req.headers.get("sec-ch-ua"); | ||
| if (!secChUa) { | ||
| signals.push("missing-sec-ch-ua"); | ||
| } else if (secChUa.toLowerCase().includes("headlesschrome")) { | ||
| signals.push("headless-chrome-hint"); | ||
| } | ||
| const accept = req.headers.get("accept") || ""; | ||
| if (!accept || accept === "*/*") { | ||
| signals.push("missing-or-bare-accept"); | ||
| } | ||
| if ((req.headers.get("connection") || "").toLowerCase() === "close") { | ||
| signals.push("connection-close"); | ||
| } | ||
| const score = signals.length; | ||
| return { score, signals, likely: score >= 2 }; | ||
| } | ||
| function classifyAgent(userAgent) { | ||
@@ -103,2 +130,12 @@ const label = parseBotName(userAgent); | ||
| } | ||
| function classifyRequest(req) { | ||
| const userAgent = req.headers.get("user-agent") || ""; | ||
| const base = classifyAgent(userAgent); | ||
| const headless = detectHeadless(req); | ||
| let kind = base.kind; | ||
| if (kind === "browser" && headless.likely) { | ||
| kind = "headless-likely"; | ||
| } | ||
| return { ...base, kind, headless }; | ||
| } | ||
@@ -120,3 +157,5 @@ // src/hash.ts | ||
| if (onlyBots && !isAiBot(userAgent)) return; | ||
| if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) return; | ||
| if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) { | ||
| if (!detectHeadless(req).likely) return; | ||
| } | ||
| let pathname = "/"; | ||
@@ -135,3 +174,3 @@ let originFromUrl = ""; | ||
| const referer = req.headers.get("referer"); | ||
| const classification = classifyAgent(userAgent); | ||
| const classification = classifyRequest(req); | ||
| const event = { | ||
@@ -150,2 +189,4 @@ event: opts.eventName ?? "agent_visit", | ||
| coding_agent_hint: classification.codingAgentHint, | ||
| headless_score: classification.headless?.score ?? 0, | ||
| headless_likely: classification.headless?.likely ?? false, | ||
| referer, | ||
@@ -212,4 +253,4 @@ source: opts.source ?? null, | ||
| export { AI_BOT_PATTERN, HTTP_CLIENT_PATTERN, classifyAgent, customAnalytics, firstUserAgentProduct, hashId, isAiBot, isHttpClient, parseBotName, posthogAnalytics, trackVisit, webhookAnalytics }; | ||
| export { AI_BOT_PATTERN, HTTP_CLIENT_PATTERN, classifyAgent, classifyRequest, customAnalytics, detectHeadless, firstUserAgentProduct, hashId, isAiBot, isHttpClient, parseBotName, posthogAnalytics, trackVisit, webhookAnalytics }; | ||
| //# sourceMappingURL=index.js.map | ||
| //# sourceMappingURL=index.js.map |
@@ -1,1 +0,1 @@ | ||
| {"version":3,"sources":["../src/bots.ts","../src/hash.ts","../src/track.ts","../src/adapters/posthog.ts","../src/adapters/webhook.ts","../src/adapters/custom.ts"],"names":[],"mappings":";AAgBO,IAAM,cAAA,GACX;AAmBK,IAAM,mBAAA,GACX;AAEK,SAAS,QAAQ,SAAA,EAA+C;AACrE,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,cAAA,CAAe,KAAK,SAAS,CAAA;AACtC;AAEO,SAAS,aAAa,SAAA,EAA+C;AAC1E,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,mBAAA,CAAoB,KAAK,SAAS,CAAA;AAC3C;AAeO,SAAS,aAAa,SAAA,EAA8C;AACzE,EAAA,IAAI,CAAC,SAAA,IAAa,OAAO,SAAA,KAAc,UAAU,OAAO,OAAA;AACxD,EAAA,MAAM,CAAA,GAAI,UAAU,WAAA,EAAY;AAGhC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,cAAc,CAAA,IAAK,EAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,eAAe,CAAA,IAAK,CAAA,CAAE,SAAS,QAAQ,CAAA;AAC1G,IAAA,OAAO,SAAA;AACT,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AAC5F,EAAA,IAAI,CAAA,CAAE,SAAS,eAAe,CAAA,IAAK,EAAE,QAAA,CAAS,iBAAiB,GAAG,OAAO,YAAA;AACzE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,OAAO,CAAA,EAAG,OAAO,cAAA;AAChC,EAAA,IAAI,CAAA,CAAE,SAAS,iBAAiB,CAAA,IAAK,EAAE,QAAA,CAAS,WAAW,GAAG,OAAO,QAAA;AACrE,EAAA,IAAI,CAAA,CAAE,SAAS,mBAAmB,CAAA,IAAK,EAAE,QAAA,CAAS,UAAU,GAAG,OAAO,OAAA;AACtE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,MAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,SAAS,oBAAoB,CAAA,IAAK,EAAE,QAAA,CAAS,aAAa,GAAG,OAAO,MAAA;AAC1E,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,SAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,eAAe,CAAA,EAAG,OAAO,YAAA;AACxC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,SAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,SAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,KAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AAGnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,UAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,KAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,KAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,gBAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,aAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AAGtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,aAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,SAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,WAAA;AACzC,EAAA,IAAI,CAAA,CAAE,SAAS,cAAc,CAAA,IAAK,EAAE,QAAA,CAAS,SAAS,GAAG,OAAO,SAAA;AAChE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,OAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AAGnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,OAAO,CAAA,EAAG,OAAO,OAAA;AAChC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,OAAA;AAGtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,qBAAqB,CAAA,EAAG,OAAO,UAAA;AAC9C,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,kBAAkB,CAAA,EAAG,OAAO,mBAAA;AAG3C,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,SAAA;AAGjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,iBAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,WAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,MAAM,CAAA,EAAG,OAAO,MAAA;AAC/B,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,QAAA;AAIrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,UAAA;AACpC,EAAA,IAAI,QAAA,CAAS,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,MAAA;AAC7B,EAAA,IAAI,SAAA,CAAU,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,OAAA;AAC9B,EAAA,IAAI,4BAAA,CAA6B,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,KAAA;AACjD,EAAA,IAAI,WAAA,CAAY,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,OAAA;AAChC,EAAA,IAAI,cAAA,CAAe,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,YAAA;AACnC,EAAA,IAAI,mBAAA,CAAoB,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,iBAAA;AACxC,EAAA,IAAI,kBAAA,CAAmB,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,gBAAA;AACvC,EAAA,IAAI,UAAA,CAAW,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,QAAA;AAC/B,EAAA,IAAI,WAAA,CAAY,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,SAAA;AAChC,EAAA,IAAI,QAAA,CAAS,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,MAAA;AAG7B,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,IAAK,EAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,SAAS,SAAS,CAAA;AAC/F,IAAA,OAAO,SAAA;AAET,EAAA,OAAO,OAAA;AACT;AAOO,SAAS,sBAAsB,SAAA,EAA8C;AAClF,EAAA,IAAI,CAAC,SAAA,IAAa,OAAO,SAAA,KAAc,UAAU,OAAO,OAAA;AACxD,EAAA,MAAM,eAAA,GAAkB,SAAA,CAAU,KAAA,CAAM,yCAAyC,CAAA;AACjF,EAAA,IAAI,eAAA,IAAmB,gBAAgB,CAAC,CAAA,SAAU,eAAA,CAAgB,CAAC,EAAE,IAAA,EAAK;AAC1E,EAAA,MAAM,QAAQ,SAAA,CAAU,IAAA,EAAK,CAAE,KAAA,CAAM,GAAG,CAAA,CAAE,CAAC,CAAA,EAAG,IAAA,GAAO,KAAA,CAAM,KAAK,CAAA,CAAE,CAAC,GAAG,IAAA,EAAK;AAC3E,EAAA,OAAO,KAAA,IAAS,OAAA;AAClB;AAmCO,SAAS,cAAc,SAAA,EAA2D;AACvF,EAAA,MAAM,KAAA,GAAQ,aAAa,SAAS,CAAA;AACpC,EAAA,MAAM,KAAA,GAAQ,QAAQ,SAAS,CAAA;AAC/B,EAAA,MAAM,UAAA,GAAa,aAAa,SAAS,CAAA;AAEzC,EAAA,IAAI,IAAA;AACJ,EAAA,IAAI,OAAO,IAAA,GAAO,kBAAA;AAAA,OAAA,IACT,YAAY,IAAA,GAAO,mBAAA;AAAA,OAAA,IACnB,KAAA,KAAU,WAAW,IAAA,GAAO,SAAA;AAAA,OAChC,IAAA,GAAO,OAAA;AAEZ,EAAA,OAAO,EAAE,IAAA,EAAM,KAAA,EAAO,OAAA,EAAS,KAAA,EAAO,iBAAiB,UAAA,EAAW;AACpE;;;AC/MO,SAAS,OAAO,KAAA,EAAuB;AAC5C,EAAA,IAAI,CAAA,GAAI,IAAA;AACR,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,KAAA,CAAM,QAAQ,CAAA,EAAA,EAAK;AACrC,IAAA,CAAA,GAAA,CAAM,KAAK,CAAA,IAAK,CAAA,GAAI,KAAA,CAAM,UAAA,CAAW,CAAC,CAAA,GAAK,UAAA;AAAA,EAC7C;AACA,EAAA,OAAO,OAAA,GAAA,CAAW,CAAA,KAAM,CAAA,EAAG,QAAA,CAAS,EAAE,CAAA;AACxC;;;ACCA,eAAsB,UAAA,CACpB,KACA,IAAA,EACe;AACf,EAAA,MAAM,SAAA,GAAY,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AAEnD,EAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,KAAA;AAClC,EAAA,MAAM,YAAA,GAAe,KAAK,YAAA,IAAgB,KAAA;AAC1C,EAAA,IAAI,QAAA,IAAY,CAAC,OAAA,CAAQ,SAAS,CAAA,EAAG;AACrC,EAAA,IAAI,YAAA,IAAgB,CAAC,OAAA,CAAQ,SAAS,KAAK,CAAC,YAAA,CAAa,SAAS,CAAA,EAAG;AAErE,EAAA,IAAI,QAAA,GAAW,GAAA;AACf,EAAA,IAAI,aAAA,GAAgB,EAAA;AACpB,EAAA,IAAI;AACF,IAAA,MAAM,GAAA,GAAM,IAAI,GAAA,CAAI,GAAA,CAAI,GAAG,CAAA;AAC3B,IAAA,QAAA,GAAW,GAAA,CAAI,QAAA;AACf,IAAA,aAAA,GAAgB,GAAA,CAAI,MAAA;AAAA,EACtB,CAAA,CAAA,MAAQ;AAEN,IAAA,QAAA,GAAW,IAAI,GAAA,IAAO,GAAA;AAAA,EACxB;AACA,EAAA,MAAM,MAAA,GAAS,KAAK,MAAA,IAAU,aAAA;AAE9B,EAAA,MAAM,YAAA,GAAe,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,iBAAiB,CAAA,IAAK,EAAA;AAC3D,EAAA,MAAM,EAAA,GAAK,aAAa,KAAA,CAAM,GAAG,EAAE,CAAC,CAAA,EAAG,MAAK,IAAK,EAAA;AACjD,EAAA,MAAM,OAAA,GAAU,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,SAAS,CAAA;AACzC,EAAA,MAAM,cAAA,GAAiB,cAAc,SAAS,CAAA;AAE9C,EAAA,MAAM,KAAA,GAAQ;AAAA,IACZ,KAAA,EAAO,KAAK,SAAA,IAAa,aAAA;AAAA,IACzB,YAAY,MAAA,CAAO,CAAA,EAAG,EAAE,CAAA,CAAA,EAAI,SAAS,CAAA,CAAE,CAAA;AAAA,IACvC,SAAA,EAAA,iBAAW,IAAI,IAAA,EAAK,EAAE,WAAA,EAAY;AAAA,IAClC,UAAA,EAAY;AAAA,MACV,uBAAA,EAAyB,KAAA;AAAA,MACzB,cAAc,MAAA,GAAS,CAAA,EAAG,MAAM,CAAA,EAAG,QAAQ,CAAA,CAAA,GAAK,QAAA;AAAA,MAChD,IAAA,EAAM,QAAA;AAAA,MACN,UAAA,EAAY,SAAA;AAAA,MACZ,WAAW,cAAA,CAAe,OAAA;AAAA,MAC1B,UAAU,cAAA,CAAe,KAAA;AAAA,MACzB,aAAa,cAAA,CAAe,IAAA;AAAA,MAC5B,mBAAmB,cAAA,CAAe,eAAA;AAAA,MAClC,OAAA;AAAA,MACA,MAAA,EAAQ,KAAK,MAAA,IAAU,IAAA;AAAA,MACvB,GAAG,IAAA,CAAK;AAAA;AACV,GACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,IAAA,CAAK,SAAA,CAAU,OAAA,CAAQ,KAAK,CAAA;AAAA,EACpC,CAAA,CAAA,MAAQ;AAAA,EAER;AACF;;;ACpCO,SAAS,iBAAiB,MAAA,EAAgD;AAC/E,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,IAAQ,0BAAA;AAC/B,EAAA,MAAM,IAAA,GAAA,CAAQ,cAAA,CAAe,IAAA,CAAK,OAAO,CAAA,GAAI,OAAA,GAAU,CAAA,QAAA,EAAW,OAAO,CAAA,CAAA,EAAI,OAAA,CAAQ,KAAA,EAAO,EAAE,CAAA;AAC9F,EAAA,MAAM,QAAQ,MAAA,CAAO,IAAA,IAAQ,UAAA,EAAY,OAAA,CAAQ,WAAW,GAAG,CAAA;AAC/D,EAAA,MAAM,QAAA,GAAW,CAAA,EAAG,IAAI,CAAA,EAAG,IAAI,CAAA,CAAA;AAC/B,EAAA,MAAM,SAAA,GAAY,OAAO,SAAA,IAAa,KAAA;AAEtC,EAAA,OAAO;AAAA,IACL,MAAM,QAAQ,KAAA,EAAoC;AAChD,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,SAAS,MAAA,CAAO,MAAA;AAAA,QAChB,OAAO,KAAA,CAAM,KAAA;AAAA,QACb,aAAa,KAAA,CAAM,UAAA;AAAA,QACnB,WAAW,KAAA,CAAM,SAAA;AAAA,QACjB,YAAY,KAAA,CAAM;AAAA,OACpB;AACA,MAAA,MAAM,UAAU,QAAA,EAAU;AAAA,QACxB,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS,EAAE,cAAA,EAAgB,kBAAA,EAAmB;AAAA,QAC9C,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO,CAAA;AAAA,QAC5B,SAAA,EAAW;AAAA,OACZ,CAAA;AAAA,IACH;AAAA,GACF;AACF;;;AC/BO,SAAS,iBAAiB,MAAA,EAAgD;AAC/E,EAAA,MAAM,SAAA,GAAY,OAAO,SAAA,IAAa,KAAA;AACtC,EAAA,MAAM,SAAA,GAAY,MAAA,CAAO,SAAA,KAAc,CAAC,CAAA,KAA6B,CAAA,CAAA;AAErE,EAAA,OAAO;AAAA,IACL,MAAM,QAAQ,KAAA,EAAoC;AAChD,MAAA,MAAM,SAAA,CAAU,OAAO,GAAA,EAAK;AAAA,QAC1B,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS;AAAA,UACP,cAAA,EAAgB,kBAAA;AAAA,UAChB,GAAI,MAAA,CAAO,OAAA,IAAW;AAAC,SACzB;AAAA,QACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,SAAA,CAAU,KAAK,CAAC,CAAA;AAAA,QACrC,SAAA,EAAW;AAAA,OACZ,CAAA;AAAA,IACH;AAAA,GACF;AACF;;;AC3BO,SAAS,gBACd,OAAA,EACkB;AAClB,EAAA,OAAO,EAAE,OAAA,EAAQ;AACnB","file":"index.js","sourcesContent":["/**\n * User-agent substrings that identify **publicly declared** AI crawlers — the\n * branded bots that identify themselves by name (OpenAI's GPTBot, Anthropic's\n * ClaudeBot, Perplexity-User, Google-Extended, etc.). High-confidence: when\n * this matches, the request almost certainly comes from that vendor's crawler\n * fleet.\n *\n * Does NOT include **coding-agent traffic** (Claude Code, Cline, Cursor,\n * Windsurf, Aider, OpenCode, VS Code). Those tools use generic HTTP library\n * UAs (axios, curl, got, colly, Electron) or spoof full browser UAs — they\n * can't be distinguished from non-AI traffic by UA alone. See\n * {@link HTTP_CLIENT_PATTERN} for the loose heuristic layer.\n *\n * Sources consulted when updating: darkvisitors.com, vendor docs from OpenAI,\n * Anthropic, Google, Perplexity, Cohere, Apple, Bytedance.\n */\nexport const AI_BOT_PATTERN =\n /ClaudeBot|Claude-User|Anthropic|ChatGPT-User|GPTBot|OAI-SearchBot|PerplexityBot|Perplexity-User|Google-Extended|Applebot-Extended|cohere-ai|Bytespider|CCBot|Amazonbot|Meta-ExternalAgent|FacebookBot|DuckAssistBot|MistralAI-User|YouBot|AI2Bot|Diffbot|Cursor|Windsurf/i\n\n/**\n * HTTP library / runtime signatures frequently used by coding agents. Matching\n * any of these is a **loose** signal — legitimate curl scripts, CI jobs, and\n * server-to-server traffic use the same libraries. Use this for the wider\n * net (`coding_agent_hint: true`) and pair with other signals (request\n * shape, JA4 fingerprint, path patterns) for higher confidence.\n *\n * Based on behavioural signatures observed by Addy Osmani:\n * Claude Code → axios/1.8.4\n * Cline, Junie → curl/8.4.0\n * Cursor → got (sindresorhus/got)\n * Windsurf → colly\n * VS Code → Electron / Chromium\n *\n * Aider and OpenCode use Playwright-driven full Mozilla/Safari UAs and are\n * indistinguishable from real browsers at the UA layer.\n */\nexport const HTTP_CLIENT_PATTERN =\n /axios\\/|curl\\/|(?:^|[\\s(])got(?:\\/|[\\s(])|\\bcolly\\b|Electron\\/|node-fetch\\/|python-requests\\/|Go-http-client\\/|okhttp\\/|aiohttp\\/|Deno\\//i\n\nexport function isAiBot(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return AI_BOT_PATTERN.test(userAgent)\n}\n\nexport function isHttpClient(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return HTTP_CLIENT_PATTERN.test(userAgent)\n}\n\n/**\n * Map a user-agent string to a coarse, human-readable label. Returns one of:\n *\n * - A branded-crawler name (`'Claude'`, `'ChatGPT'`, …) — pair with\n * {@link isAiBot} for `is_ai_bot: true` segmentation.\n * - An HTTP-library name (`'curl'`, `'axios'`, `'got'`, `'colly'`,\n * `'Electron'`, …) — hint of a coding agent or automation; not\n * conclusive. Pair with {@link isHttpClient}.\n * - `'Browser'` for typical desktop browsers (possibly spoofed by\n * Playwright-based agents like Aider/OpenCode — this label alone can't\n * tell you).\n * - `'Other'` for anything unrecognised or empty input.\n */\nexport function parseBotName(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const s = userAgent.toLowerCase()\n\n // Publicly declared AI crawlers (high confidence).\n if (s.includes('chatgpt-user') || s.includes('gptbot') || s.includes('oai-searchbot') || s.includes('openai'))\n return 'ChatGPT'\n if (s.includes('claudebot') || s.includes('claude-user') || s.includes('anthropic')) return 'Claude'\n if (s.includes('perplexitybot') || s.includes('perplexity-user')) return 'Perplexity'\n if (s.includes('ccbot')) return 'Common Crawl'\n if (s.includes('google-extended') || s.includes('googlebot')) return 'Google'\n if (s.includes('applebot-extended') || s.includes('applebot')) return 'Apple'\n if (s.includes('bingbot')) return 'Bing'\n if (s.includes('bytespider')) return 'Bytespider'\n if (s.includes('amazonbot')) return 'Amazon'\n if (s.includes('meta-externalagent') || s.includes('facebookbot')) return 'Meta'\n if (s.includes('mistralai-user')) return 'Mistral'\n if (s.includes('duckassistbot')) return 'DuckDuckGo'\n if (s.includes('youbot')) return 'You.com'\n if (s.includes('diffbot')) return 'Diffbot'\n if (s.includes('ai2bot')) return 'AI2'\n if (s.includes('cohere')) return 'Cohere'\n if (s.includes('cursor')) return 'Cursor'\n if (s.includes('windsurf')) return 'Windsurf'\n if (s.includes('petalbot')) return 'PetalBot'\n\n // SEO crawlers and monitoring bots.\n if (s.includes('ahrefsbot')) return 'Ahrefs'\n if (s.includes('semrushbot')) return 'Semrush'\n if (s.includes('mj12bot')) return 'Majestic'\n if (s.includes('dotbot')) return 'Moz'\n if (s.includes('rogerbot')) return 'Moz'\n if (s.includes('screaming frog')) return 'Screaming Frog'\n if (s.includes('sitebulb')) return 'Sitebulb'\n if (s.includes('linkfluence')) return 'Linkfluence'\n if (s.includes('dataforseo')) return 'DataForSEO'\n if (s.includes('serpstatbot')) return 'Serpstat'\n\n // Monitoring and feed bots.\n if (s.includes('uptimerobot')) return 'UptimeRobot'\n if (s.includes('pingdom')) return 'Pingdom'\n if (s.includes('statuscake')) return 'StatusCake'\n if (s.includes('newrelicpinger')) return 'New Relic'\n if (s.includes('datadogagent') || s.includes('datadog')) return 'Datadog'\n if (s.includes('slackbot')) return 'Slack'\n if (s.includes('twitterbot')) return 'Twitter'\n if (s.includes('linkedinbot')) return 'LinkedIn'\n if (s.includes('discordbot')) return 'Discord'\n if (s.includes('telegrambot')) return 'Telegram'\n if (s.includes('whatsapp')) return 'WhatsApp'\n\n // AI search and indexing bots.\n if (s.includes('linkupbot')) return 'Linkup'\n if (s.includes('sogou')) return 'Sogou'\n if (s.includes('yandexbot')) return 'Yandex'\n if (s.includes('baiduspider')) return 'Baidu'\n\n // Link preview fetchers.\n if (s.includes('facebookexternalhit')) return 'Facebook'\n if (s.includes('com.apple.webkit')) return 'Apple URL Preview'\n\n // Uptime and monitoring.\n if (s.includes('ohdear')) return 'Oh Dear'\n\n // Generic scrapers.\n if (s.includes('scrapy')) return 'Scrapy'\n if (s.includes('headlesschrome')) return 'Headless Chrome'\n if (s.includes('phantomjs')) return 'PhantomJS'\n if (s.includes('wget')) return 'wget'\n if (s.includes('httpie')) return 'HTTPie'\n if (s.includes('guzzlehttp')) return 'Guzzle'\n\n // HTTP library / runtime signatures (loose — coding agent or automation).\n // Check Electron before Browser since Electron UAs contain Chrome/Safari.\n if (s.includes('electron/')) return 'Electron'\n if (/curl\\//.test(s)) return 'curl'\n if (/axios\\//.test(s)) return 'axios'\n if (/(?:^|[\\s(])got(?:\\/|[\\s(])/.test(s)) return 'got'\n if (/\\bcolly\\b/.test(s)) return 'colly'\n if (/node-fetch\\//.test(s)) return 'node-fetch'\n if (/python-requests\\//.test(s)) return 'python-requests'\n if (/go-http-client\\//.test(s)) return 'Go http client'\n if (/okhttp\\//.test(s)) return 'OkHttp'\n if (/aiohttp\\//.test(s)) return 'aiohttp'\n if (/deno\\//.test(s)) return 'Deno'\n\n // Real browsers (or UAs spoofed to look like them — see Aider/OpenCode note).\n if (s.includes('mozilla') || s.includes('chrome') || s.includes('safari') || s.includes('firefox'))\n return 'Browser'\n\n return 'Other'\n}\n\n/**\n * Return the first product token from a UA header, useful for segmenting by\n * client without hard-coding every bot name. Falls back to `'Other'` for empty\n * input.\n */\nexport function firstUserAgentProduct(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const compatibleMatch = userAgent.match(/compatible;\\s*([^/;\\s]+)(?:\\/[^\\s;]*)?/i)\n if (compatibleMatch && compatibleMatch[1]) return compatibleMatch[1].trim()\n const first = userAgent.trim().split('/')[0]?.trim().split(/\\s+/)[0]?.trim()\n return first || 'Other'\n}\n\nexport type AgentKind =\n | 'declared-crawler'\n | 'coding-agent-hint'\n | 'browser'\n | 'other'\n\nexport interface AgentClassification {\n /**\n * Categorical tag for the UA:\n *\n * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.\n * - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose\n * signal; could be a coding agent, a curl script, or any automation.\n * - `'browser'` — looks like a real browser. Could be a genuine user or\n * a Playwright-based agent (Aider, OpenCode) that can't be distinguished\n * at the UA layer.\n * - `'other'` — unrecognised or empty.\n */\n kind: AgentKind\n /** Human-readable label, same string {@link parseBotName} returns. */\n label: string\n /** Strict: `true` only when the UA matches a branded AI crawler. */\n isAiBot: boolean\n /** Loose: `true` for known HTTP-library / automation UAs. */\n codingAgentHint: boolean\n}\n\n/**\n * One-stop classification of a user-agent. Combines {@link isAiBot},\n * {@link isHttpClient}, and {@link parseBotName} into a single structured\n * result. Used internally by `trackVisit` to populate event properties;\n * useful in consumer code when you need all signals at once.\n */\nexport function classifyAgent(userAgent: string | null | undefined): AgentClassification {\n const label = parseBotName(userAgent)\n const aiBot = isAiBot(userAgent)\n const httpClient = isHttpClient(userAgent)\n\n let kind: AgentKind\n if (aiBot) kind = 'declared-crawler'\n else if (httpClient) kind = 'coding-agent-hint'\n else if (label === 'Browser') kind = 'browser'\n else kind = 'other'\n\n return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }\n}\n","/**\n * djb2 hash returning an 8-char hex string prefixed with `anon_`. Used to\n * build stable anonymous distinct-ids from `ip:ua:...` tuples without\n * collecting identifying data. Not cryptographic — collisions are fine for\n * analytics segmentation.\n */\nexport function hashId(input: string): string {\n let h = 5381\n for (let i = 0; i < input.length; i++) {\n h = ((h << 5) + h + input.charCodeAt(i)) & 0xffffffff\n }\n return 'anon_' + (h >>> 0).toString(16)\n}\n","import { classifyAgent, isAiBot, isHttpClient } from './bots.js'\nimport { hashId } from './hash.js'\nimport type { TrackVisitOptions } from './types.js'\n\n/**\n * Capture an event describing the incoming request. Fire-and-forget: awaits\n * the adapter but swallows errors so a downed analytics backend never breaks\n * the response path. Callers typically don't await the returned promise.\n *\n * By default, captures every request so coding-agent traffic (axios, curl,\n * Electron, …) shows up alongside branded crawlers. Set `onlyBots: true` to\n * restrict capture to UAs matching {@link AI_BOT_PATTERN}.\n */\nexport async function trackVisit(\n req: Request,\n opts: TrackVisitOptions\n): Promise<void> {\n const userAgent = req.headers.get('user-agent') || ''\n\n const onlyBots = opts.onlyBots ?? false\n const skipBrowsers = opts.skipBrowsers ?? false\n if (onlyBots && !isAiBot(userAgent)) return\n if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) return\n\n let pathname = '/'\n let originFromUrl = ''\n try {\n const url = new URL(req.url)\n pathname = url.pathname\n originFromUrl = url.origin\n } catch {\n // Some runtimes hand us a relative URL; fall back to the raw string.\n pathname = req.url || '/'\n }\n const origin = opts.origin ?? originFromUrl\n\n const forwardedFor = req.headers.get('x-forwarded-for') || ''\n const ip = forwardedFor.split(',')[0]?.trim() ?? ''\n const referer = req.headers.get('referer')\n const classification = classifyAgent(userAgent)\n\n const event = {\n event: opts.eventName ?? 'agent_visit',\n distinctId: hashId(`${ip}:${userAgent}`),\n timestamp: new Date().toISOString(),\n properties: {\n $process_person_profile: false,\n $current_url: origin ? `${origin}${pathname}` : pathname,\n path: pathname,\n user_agent: userAgent,\n is_ai_bot: classification.isAiBot,\n bot_name: classification.label,\n ua_category: classification.kind,\n coding_agent_hint: classification.codingAgentHint,\n referer,\n source: opts.source ?? null,\n ...opts.properties\n }\n }\n\n try {\n await opts.analytics.capture(event)\n } catch {\n // Intentional swallow — analytics failures must not affect the response.\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\nexport interface PostHogAdapterConfig {\n /** PostHog project API key (the public one used by the JS SDK). */\n apiKey: string\n /**\n * PostHog host, with or without scheme. Defaults to `https://us.i.posthog.com`.\n * Use `https://eu.i.posthog.com` for EU cloud, or your own reverse-proxy\n * domain (e.g. `https://svc.example.com`).\n */\n host?: string\n /**\n * Path on the host that accepts single-event captures. Defaults to\n * `/i/v0/e/` which is PostHog's current endpoint for this.\n */\n path?: string\n /**\n * Override the `fetch` implementation (useful for tests or custom runtimes\n * that need a pinned fetch).\n */\n fetchImpl?: typeof fetch\n}\n\n/**\n * Adapter that posts each event to the PostHog capture endpoint. Uses\n * `keepalive: true` so the request survives after a serverless response\n * returns — events aren't guaranteed (fire-and-forget), but that's the\n * trade we want to keep the hot path fast.\n */\nexport function posthogAnalytics(config: PostHogAdapterConfig): AnalyticsAdapter {\n const hostRaw = config.host ?? 'https://us.i.posthog.com'\n const base = (/^https?:\\/\\//.test(hostRaw) ? hostRaw : `https://${hostRaw}`).replace(/\\/$/, '')\n const path = (config.path ?? '/i/v0/e/').replace(/^(?!\\/)/, '/')\n const endpoint = `${base}${path}`\n const fetchImpl = config.fetchImpl ?? fetch\n\n return {\n async capture(event: CaptureEvent): Promise<void> {\n const payload = {\n api_key: config.apiKey,\n event: event.event,\n distinct_id: event.distinctId,\n timestamp: event.timestamp,\n properties: event.properties\n }\n await fetchImpl(endpoint, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(payload),\n keepalive: true\n })\n }\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\nexport interface WebhookAdapterConfig {\n /** Destination URL that receives a POST for each event. */\n url: string\n /** Extra headers merged onto the POST (useful for shared-secret auth). */\n headers?: Record<string, string>\n /**\n * Transform the event into the exact JSON body the destination expects.\n * Defaults to sending the {@link CaptureEvent} as-is.\n */\n transform?: (event: CaptureEvent) => unknown\n /** Override the `fetch` implementation. */\n fetchImpl?: typeof fetch\n}\n\n/**\n * Adapter that POSTs each event to an arbitrary webhook URL. Keeps the\n * library analytics-backend-agnostic — use this when PostHog isn't your\n * analytics of record, or when you want to multiplex events through your\n * own ingestion layer.\n */\nexport function webhookAnalytics(config: WebhookAdapterConfig): AnalyticsAdapter {\n const fetchImpl = config.fetchImpl ?? fetch\n const transform = config.transform ?? ((e: CaptureEvent): unknown => e)\n\n return {\n async capture(event: CaptureEvent): Promise<void> {\n await fetchImpl(config.url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n ...(config.headers ?? {})\n },\n body: JSON.stringify(transform(event)),\n keepalive: true\n })\n }\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\n/**\n * Escape hatch for wiring a callback directly as an analytics adapter.\n * Useful when you want to log events, pipe them through your own SDK, or\n * compose multiple adapters.\n *\n * @example\n * ```ts\n * const devAnalytics = customAnalytics((e) => console.log('[doc_view]', e))\n * ```\n */\nexport function customAnalytics(\n capture: (event: CaptureEvent) => Promise<void> | void\n): AnalyticsAdapter {\n return { capture }\n}\n"]} | ||
| {"version":3,"sources":["../src/bots.ts","../src/hash.ts","../src/track.ts","../src/adapters/posthog.ts","../src/adapters/webhook.ts","../src/adapters/custom.ts"],"names":[],"mappings":";AAgBO,IAAM,cAAA,GACX;AAmBK,IAAM,mBAAA,GACX;AAEK,SAAS,QAAQ,SAAA,EAA+C;AACrE,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,cAAA,CAAe,KAAK,SAAS,CAAA;AACtC;AAEO,SAAS,aAAa,SAAA,EAA+C;AAC1E,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,mBAAA,CAAoB,KAAK,SAAS,CAAA;AAC3C;AAeO,SAAS,aAAa,SAAA,EAA8C;AACzE,EAAA,IAAI,CAAC,SAAA,IAAa,OAAO,SAAA,KAAc,UAAU,OAAO,OAAA;AACxD,EAAA,MAAM,CAAA,GAAI,UAAU,WAAA,EAAY;AAGhC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,cAAc,CAAA,IAAK,EAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,eAAe,CAAA,IAAK,CAAA,CAAE,SAAS,QAAQ,CAAA;AAC1G,IAAA,OAAO,SAAA;AACT,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AAC5F,EAAA,IAAI,CAAA,CAAE,SAAS,eAAe,CAAA,IAAK,EAAE,QAAA,CAAS,iBAAiB,GAAG,OAAO,YAAA;AACzE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,OAAO,CAAA,EAAG,OAAO,cAAA;AAChC,EAAA,IAAI,CAAA,CAAE,SAAS,iBAAiB,CAAA,IAAK,EAAE,QAAA,CAAS,WAAW,GAAG,OAAO,QAAA;AACrE,EAAA,IAAI,CAAA,CAAE,SAAS,mBAAmB,CAAA,IAAK,EAAE,QAAA,CAAS,UAAU,GAAG,OAAO,OAAA;AACtE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,MAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,SAAS,oBAAoB,CAAA,IAAK,EAAE,QAAA,CAAS,aAAa,GAAG,OAAO,MAAA;AAC1E,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,SAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,eAAe,CAAA,EAAG,OAAO,YAAA;AACxC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,SAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,SAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,KAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AAGnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,UAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,KAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,KAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,gBAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,aAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AAGtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,aAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,EAAG,OAAO,SAAA;AAClC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,YAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,WAAA;AACzC,EAAA,IAAI,CAAA,CAAE,SAAS,cAAc,CAAA,IAAK,EAAE,QAAA,CAAS,SAAS,GAAG,OAAO,SAAA;AAChE,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,OAAA;AACnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,SAAA;AACrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,UAAA;AACtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,UAAU,CAAA,EAAG,OAAO,UAAA;AAGnC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,OAAO,CAAA,EAAG,OAAO,OAAA;AAChC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,QAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,aAAa,CAAA,EAAG,OAAO,OAAA;AAGtC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,qBAAqB,CAAA,EAAG,OAAO,UAAA;AAC9C,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,kBAAkB,CAAA,EAAG,OAAO,mBAAA;AAG3C,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,SAAA;AAGjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG,OAAO,iBAAA;AACzC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,WAAA;AACpC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,MAAM,CAAA,EAAG,OAAO,MAAA;AAC/B,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,EAAG,OAAO,QAAA;AACjC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,YAAY,CAAA,EAAG,OAAO,QAAA;AAIrC,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,WAAW,CAAA,EAAG,OAAO,UAAA;AACpC,EAAA,IAAI,QAAA,CAAS,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,MAAA;AAC7B,EAAA,IAAI,SAAA,CAAU,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,OAAA;AAC9B,EAAA,IAAI,4BAAA,CAA6B,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,KAAA;AACjD,EAAA,IAAI,WAAA,CAAY,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,OAAA;AAChC,EAAA,IAAI,cAAA,CAAe,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,YAAA;AACnC,EAAA,IAAI,mBAAA,CAAoB,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,iBAAA;AACxC,EAAA,IAAI,kBAAA,CAAmB,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,gBAAA;AACvC,EAAA,IAAI,UAAA,CAAW,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,QAAA;AAC/B,EAAA,IAAI,WAAA,CAAY,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,SAAA;AAChC,EAAA,IAAI,QAAA,CAAS,IAAA,CAAK,CAAC,CAAA,EAAG,OAAO,MAAA;AAG7B,EAAA,IAAI,CAAA,CAAE,QAAA,CAAS,SAAS,CAAA,IAAK,EAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,QAAA,CAAS,QAAQ,CAAA,IAAK,CAAA,CAAE,SAAS,SAAS,CAAA;AAC/F,IAAA,OAAO,SAAA;AAET,EAAA,OAAO,OAAA;AACT;AAOO,SAAS,sBAAsB,SAAA,EAA8C;AAClF,EAAA,IAAI,CAAC,SAAA,IAAa,OAAO,SAAA,KAAc,UAAU,OAAO,OAAA;AACxD,EAAA,MAAM,eAAA,GAAkB,SAAA,CAAU,KAAA,CAAM,yCAAyC,CAAA;AACjF,EAAA,IAAI,eAAA,IAAmB,gBAAgB,CAAC,CAAA,SAAU,eAAA,CAAgB,CAAC,EAAE,IAAA,EAAK;AAC1E,EAAA,MAAM,QAAQ,SAAA,CAAU,IAAA,EAAK,CAAE,KAAA,CAAM,GAAG,CAAA,CAAE,CAAC,CAAA,EAAG,IAAA,GAAO,KAAA,CAAM,KAAK,CAAA,CAAE,CAAC,GAAG,IAAA,EAAK;AAC3E,EAAA,OAAO,KAAA,IAAS,OAAA;AAClB;AAkBO,SAAS,eAAe,GAAA,EAAiC;AAC9D,EAAA,MAAM,UAAoB,EAAC;AAC3B,EAAA,MAAM,MAAM,GAAA,CAAI,OAAA,CAAQ,IAAI,YAAY,CAAA,IAAK,IAAI,WAAA,EAAY;AAC7D,EAAA,MAAM,WAAA,GACJ,EAAA,CAAG,QAAA,CAAS,SAAS,KAAK,EAAA,CAAG,QAAA,CAAS,QAAQ,CAAA,IAAK,GAAG,QAAA,CAAS,QAAQ,CAAA,IAAK,EAAA,CAAG,SAAS,SAAS,CAAA;AAEnG,EAAA,IAAI,CAAC,WAAA,EAAa,OAAO,EAAE,KAAA,EAAO,GAAG,OAAA,EAAS,EAAC,EAAG,MAAA,EAAQ,KAAA,EAAM;AAEhE,EAAA,IAAI,CAAC,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,iBAAiB,CAAA,EAAG;AACvC,IAAA,OAAA,CAAQ,KAAK,yBAAyB,CAAA;AAAA,EACxC;AACA,EAAA,IAAI,CAAC,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,gBAAgB,CAAA,EAAG;AACtC,IAAA,OAAA,CAAQ,KAAK,wBAAwB,CAAA;AAAA,EACvC;AACA,EAAA,MAAM,OAAA,GAAU,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,WAAW,CAAA;AAC3C,EAAA,IAAI,CAAC,OAAA,EAAS;AACZ,IAAA,OAAA,CAAQ,KAAK,mBAAmB,CAAA;AAAA,EAClC,WAAW,OAAA,CAAQ,WAAA,EAAY,CAAE,QAAA,CAAS,gBAAgB,CAAA,EAAG;AAC3D,IAAA,OAAA,CAAQ,KAAK,sBAAsB,CAAA;AAAA,EACrC;AACA,EAAA,MAAM,MAAA,GAAS,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,QAAQ,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,CAAC,MAAA,IAAU,MAAA,KAAW,KAAA,EAAO;AAC/B,IAAA,OAAA,CAAQ,KAAK,wBAAwB,CAAA;AAAA,EACvC;AACA,EAAA,IAAA,CAAK,GAAA,CAAI,QAAQ,GAAA,CAAI,YAAY,KAAK,EAAA,EAAI,WAAA,OAAkB,OAAA,EAAS;AACnE,IAAA,OAAA,CAAQ,KAAK,kBAAkB,CAAA;AAAA,EACjC;AAEA,EAAA,MAAM,QAAQ,OAAA,CAAQ,MAAA;AACtB,EAAA,OAAO,EAAE,KAAA,EAAO,OAAA,EAAS,MAAA,EAAQ,SAAS,CAAA,EAAE;AAC9C;AA6CO,SAAS,cAAc,SAAA,EAA2D;AACvF,EAAA,MAAM,KAAA,GAAQ,aAAa,SAAS,CAAA;AACpC,EAAA,MAAM,KAAA,GAAQ,QAAQ,SAAS,CAAA;AAC/B,EAAA,MAAM,UAAA,GAAa,aAAa,SAAS,CAAA;AAEzC,EAAA,IAAI,IAAA;AACJ,EAAA,IAAI,OAAO,IAAA,GAAO,kBAAA;AAAA,OAAA,IACT,YAAY,IAAA,GAAO,mBAAA;AAAA,OAAA,IACnB,KAAA,KAAU,WAAW,IAAA,GAAO,SAAA;AAAA,OAChC,IAAA,GAAO,OAAA;AAEZ,EAAA,OAAO,EAAE,IAAA,EAAM,KAAA,EAAO,OAAA,EAAS,KAAA,EAAO,iBAAiB,UAAA,EAAW;AACpE;AAOO,SAAS,gBAAgB,GAAA,EAAmC;AACjE,EAAA,MAAM,SAAA,GAAY,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AACnD,EAAA,MAAM,IAAA,GAAO,cAAc,SAAS,CAAA;AACpC,EAAA,MAAM,QAAA,GAAW,eAAe,GAAG,CAAA;AAEnC,EAAA,IAAI,OAAO,IAAA,CAAK,IAAA;AAChB,EAAA,IAAI,IAAA,KAAS,SAAA,IAAa,QAAA,CAAS,MAAA,EAAQ;AACzC,IAAA,IAAA,GAAO,iBAAA;AAAA,EACT;AAEA,EAAA,OAAO,EAAE,GAAG,IAAA,EAAM,IAAA,EAAM,QAAA,EAAS;AACnC;;;AC3RO,SAAS,OAAO,KAAA,EAAuB;AAC5C,EAAA,IAAI,CAAA,GAAI,IAAA;AACR,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,KAAA,CAAM,QAAQ,CAAA,EAAA,EAAK;AACrC,IAAA,CAAA,GAAA,CAAM,KAAK,CAAA,IAAK,CAAA,GAAI,KAAA,CAAM,UAAA,CAAW,CAAC,CAAA,GAAK,UAAA;AAAA,EAC7C;AACA,EAAA,OAAO,OAAA,GAAA,CAAW,CAAA,KAAM,CAAA,EAAG,QAAA,CAAS,EAAE,CAAA;AACxC;;;ACCA,eAAsB,UAAA,CACpB,KACA,IAAA,EACe;AACf,EAAA,MAAM,SAAA,GAAY,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AAEnD,EAAA,MAAM,QAAA,GAAW,KAAK,QAAA,IAAY,KAAA;AAClC,EAAA,MAAM,YAAA,GAAe,KAAK,YAAA,IAAgB,KAAA;AAC1C,EAAA,IAAI,QAAA,IAAY,CAAC,OAAA,CAAQ,SAAS,CAAA,EAAG;AACrC,EAAA,IAAI,YAAA,IAAgB,CAAC,OAAA,CAAQ,SAAS,KAAK,CAAC,YAAA,CAAa,SAAS,CAAA,EAAG;AAInE,IAAA,IAAI,CAAC,cAAA,CAAe,GAAG,CAAA,CAAE,MAAA,EAAQ;AAAA,EACnC;AAEA,EAAA,IAAI,QAAA,GAAW,GAAA;AACf,EAAA,IAAI,aAAA,GAAgB,EAAA;AACpB,EAAA,IAAI;AACF,IAAA,MAAM,GAAA,GAAM,IAAI,GAAA,CAAI,GAAA,CAAI,GAAG,CAAA;AAC3B,IAAA,QAAA,GAAW,GAAA,CAAI,QAAA;AACf,IAAA,aAAA,GAAgB,GAAA,CAAI,MAAA;AAAA,EACtB,CAAA,CAAA,MAAQ;AAEN,IAAA,QAAA,GAAW,IAAI,GAAA,IAAO,GAAA;AAAA,EACxB;AACA,EAAA,MAAM,MAAA,GAAS,KAAK,MAAA,IAAU,aAAA;AAE9B,EAAA,MAAM,YAAA,GAAe,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,iBAAiB,CAAA,IAAK,EAAA;AAC3D,EAAA,MAAM,EAAA,GAAK,aAAa,KAAA,CAAM,GAAG,EAAE,CAAC,CAAA,EAAG,MAAK,IAAK,EAAA;AACjD,EAAA,MAAM,OAAA,GAAU,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,SAAS,CAAA;AACzC,EAAA,MAAM,cAAA,GAAiB,gBAAgB,GAAG,CAAA;AAE1C,EAAA,MAAM,KAAA,GAAQ;AAAA,IACZ,KAAA,EAAO,KAAK,SAAA,IAAa,aAAA;AAAA,IACzB,YAAY,MAAA,CAAO,CAAA,EAAG,EAAE,CAAA,CAAA,EAAI,SAAS,CAAA,CAAE,CAAA;AAAA,IACvC,SAAA,EAAA,iBAAW,IAAI,IAAA,EAAK,EAAE,WAAA,EAAY;AAAA,IAClC,UAAA,EAAY;AAAA,MACV,uBAAA,EAAyB,KAAA;AAAA,MACzB,cAAc,MAAA,GAAS,CAAA,EAAG,MAAM,CAAA,EAAG,QAAQ,CAAA,CAAA,GAAK,QAAA;AAAA,MAChD,IAAA,EAAM,QAAA;AAAA,MACN,UAAA,EAAY,SAAA;AAAA,MACZ,WAAW,cAAA,CAAe,OAAA;AAAA,MAC1B,UAAU,cAAA,CAAe,KAAA;AAAA,MACzB,aAAa,cAAA,CAAe,IAAA;AAAA,MAC5B,mBAAmB,cAAA,CAAe,eAAA;AAAA,MAClC,cAAA,EAAgB,cAAA,CAAe,QAAA,EAAU,KAAA,IAAS,CAAA;AAAA,MAClD,eAAA,EAAiB,cAAA,CAAe,QAAA,EAAU,MAAA,IAAU,KAAA;AAAA,MACpD,OAAA;AAAA,MACA,MAAA,EAAQ,KAAK,MAAA,IAAU,IAAA;AAAA,MACvB,GAAG,IAAA,CAAK;AAAA;AACV,GACF;AAEA,EAAA,IAAI;AACF,IAAA,MAAM,IAAA,CAAK,SAAA,CAAU,OAAA,CAAQ,KAAK,CAAA;AAAA,EACpC,CAAA,CAAA,MAAQ;AAAA,EAER;AACF;;;AC3CO,SAAS,iBAAiB,MAAA,EAAgD;AAC/E,EAAA,MAAM,OAAA,GAAU,OAAO,IAAA,IAAQ,0BAAA;AAC/B,EAAA,MAAM,IAAA,GAAA,CAAQ,cAAA,CAAe,IAAA,CAAK,OAAO,CAAA,GAAI,OAAA,GAAU,CAAA,QAAA,EAAW,OAAO,CAAA,CAAA,EAAI,OAAA,CAAQ,KAAA,EAAO,EAAE,CAAA;AAC9F,EAAA,MAAM,QAAQ,MAAA,CAAO,IAAA,IAAQ,UAAA,EAAY,OAAA,CAAQ,WAAW,GAAG,CAAA;AAC/D,EAAA,MAAM,QAAA,GAAW,CAAA,EAAG,IAAI,CAAA,EAAG,IAAI,CAAA,CAAA;AAC/B,EAAA,MAAM,SAAA,GAAY,OAAO,SAAA,IAAa,KAAA;AAEtC,EAAA,OAAO;AAAA,IACL,MAAM,QAAQ,KAAA,EAAoC;AAChD,MAAA,MAAM,OAAA,GAAU;AAAA,QACd,SAAS,MAAA,CAAO,MAAA;AAAA,QAChB,OAAO,KAAA,CAAM,KAAA;AAAA,QACb,aAAa,KAAA,CAAM,UAAA;AAAA,QACnB,WAAW,KAAA,CAAM,SAAA;AAAA,QACjB,YAAY,KAAA,CAAM;AAAA,OACpB;AACA,MAAA,MAAM,UAAU,QAAA,EAAU;AAAA,QACxB,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS,EAAE,cAAA,EAAgB,kBAAA,EAAmB;AAAA,QAC9C,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,OAAO,CAAA;AAAA,QAC5B,SAAA,EAAW;AAAA,OACZ,CAAA;AAAA,IACH;AAAA,GACF;AACF;;;AC/BO,SAAS,iBAAiB,MAAA,EAAgD;AAC/E,EAAA,MAAM,SAAA,GAAY,OAAO,SAAA,IAAa,KAAA;AACtC,EAAA,MAAM,SAAA,GAAY,MAAA,CAAO,SAAA,KAAc,CAAC,CAAA,KAA6B,CAAA,CAAA;AAErE,EAAA,OAAO;AAAA,IACL,MAAM,QAAQ,KAAA,EAAoC;AAChD,MAAA,MAAM,SAAA,CAAU,OAAO,GAAA,EAAK;AAAA,QAC1B,MAAA,EAAQ,MAAA;AAAA,QACR,OAAA,EAAS;AAAA,UACP,cAAA,EAAgB,kBAAA;AAAA,UAChB,GAAI,MAAA,CAAO,OAAA,IAAW;AAAC,SACzB;AAAA,QACA,IAAA,EAAM,IAAA,CAAK,SAAA,CAAU,SAAA,CAAU,KAAK,CAAC,CAAA;AAAA,QACrC,SAAA,EAAW;AAAA,OACZ,CAAA;AAAA,IACH;AAAA,GACF;AACF;;;AC3BO,SAAS,gBACd,OAAA,EACkB;AAClB,EAAA,OAAO,EAAE,OAAA,EAAQ;AACnB","file":"index.js","sourcesContent":["/**\n * User-agent substrings that identify **publicly declared** AI crawlers — the\n * branded bots that identify themselves by name (OpenAI's GPTBot, Anthropic's\n * ClaudeBot, Perplexity-User, Google-Extended, etc.). High-confidence: when\n * this matches, the request almost certainly comes from that vendor's crawler\n * fleet.\n *\n * Does NOT include **coding-agent traffic** (Claude Code, Cline, Cursor,\n * Windsurf, Aider, OpenCode, VS Code). Those tools use generic HTTP library\n * UAs (axios, curl, got, colly, Electron) or spoof full browser UAs — they\n * can't be distinguished from non-AI traffic by UA alone. See\n * {@link HTTP_CLIENT_PATTERN} for the loose heuristic layer.\n *\n * Sources consulted when updating: darkvisitors.com, vendor docs from OpenAI,\n * Anthropic, Google, Perplexity, Cohere, Apple, Bytedance.\n */\nexport const AI_BOT_PATTERN =\n /ClaudeBot|Claude-User|Anthropic|ChatGPT-User|GPTBot|OAI-SearchBot|PerplexityBot|Perplexity-User|Google-Extended|Applebot-Extended|cohere-ai|Bytespider|CCBot|Amazonbot|Meta-ExternalAgent|FacebookBot|DuckAssistBot|MistralAI-User|YouBot|AI2Bot|Diffbot|Cursor|Windsurf/i\n\n/**\n * HTTP library / runtime signatures frequently used by coding agents. Matching\n * any of these is a **loose** signal — legitimate curl scripts, CI jobs, and\n * server-to-server traffic use the same libraries. Use this for the wider\n * net (`coding_agent_hint: true`) and pair with other signals (request\n * shape, JA4 fingerprint, path patterns) for higher confidence.\n *\n * Based on behavioural signatures observed by Addy Osmani:\n * Claude Code → axios/1.8.4\n * Cline, Junie → curl/8.4.0\n * Cursor → got (sindresorhus/got)\n * Windsurf → colly\n * VS Code → Electron / Chromium\n *\n * Aider and OpenCode use Playwright-driven full Mozilla/Safari UAs and are\n * indistinguishable from real browsers at the UA layer.\n */\nexport const HTTP_CLIENT_PATTERN =\n /axios\\/|curl\\/|(?:^|[\\s(])got(?:\\/|[\\s(])|\\bcolly\\b|Electron\\/|node-fetch\\/|python-requests\\/|Go-http-client\\/|okhttp\\/|aiohttp\\/|Deno\\//i\n\nexport function isAiBot(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return AI_BOT_PATTERN.test(userAgent)\n}\n\nexport function isHttpClient(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return HTTP_CLIENT_PATTERN.test(userAgent)\n}\n\n/**\n * Map a user-agent string to a coarse, human-readable label. Returns one of:\n *\n * - A branded-crawler name (`'Claude'`, `'ChatGPT'`, …) — pair with\n * {@link isAiBot} for `is_ai_bot: true` segmentation.\n * - An HTTP-library name (`'curl'`, `'axios'`, `'got'`, `'colly'`,\n * `'Electron'`, …) — hint of a coding agent or automation; not\n * conclusive. Pair with {@link isHttpClient}.\n * - `'Browser'` for typical desktop browsers (possibly spoofed by\n * Playwright-based agents like Aider/OpenCode — this label alone can't\n * tell you).\n * - `'Other'` for anything unrecognised or empty input.\n */\nexport function parseBotName(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const s = userAgent.toLowerCase()\n\n // Publicly declared AI crawlers (high confidence).\n if (s.includes('chatgpt-user') || s.includes('gptbot') || s.includes('oai-searchbot') || s.includes('openai'))\n return 'ChatGPT'\n if (s.includes('claudebot') || s.includes('claude-user') || s.includes('anthropic')) return 'Claude'\n if (s.includes('perplexitybot') || s.includes('perplexity-user')) return 'Perplexity'\n if (s.includes('ccbot')) return 'Common Crawl'\n if (s.includes('google-extended') || s.includes('googlebot')) return 'Google'\n if (s.includes('applebot-extended') || s.includes('applebot')) return 'Apple'\n if (s.includes('bingbot')) return 'Bing'\n if (s.includes('bytespider')) return 'Bytespider'\n if (s.includes('amazonbot')) return 'Amazon'\n if (s.includes('meta-externalagent') || s.includes('facebookbot')) return 'Meta'\n if (s.includes('mistralai-user')) return 'Mistral'\n if (s.includes('duckassistbot')) return 'DuckDuckGo'\n if (s.includes('youbot')) return 'You.com'\n if (s.includes('diffbot')) return 'Diffbot'\n if (s.includes('ai2bot')) return 'AI2'\n if (s.includes('cohere')) return 'Cohere'\n if (s.includes('cursor')) return 'Cursor'\n if (s.includes('windsurf')) return 'Windsurf'\n if (s.includes('petalbot')) return 'PetalBot'\n\n // SEO crawlers and monitoring bots.\n if (s.includes('ahrefsbot')) return 'Ahrefs'\n if (s.includes('semrushbot')) return 'Semrush'\n if (s.includes('mj12bot')) return 'Majestic'\n if (s.includes('dotbot')) return 'Moz'\n if (s.includes('rogerbot')) return 'Moz'\n if (s.includes('screaming frog')) return 'Screaming Frog'\n if (s.includes('sitebulb')) return 'Sitebulb'\n if (s.includes('linkfluence')) return 'Linkfluence'\n if (s.includes('dataforseo')) return 'DataForSEO'\n if (s.includes('serpstatbot')) return 'Serpstat'\n\n // Monitoring and feed bots.\n if (s.includes('uptimerobot')) return 'UptimeRobot'\n if (s.includes('pingdom')) return 'Pingdom'\n if (s.includes('statuscake')) return 'StatusCake'\n if (s.includes('newrelicpinger')) return 'New Relic'\n if (s.includes('datadogagent') || s.includes('datadog')) return 'Datadog'\n if (s.includes('slackbot')) return 'Slack'\n if (s.includes('twitterbot')) return 'Twitter'\n if (s.includes('linkedinbot')) return 'LinkedIn'\n if (s.includes('discordbot')) return 'Discord'\n if (s.includes('telegrambot')) return 'Telegram'\n if (s.includes('whatsapp')) return 'WhatsApp'\n\n // AI search and indexing bots.\n if (s.includes('linkupbot')) return 'Linkup'\n if (s.includes('sogou')) return 'Sogou'\n if (s.includes('yandexbot')) return 'Yandex'\n if (s.includes('baiduspider')) return 'Baidu'\n\n // Link preview fetchers.\n if (s.includes('facebookexternalhit')) return 'Facebook'\n if (s.includes('com.apple.webkit')) return 'Apple URL Preview'\n\n // Uptime and monitoring.\n if (s.includes('ohdear')) return 'Oh Dear'\n\n // Generic scrapers.\n if (s.includes('scrapy')) return 'Scrapy'\n if (s.includes('headlesschrome')) return 'Headless Chrome'\n if (s.includes('phantomjs')) return 'PhantomJS'\n if (s.includes('wget')) return 'wget'\n if (s.includes('httpie')) return 'HTTPie'\n if (s.includes('guzzlehttp')) return 'Guzzle'\n\n // HTTP library / runtime signatures (loose — coding agent or automation).\n // Check Electron before Browser since Electron UAs contain Chrome/Safari.\n if (s.includes('electron/')) return 'Electron'\n if (/curl\\//.test(s)) return 'curl'\n if (/axios\\//.test(s)) return 'axios'\n if (/(?:^|[\\s(])got(?:\\/|[\\s(])/.test(s)) return 'got'\n if (/\\bcolly\\b/.test(s)) return 'colly'\n if (/node-fetch\\//.test(s)) return 'node-fetch'\n if (/python-requests\\//.test(s)) return 'python-requests'\n if (/go-http-client\\//.test(s)) return 'Go http client'\n if (/okhttp\\//.test(s)) return 'OkHttp'\n if (/aiohttp\\//.test(s)) return 'aiohttp'\n if (/deno\\//.test(s)) return 'Deno'\n\n // Real browsers (or UAs spoofed to look like them — see Aider/OpenCode note).\n if (s.includes('mozilla') || s.includes('chrome') || s.includes('safari') || s.includes('firefox'))\n return 'Browser'\n\n return 'Other'\n}\n\n/**\n * Return the first product token from a UA header, useful for segmenting by\n * client without hard-coding every bot name. Falls back to `'Other'` for empty\n * input.\n */\nexport function firstUserAgentProduct(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const compatibleMatch = userAgent.match(/compatible;\\s*([^/;\\s]+)(?:\\/[^\\s;]*)?/i)\n if (compatibleMatch && compatibleMatch[1]) return compatibleMatch[1].trim()\n const first = userAgent.trim().split('/')[0]?.trim().split(/\\s+/)[0]?.trim()\n return first || 'Other'\n}\n\n/**\n * Detect likely headless/automated browsers by checking for missing headers\n * that real browsers always send. Playwright, Puppeteer, and similar tools\n * spoof the UA but often omit standard browser headers.\n *\n * Signals checked (each scores 1 point):\n * - Missing `Accept-Language` — every real browser sends this\n * - Missing `Sec-Fetch-Mode` — sent by all modern browsers\n * - Missing `Sec-CH-UA` — Client Hints, Chromium 89+\n * - `Sec-CH-UA` contains \"HeadlessChrome\"\n * - Missing or bare Accept header — browsers send detailed accept lists\n * - `Connection: close` with browser UA — browsers use keep-alive\n *\n * Returns a score (0-6), the signals that fired, and a boolean `likely`\n * flag (score >= 2 with a browser-like UA).\n */\nexport function detectHeadless(req: Request): HeadlessDetection {\n const signals: string[] = []\n const ua = (req.headers.get('user-agent') || '').toLowerCase()\n const isBrowserUA =\n ua.includes('mozilla') || ua.includes('chrome') || ua.includes('safari') || ua.includes('firefox')\n\n if (!isBrowserUA) return { score: 0, signals: [], likely: false }\n\n if (!req.headers.get('accept-language')) {\n signals.push('missing-accept-language')\n }\n if (!req.headers.get('sec-fetch-mode')) {\n signals.push('missing-sec-fetch-mode')\n }\n const secChUa = req.headers.get('sec-ch-ua')\n if (!secChUa) {\n signals.push('missing-sec-ch-ua')\n } else if (secChUa.toLowerCase().includes('headlesschrome')) {\n signals.push('headless-chrome-hint')\n }\n const accept = req.headers.get('accept') || ''\n if (!accept || accept === '*/*') {\n signals.push('missing-or-bare-accept')\n }\n if ((req.headers.get('connection') || '').toLowerCase() === 'close') {\n signals.push('connection-close')\n }\n\n const score = signals.length\n return { score, signals, likely: score >= 2 }\n}\n\nexport interface HeadlessDetection {\n /** Number of suspicious signals found (0-6). */\n score: number\n /** Names of the specific signals that fired. */\n signals: string[]\n /** True when score >= 2 — strong headless indication. */\n likely: boolean\n}\n\nexport type AgentKind =\n | 'declared-crawler'\n | 'coding-agent-hint'\n | 'headless-likely'\n | 'browser'\n | 'other'\n\nexport interface AgentClassification {\n /**\n * Categorical tag for the request:\n *\n * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.\n * - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose\n * signal; could be a coding agent, a curl script, or any automation.\n * - `'headless-likely'` — Browser-like UA but missing standard headers.\n * Strong signal of Playwright/Puppeteer automation (Aider, OpenCode, etc.).\n * - `'browser'` — Looks like a real browser with expected headers present.\n * - `'other'` — Unrecognised or empty.\n */\n kind: AgentKind\n /** Human-readable label, same string {@link parseBotName} returns. */\n label: string\n /** Strict: `true` only when the UA matches a branded AI crawler. */\n isAiBot: boolean\n /** Loose: `true` for known HTTP-library / automation UAs. */\n codingAgentHint: boolean\n /** Headless browser detection result. Only populated when `req` is passed. */\n headless?: HeadlessDetection\n}\n\n/**\n * UA-only classification. Use {@link classifyRequest} for full detection\n * including headless browser heuristics.\n */\nexport function classifyAgent(userAgent: string | null | undefined): AgentClassification {\n const label = parseBotName(userAgent)\n const aiBot = isAiBot(userAgent)\n const httpClient = isHttpClient(userAgent)\n\n let kind: AgentKind\n if (aiBot) kind = 'declared-crawler'\n else if (httpClient) kind = 'coding-agent-hint'\n else if (label === 'Browser') kind = 'browser'\n else kind = 'other'\n\n return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }\n}\n\n/**\n * Full request classification — combines UA parsing with header-based\n * headless detection. When a browser-like UA is missing standard headers,\n * the kind is promoted from `'browser'` to `'headless-likely'`.\n */\nexport function classifyRequest(req: Request): AgentClassification {\n const userAgent = req.headers.get('user-agent') || ''\n const base = classifyAgent(userAgent)\n const headless = detectHeadless(req)\n\n let kind = base.kind\n if (kind === 'browser' && headless.likely) {\n kind = 'headless-likely'\n }\n\n return { ...base, kind, headless }\n}\n","/**\n * djb2 hash returning an 8-char hex string prefixed with `anon_`. Used to\n * build stable anonymous distinct-ids from `ip:ua:...` tuples without\n * collecting identifying data. Not cryptographic — collisions are fine for\n * analytics segmentation.\n */\nexport function hashId(input: string): string {\n let h = 5381\n for (let i = 0; i < input.length; i++) {\n h = ((h << 5) + h + input.charCodeAt(i)) & 0xffffffff\n }\n return 'anon_' + (h >>> 0).toString(16)\n}\n","import { classifyRequest, detectHeadless, isAiBot, isHttpClient } from './bots.js'\nimport { hashId } from './hash.js'\nimport type { TrackVisitOptions } from './types.js'\n\n/**\n * Capture an event describing the incoming request. Fire-and-forget: awaits\n * the adapter but swallows errors so a downed analytics backend never breaks\n * the response path. Callers typically don't await the returned promise.\n *\n * By default, captures every request so coding-agent traffic (axios, curl,\n * Electron, …) shows up alongside branded crawlers. Set `onlyBots: true` to\n * restrict capture to UAs matching {@link AI_BOT_PATTERN}.\n */\nexport async function trackVisit(\n req: Request,\n opts: TrackVisitOptions\n): Promise<void> {\n const userAgent = req.headers.get('user-agent') || ''\n\n const onlyBots = opts.onlyBots ?? false\n const skipBrowsers = opts.skipBrowsers ?? false\n if (onlyBots && !isAiBot(userAgent)) return\n if (skipBrowsers && !isAiBot(userAgent) && !isHttpClient(userAgent)) {\n // Not a declared bot or HTTP client — check headless heuristics.\n // Playwright-based agents (Aider, OpenCode) will pass if they're missing\n // standard browser headers. Real browsers get skipped.\n if (!detectHeadless(req).likely) return\n }\n\n let pathname = '/'\n let originFromUrl = ''\n try {\n const url = new URL(req.url)\n pathname = url.pathname\n originFromUrl = url.origin\n } catch {\n // Some runtimes hand us a relative URL; fall back to the raw string.\n pathname = req.url || '/'\n }\n const origin = opts.origin ?? originFromUrl\n\n const forwardedFor = req.headers.get('x-forwarded-for') || ''\n const ip = forwardedFor.split(',')[0]?.trim() ?? ''\n const referer = req.headers.get('referer')\n const classification = classifyRequest(req)\n\n const event = {\n event: opts.eventName ?? 'agent_visit',\n distinctId: hashId(`${ip}:${userAgent}`),\n timestamp: new Date().toISOString(),\n properties: {\n $process_person_profile: false,\n $current_url: origin ? `${origin}${pathname}` : pathname,\n path: pathname,\n user_agent: userAgent,\n is_ai_bot: classification.isAiBot,\n bot_name: classification.label,\n ua_category: classification.kind,\n coding_agent_hint: classification.codingAgentHint,\n headless_score: classification.headless?.score ?? 0,\n headless_likely: classification.headless?.likely ?? false,\n referer,\n source: opts.source ?? null,\n ...opts.properties\n }\n }\n\n try {\n await opts.analytics.capture(event)\n } catch {\n // Intentional swallow — analytics failures must not affect the response.\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\nexport interface PostHogAdapterConfig {\n /** PostHog project API key (the public one used by the JS SDK). */\n apiKey: string\n /**\n * PostHog host, with or without scheme. Defaults to `https://us.i.posthog.com`.\n * Use `https://eu.i.posthog.com` for EU cloud, or your own reverse-proxy\n * domain (e.g. `https://svc.example.com`).\n */\n host?: string\n /**\n * Path on the host that accepts single-event captures. Defaults to\n * `/i/v0/e/` which is PostHog's current endpoint for this.\n */\n path?: string\n /**\n * Override the `fetch` implementation (useful for tests or custom runtimes\n * that need a pinned fetch).\n */\n fetchImpl?: typeof fetch\n}\n\n/**\n * Adapter that posts each event to the PostHog capture endpoint. Uses\n * `keepalive: true` so the request survives after a serverless response\n * returns — events aren't guaranteed (fire-and-forget), but that's the\n * trade we want to keep the hot path fast.\n */\nexport function posthogAnalytics(config: PostHogAdapterConfig): AnalyticsAdapter {\n const hostRaw = config.host ?? 'https://us.i.posthog.com'\n const base = (/^https?:\\/\\//.test(hostRaw) ? hostRaw : `https://${hostRaw}`).replace(/\\/$/, '')\n const path = (config.path ?? '/i/v0/e/').replace(/^(?!\\/)/, '/')\n const endpoint = `${base}${path}`\n const fetchImpl = config.fetchImpl ?? fetch\n\n return {\n async capture(event: CaptureEvent): Promise<void> {\n const payload = {\n api_key: config.apiKey,\n event: event.event,\n distinct_id: event.distinctId,\n timestamp: event.timestamp,\n properties: event.properties\n }\n await fetchImpl(endpoint, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(payload),\n keepalive: true\n })\n }\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\nexport interface WebhookAdapterConfig {\n /** Destination URL that receives a POST for each event. */\n url: string\n /** Extra headers merged onto the POST (useful for shared-secret auth). */\n headers?: Record<string, string>\n /**\n * Transform the event into the exact JSON body the destination expects.\n * Defaults to sending the {@link CaptureEvent} as-is.\n */\n transform?: (event: CaptureEvent) => unknown\n /** Override the `fetch` implementation. */\n fetchImpl?: typeof fetch\n}\n\n/**\n * Adapter that POSTs each event to an arbitrary webhook URL. Keeps the\n * library analytics-backend-agnostic — use this when PostHog isn't your\n * analytics of record, or when you want to multiplex events through your\n * own ingestion layer.\n */\nexport function webhookAnalytics(config: WebhookAdapterConfig): AnalyticsAdapter {\n const fetchImpl = config.fetchImpl ?? fetch\n const transform = config.transform ?? ((e: CaptureEvent): unknown => e)\n\n return {\n async capture(event: CaptureEvent): Promise<void> {\n await fetchImpl(config.url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n ...(config.headers ?? {})\n },\n body: JSON.stringify(transform(event)),\n keepalive: true\n })\n }\n }\n}\n","import type { AnalyticsAdapter, CaptureEvent } from '../types.js'\n\n/**\n * Escape hatch for wiring a callback directly as an analytics adapter.\n * Useful when you want to log events, pipe them through your own SDK, or\n * compose multiple adapters.\n *\n * @example\n * ```ts\n * const devAnalytics = customAnalytics((e) => console.log('[doc_view]', e))\n * ```\n */\nexport function customAnalytics(\n capture: (event: CaptureEvent) => Promise<void> | void\n): AnalyticsAdapter {\n return { capture }\n}\n"]} |
@@ -1,1 +0,1 @@ | ||
| {"version":3,"sources":["../src/bots.ts","../src/markdown.ts"],"names":[],"mappings":";;;AAgBO,IAAM,cAAA,GACX,2QAAA;AAsBK,SAAS,QAAQ,SAAA,EAA+C;AACrE,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,cAAA,CAAe,KAAK,SAAS,CAAA;AACtC;;;ACZO,SAAS,sBAAsB,GAAA,EAAuC;AAC3E,EAAA,IAAI,QAAA,GAAW,GAAA;AACf,EAAA,IAAI;AACF,IAAA,QAAA,GAAW,IAAI,GAAA,CAAI,GAAA,CAAI,GAAG,CAAA,CAAE,QAAA;AAAA,EAC9B,CAAA,CAAA,MAAQ;AACN,IAAA,QAAA,GAAW,IAAI,GAAA,IAAO,GAAA;AAAA,EACxB;AAEA,EAAA,MAAM,EAAA,GAAK,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,OAAA,CAAQ,EAAE,CAAA,EAAG;AACf,IAAA,OAAO,EAAE,MAAA,EAAQ,YAAA,EAAc,YAAA,EAAc,QAAA,EAAS;AAAA,EACxD;AAEA,EAAA,IAAI,QAAA,CAAS,QAAA,CAAS,KAAK,CAAA,EAAG;AAC5B,IAAA,OAAO,EAAE,QAAQ,WAAA,EAAa,YAAA,EAAc,SAAS,OAAA,CAAQ,OAAA,EAAS,EAAE,CAAA,EAAE;AAAA,EAC5E;AAEA,EAAA,MAAM,MAAA,GAAS,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,QAAQ,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,MAAA,CAAO,QAAA,CAAS,eAAe,CAAA,EAAG;AACpC,IAAA,OAAO,EAAE,MAAA,EAAQ,eAAA,EAAiB,YAAA,EAAc,QAAA,EAAS;AAAA,EAC3D;AAEA,EAAA,OAAO,IAAA;AACT;AAqBO,SAAS,eAAA,CAAgB,KAAA,GAA8B,EAAC,EAA2B;AACxF,EAAA,MAAM,OAAA,GAAkC;AAAA,IACtC,cAAA,EAAgB,8BAAA;AAAA,IAChB,gBAAA,EAAkB,MAAM,aAAA,IAAiB,uCAAA;AAAA,IACzC,IAAA,EAAM;AAAA,GACR;AACA,EAAA,IAAI,OAAO,KAAA,CAAM,MAAA,KAAW,QAAA,IAAY,KAAA,CAAM,SAAS,CAAA,EAAG;AACxD,IAAA,OAAA,CAAQ,mBAAmB,CAAA,GAAI,IAAA,CAAK,GAAA,CAAI,CAAA,EAAG,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,MAAM,CAAC,CAAA,CAAE,QAAA,EAAS;AAAA,EAC/E;AACA,EAAA,OAAO,OAAA;AACT;AAoBO,SAAS,0BAA0B,KAAA,EAAuC;AAC/E,EAAA,MAAM,IAAA,GACJ,KAAA,CAAM,QAAA,IAAA,CACL,MAAM;AACL,IAAA,IAAI;AACF,MAAA,OAAO,IAAI,GAAA,CAAI,KAAA,CAAM,MAAM,CAAA,CAAE,QAAA;AAAA,IAC/B,CAAA,CAAA,MAAQ;AACN,MAAA,OAAO,KAAA,CAAM,MAAA;AAAA,IACf;AAAA,EACF,CAAA,GAAG;AACL,EAAA,MAAM,MAAM,CAAA,EAAG,KAAA,CAAM,MAAM,CAAA,EAAG,MAAM,QAAQ,CAAA,CAAA;AAC5C,EAAA,MAAM,KAAA,GAAkB,CAAC,CAAA,EAAA,EAAK,IAAI,IAAI,EAAA,EAAI,CAAA,WAAA,EAAc,GAAG,CAAA,gDAAA,CAAA,EAAoD,EAAE,CAAA;AACjH,EAAA,MAAM,QAAkB,EAAC;AACzB,EAAA,IAAI,KAAA,CAAM,UAAA,EAAY,KAAA,CAAM,IAAA,CAAK,CAAA,GAAA,EAAM,MAAM,UAAU,CAAA,EAAA,EAAK,KAAA,CAAM,UAAU,CAAA,8BAAA,CAA2B,CAAA;AACvG,EAAA,IAAI,KAAA,CAAM,cAAA;AACR,IAAA,KAAA,CAAM,KAAK,CAAA,GAAA,EAAM,KAAA,CAAM,cAAc,CAAA,EAAA,EAAK,KAAA,CAAM,cAAc,CAAA,8BAAA,CAA2B,CAAA;AAC3F,EAAA,IAAI,KAAA,CAAM,gBAAA;AACR,IAAA,KAAA,CAAM,KAAK,CAAA,GAAA,EAAM,KAAA,CAAM,gBAAgB,CAAA,EAAA,EAAK,KAAA,CAAM,gBAAgB,CAAA,yCAAA,CAAsC,CAAA;AAC1G,EAAA,IAAI,MAAM,MAAA,EAAQ;AAChB,IAAA,KAAA,CAAM,IAAA,CAAK,0CAAA,EAA4C,EAAA,EAAI,GAAG,OAAO,EAAE,CAAA;AAAA,EACzE;AACA,EAAA,OAAO,KAAA,CAAM,KAAK,IAAI,CAAA;AACxB","file":"markdown.cjs","sourcesContent":["/**\n * User-agent substrings that identify **publicly declared** AI crawlers — the\n * branded bots that identify themselves by name (OpenAI's GPTBot, Anthropic's\n * ClaudeBot, Perplexity-User, Google-Extended, etc.). High-confidence: when\n * this matches, the request almost certainly comes from that vendor's crawler\n * fleet.\n *\n * Does NOT include **coding-agent traffic** (Claude Code, Cline, Cursor,\n * Windsurf, Aider, OpenCode, VS Code). Those tools use generic HTTP library\n * UAs (axios, curl, got, colly, Electron) or spoof full browser UAs — they\n * can't be distinguished from non-AI traffic by UA alone. See\n * {@link HTTP_CLIENT_PATTERN} for the loose heuristic layer.\n *\n * Sources consulted when updating: darkvisitors.com, vendor docs from OpenAI,\n * Anthropic, Google, Perplexity, Cohere, Apple, Bytedance.\n */\nexport const AI_BOT_PATTERN =\n /ClaudeBot|Claude-User|Anthropic|ChatGPT-User|GPTBot|OAI-SearchBot|PerplexityBot|Perplexity-User|Google-Extended|Applebot-Extended|cohere-ai|Bytespider|CCBot|Amazonbot|Meta-ExternalAgent|FacebookBot|DuckAssistBot|MistralAI-User|YouBot|AI2Bot|Diffbot|Cursor|Windsurf/i\n\n/**\n * HTTP library / runtime signatures frequently used by coding agents. Matching\n * any of these is a **loose** signal — legitimate curl scripts, CI jobs, and\n * server-to-server traffic use the same libraries. Use this for the wider\n * net (`coding_agent_hint: true`) and pair with other signals (request\n * shape, JA4 fingerprint, path patterns) for higher confidence.\n *\n * Based on behavioural signatures observed by Addy Osmani:\n * Claude Code → axios/1.8.4\n * Cline, Junie → curl/8.4.0\n * Cursor → got (sindresorhus/got)\n * Windsurf → colly\n * VS Code → Electron / Chromium\n *\n * Aider and OpenCode use Playwright-driven full Mozilla/Safari UAs and are\n * indistinguishable from real browsers at the UA layer.\n */\nexport const HTTP_CLIENT_PATTERN =\n /axios\\/|curl\\/|(?:^|[\\s(])got(?:\\/|[\\s(])|\\bcolly\\b|Electron\\/|node-fetch\\/|python-requests\\/|Go-http-client\\/|okhttp\\/|aiohttp\\/|Deno\\//i\n\nexport function isAiBot(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return AI_BOT_PATTERN.test(userAgent)\n}\n\nexport function isHttpClient(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return HTTP_CLIENT_PATTERN.test(userAgent)\n}\n\n/**\n * Map a user-agent string to a coarse, human-readable label. Returns one of:\n *\n * - A branded-crawler name (`'Claude'`, `'ChatGPT'`, …) — pair with\n * {@link isAiBot} for `is_ai_bot: true` segmentation.\n * - An HTTP-library name (`'curl'`, `'axios'`, `'got'`, `'colly'`,\n * `'Electron'`, …) — hint of a coding agent or automation; not\n * conclusive. Pair with {@link isHttpClient}.\n * - `'Browser'` for typical desktop browsers (possibly spoofed by\n * Playwright-based agents like Aider/OpenCode — this label alone can't\n * tell you).\n * - `'Other'` for anything unrecognised or empty input.\n */\nexport function parseBotName(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const s = userAgent.toLowerCase()\n\n // Publicly declared AI crawlers (high confidence).\n if (s.includes('chatgpt-user') || s.includes('gptbot') || s.includes('oai-searchbot') || s.includes('openai'))\n return 'ChatGPT'\n if (s.includes('claudebot') || s.includes('claude-user') || s.includes('anthropic')) return 'Claude'\n if (s.includes('perplexitybot') || s.includes('perplexity-user')) return 'Perplexity'\n if (s.includes('ccbot')) return 'Common Crawl'\n if (s.includes('google-extended') || s.includes('googlebot')) return 'Google'\n if (s.includes('applebot-extended') || s.includes('applebot')) return 'Apple'\n if (s.includes('bingbot')) return 'Bing'\n if (s.includes('bytespider')) return 'Bytespider'\n if (s.includes('amazonbot')) return 'Amazon'\n if (s.includes('meta-externalagent') || s.includes('facebookbot')) return 'Meta'\n if (s.includes('mistralai-user')) return 'Mistral'\n if (s.includes('duckassistbot')) return 'DuckDuckGo'\n if (s.includes('youbot')) return 'You.com'\n if (s.includes('diffbot')) return 'Diffbot'\n if (s.includes('ai2bot')) return 'AI2'\n if (s.includes('cohere')) return 'Cohere'\n if (s.includes('cursor')) return 'Cursor'\n if (s.includes('windsurf')) return 'Windsurf'\n if (s.includes('petalbot')) return 'PetalBot'\n\n // SEO crawlers and monitoring bots.\n if (s.includes('ahrefsbot')) return 'Ahrefs'\n if (s.includes('semrushbot')) return 'Semrush'\n if (s.includes('mj12bot')) return 'Majestic'\n if (s.includes('dotbot')) return 'Moz'\n if (s.includes('rogerbot')) return 'Moz'\n if (s.includes('screaming frog')) return 'Screaming Frog'\n if (s.includes('sitebulb')) return 'Sitebulb'\n if (s.includes('linkfluence')) return 'Linkfluence'\n if (s.includes('dataforseo')) return 'DataForSEO'\n if (s.includes('serpstatbot')) return 'Serpstat'\n\n // Monitoring and feed bots.\n if (s.includes('uptimerobot')) return 'UptimeRobot'\n if (s.includes('pingdom')) return 'Pingdom'\n if (s.includes('statuscake')) return 'StatusCake'\n if (s.includes('newrelicpinger')) return 'New Relic'\n if (s.includes('datadogagent') || s.includes('datadog')) return 'Datadog'\n if (s.includes('slackbot')) return 'Slack'\n if (s.includes('twitterbot')) return 'Twitter'\n if (s.includes('linkedinbot')) return 'LinkedIn'\n if (s.includes('discordbot')) return 'Discord'\n if (s.includes('telegrambot')) return 'Telegram'\n if (s.includes('whatsapp')) return 'WhatsApp'\n\n // AI search and indexing bots.\n if (s.includes('linkupbot')) return 'Linkup'\n if (s.includes('sogou')) return 'Sogou'\n if (s.includes('yandexbot')) return 'Yandex'\n if (s.includes('baiduspider')) return 'Baidu'\n\n // Link preview fetchers.\n if (s.includes('facebookexternalhit')) return 'Facebook'\n if (s.includes('com.apple.webkit')) return 'Apple URL Preview'\n\n // Uptime and monitoring.\n if (s.includes('ohdear')) return 'Oh Dear'\n\n // Generic scrapers.\n if (s.includes('scrapy')) return 'Scrapy'\n if (s.includes('headlesschrome')) return 'Headless Chrome'\n if (s.includes('phantomjs')) return 'PhantomJS'\n if (s.includes('wget')) return 'wget'\n if (s.includes('httpie')) return 'HTTPie'\n if (s.includes('guzzlehttp')) return 'Guzzle'\n\n // HTTP library / runtime signatures (loose — coding agent or automation).\n // Check Electron before Browser since Electron UAs contain Chrome/Safari.\n if (s.includes('electron/')) return 'Electron'\n if (/curl\\//.test(s)) return 'curl'\n if (/axios\\//.test(s)) return 'axios'\n if (/(?:^|[\\s(])got(?:\\/|[\\s(])/.test(s)) return 'got'\n if (/\\bcolly\\b/.test(s)) return 'colly'\n if (/node-fetch\\//.test(s)) return 'node-fetch'\n if (/python-requests\\//.test(s)) return 'python-requests'\n if (/go-http-client\\//.test(s)) return 'Go http client'\n if (/okhttp\\//.test(s)) return 'OkHttp'\n if (/aiohttp\\//.test(s)) return 'aiohttp'\n if (/deno\\//.test(s)) return 'Deno'\n\n // Real browsers (or UAs spoofed to look like them — see Aider/OpenCode note).\n if (s.includes('mozilla') || s.includes('chrome') || s.includes('safari') || s.includes('firefox'))\n return 'Browser'\n\n return 'Other'\n}\n\n/**\n * Return the first product token from a UA header, useful for segmenting by\n * client without hard-coding every bot name. Falls back to `'Other'` for empty\n * input.\n */\nexport function firstUserAgentProduct(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const compatibleMatch = userAgent.match(/compatible;\\s*([^/;\\s]+)(?:\\/[^\\s;]*)?/i)\n if (compatibleMatch && compatibleMatch[1]) return compatibleMatch[1].trim()\n const first = userAgent.trim().split('/')[0]?.trim().split(/\\s+/)[0]?.trim()\n return first || 'Other'\n}\n\nexport type AgentKind =\n | 'declared-crawler'\n | 'coding-agent-hint'\n | 'browser'\n | 'other'\n\nexport interface AgentClassification {\n /**\n * Categorical tag for the UA:\n *\n * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.\n * - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose\n * signal; could be a coding agent, a curl script, or any automation.\n * - `'browser'` — looks like a real browser. Could be a genuine user or\n * a Playwright-based agent (Aider, OpenCode) that can't be distinguished\n * at the UA layer.\n * - `'other'` — unrecognised or empty.\n */\n kind: AgentKind\n /** Human-readable label, same string {@link parseBotName} returns. */\n label: string\n /** Strict: `true` only when the UA matches a branded AI crawler. */\n isAiBot: boolean\n /** Loose: `true` for known HTTP-library / automation UAs. */\n codingAgentHint: boolean\n}\n\n/**\n * One-stop classification of a user-agent. Combines {@link isAiBot},\n * {@link isHttpClient}, and {@link parseBotName} into a single structured\n * result. Used internally by `trackVisit` to populate event properties;\n * useful in consumer code when you need all signals at once.\n */\nexport function classifyAgent(userAgent: string | null | undefined): AgentClassification {\n const label = parseBotName(userAgent)\n const aiBot = isAiBot(userAgent)\n const httpClient = isHttpClient(userAgent)\n\n let kind: AgentKind\n if (aiBot) kind = 'declared-crawler'\n else if (httpClient) kind = 'coding-agent-hint'\n else if (label === 'Browser') kind = 'browser'\n else kind = 'other'\n\n return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }\n}\n","import { isAiBot } from './bots.js'\n\nexport type MarkdownServeReason =\n | 'ua-rewrite'\n | 'md-suffix'\n | 'accept-header'\n\nexport interface MarkdownDecision {\n /** Why this request should be served Markdown. */\n reason: MarkdownServeReason\n /**\n * The request's original logical path, with any trailing `.md` stripped.\n * Use this when mapping to a mirror file.\n */\n strippedPath: string\n}\n\n/**\n * Decide whether the request should be served Markdown instead of HTML.\n * Returns `null` when the request should go through your normal handler.\n *\n * Covers three triggers:\n * - Known AI-bot UA on any URL (`ua-rewrite`)\n * - Explicit `.md` suffix on the URL (`md-suffix`)\n * - `Accept: text/markdown` header (`accept-header`)\n *\n * This helper intentionally does not perform the rewrite itself — routing is\n * framework-specific (NextResponse.rewrite for Next.js, ctx.rewrite for\n * Hono, etc.). Use the returned decision to build the appropriate response.\n */\nexport function markdownServeDecision(req: Request): MarkdownDecision | null {\n let pathname = '/'\n try {\n pathname = new URL(req.url).pathname\n } catch {\n pathname = req.url || '/'\n }\n\n const ua = req.headers.get('user-agent') || ''\n if (isAiBot(ua)) {\n return { reason: 'ua-rewrite', strippedPath: pathname }\n }\n\n if (pathname.endsWith('.md')) {\n return { reason: 'md-suffix', strippedPath: pathname.replace(/\\.md$/, '') }\n }\n\n const accept = req.headers.get('accept') || ''\n if (accept.includes('text/markdown')) {\n return { reason: 'accept-header', strippedPath: pathname }\n }\n\n return null\n}\n\nexport interface MarkdownHeadersInput {\n /**\n * If provided, rendered as `x-markdown-tokens` so agents can budget context\n * before parsing the body. Typically `Math.ceil(body.length / 4)`.\n */\n tokens?: number\n /**\n * Content-Signal directive (see contentsignals.org). Defaults to\n * `'search=yes, ai-input=yes, ai-train=no'` — change if you want to permit\n * training or restrict indexing.\n */\n contentSignal?: string\n}\n\n/**\n * Build the set of response headers to attach to a Markdown response. Safe\n * defaults: UTF-8 text/markdown, Vary: accept, and a Content-Signal directive\n * that permits search + agent input but denies training.\n */\nexport function markdownHeaders(input: MarkdownHeadersInput = {}): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'text/markdown; charset=utf-8',\n 'Content-Signal': input.contentSignal ?? 'search=yes, ai-input=yes, ai-train=no',\n Vary: 'accept'\n }\n if (typeof input.tokens === 'number' && input.tokens > 0) {\n headers['x-markdown-tokens'] = Math.max(1, Math.ceil(input.tokens)).toString()\n }\n return headers\n}\n\nexport interface SynthesizePointerInput {\n origin: string\n pathname: string\n /** URL of the site's curated index, usually `/llms.txt`. */\n llmsTxtUrl?: string\n /** URL of the full enumerated index, usually `/llms-full.txt`. */\n llmsFullTxtUrl?: string\n /** URL of the machine-readable path manifest, usually `/md/index.json`. */\n markdownIndexUrl?: string\n /** Site name to title the pointer document. Defaults to the origin hostname. */\n siteName?: string\n}\n\n/**\n * Generate a minimal pointer Markdown document for URLs that don't have a\n * pre-built mirror. Keeps the `Accept: text/markdown` contract intact\n * site-wide — agents always get *something* parseable, not a 404.\n */\nexport function synthesizeMarkdownPointer(input: SynthesizePointerInput): string {\n const site =\n input.siteName ??\n (() => {\n try {\n return new URL(input.origin).hostname\n } catch {\n return input.origin\n }\n })()\n const url = `${input.origin}${input.pathname}`\n const lines: string[] = [`# ${site}`, '', `This page (${url}) does not have a dedicated Markdown mirror yet.`, '']\n const links: string[] = []\n if (input.llmsTxtUrl) links.push(`- [${input.llmsTxtUrl}](${input.llmsTxtUrl}) — curated index of docs`)\n if (input.llmsFullTxtUrl)\n links.push(`- [${input.llmsFullTxtUrl}](${input.llmsFullTxtUrl}) — full enumerated index`)\n if (input.markdownIndexUrl)\n links.push(`- [${input.markdownIndexUrl}](${input.markdownIndexUrl}) — JSON index of all Markdown paths`)\n if (links.length) {\n lines.push('For machine-readable documentation, see:', '', ...links, '')\n }\n return lines.join('\\n')\n}\n"]} | ||
| {"version":3,"sources":["../src/bots.ts","../src/markdown.ts"],"names":[],"mappings":";;;AAgBO,IAAM,cAAA,GACX,2QAAA;AAsBK,SAAS,QAAQ,SAAA,EAA+C;AACrE,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,cAAA,CAAe,KAAK,SAAS,CAAA;AACtC;;;ACZO,SAAS,sBAAsB,GAAA,EAAuC;AAC3E,EAAA,IAAI,QAAA,GAAW,GAAA;AACf,EAAA,IAAI;AACF,IAAA,QAAA,GAAW,IAAI,GAAA,CAAI,GAAA,CAAI,GAAG,CAAA,CAAE,QAAA;AAAA,EAC9B,CAAA,CAAA,MAAQ;AACN,IAAA,QAAA,GAAW,IAAI,GAAA,IAAO,GAAA;AAAA,EACxB;AAEA,EAAA,MAAM,EAAA,GAAK,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,OAAA,CAAQ,EAAE,CAAA,EAAG;AACf,IAAA,OAAO,EAAE,MAAA,EAAQ,YAAA,EAAc,YAAA,EAAc,QAAA,EAAS;AAAA,EACxD;AAEA,EAAA,IAAI,QAAA,CAAS,QAAA,CAAS,KAAK,CAAA,EAAG;AAC5B,IAAA,OAAO,EAAE,QAAQ,WAAA,EAAa,YAAA,EAAc,SAAS,OAAA,CAAQ,OAAA,EAAS,EAAE,CAAA,EAAE;AAAA,EAC5E;AAEA,EAAA,MAAM,MAAA,GAAS,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,QAAQ,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,MAAA,CAAO,QAAA,CAAS,eAAe,CAAA,EAAG;AACpC,IAAA,OAAO,EAAE,MAAA,EAAQ,eAAA,EAAiB,YAAA,EAAc,QAAA,EAAS;AAAA,EAC3D;AAEA,EAAA,OAAO,IAAA;AACT;AAqBO,SAAS,eAAA,CAAgB,KAAA,GAA8B,EAAC,EAA2B;AACxF,EAAA,MAAM,OAAA,GAAkC;AAAA,IACtC,cAAA,EAAgB,8BAAA;AAAA,IAChB,gBAAA,EAAkB,MAAM,aAAA,IAAiB,uCAAA;AAAA,IACzC,IAAA,EAAM;AAAA,GACR;AACA,EAAA,IAAI,OAAO,KAAA,CAAM,MAAA,KAAW,QAAA,IAAY,KAAA,CAAM,SAAS,CAAA,EAAG;AACxD,IAAA,OAAA,CAAQ,mBAAmB,CAAA,GAAI,IAAA,CAAK,GAAA,CAAI,CAAA,EAAG,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,MAAM,CAAC,CAAA,CAAE,QAAA,EAAS;AAAA,EAC/E;AACA,EAAA,OAAO,OAAA;AACT;AAoBO,SAAS,0BAA0B,KAAA,EAAuC;AAC/E,EAAA,MAAM,IAAA,GACJ,KAAA,CAAM,QAAA,IAAA,CACL,MAAM;AACL,IAAA,IAAI;AACF,MAAA,OAAO,IAAI,GAAA,CAAI,KAAA,CAAM,MAAM,CAAA,CAAE,QAAA;AAAA,IAC/B,CAAA,CAAA,MAAQ;AACN,MAAA,OAAO,KAAA,CAAM,MAAA;AAAA,IACf;AAAA,EACF,CAAA,GAAG;AACL,EAAA,MAAM,MAAM,CAAA,EAAG,KAAA,CAAM,MAAM,CAAA,EAAG,MAAM,QAAQ,CAAA,CAAA;AAC5C,EAAA,MAAM,KAAA,GAAkB,CAAC,CAAA,EAAA,EAAK,IAAI,IAAI,EAAA,EAAI,CAAA,WAAA,EAAc,GAAG,CAAA,gDAAA,CAAA,EAAoD,EAAE,CAAA;AACjH,EAAA,MAAM,QAAkB,EAAC;AACzB,EAAA,IAAI,KAAA,CAAM,UAAA,EAAY,KAAA,CAAM,IAAA,CAAK,CAAA,GAAA,EAAM,MAAM,UAAU,CAAA,EAAA,EAAK,KAAA,CAAM,UAAU,CAAA,8BAAA,CAA2B,CAAA;AACvG,EAAA,IAAI,KAAA,CAAM,cAAA;AACR,IAAA,KAAA,CAAM,KAAK,CAAA,GAAA,EAAM,KAAA,CAAM,cAAc,CAAA,EAAA,EAAK,KAAA,CAAM,cAAc,CAAA,8BAAA,CAA2B,CAAA;AAC3F,EAAA,IAAI,KAAA,CAAM,gBAAA;AACR,IAAA,KAAA,CAAM,KAAK,CAAA,GAAA,EAAM,KAAA,CAAM,gBAAgB,CAAA,EAAA,EAAK,KAAA,CAAM,gBAAgB,CAAA,yCAAA,CAAsC,CAAA;AAC1G,EAAA,IAAI,MAAM,MAAA,EAAQ;AAChB,IAAA,KAAA,CAAM,IAAA,CAAK,0CAAA,EAA4C,EAAA,EAAI,GAAG,OAAO,EAAE,CAAA;AAAA,EACzE;AACA,EAAA,OAAO,KAAA,CAAM,KAAK,IAAI,CAAA;AACxB","file":"markdown.cjs","sourcesContent":["/**\n * User-agent substrings that identify **publicly declared** AI crawlers — the\n * branded bots that identify themselves by name (OpenAI's GPTBot, Anthropic's\n * ClaudeBot, Perplexity-User, Google-Extended, etc.). High-confidence: when\n * this matches, the request almost certainly comes from that vendor's crawler\n * fleet.\n *\n * Does NOT include **coding-agent traffic** (Claude Code, Cline, Cursor,\n * Windsurf, Aider, OpenCode, VS Code). Those tools use generic HTTP library\n * UAs (axios, curl, got, colly, Electron) or spoof full browser UAs — they\n * can't be distinguished from non-AI traffic by UA alone. See\n * {@link HTTP_CLIENT_PATTERN} for the loose heuristic layer.\n *\n * Sources consulted when updating: darkvisitors.com, vendor docs from OpenAI,\n * Anthropic, Google, Perplexity, Cohere, Apple, Bytedance.\n */\nexport const AI_BOT_PATTERN =\n /ClaudeBot|Claude-User|Anthropic|ChatGPT-User|GPTBot|OAI-SearchBot|PerplexityBot|Perplexity-User|Google-Extended|Applebot-Extended|cohere-ai|Bytespider|CCBot|Amazonbot|Meta-ExternalAgent|FacebookBot|DuckAssistBot|MistralAI-User|YouBot|AI2Bot|Diffbot|Cursor|Windsurf/i\n\n/**\n * HTTP library / runtime signatures frequently used by coding agents. Matching\n * any of these is a **loose** signal — legitimate curl scripts, CI jobs, and\n * server-to-server traffic use the same libraries. Use this for the wider\n * net (`coding_agent_hint: true`) and pair with other signals (request\n * shape, JA4 fingerprint, path patterns) for higher confidence.\n *\n * Based on behavioural signatures observed by Addy Osmani:\n * Claude Code → axios/1.8.4\n * Cline, Junie → curl/8.4.0\n * Cursor → got (sindresorhus/got)\n * Windsurf → colly\n * VS Code → Electron / Chromium\n *\n * Aider and OpenCode use Playwright-driven full Mozilla/Safari UAs and are\n * indistinguishable from real browsers at the UA layer.\n */\nexport const HTTP_CLIENT_PATTERN =\n /axios\\/|curl\\/|(?:^|[\\s(])got(?:\\/|[\\s(])|\\bcolly\\b|Electron\\/|node-fetch\\/|python-requests\\/|Go-http-client\\/|okhttp\\/|aiohttp\\/|Deno\\//i\n\nexport function isAiBot(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return AI_BOT_PATTERN.test(userAgent)\n}\n\nexport function isHttpClient(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return HTTP_CLIENT_PATTERN.test(userAgent)\n}\n\n/**\n * Map a user-agent string to a coarse, human-readable label. Returns one of:\n *\n * - A branded-crawler name (`'Claude'`, `'ChatGPT'`, …) — pair with\n * {@link isAiBot} for `is_ai_bot: true` segmentation.\n * - An HTTP-library name (`'curl'`, `'axios'`, `'got'`, `'colly'`,\n * `'Electron'`, …) — hint of a coding agent or automation; not\n * conclusive. Pair with {@link isHttpClient}.\n * - `'Browser'` for typical desktop browsers (possibly spoofed by\n * Playwright-based agents like Aider/OpenCode — this label alone can't\n * tell you).\n * - `'Other'` for anything unrecognised or empty input.\n */\nexport function parseBotName(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const s = userAgent.toLowerCase()\n\n // Publicly declared AI crawlers (high confidence).\n if (s.includes('chatgpt-user') || s.includes('gptbot') || s.includes('oai-searchbot') || s.includes('openai'))\n return 'ChatGPT'\n if (s.includes('claudebot') || s.includes('claude-user') || s.includes('anthropic')) return 'Claude'\n if (s.includes('perplexitybot') || s.includes('perplexity-user')) return 'Perplexity'\n if (s.includes('ccbot')) return 'Common Crawl'\n if (s.includes('google-extended') || s.includes('googlebot')) return 'Google'\n if (s.includes('applebot-extended') || s.includes('applebot')) return 'Apple'\n if (s.includes('bingbot')) return 'Bing'\n if (s.includes('bytespider')) return 'Bytespider'\n if (s.includes('amazonbot')) return 'Amazon'\n if (s.includes('meta-externalagent') || s.includes('facebookbot')) return 'Meta'\n if (s.includes('mistralai-user')) return 'Mistral'\n if (s.includes('duckassistbot')) return 'DuckDuckGo'\n if (s.includes('youbot')) return 'You.com'\n if (s.includes('diffbot')) return 'Diffbot'\n if (s.includes('ai2bot')) return 'AI2'\n if (s.includes('cohere')) return 'Cohere'\n if (s.includes('cursor')) return 'Cursor'\n if (s.includes('windsurf')) return 'Windsurf'\n if (s.includes('petalbot')) return 'PetalBot'\n\n // SEO crawlers and monitoring bots.\n if (s.includes('ahrefsbot')) return 'Ahrefs'\n if (s.includes('semrushbot')) return 'Semrush'\n if (s.includes('mj12bot')) return 'Majestic'\n if (s.includes('dotbot')) return 'Moz'\n if (s.includes('rogerbot')) return 'Moz'\n if (s.includes('screaming frog')) return 'Screaming Frog'\n if (s.includes('sitebulb')) return 'Sitebulb'\n if (s.includes('linkfluence')) return 'Linkfluence'\n if (s.includes('dataforseo')) return 'DataForSEO'\n if (s.includes('serpstatbot')) return 'Serpstat'\n\n // Monitoring and feed bots.\n if (s.includes('uptimerobot')) return 'UptimeRobot'\n if (s.includes('pingdom')) return 'Pingdom'\n if (s.includes('statuscake')) return 'StatusCake'\n if (s.includes('newrelicpinger')) return 'New Relic'\n if (s.includes('datadogagent') || s.includes('datadog')) return 'Datadog'\n if (s.includes('slackbot')) return 'Slack'\n if (s.includes('twitterbot')) return 'Twitter'\n if (s.includes('linkedinbot')) return 'LinkedIn'\n if (s.includes('discordbot')) return 'Discord'\n if (s.includes('telegrambot')) return 'Telegram'\n if (s.includes('whatsapp')) return 'WhatsApp'\n\n // AI search and indexing bots.\n if (s.includes('linkupbot')) return 'Linkup'\n if (s.includes('sogou')) return 'Sogou'\n if (s.includes('yandexbot')) return 'Yandex'\n if (s.includes('baiduspider')) return 'Baidu'\n\n // Link preview fetchers.\n if (s.includes('facebookexternalhit')) return 'Facebook'\n if (s.includes('com.apple.webkit')) return 'Apple URL Preview'\n\n // Uptime and monitoring.\n if (s.includes('ohdear')) return 'Oh Dear'\n\n // Generic scrapers.\n if (s.includes('scrapy')) return 'Scrapy'\n if (s.includes('headlesschrome')) return 'Headless Chrome'\n if (s.includes('phantomjs')) return 'PhantomJS'\n if (s.includes('wget')) return 'wget'\n if (s.includes('httpie')) return 'HTTPie'\n if (s.includes('guzzlehttp')) return 'Guzzle'\n\n // HTTP library / runtime signatures (loose — coding agent or automation).\n // Check Electron before Browser since Electron UAs contain Chrome/Safari.\n if (s.includes('electron/')) return 'Electron'\n if (/curl\\//.test(s)) return 'curl'\n if (/axios\\//.test(s)) return 'axios'\n if (/(?:^|[\\s(])got(?:\\/|[\\s(])/.test(s)) return 'got'\n if (/\\bcolly\\b/.test(s)) return 'colly'\n if (/node-fetch\\//.test(s)) return 'node-fetch'\n if (/python-requests\\//.test(s)) return 'python-requests'\n if (/go-http-client\\//.test(s)) return 'Go http client'\n if (/okhttp\\//.test(s)) return 'OkHttp'\n if (/aiohttp\\//.test(s)) return 'aiohttp'\n if (/deno\\//.test(s)) return 'Deno'\n\n // Real browsers (or UAs spoofed to look like them — see Aider/OpenCode note).\n if (s.includes('mozilla') || s.includes('chrome') || s.includes('safari') || s.includes('firefox'))\n return 'Browser'\n\n return 'Other'\n}\n\n/**\n * Return the first product token from a UA header, useful for segmenting by\n * client without hard-coding every bot name. Falls back to `'Other'` for empty\n * input.\n */\nexport function firstUserAgentProduct(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const compatibleMatch = userAgent.match(/compatible;\\s*([^/;\\s]+)(?:\\/[^\\s;]*)?/i)\n if (compatibleMatch && compatibleMatch[1]) return compatibleMatch[1].trim()\n const first = userAgent.trim().split('/')[0]?.trim().split(/\\s+/)[0]?.trim()\n return first || 'Other'\n}\n\n/**\n * Detect likely headless/automated browsers by checking for missing headers\n * that real browsers always send. Playwright, Puppeteer, and similar tools\n * spoof the UA but often omit standard browser headers.\n *\n * Signals checked (each scores 1 point):\n * - Missing `Accept-Language` — every real browser sends this\n * - Missing `Sec-Fetch-Mode` — sent by all modern browsers\n * - Missing `Sec-CH-UA` — Client Hints, Chromium 89+\n * - `Sec-CH-UA` contains \"HeadlessChrome\"\n * - Missing or bare Accept header — browsers send detailed accept lists\n * - `Connection: close` with browser UA — browsers use keep-alive\n *\n * Returns a score (0-6), the signals that fired, and a boolean `likely`\n * flag (score >= 2 with a browser-like UA).\n */\nexport function detectHeadless(req: Request): HeadlessDetection {\n const signals: string[] = []\n const ua = (req.headers.get('user-agent') || '').toLowerCase()\n const isBrowserUA =\n ua.includes('mozilla') || ua.includes('chrome') || ua.includes('safari') || ua.includes('firefox')\n\n if (!isBrowserUA) return { score: 0, signals: [], likely: false }\n\n if (!req.headers.get('accept-language')) {\n signals.push('missing-accept-language')\n }\n if (!req.headers.get('sec-fetch-mode')) {\n signals.push('missing-sec-fetch-mode')\n }\n const secChUa = req.headers.get('sec-ch-ua')\n if (!secChUa) {\n signals.push('missing-sec-ch-ua')\n } else if (secChUa.toLowerCase().includes('headlesschrome')) {\n signals.push('headless-chrome-hint')\n }\n const accept = req.headers.get('accept') || ''\n if (!accept || accept === '*/*') {\n signals.push('missing-or-bare-accept')\n }\n if ((req.headers.get('connection') || '').toLowerCase() === 'close') {\n signals.push('connection-close')\n }\n\n const score = signals.length\n return { score, signals, likely: score >= 2 }\n}\n\nexport interface HeadlessDetection {\n /** Number of suspicious signals found (0-6). */\n score: number\n /** Names of the specific signals that fired. */\n signals: string[]\n /** True when score >= 2 — strong headless indication. */\n likely: boolean\n}\n\nexport type AgentKind =\n | 'declared-crawler'\n | 'coding-agent-hint'\n | 'headless-likely'\n | 'browser'\n | 'other'\n\nexport interface AgentClassification {\n /**\n * Categorical tag for the request:\n *\n * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.\n * - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose\n * signal; could be a coding agent, a curl script, or any automation.\n * - `'headless-likely'` — Browser-like UA but missing standard headers.\n * Strong signal of Playwright/Puppeteer automation (Aider, OpenCode, etc.).\n * - `'browser'` — Looks like a real browser with expected headers present.\n * - `'other'` — Unrecognised or empty.\n */\n kind: AgentKind\n /** Human-readable label, same string {@link parseBotName} returns. */\n label: string\n /** Strict: `true` only when the UA matches a branded AI crawler. */\n isAiBot: boolean\n /** Loose: `true` for known HTTP-library / automation UAs. */\n codingAgentHint: boolean\n /** Headless browser detection result. Only populated when `req` is passed. */\n headless?: HeadlessDetection\n}\n\n/**\n * UA-only classification. Use {@link classifyRequest} for full detection\n * including headless browser heuristics.\n */\nexport function classifyAgent(userAgent: string | null | undefined): AgentClassification {\n const label = parseBotName(userAgent)\n const aiBot = isAiBot(userAgent)\n const httpClient = isHttpClient(userAgent)\n\n let kind: AgentKind\n if (aiBot) kind = 'declared-crawler'\n else if (httpClient) kind = 'coding-agent-hint'\n else if (label === 'Browser') kind = 'browser'\n else kind = 'other'\n\n return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }\n}\n\n/**\n * Full request classification — combines UA parsing with header-based\n * headless detection. When a browser-like UA is missing standard headers,\n * the kind is promoted from `'browser'` to `'headless-likely'`.\n */\nexport function classifyRequest(req: Request): AgentClassification {\n const userAgent = req.headers.get('user-agent') || ''\n const base = classifyAgent(userAgent)\n const headless = detectHeadless(req)\n\n let kind = base.kind\n if (kind === 'browser' && headless.likely) {\n kind = 'headless-likely'\n }\n\n return { ...base, kind, headless }\n}\n","import { isAiBot } from './bots.js'\n\nexport type MarkdownServeReason =\n | 'ua-rewrite'\n | 'md-suffix'\n | 'accept-header'\n\nexport interface MarkdownDecision {\n /** Why this request should be served Markdown. */\n reason: MarkdownServeReason\n /**\n * The request's original logical path, with any trailing `.md` stripped.\n * Use this when mapping to a mirror file.\n */\n strippedPath: string\n}\n\n/**\n * Decide whether the request should be served Markdown instead of HTML.\n * Returns `null` when the request should go through your normal handler.\n *\n * Covers three triggers:\n * - Known AI-bot UA on any URL (`ua-rewrite`)\n * - Explicit `.md` suffix on the URL (`md-suffix`)\n * - `Accept: text/markdown` header (`accept-header`)\n *\n * This helper intentionally does not perform the rewrite itself — routing is\n * framework-specific (NextResponse.rewrite for Next.js, ctx.rewrite for\n * Hono, etc.). Use the returned decision to build the appropriate response.\n */\nexport function markdownServeDecision(req: Request): MarkdownDecision | null {\n let pathname = '/'\n try {\n pathname = new URL(req.url).pathname\n } catch {\n pathname = req.url || '/'\n }\n\n const ua = req.headers.get('user-agent') || ''\n if (isAiBot(ua)) {\n return { reason: 'ua-rewrite', strippedPath: pathname }\n }\n\n if (pathname.endsWith('.md')) {\n return { reason: 'md-suffix', strippedPath: pathname.replace(/\\.md$/, '') }\n }\n\n const accept = req.headers.get('accept') || ''\n if (accept.includes('text/markdown')) {\n return { reason: 'accept-header', strippedPath: pathname }\n }\n\n return null\n}\n\nexport interface MarkdownHeadersInput {\n /**\n * If provided, rendered as `x-markdown-tokens` so agents can budget context\n * before parsing the body. Typically `Math.ceil(body.length / 4)`.\n */\n tokens?: number\n /**\n * Content-Signal directive (see contentsignals.org). Defaults to\n * `'search=yes, ai-input=yes, ai-train=no'` — change if you want to permit\n * training or restrict indexing.\n */\n contentSignal?: string\n}\n\n/**\n * Build the set of response headers to attach to a Markdown response. Safe\n * defaults: UTF-8 text/markdown, Vary: accept, and a Content-Signal directive\n * that permits search + agent input but denies training.\n */\nexport function markdownHeaders(input: MarkdownHeadersInput = {}): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'text/markdown; charset=utf-8',\n 'Content-Signal': input.contentSignal ?? 'search=yes, ai-input=yes, ai-train=no',\n Vary: 'accept'\n }\n if (typeof input.tokens === 'number' && input.tokens > 0) {\n headers['x-markdown-tokens'] = Math.max(1, Math.ceil(input.tokens)).toString()\n }\n return headers\n}\n\nexport interface SynthesizePointerInput {\n origin: string\n pathname: string\n /** URL of the site's curated index, usually `/llms.txt`. */\n llmsTxtUrl?: string\n /** URL of the full enumerated index, usually `/llms-full.txt`. */\n llmsFullTxtUrl?: string\n /** URL of the machine-readable path manifest, usually `/md/index.json`. */\n markdownIndexUrl?: string\n /** Site name to title the pointer document. Defaults to the origin hostname. */\n siteName?: string\n}\n\n/**\n * Generate a minimal pointer Markdown document for URLs that don't have a\n * pre-built mirror. Keeps the `Accept: text/markdown` contract intact\n * site-wide — agents always get *something* parseable, not a 404.\n */\nexport function synthesizeMarkdownPointer(input: SynthesizePointerInput): string {\n const site =\n input.siteName ??\n (() => {\n try {\n return new URL(input.origin).hostname\n } catch {\n return input.origin\n }\n })()\n const url = `${input.origin}${input.pathname}`\n const lines: string[] = [`# ${site}`, '', `This page (${url}) does not have a dedicated Markdown mirror yet.`, '']\n const links: string[] = []\n if (input.llmsTxtUrl) links.push(`- [${input.llmsTxtUrl}](${input.llmsTxtUrl}) — curated index of docs`)\n if (input.llmsFullTxtUrl)\n links.push(`- [${input.llmsFullTxtUrl}](${input.llmsFullTxtUrl}) — full enumerated index`)\n if (input.markdownIndexUrl)\n links.push(`- [${input.markdownIndexUrl}](${input.markdownIndexUrl}) — JSON index of all Markdown paths`)\n if (links.length) {\n lines.push('For machine-readable documentation, see:', '', ...links, '')\n }\n return lines.join('\\n')\n}\n"]} |
@@ -1,1 +0,1 @@ | ||
| {"version":3,"sources":["../src/bots.ts","../src/markdown.ts"],"names":[],"mappings":";AAgBO,IAAM,cAAA,GACX,2QAAA;AAsBK,SAAS,QAAQ,SAAA,EAA+C;AACrE,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,cAAA,CAAe,KAAK,SAAS,CAAA;AACtC;;;ACZO,SAAS,sBAAsB,GAAA,EAAuC;AAC3E,EAAA,IAAI,QAAA,GAAW,GAAA;AACf,EAAA,IAAI;AACF,IAAA,QAAA,GAAW,IAAI,GAAA,CAAI,GAAA,CAAI,GAAG,CAAA,CAAE,QAAA;AAAA,EAC9B,CAAA,CAAA,MAAQ;AACN,IAAA,QAAA,GAAW,IAAI,GAAA,IAAO,GAAA;AAAA,EACxB;AAEA,EAAA,MAAM,EAAA,GAAK,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,OAAA,CAAQ,EAAE,CAAA,EAAG;AACf,IAAA,OAAO,EAAE,MAAA,EAAQ,YAAA,EAAc,YAAA,EAAc,QAAA,EAAS;AAAA,EACxD;AAEA,EAAA,IAAI,QAAA,CAAS,QAAA,CAAS,KAAK,CAAA,EAAG;AAC5B,IAAA,OAAO,EAAE,QAAQ,WAAA,EAAa,YAAA,EAAc,SAAS,OAAA,CAAQ,OAAA,EAAS,EAAE,CAAA,EAAE;AAAA,EAC5E;AAEA,EAAA,MAAM,MAAA,GAAS,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,QAAQ,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,MAAA,CAAO,QAAA,CAAS,eAAe,CAAA,EAAG;AACpC,IAAA,OAAO,EAAE,MAAA,EAAQ,eAAA,EAAiB,YAAA,EAAc,QAAA,EAAS;AAAA,EAC3D;AAEA,EAAA,OAAO,IAAA;AACT;AAqBO,SAAS,eAAA,CAAgB,KAAA,GAA8B,EAAC,EAA2B;AACxF,EAAA,MAAM,OAAA,GAAkC;AAAA,IACtC,cAAA,EAAgB,8BAAA;AAAA,IAChB,gBAAA,EAAkB,MAAM,aAAA,IAAiB,uCAAA;AAAA,IACzC,IAAA,EAAM;AAAA,GACR;AACA,EAAA,IAAI,OAAO,KAAA,CAAM,MAAA,KAAW,QAAA,IAAY,KAAA,CAAM,SAAS,CAAA,EAAG;AACxD,IAAA,OAAA,CAAQ,mBAAmB,CAAA,GAAI,IAAA,CAAK,GAAA,CAAI,CAAA,EAAG,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,MAAM,CAAC,CAAA,CAAE,QAAA,EAAS;AAAA,EAC/E;AACA,EAAA,OAAO,OAAA;AACT;AAoBO,SAAS,0BAA0B,KAAA,EAAuC;AAC/E,EAAA,MAAM,IAAA,GACJ,KAAA,CAAM,QAAA,IAAA,CACL,MAAM;AACL,IAAA,IAAI;AACF,MAAA,OAAO,IAAI,GAAA,CAAI,KAAA,CAAM,MAAM,CAAA,CAAE,QAAA;AAAA,IAC/B,CAAA,CAAA,MAAQ;AACN,MAAA,OAAO,KAAA,CAAM,MAAA;AAAA,IACf;AAAA,EACF,CAAA,GAAG;AACL,EAAA,MAAM,MAAM,CAAA,EAAG,KAAA,CAAM,MAAM,CAAA,EAAG,MAAM,QAAQ,CAAA,CAAA;AAC5C,EAAA,MAAM,KAAA,GAAkB,CAAC,CAAA,EAAA,EAAK,IAAI,IAAI,EAAA,EAAI,CAAA,WAAA,EAAc,GAAG,CAAA,gDAAA,CAAA,EAAoD,EAAE,CAAA;AACjH,EAAA,MAAM,QAAkB,EAAC;AACzB,EAAA,IAAI,KAAA,CAAM,UAAA,EAAY,KAAA,CAAM,IAAA,CAAK,CAAA,GAAA,EAAM,MAAM,UAAU,CAAA,EAAA,EAAK,KAAA,CAAM,UAAU,CAAA,8BAAA,CAA2B,CAAA;AACvG,EAAA,IAAI,KAAA,CAAM,cAAA;AACR,IAAA,KAAA,CAAM,KAAK,CAAA,GAAA,EAAM,KAAA,CAAM,cAAc,CAAA,EAAA,EAAK,KAAA,CAAM,cAAc,CAAA,8BAAA,CAA2B,CAAA;AAC3F,EAAA,IAAI,KAAA,CAAM,gBAAA;AACR,IAAA,KAAA,CAAM,KAAK,CAAA,GAAA,EAAM,KAAA,CAAM,gBAAgB,CAAA,EAAA,EAAK,KAAA,CAAM,gBAAgB,CAAA,yCAAA,CAAsC,CAAA;AAC1G,EAAA,IAAI,MAAM,MAAA,EAAQ;AAChB,IAAA,KAAA,CAAM,IAAA,CAAK,0CAAA,EAA4C,EAAA,EAAI,GAAG,OAAO,EAAE,CAAA;AAAA,EACzE;AACA,EAAA,OAAO,KAAA,CAAM,KAAK,IAAI,CAAA;AACxB","file":"markdown.js","sourcesContent":["/**\n * User-agent substrings that identify **publicly declared** AI crawlers — the\n * branded bots that identify themselves by name (OpenAI's GPTBot, Anthropic's\n * ClaudeBot, Perplexity-User, Google-Extended, etc.). High-confidence: when\n * this matches, the request almost certainly comes from that vendor's crawler\n * fleet.\n *\n * Does NOT include **coding-agent traffic** (Claude Code, Cline, Cursor,\n * Windsurf, Aider, OpenCode, VS Code). Those tools use generic HTTP library\n * UAs (axios, curl, got, colly, Electron) or spoof full browser UAs — they\n * can't be distinguished from non-AI traffic by UA alone. See\n * {@link HTTP_CLIENT_PATTERN} for the loose heuristic layer.\n *\n * Sources consulted when updating: darkvisitors.com, vendor docs from OpenAI,\n * Anthropic, Google, Perplexity, Cohere, Apple, Bytedance.\n */\nexport const AI_BOT_PATTERN =\n /ClaudeBot|Claude-User|Anthropic|ChatGPT-User|GPTBot|OAI-SearchBot|PerplexityBot|Perplexity-User|Google-Extended|Applebot-Extended|cohere-ai|Bytespider|CCBot|Amazonbot|Meta-ExternalAgent|FacebookBot|DuckAssistBot|MistralAI-User|YouBot|AI2Bot|Diffbot|Cursor|Windsurf/i\n\n/**\n * HTTP library / runtime signatures frequently used by coding agents. Matching\n * any of these is a **loose** signal — legitimate curl scripts, CI jobs, and\n * server-to-server traffic use the same libraries. Use this for the wider\n * net (`coding_agent_hint: true`) and pair with other signals (request\n * shape, JA4 fingerprint, path patterns) for higher confidence.\n *\n * Based on behavioural signatures observed by Addy Osmani:\n * Claude Code → axios/1.8.4\n * Cline, Junie → curl/8.4.0\n * Cursor → got (sindresorhus/got)\n * Windsurf → colly\n * VS Code → Electron / Chromium\n *\n * Aider and OpenCode use Playwright-driven full Mozilla/Safari UAs and are\n * indistinguishable from real browsers at the UA layer.\n */\nexport const HTTP_CLIENT_PATTERN =\n /axios\\/|curl\\/|(?:^|[\\s(])got(?:\\/|[\\s(])|\\bcolly\\b|Electron\\/|node-fetch\\/|python-requests\\/|Go-http-client\\/|okhttp\\/|aiohttp\\/|Deno\\//i\n\nexport function isAiBot(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return AI_BOT_PATTERN.test(userAgent)\n}\n\nexport function isHttpClient(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return HTTP_CLIENT_PATTERN.test(userAgent)\n}\n\n/**\n * Map a user-agent string to a coarse, human-readable label. Returns one of:\n *\n * - A branded-crawler name (`'Claude'`, `'ChatGPT'`, …) — pair with\n * {@link isAiBot} for `is_ai_bot: true` segmentation.\n * - An HTTP-library name (`'curl'`, `'axios'`, `'got'`, `'colly'`,\n * `'Electron'`, …) — hint of a coding agent or automation; not\n * conclusive. Pair with {@link isHttpClient}.\n * - `'Browser'` for typical desktop browsers (possibly spoofed by\n * Playwright-based agents like Aider/OpenCode — this label alone can't\n * tell you).\n * - `'Other'` for anything unrecognised or empty input.\n */\nexport function parseBotName(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const s = userAgent.toLowerCase()\n\n // Publicly declared AI crawlers (high confidence).\n if (s.includes('chatgpt-user') || s.includes('gptbot') || s.includes('oai-searchbot') || s.includes('openai'))\n return 'ChatGPT'\n if (s.includes('claudebot') || s.includes('claude-user') || s.includes('anthropic')) return 'Claude'\n if (s.includes('perplexitybot') || s.includes('perplexity-user')) return 'Perplexity'\n if (s.includes('ccbot')) return 'Common Crawl'\n if (s.includes('google-extended') || s.includes('googlebot')) return 'Google'\n if (s.includes('applebot-extended') || s.includes('applebot')) return 'Apple'\n if (s.includes('bingbot')) return 'Bing'\n if (s.includes('bytespider')) return 'Bytespider'\n if (s.includes('amazonbot')) return 'Amazon'\n if (s.includes('meta-externalagent') || s.includes('facebookbot')) return 'Meta'\n if (s.includes('mistralai-user')) return 'Mistral'\n if (s.includes('duckassistbot')) return 'DuckDuckGo'\n if (s.includes('youbot')) return 'You.com'\n if (s.includes('diffbot')) return 'Diffbot'\n if (s.includes('ai2bot')) return 'AI2'\n if (s.includes('cohere')) return 'Cohere'\n if (s.includes('cursor')) return 'Cursor'\n if (s.includes('windsurf')) return 'Windsurf'\n if (s.includes('petalbot')) return 'PetalBot'\n\n // SEO crawlers and monitoring bots.\n if (s.includes('ahrefsbot')) return 'Ahrefs'\n if (s.includes('semrushbot')) return 'Semrush'\n if (s.includes('mj12bot')) return 'Majestic'\n if (s.includes('dotbot')) return 'Moz'\n if (s.includes('rogerbot')) return 'Moz'\n if (s.includes('screaming frog')) return 'Screaming Frog'\n if (s.includes('sitebulb')) return 'Sitebulb'\n if (s.includes('linkfluence')) return 'Linkfluence'\n if (s.includes('dataforseo')) return 'DataForSEO'\n if (s.includes('serpstatbot')) return 'Serpstat'\n\n // Monitoring and feed bots.\n if (s.includes('uptimerobot')) return 'UptimeRobot'\n if (s.includes('pingdom')) return 'Pingdom'\n if (s.includes('statuscake')) return 'StatusCake'\n if (s.includes('newrelicpinger')) return 'New Relic'\n if (s.includes('datadogagent') || s.includes('datadog')) return 'Datadog'\n if (s.includes('slackbot')) return 'Slack'\n if (s.includes('twitterbot')) return 'Twitter'\n if (s.includes('linkedinbot')) return 'LinkedIn'\n if (s.includes('discordbot')) return 'Discord'\n if (s.includes('telegrambot')) return 'Telegram'\n if (s.includes('whatsapp')) return 'WhatsApp'\n\n // AI search and indexing bots.\n if (s.includes('linkupbot')) return 'Linkup'\n if (s.includes('sogou')) return 'Sogou'\n if (s.includes('yandexbot')) return 'Yandex'\n if (s.includes('baiduspider')) return 'Baidu'\n\n // Link preview fetchers.\n if (s.includes('facebookexternalhit')) return 'Facebook'\n if (s.includes('com.apple.webkit')) return 'Apple URL Preview'\n\n // Uptime and monitoring.\n if (s.includes('ohdear')) return 'Oh Dear'\n\n // Generic scrapers.\n if (s.includes('scrapy')) return 'Scrapy'\n if (s.includes('headlesschrome')) return 'Headless Chrome'\n if (s.includes('phantomjs')) return 'PhantomJS'\n if (s.includes('wget')) return 'wget'\n if (s.includes('httpie')) return 'HTTPie'\n if (s.includes('guzzlehttp')) return 'Guzzle'\n\n // HTTP library / runtime signatures (loose — coding agent or automation).\n // Check Electron before Browser since Electron UAs contain Chrome/Safari.\n if (s.includes('electron/')) return 'Electron'\n if (/curl\\//.test(s)) return 'curl'\n if (/axios\\//.test(s)) return 'axios'\n if (/(?:^|[\\s(])got(?:\\/|[\\s(])/.test(s)) return 'got'\n if (/\\bcolly\\b/.test(s)) return 'colly'\n if (/node-fetch\\//.test(s)) return 'node-fetch'\n if (/python-requests\\//.test(s)) return 'python-requests'\n if (/go-http-client\\//.test(s)) return 'Go http client'\n if (/okhttp\\//.test(s)) return 'OkHttp'\n if (/aiohttp\\//.test(s)) return 'aiohttp'\n if (/deno\\//.test(s)) return 'Deno'\n\n // Real browsers (or UAs spoofed to look like them — see Aider/OpenCode note).\n if (s.includes('mozilla') || s.includes('chrome') || s.includes('safari') || s.includes('firefox'))\n return 'Browser'\n\n return 'Other'\n}\n\n/**\n * Return the first product token from a UA header, useful for segmenting by\n * client without hard-coding every bot name. Falls back to `'Other'` for empty\n * input.\n */\nexport function firstUserAgentProduct(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const compatibleMatch = userAgent.match(/compatible;\\s*([^/;\\s]+)(?:\\/[^\\s;]*)?/i)\n if (compatibleMatch && compatibleMatch[1]) return compatibleMatch[1].trim()\n const first = userAgent.trim().split('/')[0]?.trim().split(/\\s+/)[0]?.trim()\n return first || 'Other'\n}\n\nexport type AgentKind =\n | 'declared-crawler'\n | 'coding-agent-hint'\n | 'browser'\n | 'other'\n\nexport interface AgentClassification {\n /**\n * Categorical tag for the UA:\n *\n * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.\n * - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose\n * signal; could be a coding agent, a curl script, or any automation.\n * - `'browser'` — looks like a real browser. Could be a genuine user or\n * a Playwright-based agent (Aider, OpenCode) that can't be distinguished\n * at the UA layer.\n * - `'other'` — unrecognised or empty.\n */\n kind: AgentKind\n /** Human-readable label, same string {@link parseBotName} returns. */\n label: string\n /** Strict: `true` only when the UA matches a branded AI crawler. */\n isAiBot: boolean\n /** Loose: `true` for known HTTP-library / automation UAs. */\n codingAgentHint: boolean\n}\n\n/**\n * One-stop classification of a user-agent. Combines {@link isAiBot},\n * {@link isHttpClient}, and {@link parseBotName} into a single structured\n * result. Used internally by `trackVisit` to populate event properties;\n * useful in consumer code when you need all signals at once.\n */\nexport function classifyAgent(userAgent: string | null | undefined): AgentClassification {\n const label = parseBotName(userAgent)\n const aiBot = isAiBot(userAgent)\n const httpClient = isHttpClient(userAgent)\n\n let kind: AgentKind\n if (aiBot) kind = 'declared-crawler'\n else if (httpClient) kind = 'coding-agent-hint'\n else if (label === 'Browser') kind = 'browser'\n else kind = 'other'\n\n return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }\n}\n","import { isAiBot } from './bots.js'\n\nexport type MarkdownServeReason =\n | 'ua-rewrite'\n | 'md-suffix'\n | 'accept-header'\n\nexport interface MarkdownDecision {\n /** Why this request should be served Markdown. */\n reason: MarkdownServeReason\n /**\n * The request's original logical path, with any trailing `.md` stripped.\n * Use this when mapping to a mirror file.\n */\n strippedPath: string\n}\n\n/**\n * Decide whether the request should be served Markdown instead of HTML.\n * Returns `null` when the request should go through your normal handler.\n *\n * Covers three triggers:\n * - Known AI-bot UA on any URL (`ua-rewrite`)\n * - Explicit `.md` suffix on the URL (`md-suffix`)\n * - `Accept: text/markdown` header (`accept-header`)\n *\n * This helper intentionally does not perform the rewrite itself — routing is\n * framework-specific (NextResponse.rewrite for Next.js, ctx.rewrite for\n * Hono, etc.). Use the returned decision to build the appropriate response.\n */\nexport function markdownServeDecision(req: Request): MarkdownDecision | null {\n let pathname = '/'\n try {\n pathname = new URL(req.url).pathname\n } catch {\n pathname = req.url || '/'\n }\n\n const ua = req.headers.get('user-agent') || ''\n if (isAiBot(ua)) {\n return { reason: 'ua-rewrite', strippedPath: pathname }\n }\n\n if (pathname.endsWith('.md')) {\n return { reason: 'md-suffix', strippedPath: pathname.replace(/\\.md$/, '') }\n }\n\n const accept = req.headers.get('accept') || ''\n if (accept.includes('text/markdown')) {\n return { reason: 'accept-header', strippedPath: pathname }\n }\n\n return null\n}\n\nexport interface MarkdownHeadersInput {\n /**\n * If provided, rendered as `x-markdown-tokens` so agents can budget context\n * before parsing the body. Typically `Math.ceil(body.length / 4)`.\n */\n tokens?: number\n /**\n * Content-Signal directive (see contentsignals.org). Defaults to\n * `'search=yes, ai-input=yes, ai-train=no'` — change if you want to permit\n * training or restrict indexing.\n */\n contentSignal?: string\n}\n\n/**\n * Build the set of response headers to attach to a Markdown response. Safe\n * defaults: UTF-8 text/markdown, Vary: accept, and a Content-Signal directive\n * that permits search + agent input but denies training.\n */\nexport function markdownHeaders(input: MarkdownHeadersInput = {}): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'text/markdown; charset=utf-8',\n 'Content-Signal': input.contentSignal ?? 'search=yes, ai-input=yes, ai-train=no',\n Vary: 'accept'\n }\n if (typeof input.tokens === 'number' && input.tokens > 0) {\n headers['x-markdown-tokens'] = Math.max(1, Math.ceil(input.tokens)).toString()\n }\n return headers\n}\n\nexport interface SynthesizePointerInput {\n origin: string\n pathname: string\n /** URL of the site's curated index, usually `/llms.txt`. */\n llmsTxtUrl?: string\n /** URL of the full enumerated index, usually `/llms-full.txt`. */\n llmsFullTxtUrl?: string\n /** URL of the machine-readable path manifest, usually `/md/index.json`. */\n markdownIndexUrl?: string\n /** Site name to title the pointer document. Defaults to the origin hostname. */\n siteName?: string\n}\n\n/**\n * Generate a minimal pointer Markdown document for URLs that don't have a\n * pre-built mirror. Keeps the `Accept: text/markdown` contract intact\n * site-wide — agents always get *something* parseable, not a 404.\n */\nexport function synthesizeMarkdownPointer(input: SynthesizePointerInput): string {\n const site =\n input.siteName ??\n (() => {\n try {\n return new URL(input.origin).hostname\n } catch {\n return input.origin\n }\n })()\n const url = `${input.origin}${input.pathname}`\n const lines: string[] = [`# ${site}`, '', `This page (${url}) does not have a dedicated Markdown mirror yet.`, '']\n const links: string[] = []\n if (input.llmsTxtUrl) links.push(`- [${input.llmsTxtUrl}](${input.llmsTxtUrl}) — curated index of docs`)\n if (input.llmsFullTxtUrl)\n links.push(`- [${input.llmsFullTxtUrl}](${input.llmsFullTxtUrl}) — full enumerated index`)\n if (input.markdownIndexUrl)\n links.push(`- [${input.markdownIndexUrl}](${input.markdownIndexUrl}) — JSON index of all Markdown paths`)\n if (links.length) {\n lines.push('For machine-readable documentation, see:', '', ...links, '')\n }\n return lines.join('\\n')\n}\n"]} | ||
| {"version":3,"sources":["../src/bots.ts","../src/markdown.ts"],"names":[],"mappings":";AAgBO,IAAM,cAAA,GACX,2QAAA;AAsBK,SAAS,QAAQ,SAAA,EAA+C;AACrE,EAAA,IAAI,CAAC,WAAW,OAAO,KAAA;AACvB,EAAA,OAAO,cAAA,CAAe,KAAK,SAAS,CAAA;AACtC;;;ACZO,SAAS,sBAAsB,GAAA,EAAuC;AAC3E,EAAA,IAAI,QAAA,GAAW,GAAA;AACf,EAAA,IAAI;AACF,IAAA,QAAA,GAAW,IAAI,GAAA,CAAI,GAAA,CAAI,GAAG,CAAA,CAAE,QAAA;AAAA,EAC9B,CAAA,CAAA,MAAQ;AACN,IAAA,QAAA,GAAW,IAAI,GAAA,IAAO,GAAA;AAAA,EACxB;AAEA,EAAA,MAAM,EAAA,GAAK,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,YAAY,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,OAAA,CAAQ,EAAE,CAAA,EAAG;AACf,IAAA,OAAO,EAAE,MAAA,EAAQ,YAAA,EAAc,YAAA,EAAc,QAAA,EAAS;AAAA,EACxD;AAEA,EAAA,IAAI,QAAA,CAAS,QAAA,CAAS,KAAK,CAAA,EAAG;AAC5B,IAAA,OAAO,EAAE,QAAQ,WAAA,EAAa,YAAA,EAAc,SAAS,OAAA,CAAQ,OAAA,EAAS,EAAE,CAAA,EAAE;AAAA,EAC5E;AAEA,EAAA,MAAM,MAAA,GAAS,GAAA,CAAI,OAAA,CAAQ,GAAA,CAAI,QAAQ,CAAA,IAAK,EAAA;AAC5C,EAAA,IAAI,MAAA,CAAO,QAAA,CAAS,eAAe,CAAA,EAAG;AACpC,IAAA,OAAO,EAAE,MAAA,EAAQ,eAAA,EAAiB,YAAA,EAAc,QAAA,EAAS;AAAA,EAC3D;AAEA,EAAA,OAAO,IAAA;AACT;AAqBO,SAAS,eAAA,CAAgB,KAAA,GAA8B,EAAC,EAA2B;AACxF,EAAA,MAAM,OAAA,GAAkC;AAAA,IACtC,cAAA,EAAgB,8BAAA;AAAA,IAChB,gBAAA,EAAkB,MAAM,aAAA,IAAiB,uCAAA;AAAA,IACzC,IAAA,EAAM;AAAA,GACR;AACA,EAAA,IAAI,OAAO,KAAA,CAAM,MAAA,KAAW,QAAA,IAAY,KAAA,CAAM,SAAS,CAAA,EAAG;AACxD,IAAA,OAAA,CAAQ,mBAAmB,CAAA,GAAI,IAAA,CAAK,GAAA,CAAI,CAAA,EAAG,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,MAAM,CAAC,CAAA,CAAE,QAAA,EAAS;AAAA,EAC/E;AACA,EAAA,OAAO,OAAA;AACT;AAoBO,SAAS,0BAA0B,KAAA,EAAuC;AAC/E,EAAA,MAAM,IAAA,GACJ,KAAA,CAAM,QAAA,IAAA,CACL,MAAM;AACL,IAAA,IAAI;AACF,MAAA,OAAO,IAAI,GAAA,CAAI,KAAA,CAAM,MAAM,CAAA,CAAE,QAAA;AAAA,IAC/B,CAAA,CAAA,MAAQ;AACN,MAAA,OAAO,KAAA,CAAM,MAAA;AAAA,IACf;AAAA,EACF,CAAA,GAAG;AACL,EAAA,MAAM,MAAM,CAAA,EAAG,KAAA,CAAM,MAAM,CAAA,EAAG,MAAM,QAAQ,CAAA,CAAA;AAC5C,EAAA,MAAM,KAAA,GAAkB,CAAC,CAAA,EAAA,EAAK,IAAI,IAAI,EAAA,EAAI,CAAA,WAAA,EAAc,GAAG,CAAA,gDAAA,CAAA,EAAoD,EAAE,CAAA;AACjH,EAAA,MAAM,QAAkB,EAAC;AACzB,EAAA,IAAI,KAAA,CAAM,UAAA,EAAY,KAAA,CAAM,IAAA,CAAK,CAAA,GAAA,EAAM,MAAM,UAAU,CAAA,EAAA,EAAK,KAAA,CAAM,UAAU,CAAA,8BAAA,CAA2B,CAAA;AACvG,EAAA,IAAI,KAAA,CAAM,cAAA;AACR,IAAA,KAAA,CAAM,KAAK,CAAA,GAAA,EAAM,KAAA,CAAM,cAAc,CAAA,EAAA,EAAK,KAAA,CAAM,cAAc,CAAA,8BAAA,CAA2B,CAAA;AAC3F,EAAA,IAAI,KAAA,CAAM,gBAAA;AACR,IAAA,KAAA,CAAM,KAAK,CAAA,GAAA,EAAM,KAAA,CAAM,gBAAgB,CAAA,EAAA,EAAK,KAAA,CAAM,gBAAgB,CAAA,yCAAA,CAAsC,CAAA;AAC1G,EAAA,IAAI,MAAM,MAAA,EAAQ;AAChB,IAAA,KAAA,CAAM,IAAA,CAAK,0CAAA,EAA4C,EAAA,EAAI,GAAG,OAAO,EAAE,CAAA;AAAA,EACzE;AACA,EAAA,OAAO,KAAA,CAAM,KAAK,IAAI,CAAA;AACxB","file":"markdown.js","sourcesContent":["/**\n * User-agent substrings that identify **publicly declared** AI crawlers — the\n * branded bots that identify themselves by name (OpenAI's GPTBot, Anthropic's\n * ClaudeBot, Perplexity-User, Google-Extended, etc.). High-confidence: when\n * this matches, the request almost certainly comes from that vendor's crawler\n * fleet.\n *\n * Does NOT include **coding-agent traffic** (Claude Code, Cline, Cursor,\n * Windsurf, Aider, OpenCode, VS Code). Those tools use generic HTTP library\n * UAs (axios, curl, got, colly, Electron) or spoof full browser UAs — they\n * can't be distinguished from non-AI traffic by UA alone. See\n * {@link HTTP_CLIENT_PATTERN} for the loose heuristic layer.\n *\n * Sources consulted when updating: darkvisitors.com, vendor docs from OpenAI,\n * Anthropic, Google, Perplexity, Cohere, Apple, Bytedance.\n */\nexport const AI_BOT_PATTERN =\n /ClaudeBot|Claude-User|Anthropic|ChatGPT-User|GPTBot|OAI-SearchBot|PerplexityBot|Perplexity-User|Google-Extended|Applebot-Extended|cohere-ai|Bytespider|CCBot|Amazonbot|Meta-ExternalAgent|FacebookBot|DuckAssistBot|MistralAI-User|YouBot|AI2Bot|Diffbot|Cursor|Windsurf/i\n\n/**\n * HTTP library / runtime signatures frequently used by coding agents. Matching\n * any of these is a **loose** signal — legitimate curl scripts, CI jobs, and\n * server-to-server traffic use the same libraries. Use this for the wider\n * net (`coding_agent_hint: true`) and pair with other signals (request\n * shape, JA4 fingerprint, path patterns) for higher confidence.\n *\n * Based on behavioural signatures observed by Addy Osmani:\n * Claude Code → axios/1.8.4\n * Cline, Junie → curl/8.4.0\n * Cursor → got (sindresorhus/got)\n * Windsurf → colly\n * VS Code → Electron / Chromium\n *\n * Aider and OpenCode use Playwright-driven full Mozilla/Safari UAs and are\n * indistinguishable from real browsers at the UA layer.\n */\nexport const HTTP_CLIENT_PATTERN =\n /axios\\/|curl\\/|(?:^|[\\s(])got(?:\\/|[\\s(])|\\bcolly\\b|Electron\\/|node-fetch\\/|python-requests\\/|Go-http-client\\/|okhttp\\/|aiohttp\\/|Deno\\//i\n\nexport function isAiBot(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return AI_BOT_PATTERN.test(userAgent)\n}\n\nexport function isHttpClient(userAgent: string | null | undefined): boolean {\n if (!userAgent) return false\n return HTTP_CLIENT_PATTERN.test(userAgent)\n}\n\n/**\n * Map a user-agent string to a coarse, human-readable label. Returns one of:\n *\n * - A branded-crawler name (`'Claude'`, `'ChatGPT'`, …) — pair with\n * {@link isAiBot} for `is_ai_bot: true` segmentation.\n * - An HTTP-library name (`'curl'`, `'axios'`, `'got'`, `'colly'`,\n * `'Electron'`, …) — hint of a coding agent or automation; not\n * conclusive. Pair with {@link isHttpClient}.\n * - `'Browser'` for typical desktop browsers (possibly spoofed by\n * Playwright-based agents like Aider/OpenCode — this label alone can't\n * tell you).\n * - `'Other'` for anything unrecognised or empty input.\n */\nexport function parseBotName(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const s = userAgent.toLowerCase()\n\n // Publicly declared AI crawlers (high confidence).\n if (s.includes('chatgpt-user') || s.includes('gptbot') || s.includes('oai-searchbot') || s.includes('openai'))\n return 'ChatGPT'\n if (s.includes('claudebot') || s.includes('claude-user') || s.includes('anthropic')) return 'Claude'\n if (s.includes('perplexitybot') || s.includes('perplexity-user')) return 'Perplexity'\n if (s.includes('ccbot')) return 'Common Crawl'\n if (s.includes('google-extended') || s.includes('googlebot')) return 'Google'\n if (s.includes('applebot-extended') || s.includes('applebot')) return 'Apple'\n if (s.includes('bingbot')) return 'Bing'\n if (s.includes('bytespider')) return 'Bytespider'\n if (s.includes('amazonbot')) return 'Amazon'\n if (s.includes('meta-externalagent') || s.includes('facebookbot')) return 'Meta'\n if (s.includes('mistralai-user')) return 'Mistral'\n if (s.includes('duckassistbot')) return 'DuckDuckGo'\n if (s.includes('youbot')) return 'You.com'\n if (s.includes('diffbot')) return 'Diffbot'\n if (s.includes('ai2bot')) return 'AI2'\n if (s.includes('cohere')) return 'Cohere'\n if (s.includes('cursor')) return 'Cursor'\n if (s.includes('windsurf')) return 'Windsurf'\n if (s.includes('petalbot')) return 'PetalBot'\n\n // SEO crawlers and monitoring bots.\n if (s.includes('ahrefsbot')) return 'Ahrefs'\n if (s.includes('semrushbot')) return 'Semrush'\n if (s.includes('mj12bot')) return 'Majestic'\n if (s.includes('dotbot')) return 'Moz'\n if (s.includes('rogerbot')) return 'Moz'\n if (s.includes('screaming frog')) return 'Screaming Frog'\n if (s.includes('sitebulb')) return 'Sitebulb'\n if (s.includes('linkfluence')) return 'Linkfluence'\n if (s.includes('dataforseo')) return 'DataForSEO'\n if (s.includes('serpstatbot')) return 'Serpstat'\n\n // Monitoring and feed bots.\n if (s.includes('uptimerobot')) return 'UptimeRobot'\n if (s.includes('pingdom')) return 'Pingdom'\n if (s.includes('statuscake')) return 'StatusCake'\n if (s.includes('newrelicpinger')) return 'New Relic'\n if (s.includes('datadogagent') || s.includes('datadog')) return 'Datadog'\n if (s.includes('slackbot')) return 'Slack'\n if (s.includes('twitterbot')) return 'Twitter'\n if (s.includes('linkedinbot')) return 'LinkedIn'\n if (s.includes('discordbot')) return 'Discord'\n if (s.includes('telegrambot')) return 'Telegram'\n if (s.includes('whatsapp')) return 'WhatsApp'\n\n // AI search and indexing bots.\n if (s.includes('linkupbot')) return 'Linkup'\n if (s.includes('sogou')) return 'Sogou'\n if (s.includes('yandexbot')) return 'Yandex'\n if (s.includes('baiduspider')) return 'Baidu'\n\n // Link preview fetchers.\n if (s.includes('facebookexternalhit')) return 'Facebook'\n if (s.includes('com.apple.webkit')) return 'Apple URL Preview'\n\n // Uptime and monitoring.\n if (s.includes('ohdear')) return 'Oh Dear'\n\n // Generic scrapers.\n if (s.includes('scrapy')) return 'Scrapy'\n if (s.includes('headlesschrome')) return 'Headless Chrome'\n if (s.includes('phantomjs')) return 'PhantomJS'\n if (s.includes('wget')) return 'wget'\n if (s.includes('httpie')) return 'HTTPie'\n if (s.includes('guzzlehttp')) return 'Guzzle'\n\n // HTTP library / runtime signatures (loose — coding agent or automation).\n // Check Electron before Browser since Electron UAs contain Chrome/Safari.\n if (s.includes('electron/')) return 'Electron'\n if (/curl\\//.test(s)) return 'curl'\n if (/axios\\//.test(s)) return 'axios'\n if (/(?:^|[\\s(])got(?:\\/|[\\s(])/.test(s)) return 'got'\n if (/\\bcolly\\b/.test(s)) return 'colly'\n if (/node-fetch\\//.test(s)) return 'node-fetch'\n if (/python-requests\\//.test(s)) return 'python-requests'\n if (/go-http-client\\//.test(s)) return 'Go http client'\n if (/okhttp\\//.test(s)) return 'OkHttp'\n if (/aiohttp\\//.test(s)) return 'aiohttp'\n if (/deno\\//.test(s)) return 'Deno'\n\n // Real browsers (or UAs spoofed to look like them — see Aider/OpenCode note).\n if (s.includes('mozilla') || s.includes('chrome') || s.includes('safari') || s.includes('firefox'))\n return 'Browser'\n\n return 'Other'\n}\n\n/**\n * Return the first product token from a UA header, useful for segmenting by\n * client without hard-coding every bot name. Falls back to `'Other'` for empty\n * input.\n */\nexport function firstUserAgentProduct(userAgent: string | null | undefined): string {\n if (!userAgent || typeof userAgent !== 'string') return 'Other'\n const compatibleMatch = userAgent.match(/compatible;\\s*([^/;\\s]+)(?:\\/[^\\s;]*)?/i)\n if (compatibleMatch && compatibleMatch[1]) return compatibleMatch[1].trim()\n const first = userAgent.trim().split('/')[0]?.trim().split(/\\s+/)[0]?.trim()\n return first || 'Other'\n}\n\n/**\n * Detect likely headless/automated browsers by checking for missing headers\n * that real browsers always send. Playwright, Puppeteer, and similar tools\n * spoof the UA but often omit standard browser headers.\n *\n * Signals checked (each scores 1 point):\n * - Missing `Accept-Language` — every real browser sends this\n * - Missing `Sec-Fetch-Mode` — sent by all modern browsers\n * - Missing `Sec-CH-UA` — Client Hints, Chromium 89+\n * - `Sec-CH-UA` contains \"HeadlessChrome\"\n * - Missing or bare Accept header — browsers send detailed accept lists\n * - `Connection: close` with browser UA — browsers use keep-alive\n *\n * Returns a score (0-6), the signals that fired, and a boolean `likely`\n * flag (score >= 2 with a browser-like UA).\n */\nexport function detectHeadless(req: Request): HeadlessDetection {\n const signals: string[] = []\n const ua = (req.headers.get('user-agent') || '').toLowerCase()\n const isBrowserUA =\n ua.includes('mozilla') || ua.includes('chrome') || ua.includes('safari') || ua.includes('firefox')\n\n if (!isBrowserUA) return { score: 0, signals: [], likely: false }\n\n if (!req.headers.get('accept-language')) {\n signals.push('missing-accept-language')\n }\n if (!req.headers.get('sec-fetch-mode')) {\n signals.push('missing-sec-fetch-mode')\n }\n const secChUa = req.headers.get('sec-ch-ua')\n if (!secChUa) {\n signals.push('missing-sec-ch-ua')\n } else if (secChUa.toLowerCase().includes('headlesschrome')) {\n signals.push('headless-chrome-hint')\n }\n const accept = req.headers.get('accept') || ''\n if (!accept || accept === '*/*') {\n signals.push('missing-or-bare-accept')\n }\n if ((req.headers.get('connection') || '').toLowerCase() === 'close') {\n signals.push('connection-close')\n }\n\n const score = signals.length\n return { score, signals, likely: score >= 2 }\n}\n\nexport interface HeadlessDetection {\n /** Number of suspicious signals found (0-6). */\n score: number\n /** Names of the specific signals that fired. */\n signals: string[]\n /** True when score >= 2 — strong headless indication. */\n likely: boolean\n}\n\nexport type AgentKind =\n | 'declared-crawler'\n | 'coding-agent-hint'\n | 'headless-likely'\n | 'browser'\n | 'other'\n\nexport interface AgentClassification {\n /**\n * Categorical tag for the request:\n *\n * - `'declared-crawler'` — {@link AI_BOT_PATTERN} matched. High confidence.\n * - `'coding-agent-hint'` — {@link HTTP_CLIENT_PATTERN} matched. Loose\n * signal; could be a coding agent, a curl script, or any automation.\n * - `'headless-likely'` — Browser-like UA but missing standard headers.\n * Strong signal of Playwright/Puppeteer automation (Aider, OpenCode, etc.).\n * - `'browser'` — Looks like a real browser with expected headers present.\n * - `'other'` — Unrecognised or empty.\n */\n kind: AgentKind\n /** Human-readable label, same string {@link parseBotName} returns. */\n label: string\n /** Strict: `true` only when the UA matches a branded AI crawler. */\n isAiBot: boolean\n /** Loose: `true` for known HTTP-library / automation UAs. */\n codingAgentHint: boolean\n /** Headless browser detection result. Only populated when `req` is passed. */\n headless?: HeadlessDetection\n}\n\n/**\n * UA-only classification. Use {@link classifyRequest} for full detection\n * including headless browser heuristics.\n */\nexport function classifyAgent(userAgent: string | null | undefined): AgentClassification {\n const label = parseBotName(userAgent)\n const aiBot = isAiBot(userAgent)\n const httpClient = isHttpClient(userAgent)\n\n let kind: AgentKind\n if (aiBot) kind = 'declared-crawler'\n else if (httpClient) kind = 'coding-agent-hint'\n else if (label === 'Browser') kind = 'browser'\n else kind = 'other'\n\n return { kind, label, isAiBot: aiBot, codingAgentHint: httpClient }\n}\n\n/**\n * Full request classification — combines UA parsing with header-based\n * headless detection. When a browser-like UA is missing standard headers,\n * the kind is promoted from `'browser'` to `'headless-likely'`.\n */\nexport function classifyRequest(req: Request): AgentClassification {\n const userAgent = req.headers.get('user-agent') || ''\n const base = classifyAgent(userAgent)\n const headless = detectHeadless(req)\n\n let kind = base.kind\n if (kind === 'browser' && headless.likely) {\n kind = 'headless-likely'\n }\n\n return { ...base, kind, headless }\n}\n","import { isAiBot } from './bots.js'\n\nexport type MarkdownServeReason =\n | 'ua-rewrite'\n | 'md-suffix'\n | 'accept-header'\n\nexport interface MarkdownDecision {\n /** Why this request should be served Markdown. */\n reason: MarkdownServeReason\n /**\n * The request's original logical path, with any trailing `.md` stripped.\n * Use this when mapping to a mirror file.\n */\n strippedPath: string\n}\n\n/**\n * Decide whether the request should be served Markdown instead of HTML.\n * Returns `null` when the request should go through your normal handler.\n *\n * Covers three triggers:\n * - Known AI-bot UA on any URL (`ua-rewrite`)\n * - Explicit `.md` suffix on the URL (`md-suffix`)\n * - `Accept: text/markdown` header (`accept-header`)\n *\n * This helper intentionally does not perform the rewrite itself — routing is\n * framework-specific (NextResponse.rewrite for Next.js, ctx.rewrite for\n * Hono, etc.). Use the returned decision to build the appropriate response.\n */\nexport function markdownServeDecision(req: Request): MarkdownDecision | null {\n let pathname = '/'\n try {\n pathname = new URL(req.url).pathname\n } catch {\n pathname = req.url || '/'\n }\n\n const ua = req.headers.get('user-agent') || ''\n if (isAiBot(ua)) {\n return { reason: 'ua-rewrite', strippedPath: pathname }\n }\n\n if (pathname.endsWith('.md')) {\n return { reason: 'md-suffix', strippedPath: pathname.replace(/\\.md$/, '') }\n }\n\n const accept = req.headers.get('accept') || ''\n if (accept.includes('text/markdown')) {\n return { reason: 'accept-header', strippedPath: pathname }\n }\n\n return null\n}\n\nexport interface MarkdownHeadersInput {\n /**\n * If provided, rendered as `x-markdown-tokens` so agents can budget context\n * before parsing the body. Typically `Math.ceil(body.length / 4)`.\n */\n tokens?: number\n /**\n * Content-Signal directive (see contentsignals.org). Defaults to\n * `'search=yes, ai-input=yes, ai-train=no'` — change if you want to permit\n * training or restrict indexing.\n */\n contentSignal?: string\n}\n\n/**\n * Build the set of response headers to attach to a Markdown response. Safe\n * defaults: UTF-8 text/markdown, Vary: accept, and a Content-Signal directive\n * that permits search + agent input but denies training.\n */\nexport function markdownHeaders(input: MarkdownHeadersInput = {}): Record<string, string> {\n const headers: Record<string, string> = {\n 'Content-Type': 'text/markdown; charset=utf-8',\n 'Content-Signal': input.contentSignal ?? 'search=yes, ai-input=yes, ai-train=no',\n Vary: 'accept'\n }\n if (typeof input.tokens === 'number' && input.tokens > 0) {\n headers['x-markdown-tokens'] = Math.max(1, Math.ceil(input.tokens)).toString()\n }\n return headers\n}\n\nexport interface SynthesizePointerInput {\n origin: string\n pathname: string\n /** URL of the site's curated index, usually `/llms.txt`. */\n llmsTxtUrl?: string\n /** URL of the full enumerated index, usually `/llms-full.txt`. */\n llmsFullTxtUrl?: string\n /** URL of the machine-readable path manifest, usually `/md/index.json`. */\n markdownIndexUrl?: string\n /** Site name to title the pointer document. Defaults to the origin hostname. */\n siteName?: string\n}\n\n/**\n * Generate a minimal pointer Markdown document for URLs that don't have a\n * pre-built mirror. Keeps the `Accept: text/markdown` contract intact\n * site-wide — agents always get *something* parseable, not a 404.\n */\nexport function synthesizeMarkdownPointer(input: SynthesizePointerInput): string {\n const site =\n input.siteName ??\n (() => {\n try {\n return new URL(input.origin).hostname\n } catch {\n return input.origin\n }\n })()\n const url = `${input.origin}${input.pathname}`\n const lines: string[] = [`# ${site}`, '', `This page (${url}) does not have a dedicated Markdown mirror yet.`, '']\n const links: string[] = []\n if (input.llmsTxtUrl) links.push(`- [${input.llmsTxtUrl}](${input.llmsTxtUrl}) — curated index of docs`)\n if (input.llmsFullTxtUrl)\n links.push(`- [${input.llmsFullTxtUrl}](${input.llmsFullTxtUrl}) — full enumerated index`)\n if (input.markdownIndexUrl)\n links.push(`- [${input.markdownIndexUrl}](${input.markdownIndexUrl}) — JSON index of all Markdown paths`)\n if (links.length) {\n lines.push('For machine-readable documentation, see:', '', ...links, '')\n }\n return lines.join('\\n')\n}\n"]} |
+1
-1
| { | ||
| "name": "@apideck/agent-analytics", | ||
| "version": "0.6.0", | ||
| "version": "0.7.0", | ||
| "description": "Track AI agent and bot traffic to your Next.js / Vercel app — PostHog, webhooks, or any custom analytics backend. Detects Claude, ChatGPT, Perplexity, Google-Extended, and more.", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
188160
13.02%1032
12.66%