isbot
Advanced tools
Comparing version 4.0.0 to 4.1.0
@@ -18,2 +18,6 @@ /** | ||
/** | ||
* Create a custom isbot function with a custom pattern. | ||
*/ | ||
export declare const createIsbotFromList: (list: string[]) => (userAgent: string) => boolean; | ||
/** | ||
* Find the first part of the user agent that matches a bot pattern. | ||
@@ -20,0 +24,0 @@ */ |
@@ -27,3 +27,3 @@ var isbot = (() => { | ||
// src/pattern.ts | ||
var regex = / daum[ /]| deusu\/| yadirectfetcher|(?:^| )site|(?:^|[^g])news|(?<! (?:channel\/|google\/))google(?!(app|\/google| pixel))|(?<! cu)bot|(?<! ya(?:yandex)?)search|(?<!(?:lib))http|@[a-z]|\(at\)[a-z]|\(github\.com\/|\[at\][a-z]|^12345|^<|^[\w \.\-\(\)]+(\/v?\d+(\.\d+)?(\.\d{1,10})?)?$|^[^ ]{50,}$|^active|^ad muncher|^amaya|^anglesharp\/|^anonymous|^avsdevicesdk\/|^axios\/|^bidtellect\/|^biglotron|^btwebclient\/|^castro|^clamav[ /]|^client\/|^cobweb\/|^coccoc|^custom|^ddg[_-]android|^discourse|^dispatch\/\d|^downcast\/|^duckduckgo|^facebook|^fdm[ /]\d|^getright\/|^gozilla\/|^hatena|^hobbit|^hotzonu|^hwcdn\/|^jeode\/|^jetty\/|^jigsaw|^linkdex|^lwp[-: ]|^metauri|^microsoft bits|^movabletype|^mozilla\/\d\.\d \(compatible;?\)$|^mozilla\/\d\.\d \w*$|^navermailapp|^netsurf|^offline explorer|^php|^postman|^postrank|^python|^read|^reed|^restsharp\/|^snapchat|^space bison|^svn|^swcd |^taringa|^test certificate info|^thumbor\/|^tumblr\/|^user-agent:mozilla|^valid|^venus\/fedoraplanet|^w3c|^webbandit\/|^webcopier|^wget|^whatsapp|^xenu link sleuth|^yahoo|^yandex|^zdm\/\d|^zoom marketplace\/|^{{.*}}$|adbeat\.com|appinsights|archive|ask jeeves\/teoma|bit\.ly\/|bluecoat drtr|browsex|burpcollaborator|capture|catch|check|chrome-lighthouse|chromeframe|classifier|cloud|crawl|cryptoapi|dareboost|datanyze|dataprovider|dejaclick|dmbrowser|download|evc-batch\/|feed|firephp|freesafeip|gomezagent|headlesschrome\/|httrack|hubspot marketing grader|hydra|ibisbrowser|images|inspect|iplabel|ips-agent|java(?!;)|library|mail\.ru\/|manager|monitor|morningscore\/|neustar wpm|nutch|offbyone|optimize|pageburst|pagespeed|parser|perl|phantom|pingdom|powermarks|preview|proxy|ptst[ /]\d|reader|rexx;|rigor|rss|scan|scrape|serp ?reputation ?management|server|sogou|sparkler\/|speedcurve|spider|splash|statuscake|stumbleupon\.com|supercleaner|synapse|synthetic|torrent|tracemyfile|transcoder|trendsmapresolver|twingly recon|url|virtuoso|wappalyzer|webglance|webkit2png|websitemetadataretriever|whatcms\/|wordpress|zgrab/i; | ||
var regex = / daum[ /]| deusu\/| yadirectfetcher|(?:^| )site|(?:^|[^g])news|(?<! (?:channel\/|google\/))google(?!(app|\/google| pixel))|(?<! cu)bot|(?<! ya(?:yandex)?)search|(?<!(?:lib))http|(?<!hm)score|@[a-z]|\(at\)[a-z]|\(github\.com\/|\[at\][a-z]|^12345|^<|^[\w \.\-\(?:\):]+(?:\/v?\d+(\.\d+)?(?:\.\d{1,10})?)?(?:,|$)|^[^ ]{50,}$|^active|^ad muncher|^amaya|^anglesharp\/|^avsdevicesdk\/|^bidtellect\/|^biglotron|^btwebclient\/|^clamav[ /]|^client\/|^cobweb\/|^coccoc|^custom|^ddg[_-]android|^discourse|^dispatch\/\d|^downcast\/|^duckduckgo|^facebook|^fdm[ /]\d|^getright\/|^gozilla\/|^hatena|^hobbit|^hotzonu|^hwcdn\/|^jeode\/|^jetty\/|^jigsaw|^linkdex|^metauri|^microsoft bits|^movabletype|^mozilla\/\d\.\d \(compatible;?\)$|^mozilla\/\d\.\d \w*$|^navermailapp|^netsurf|^offline explorer|^php|^postman|^postrank|^python|^read|^reed|^restsharp\/|^snapchat|^space bison|^svn|^swcd |^taringa|^thumbor\/|^tumblr\/|^user-agent:|^valid|^venus\/fedoraplanet|^w3c|^webbandit\/|^webcopier|^wget|^whatsapp|^xenu link sleuth|^yahoo|^yandex|^zdm\/\d|^zoom marketplace\/|^{{.*}}$|adbeat\.com|appinsights|archive|ask jeeves\/teoma|bit\.ly\/|bluecoat drtr|browsex|burpcollaborator|capture|catch|check|chrome-lighthouse|chromeframe|classifier|cloud|crawl|cryptoapi|dareboost|datanyze|dataprovider|dejaclick|dmbrowser|download|evc-batch\/|feed|firephp|freesafeip|gomezagent|headless|httrack|hubspot marketing grader|hydra|ibisbrowser|images|inspect|iplabel|ips-agent|java(?!;)|library|mail\.ru\/|manager|monitor|neustar wpm|nutch|offbyone|optimize|pageburst|pagespeed|parser|perl|phantom|pingdom|powermarks|preview|proxy|ptst[ /]\d|reader|reputation|resolver|retriever|rexx;|rigor|rss|scan|scrape|server|sogou|sparkler\/|speedcurve|spider|splash|statuscake|stumbleupon\.com|supercleaner|synapse|synthetic|torrent|tracemyfile|transcoder|twingly recon|url|virtuoso|wappalyzer|webglance|webkit2png|whatcms\/|wordpress|zgrab/i; | ||
@@ -30,0 +30,0 @@ // src/index.ts |
30
index.js
@@ -23,2 +23,3 @@ var __defProp = Object.defineProperty; | ||
createIsbot: () => createIsbot, | ||
createIsbotFromList: () => createIsbotFromList, | ||
isbot: () => isbot, | ||
@@ -35,3 +36,3 @@ isbotMatch: () => isbotMatch, | ||
// src/pattern.ts | ||
var regex = / daum[ /]| deusu\/| yadirectfetcher|(?:^| )site|(?:^|[^g])news|(?<! (?:channel\/|google\/))google(?!(app|\/google| pixel))|(?<! cu)bot|(?<! ya(?:yandex)?)search|(?<!(?:lib))http|@[a-z]|\(at\)[a-z]|\(github\.com\/|\[at\][a-z]|^12345|^<|^[\w \.\-\(\)]+(\/v?\d+(\.\d+)?(\.\d{1,10})?)?$|^[^ ]{50,}$|^active|^ad muncher|^amaya|^anglesharp\/|^anonymous|^avsdevicesdk\/|^axios\/|^bidtellect\/|^biglotron|^btwebclient\/|^castro|^clamav[ /]|^client\/|^cobweb\/|^coccoc|^custom|^ddg[_-]android|^discourse|^dispatch\/\d|^downcast\/|^duckduckgo|^facebook|^fdm[ /]\d|^getright\/|^gozilla\/|^hatena|^hobbit|^hotzonu|^hwcdn\/|^jeode\/|^jetty\/|^jigsaw|^linkdex|^lwp[-: ]|^metauri|^microsoft bits|^movabletype|^mozilla\/\d\.\d \(compatible;?\)$|^mozilla\/\d\.\d \w*$|^navermailapp|^netsurf|^offline explorer|^php|^postman|^postrank|^python|^read|^reed|^restsharp\/|^snapchat|^space bison|^svn|^swcd |^taringa|^test certificate info|^thumbor\/|^tumblr\/|^user-agent:mozilla|^valid|^venus\/fedoraplanet|^w3c|^webbandit\/|^webcopier|^wget|^whatsapp|^xenu link sleuth|^yahoo|^yandex|^zdm\/\d|^zoom marketplace\/|^{{.*}}$|adbeat\.com|appinsights|archive|ask jeeves\/teoma|bit\.ly\/|bluecoat drtr|browsex|burpcollaborator|capture|catch|check|chrome-lighthouse|chromeframe|classifier|cloud|crawl|cryptoapi|dareboost|datanyze|dataprovider|dejaclick|dmbrowser|download|evc-batch\/|feed|firephp|freesafeip|gomezagent|headlesschrome\/|httrack|hubspot marketing grader|hydra|ibisbrowser|images|inspect|iplabel|ips-agent|java(?!;)|library|mail\.ru\/|manager|monitor|morningscore\/|neustar wpm|nutch|offbyone|optimize|pageburst|pagespeed|parser|perl|phantom|pingdom|powermarks|preview|proxy|ptst[ /]\d|reader|rexx;|rigor|rss|scan|scrape|serp ?reputation ?management|server|sogou|sparkler\/|speedcurve|spider|splash|statuscake|stumbleupon\.com|supercleaner|synapse|synthetic|torrent|tracemyfile|transcoder|trendsmapresolver|twingly recon|url|virtuoso|wappalyzer|webglance|webkit2png|websitemetadataretriever|whatcms\/|wordpress|zgrab/i; | ||
var regex = / daum[ /]| deusu\/| yadirectfetcher|(?:^| )site|(?:^|[^g])news|(?<! (?:channel\/|google\/))google(?!(app|\/google| pixel))|(?<! cu)bot|(?<! ya(?:yandex)?)search|(?<!(?:lib))http|(?<!hm)score|@[a-z]|\(at\)[a-z]|\(github\.com\/|\[at\][a-z]|^12345|^<|^[\w \.\-\(?:\):]+(?:\/v?\d+(\.\d+)?(?:\.\d{1,10})?)?(?:,|$)|^[^ ]{50,}$|^active|^ad muncher|^amaya|^anglesharp\/|^avsdevicesdk\/|^bidtellect\/|^biglotron|^btwebclient\/|^clamav[ /]|^client\/|^cobweb\/|^coccoc|^custom|^ddg[_-]android|^discourse|^dispatch\/\d|^downcast\/|^duckduckgo|^facebook|^fdm[ /]\d|^getright\/|^gozilla\/|^hatena|^hobbit|^hotzonu|^hwcdn\/|^jeode\/|^jetty\/|^jigsaw|^linkdex|^metauri|^microsoft bits|^movabletype|^mozilla\/\d\.\d \(compatible;?\)$|^mozilla\/\d\.\d \w*$|^navermailapp|^netsurf|^offline explorer|^php|^postman|^postrank|^python|^read|^reed|^restsharp\/|^snapchat|^space bison|^svn|^swcd |^taringa|^thumbor\/|^tumblr\/|^user-agent:|^valid|^venus\/fedoraplanet|^w3c|^webbandit\/|^webcopier|^wget|^whatsapp|^xenu link sleuth|^yahoo|^yandex|^zdm\/\d|^zoom marketplace\/|^{{.*}}$|adbeat\.com|appinsights|archive|ask jeeves\/teoma|bit\.ly\/|bluecoat drtr|browsex|burpcollaborator|capture|catch|check|chrome-lighthouse|chromeframe|classifier|cloud|crawl|cryptoapi|dareboost|datanyze|dataprovider|dejaclick|dmbrowser|download|evc-batch\/|feed|firephp|freesafeip|gomezagent|headless|httrack|hubspot marketing grader|hydra|ibisbrowser|images|inspect|iplabel|ips-agent|java(?!;)|library|mail\.ru\/|manager|monitor|neustar wpm|nutch|offbyone|optimize|pageburst|pagespeed|parser|perl|phantom|pingdom|powermarks|preview|proxy|ptst[ /]\d|reader|reputation|resolver|retriever|rexx;|rigor|rss|scan|scrape|server|sogou|sparkler\/|speedcurve|spider|splash|statuscake|stumbleupon\.com|supercleaner|synapse|synthetic|torrent|tracemyfile|transcoder|twingly recon|url|virtuoso|wappalyzer|webglance|webkit2png|whatcms\/|wordpress|zgrab/i; | ||
@@ -49,2 +50,3 @@ // src/patterns.json | ||
"(?<!(?:lib))http", | ||
"(?<!hm)score", | ||
"@[a-z]", | ||
@@ -56,3 +58,3 @@ "\\(at\\)[a-z]", | ||
"^<", | ||
"^[\\w \\.\\-\\(\\)]+(/v?\\d+(\\.\\d+)?(\\.\\d{1,10})?)?$", | ||
"^[\\w \\.\\-\\(?:\\):]+(?:/v?\\d+(\\.\\d+)?(?:\\.\\d{1,10})?)?(?:,|$)", | ||
"^[^ ]{50,}$", | ||
@@ -63,9 +65,6 @@ "^active", | ||
"^anglesharp/", | ||
"^anonymous", | ||
"^avsdevicesdk/", | ||
"^axios/", | ||
"^bidtellect/", | ||
"^biglotron", | ||
"^btwebclient/", | ||
"^castro", | ||
"^clamav[ /]", | ||
@@ -93,3 +92,2 @@ "^client/", | ||
"^linkdex", | ||
"^lwp[-: ]", | ||
"^metauri", | ||
@@ -115,6 +113,5 @@ "^microsoft bits", | ||
"^taringa", | ||
"^test certificate info", | ||
"^thumbor/", | ||
"^tumblr/", | ||
"^user-agent:mozilla", | ||
"^user-agent:", | ||
"^valid", | ||
@@ -161,3 +158,3 @@ "^venus/fedoraplanet", | ||
"gomezagent", | ||
"headlesschrome/", | ||
"headless", | ||
"httrack", | ||
@@ -176,3 +173,2 @@ "hubspot marketing grader", | ||
"monitor", | ||
"morningscore/", | ||
"neustar wpm", | ||
@@ -193,2 +189,5 @@ "nutch", | ||
"reader", | ||
"reputation", | ||
"resolver", | ||
"retriever", | ||
"rexx;", | ||
@@ -199,3 +198,2 @@ "rigor", | ||
"scrape", | ||
"serp ?reputation ?management", | ||
"server", | ||
@@ -215,3 +213,2 @@ "sogou", | ||
"transcoder", | ||
"trendsmapresolver", | ||
"twingly recon", | ||
@@ -223,3 +220,2 @@ "url", | ||
"webkit2png", | ||
"websitemetadataretriever", | ||
"whatcms/", | ||
@@ -235,5 +231,9 @@ "wordpress", | ||
var createIsbot = (customPattern) => (userAgent) => Boolean(userAgent) && customPattern.test(userAgent); | ||
var createIsbotFromList = (list2) => { | ||
const pattern2 = new RegExp(list2.join("|"), "i"); | ||
return (userAgent) => Boolean(userAgent) && pattern2.test(userAgent); | ||
}; | ||
var isbotMatch = (userAgent) => userAgent.match(pattern)?.[0]; | ||
var isbotMatches = (userAgent) => list.map((part) => userAgent.match(new RegExp(part, "i"))?.[0]).filter(Boolean); | ||
var isbotPattern = (userAgent) => list.find((patten) => new RegExp(patten, "i").test(userAgent)) ?? null; | ||
var isbotPatterns = (userAgent) => list.filter((patten) => new RegExp(patten, "i").test(userAgent)); | ||
var isbotPattern = (userAgent) => list.find((pattern2) => new RegExp(pattern2, "i").test(userAgent)) ?? null; | ||
var isbotPatterns = (userAgent) => list.filter((pattern2) => new RegExp(pattern2, "i").test(userAgent)); |
{ | ||
"name": "isbot", | ||
"version": "4.0.0", | ||
"version": "4.1.0", | ||
"description": "🤖 Recognise bots/crawlers/spiders using the user agent string.", | ||
@@ -5,0 +5,0 @@ "keywords": [ |
# isbot 🤖/👨🦰 | ||
[![](https://img.shields.io/npm/v/isbot.svg?style=flat-square)](https://www.npmjs.com/package/isbot) [![](https://img.shields.io/npm/dt/isbot?style=flat-square)](https://www.npmjs.com/package/isbot) [![](https://img.shields.io/circleci/build/github/omrilotan/isbot?style=flat-square)](https://circleci.com/gh/omrilotan/isbot) [![](https://img.shields.io/github/last-commit/omrilotan/isbot?style=flat-square)](https://github.com/omrilotan/isbot/graphs/commit-activity) [![](https://data.jsdelivr.com/v1/package/npm/isbot/badge)](https://www.jsdelivr.com/package/npm/isbot) | ||
[![](https://img.shields.io/npm/v/isbot/next?style=flat-square)](https://www.npmjs.com/package/isbot/v/next) [![](https://img.shields.io/npm/dt/isbot?style=flat-square)](https://www.npmjs.com/package/isbot) [![](https://img.shields.io/circleci/build/github/omrilotan/isbot?style=flat-square)](https://circleci.com/gh/omrilotan/isbot) [![](https://img.shields.io/github/last-commit/omrilotan/isbot?style=flat-square)](https://github.com/omrilotan/isbot/graphs/commit-activity) [![](https://data.jsdelivr.com/v1/package/npm/isbot/badge)](https://www.jsdelivr.com/package/npm/isbot) | ||
@@ -11,2 +11,4 @@ [![](./page/isbot.svg)](https://isbot.js.org) | ||
> | ||
> ### Version 4 will become the "latest" version on npm on January 2024 | ||
> | ||
> `npm i isbot@4` or `npm i isbot@next` | ||
@@ -49,35 +51,38 @@ | ||
| import | Type | Description | | ||
| ------------- | --------------------------------------------------- | ------------------------------------------------------------------------- | | ||
| pattern | _{RegExp}_ | The regular expression used to identify bots | | ||
| list | _{string[]}_ | List of all individual pattern parts | | ||
| isbotMatch | _{(userAgent: string): string \| null}_ | The substring matched by the regular expression | | ||
| isbotMatches | _{(userAgent: string): string[]}_ | All substrings matched by the regular expression | | ||
| isbotPattern | _{(userAgent: string): string \| null}_ | The regular expression used to identify bot substring in the user agent | | ||
| isbotPatterns | _{(userAgent: string): string[]}_ | All regular expressions used to identify bot substrings in the user agent | | ||
| createIsbot | _{(pattern: RegExp): (userAgent: string): boolean}_ | Create a custom isbot function | | ||
| import | Type | Description | | ||
| ------------------- | --------------------------------------------------- | ---------------------------------------------------------------------------- | | ||
| pattern | _{RegExp}_ | The regular expression used to identify bots | | ||
| list | _{string[]}_ | List of all individual pattern parts | | ||
| isbotMatch | _{(userAgent: string): string \| null}_ | The substring matched by the regular expression | | ||
| isbotMatches | _{(userAgent: string): string[]}_ | All substrings matched by the regular expression | | ||
| isbotPattern | _{(userAgent: string): string \| null}_ | The regular expression used to identify bot substring in the user agent | | ||
| isbotPatterns | _{(userAgent: string): string[]}_ | All regular expressions used to identify bot substrings in the user agent | | ||
| createIsbot | _{(pattern: RegExp): (userAgent: string): boolean}_ | Create a custom isbot function | | ||
| createIsbotFromList | _{(list: string): (userAgent: string): boolean}_ | Create a custom isbot function from a list of string representation patterns | | ||
## Examples | ||
## Example usages of helper functions | ||
### Create a custom isbot function ignoring Chrome Lighthouse | ||
Create a custom isbot that does not consider Chrome Lighthouse user agent as bots. | ||
```ts | ||
import { createIsbot, list } from "isbot"; | ||
import { createIsbotFromList, isbotMatches, list } from "isbot"; | ||
const isbot = createIsbot( | ||
new RegExp( | ||
list | ||
.filter((record) => !new RegExp(record, "i").test("Chrome-Lighthouse")) | ||
.join("|"), | ||
"i", | ||
), | ||
const ChromeLighthouseUserAgentStrings: string[] = [ | ||
"mozilla/5.0 (macintosh; intel mac os x 10_15_7) applewebkit/537.36 (khtml, like gecko) chrome/94.0.4590.2 safari/537.36 chrome-lighthouse", | ||
"mozilla/5.0 (linux; android 7.0; moto g (4)) applewebkit/537.36 (khtml, like gecko) chrome/94.0.4590.2 mobile safari/537.36 chrome-lighthouse", | ||
]; | ||
const patternsToRemove: Set<string> = new Set( | ||
ChromeLighthouseUserAgentStrings.map(isbotMatches).flat(), | ||
); | ||
const isbot = createIsbotFromList( | ||
list.filter((record) => patternsToRemove.has(record) === false), | ||
); | ||
``` | ||
### Create a custom isbot function including another pattern | ||
Create a custom isbot that considers another pattern as a bot, which is not included in the package originally. | ||
```ts | ||
import { createIsbot, list } from "isbot"; | ||
import { createIsbotFromList, list } from "isbot"; | ||
const isbot = createIsbot(new RegExp(list.concat("shmulik").join("|"), "i")); | ||
const isbot = createIsbotFromList(list.concat("shmulik")); | ||
``` | ||
@@ -133,4 +138,8 @@ | ||
Remove `isbot` function default export | ||
Remove `isbot` function default export in favour of a named export. | ||
```ts | ||
import { isbot } from "isbot"; | ||
``` | ||
### [**Version 3**](https://github.com/omrilotan/isbot/releases/tag/v3.0.0) | ||
@@ -137,0 +146,0 @@ |
Sorry, the diff of this file is not supported yet
27253
512
163