Comparing version 2.4.2 to 2.5.0
@@ -5,18 +5,24 @@ /** | ||
*/ | ||
declare function testUserAgent(userAgent: string): boolean; | ||
declare function isbot(userAgent: string): boolean; | ||
/** | ||
* Extend the built-in list of bot user agent | ||
* @param additionalFilters An array of user agents | ||
*/ | ||
declare function extend(additionalFilters: string[]): void; | ||
declare namespace isbot { | ||
/** | ||
* Extend the built-in list of bot user agent | ||
* @param additionalFilters An array of user agents | ||
*/ | ||
function extend(additionalFilters: string[]): void; | ||
/** | ||
* Removes a set of user agent from the built-in list | ||
* @param excludedFilters An array of user agents | ||
*/ | ||
declare function exclude(excludedFilters: string[]): void; | ||
/** | ||
* Removes a set of user agent from the built-in list | ||
* @param excludedFilters An array of user agents | ||
*/ | ||
function exclude(excludedFilters: string[]): void; | ||
testUserAgent.extend = extend; | ||
testUserAgent.exclude = exclude; | ||
export = testUserAgent; | ||
/** | ||
* Return the respective match for bot user agent rule | ||
* @param excludedFilters An array of user agents | ||
*/ | ||
function find(userAgent: string): string; | ||
} | ||
export = isbot; |
67
index.js
@@ -1,26 +0,55 @@ | ||
var list = require('./list.json'); | ||
var regex; | ||
function update() { | ||
regex = new RegExp('(' + list.join('|') + ')', 'i'); | ||
var list = require('./list.json') | ||
var regex | ||
/** | ||
* Refresh the local regex variable (clusure) | ||
*/ | ||
function update () { | ||
regex = new RegExp('(' + list.join('|') + ')', 'i') | ||
} | ||
update(); | ||
update() | ||
module.exports = function(userAgent) { | ||
return regex.test(userAgent); | ||
}; | ||
/** | ||
* Check if string matches known crawler patterns | ||
* @param {string} userAgent | ||
* @return {boolean} | ||
*/ | ||
module.exports = function (userAgent) { | ||
return regex.test(userAgent) | ||
} | ||
module.exports.extend = function(additionalFilters){ | ||
list = list.concat(additionalFilters); | ||
update(); | ||
/** | ||
* Get the match for strings' known crawler pattern | ||
* @param {string} userAgent | ||
* @return {string} | ||
*/ | ||
module.exports.find = function (userAgent) { | ||
var match = userAgent.match(regex) | ||
return match && match[0] | ||
} | ||
module.exports.exclude = function(excludedFilters){ | ||
var i = excludedFilters.length; | ||
while (i--) { | ||
var index = list.lastIndexOf(excludedFilters[i]); | ||
if (index > 0) { | ||
list.splice(index, 1); | ||
} | ||
/** | ||
* Extent patterns for known crawlers | ||
* @param {array} additionalFilters | ||
* @return {void} | ||
*/ | ||
module.exports.extend = function (additionalFilters) { | ||
list = list.concat(additionalFilters) | ||
update() | ||
} | ||
/** | ||
* Exclude patterns from bot pattern rule | ||
* @param {array} excludedFilters | ||
* @return {void} | ||
*/ | ||
module.exports.exclude = function (excludedFilters) { | ||
var i = excludedFilters.length | ||
while (i--) { | ||
var index = list.lastIndexOf(excludedFilters[i]) | ||
if (index > -1) { | ||
list.splice(index, 1) | ||
} | ||
update(); | ||
} | ||
update() | ||
} |
108
list.json
[ | ||
" Daum/", | ||
" DeuSu/", | ||
" MuckRack/", | ||
" Sysomos/", | ||
" um-LN/", | ||
"!Susie", | ||
@@ -11,5 +16,24 @@ "/www\\.answerbus\\.com", | ||
"\\(privoxy/", | ||
"^AHC/", | ||
"^Amazon CloudFront", | ||
"^axios/", | ||
"^Disqus/", | ||
"^Friendica", | ||
"^Hatena", | ||
"^http_get", | ||
"^Jetty/", | ||
"^MeltwaterNews", | ||
"^MixnodeCache/", | ||
"^newspaper/", | ||
"^NextCloud-News/", | ||
"^ng/", | ||
"A6-Indexer", | ||
"Aberja Checkomat", | ||
"^NING", | ||
"^Nuzzel", | ||
"^okhttp", | ||
"^sentry/", | ||
"^Thinklab", | ||
"^Tiny Tiny RSS/", | ||
"^Traackr.com", | ||
"^Upflow/", | ||
"^Zabbix", | ||
"Abonti", | ||
@@ -39,5 +63,9 @@ "Aboundex", | ||
"Anturis Agent", | ||
"AnyEvent-HTTP", | ||
"Apache-HttpClient", | ||
"APIs-Google", | ||
"Aport", | ||
"AppEngine-Google", | ||
"appie", | ||
"AppInsights", | ||
"Arachmo", | ||
@@ -49,3 +77,2 @@ "arachnode\\.net", | ||
"archive", | ||
"archive-com", | ||
"archiving", | ||
@@ -83,5 +110,5 @@ "asafaweb\\.com", | ||
"binlar", | ||
"Blackboard Safeassign", | ||
"BlackWidow", | ||
"blaiz-bee/", | ||
"bloglines-images/", | ||
"bloglines/", | ||
@@ -99,7 +126,6 @@ "Blogpulse", | ||
"bot", | ||
"bpimagewalker/", | ||
"BrandVerity/", | ||
"BravoBrian", | ||
"Browsershots", | ||
"bsdseek/", | ||
"BStop\\.BravoBrian\\.it Agent Detector", | ||
"btwebclient/", | ||
@@ -112,3 +138,2 @@ "BUbiNG", | ||
"butterfly", | ||
"Butterfly/", | ||
"BW-C-2", | ||
@@ -131,6 +156,5 @@ "bwh3_user_agent", | ||
"check", | ||
"checklinks/", | ||
"CheckWeb", | ||
"China Local Browse", | ||
"Chitika ContentHit", | ||
"Chrome-Lighthouse", | ||
"CJB\\.NET Proxy", | ||
@@ -169,2 +193,3 @@ "classify", | ||
"DAP x", | ||
"DareBoost", | ||
"datacha0s/", | ||
@@ -180,3 +205,2 @@ "datafountains/dmoz", | ||
"deepak-usc/isi", | ||
"del\\.icio\\.us-thumbnails/", | ||
"delegate/", | ||
@@ -194,3 +218,2 @@ "DepSpid", | ||
"DnloadMage", | ||
"DNS-Tools Header-Analyzer", | ||
"docomo/", | ||
@@ -218,3 +241,2 @@ "DomainAppender", | ||
"EBrowse \\d", | ||
"ec2linkfinder", | ||
"ecairn\\.com/grabber", | ||
@@ -228,6 +250,5 @@ "echo!/", | ||
"EmailWolf", | ||
"Embedly/", | ||
"Embedly", | ||
"envolk", | ||
"ESurf15", | ||
"europarchive\\.org", | ||
"evaliant", | ||
@@ -241,4 +262,2 @@ "eventax/", | ||
"extract", | ||
"ExtractorPro", | ||
"Extreme Picture Finder", | ||
"EyeCatcher", | ||
@@ -252,3 +271,2 @@ "eyes", | ||
"FastBug", | ||
"favcollector/", | ||
"Faveeo/", | ||
@@ -266,3 +284,2 @@ "FavIconizer", | ||
"findlink", | ||
"FindLinks", | ||
"findthatfile", | ||
@@ -273,2 +290,3 @@ "firefly/", | ||
"flexum/", | ||
"FlipboardProxy/", | ||
"FlipboardRSS/", | ||
@@ -289,3 +307,2 @@ "fluffy", | ||
"geek-tools\\.org", | ||
"genderanalyzer", | ||
"genieknows", | ||
@@ -300,2 +317,3 @@ "Genieo", | ||
"go-ahead-got-it/", | ||
"Go-http-client", | ||
"GoBeez", | ||
@@ -310,3 +328,7 @@ "goblin/", | ||
"Google Page Speed Insights", | ||
"Google Web Preview", | ||
"Google Wireless Transcoder", | ||
"Google-PhysicalWeb", | ||
"Google-Structured-Data-Testing-Tool", | ||
"google-xrawler", | ||
"GoogleImageProxy", | ||
@@ -325,6 +347,3 @@ "gopher", | ||
"haste/", | ||
"Hatena Mobile Gateway/", | ||
"Hatena Pagetitle Agent/", | ||
"Hatena RSS/", | ||
"hatenascreenshot/", | ||
"HeadlessChrome/", | ||
"helix/", | ||
@@ -340,7 +359,5 @@ "heritrix", | ||
"html2", | ||
"htmlparser/", | ||
"http-header-abfrage/", | ||
"http://anonymouse\\.org/", | ||
"http://ask\\.24x\\.info/", | ||
"http://ozysoftware\\.com/index\\.html", | ||
"http://www\\.ip2location\\.com", | ||
@@ -366,4 +383,2 @@ "http://www\\.monogol\\.de", | ||
"iframely/", | ||
"igdeSpyder", | ||
"igetter/", | ||
"IlTrovatore-Setaccio", | ||
@@ -407,3 +422,2 @@ "imageengine/", | ||
"JBH Agent 2\\.0", | ||
"jchecklinks/", | ||
"JemmaTheTourist", | ||
@@ -431,3 +445,2 @@ "JetCar", | ||
"Larbin", | ||
"lecodechecker/", | ||
"leech", | ||
@@ -440,3 +453,2 @@ "leia/", | ||
"Lincoln State Web Browser", | ||
"link check", | ||
"Link Commander", | ||
@@ -448,3 +460,2 @@ "Link Valet", | ||
"Linkguard", | ||
"linklint-checkonly/", | ||
"linkman", | ||
@@ -454,8 +465,4 @@ "LinkPimpin", | ||
"Links2Go", | ||
"links?\\s?check", | ||
"linkscan/", | ||
"linkscan/x\\.x", | ||
"linksonar/", | ||
"LinkStash", | ||
"linksweeper/", | ||
"LinkTiger", | ||
@@ -465,2 +472,3 @@ "LinkWalker", | ||
"Lipperhey SEO Service", | ||
"Lipperhey Site Explorer", | ||
"Lipperhey-Kaus-Australis/", | ||
@@ -477,3 +485,2 @@ "loader", | ||
"mabontland", | ||
"Mac Finder", | ||
"mack", | ||
@@ -490,6 +497,6 @@ "magicwml/", | ||
"MasterSeek", | ||
"Mastodon/", | ||
"Mata Hari/", | ||
"mediaget", | ||
"Mediapartners-Google", | ||
"MegaIndex\\.ru", | ||
"MegaSheep", | ||
@@ -499,7 +506,5 @@ "Megite", | ||
"metainspector/", | ||
"MetaProducts Download Express", | ||
"metaspinner/", | ||
"metatagsdir/", | ||
"MetaURI", | ||
"MFHttpScan", | ||
"MicroBaz", | ||
@@ -509,2 +514,3 @@ "Microsoft_Internet_Explorer_5", | ||
"Mindjet MindManager", | ||
"Miniflux/", | ||
"miniflux\\.net", | ||
@@ -532,3 +538,2 @@ "Missouri College Browse", | ||
"MVAClient", | ||
"mygetright/", | ||
"naofavicon4ie/", | ||
@@ -549,2 +554,3 @@ "naparek\\.cz", | ||
"nico/", | ||
"Nmap Scripting Engine", | ||
"NORAD National Defence Network", | ||
@@ -556,3 +562,2 @@ "Norton-Safeweb", | ||
"nutch", | ||
"NutchCVS", | ||
"Nymesis", | ||
@@ -565,2 +570,3 @@ "ocelli/", | ||
"OliverPerry", | ||
"omgili", | ||
"Onet\\.pl", | ||
@@ -570,6 +576,6 @@ "Oracle Application", | ||
"OSSProxy", | ||
"outbrain", | ||
"ow\\.ly", | ||
"ownCloud News/", | ||
"ozelot/", | ||
"Page Analyzer", | ||
"Page Valet/", | ||
@@ -586,2 +592,3 @@ "page2rss", | ||
"PBrowse", | ||
"Pcore-HTTP", | ||
"pd02_1", | ||
@@ -615,4 +622,4 @@ "Peew", | ||
"Powermarks", | ||
"PR-CY.RU", | ||
"pricepi\\.com", | ||
"privacyfinder/", | ||
"prlog\\.ru", | ||
@@ -631,3 +638,2 @@ "pro-sitemaps\\.com", | ||
"Qango\\.com Web Directory", | ||
"Qseero", | ||
"QuepasaCreep", | ||
@@ -646,3 +652,2 @@ "Qwantify", | ||
"responsecodetest/", | ||
"RetrevoPageAnalyzer", | ||
"retrieve", | ||
@@ -652,2 +657,3 @@ "rico/", | ||
"Rival IQ", | ||
"Rivva", | ||
"RMA/1\\.0", | ||
@@ -690,7 +696,6 @@ "RoboPal", | ||
"Site24x7", | ||
"site[-\\s]?check", | ||
"site[-\\s]?scan", | ||
"SiteBar", | ||
"SiteCondor", | ||
"siteexplorer\\.info", | ||
"Siteimprove", | ||
"SiteRecon", | ||
@@ -711,2 +716,3 @@ "SiteSnagger", | ||
"smartwit\\.com", | ||
"Snacktory", | ||
"Snappy", | ||
@@ -722,7 +728,5 @@ "sniff", | ||
"Sphider", | ||
"spida/", | ||
"spider", | ||
"spinne/", | ||
"spy", | ||
"spyder", | ||
"squidclam", | ||
@@ -755,3 +759,2 @@ "Squider", | ||
"Theophrastus", | ||
"therarestparser/", | ||
"thumb", | ||
@@ -780,4 +783,4 @@ "TinEye", | ||
"vakes/", | ||
"validator", | ||
"vb wininet", | ||
"venus/fedoraplanet", | ||
"verifier", | ||
@@ -795,3 +798,5 @@ "verify", | ||
"VYU2", | ||
"W3C-mobileOK/", | ||
"w3c-webcon/", | ||
"W3C_Unicorn/", | ||
"w3dt\\.net", | ||
@@ -814,2 +819,3 @@ "Wappalyzer", | ||
"webcraft", | ||
"WebDataStats/", | ||
"Webglimpse", | ||
@@ -816,0 +822,0 @@ "webgobbler/", |
{ | ||
"name": "isbot", | ||
"version": "2.4.2", | ||
"version": "2.5.0", | ||
"description": "detects bots/crawlers/spiders via the user agent.", | ||
@@ -8,4 +8,7 @@ "main": "index.js", | ||
"scripts": { | ||
"test": "mocha test.js", | ||
"sort": "node ./scripts/sort.js" | ||
"prepare": "./scripts/download-crawlers-json.sh", | ||
"sort": "./scripts/sort.js", | ||
"test": "mocha tests/*.js", | ||
"lint": "standard", | ||
"ts": "tsc" | ||
}, | ||
@@ -30,6 +33,8 @@ "repository": { | ||
"devDependencies": { | ||
"chai": "^3.2.0", | ||
"eslint-plugin-log": "^1.2.3", | ||
"mocha": "^2.2.5", | ||
"should": "^7.0.2" | ||
"standard": "^14.3.1", | ||
"typescript": "^3.6.4", | ||
"user-agents": "^1.0.412" | ||
} | ||
} |
# isbot [![](https://img.shields.io/npm/v/isbot.svg)](https://www.npmjs.com/package/isbot) | ||
Detect bots/crawlers/spiders via the user agent. | ||
[![Tests Status](https://github.com/gorangajic/isbot/workflows/Test/badge.svg)](https://github.com/gorangajic/isbot/actions) | ||
@@ -8,19 +10,22 @@ | ||
```console | ||
$ npm install isbot --save | ||
$ npm i isbot | ||
``` | ||
### usage | ||
## Usage | ||
### Simple detection | ||
```js | ||
isBot(req.headers['user-agent']) | ||
isBot("Googlebot/2.1 (+http://www.google.com/bot.html)") // true | ||
isBot('Googlebot/2.1 (+http://www.google.com/bot.html)') // true | ||
isBot("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36") // false | ||
isBot('Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36') // false | ||
``` | ||
### extending | ||
### Extending more user agents | ||
Add rules to user agent match RegExp | ||
```js | ||
isBot("Mozilla/5.0") // false | ||
isBot('Mozilla/5.0') // false | ||
@@ -36,6 +41,7 @@ var myList = [ | ||
isBot("Mozilla/5.0") // true | ||
isBot('Mozilla/5.0') // true | ||
``` | ||
### excluding | ||
### Excluding known crawlers | ||
Remove rules to user agent match RegExp (see existing rules in `list.json` file) | ||
@@ -54,1 +60,19 @@ ```js | ||
``` | ||
### `extend` and `exclude` use case | ||
Use [lookbehind assertion](https://github.com/tc39/proposal-regexp-lookbehind), introduced in V8 version 4.9 to exclude "Cubot" from "bot" rule | ||
```js | ||
isBot.exclude(['bot']); | ||
isBot.extend(['(?<! cu)bot']); // Recognise cubot browser as legit browser | ||
isBot('Mozilla/5.0 (Linux; Android 8.0.0; CUBOT_P20) ...') // false | ||
isBot('Googlebot/2.1 (+http://www.google.com/bot.html)') // true | ||
``` | ||
### Verbose result | ||
Return the respective match for bot user agent rule | ||
```js | ||
isBot.find('Googlebot/2.1 (+http://www.google.com/bot.html)') // 'bot' | ||
``` |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
20327
76
0
5
6
936
1