isbot - npm Package Compare versions

Comparing version 2.4.2 to 2.5.0

index.d.ts

		@@ -5,18 +5,24 @@ /**
		*/
		declare function testUserAgent(userAgent: string): boolean;
		declare function isbot(userAgent: string): boolean;

		/**
		* Extend the built-in list of bot user agent
		* @param additionalFilters An array of user agents
		*/
		declare function extend(additionalFilters: string[]): void;
		declare namespace isbot {
		/**
		* Extend the built-in list of bot user agent
		* @param additionalFilters An array of user agents
		*/
		function extend(additionalFilters: string[]): void;

		/**
		* Removes a set of user agent from the built-in list
		* @param excludedFilters An array of user agents
		*/
		declare function exclude(excludedFilters: string[]): void;
		/**
		* Removes a set of user agent from the built-in list
		* @param excludedFilters An array of user agents
		*/
		function exclude(excludedFilters: string[]): void;

		testUserAgent.extend = extend;
		testUserAgent.exclude = exclude;
		export = testUserAgent;
		/**
		* Return the respective match for bot user agent rule
		* @param excludedFilters An array of user agents
		*/
		function find(userAgent: string): string;
		}

		export = isbot;

index.js

		@@ -1,26 +0,55 @@
		var list = require('./list.json');
		var regex;
		function update() {
		regex = new RegExp('(' + list.join('\|') + ')', 'i');
		var list = require('./list.json')
		var regex

		/**
		* Refresh the local regex variable (clusure)
		*/
		function update () {
		regex = new RegExp('(' + list.join('\|') + ')', 'i')
		}
		update();
		update()

		module.exports = function(userAgent) {
		return regex.test(userAgent);
		};
		/**
		* Check if string matches known crawler patterns
		* @param {string} userAgent
		* @return {boolean}
		*/
		module.exports = function (userAgent) {
		return regex.test(userAgent)
		}

		module.exports.extend = function(additionalFilters){
		list = list.concat(additionalFilters);
		update();
		/**
		* Get the match for strings' known crawler pattern
		* @param {string} userAgent
		* @return {string}
		*/
		module.exports.find = function (userAgent) {
		var match = userAgent.match(regex)
		return match && match[0]
		}

		module.exports.exclude = function(excludedFilters){
		var i = excludedFilters.length;
		while (i--) {
		var index = list.lastIndexOf(excludedFilters[i]);
		if (index > 0) {
		list.splice(index, 1);
		}
		/**
		* Extent patterns for known crawlers
		* @param {array} additionalFilters
		* @return {void}
		*/
		module.exports.extend = function (additionalFilters) {
		list = list.concat(additionalFilters)
		update()
		}

		/**
		* Exclude patterns from bot pattern rule
		* @param {array} excludedFilters
		* @return {void}
		*/
		module.exports.exclude = function (excludedFilters) {
		var i = excludedFilters.length
		while (i--) {
		var index = list.lastIndexOf(excludedFilters[i])
		if (index > -1) {
		list.splice(index, 1)
		}
		update();
		}
		update()
		}

108

list.json

		[
		" Daum/",
		" DeuSu/",
		" MuckRack/",
		" Sysomos/",
		" um-LN/",
		"!Susie",
		@@ -11,5 +16,24 @@ "/www\\.answerbus\\.com",
		"\\(privoxy/",
		"^AHC/",
		"^Amazon CloudFront",
		"^axios/",
		"^Disqus/",
		"^Friendica",
		"^Hatena",
		"^http_get",
		"^Jetty/",
		"^MeltwaterNews",
		"^MixnodeCache/",
		"^newspaper/",
		"^NextCloud-News/",
		"^ng/",
		"A6-Indexer",
		"Aberja Checkomat",
		"^NING",
		"^Nuzzel",
		"^okhttp",
		"^sentry/",
		"^Thinklab",
		"^Tiny Tiny RSS/",
		"^Traackr.com",
		"^Upflow/",
		"^Zabbix",
		"Abonti",
		@@ -39,5 +63,9 @@ "Aboundex",
		"Anturis Agent",
		"AnyEvent-HTTP",
		"Apache-HttpClient",
		"APIs-Google",
		"Aport",
		"AppEngine-Google",
		"appie",
		"AppInsights",
		"Arachmo",
		@@ -49,3 +77,2 @@ "arachnode\\.net",
		"archive",
		"archive-com",
		"archiving",
		@@ -83,5 +110,5 @@ "asafaweb\\.com",
		"binlar",
		"Blackboard Safeassign",
		"BlackWidow",
		"blaiz-bee/",
		"bloglines-images/",
		"bloglines/",
		@@ -99,7 +126,6 @@ "Blogpulse",
		"bot",
		"bpimagewalker/",
		"BrandVerity/",
		"BravoBrian",
		"Browsershots",
		"bsdseek/",
		"BStop\\.BravoBrian\\.it Agent Detector",
		"btwebclient/",
		@@ -112,3 +138,2 @@ "BUbiNG",
		"butterfly",
		"Butterfly/",
		"BW-C-2",
		@@ -131,6 +156,5 @@ "bwh3_user_agent",
		"check",
		"checklinks/",
		"CheckWeb",
		"China Local Browse",
		"Chitika ContentHit",
		"Chrome-Lighthouse",
		"CJB\\.NET Proxy",
		@@ -169,2 +193,3 @@ "classify",
		"DAP x",
		"DareBoost",
		"datacha0s/",
		@@ -180,3 +205,2 @@ "datafountains/dmoz",
		"deepak-usc/isi",
		"del\\.icio\\.us-thumbnails/",
		"delegate/",
		@@ -194,3 +218,2 @@ "DepSpid",
		"DnloadMage",
		"DNS-Tools Header-Analyzer",
		"docomo/",
		@@ -218,3 +241,2 @@ "DomainAppender",
		"EBrowse \\d",
		"ec2linkfinder",
		"ecairn\\.com/grabber",
		@@ -228,6 +250,5 @@ "echo!/",
		"EmailWolf",
		"Embedly/",
		"Embedly",
		"envolk",
		"ESurf15",
		"europarchive\\.org",
		"evaliant",
		@@ -241,4 +262,2 @@ "eventax/",
		"extract",
		"ExtractorPro",
		"Extreme Picture Finder",
		"EyeCatcher",
		@@ -252,3 +271,2 @@ "eyes",
		"FastBug",
		"favcollector/",
		"Faveeo/",
		@@ -266,3 +284,2 @@ "FavIconizer",
		"findlink",
		"FindLinks",
		"findthatfile",
		@@ -273,2 +290,3 @@ "firefly/",
		"flexum/",
		"FlipboardProxy/",
		"FlipboardRSS/",
		@@ -289,3 +307,2 @@ "fluffy",
		"geek-tools\\.org",
		"genderanalyzer",
		"genieknows",
		@@ -300,2 +317,3 @@ "Genieo",
		"go-ahead-got-it/",
		"Go-http-client",
		"GoBeez",
		@@ -310,3 +328,7 @@ "goblin/",
		"Google Page Speed Insights",
		"Google Web Preview",
		"Google Wireless Transcoder",
		"Google-PhysicalWeb",
		"Google-Structured-Data-Testing-Tool",
		"google-xrawler",
		"GoogleImageProxy",
		@@ -325,6 +347,3 @@ "gopher",
		"haste/",
		"Hatena Mobile Gateway/",
		"Hatena Pagetitle Agent/",
		"Hatena RSS/",
		"hatenascreenshot/",
		"HeadlessChrome/",
		"helix/",
		@@ -340,7 +359,5 @@ "heritrix",
		"html2",
		"htmlparser/",
		"http-header-abfrage/",
		"http://anonymouse\\.org/",
		"http://ask\\.24x\\.info/",
		"http://ozysoftware\\.com/index\\.html",
		"http://www\\.ip2location\\.com",
		@@ -366,4 +383,2 @@ "http://www\\.monogol\\.de",
		"iframely/",
		"igdeSpyder",
		"igetter/",
		"IlTrovatore-Setaccio",
		@@ -407,3 +422,2 @@ "imageengine/",
		"JBH Agent 2\\.0",
		"jchecklinks/",
		"JemmaTheTourist",
		@@ -431,3 +445,2 @@ "JetCar",
		"Larbin",
		"lecodechecker/",
		"leech",
		@@ -440,3 +453,2 @@ "leia/",
		"Lincoln State Web Browser",
		"link check",
		"Link Commander",
		@@ -448,3 +460,2 @@ "Link Valet",
		"Linkguard",
		"linklint-checkonly/",
		"linkman",
		@@ -454,8 +465,4 @@ "LinkPimpin",
		"Links2Go",
		"links?\\s?check",
		"linkscan/",
		"linkscan/x\\.x",
		"linksonar/",
		"LinkStash",
		"linksweeper/",
		"LinkTiger",
		@@ -465,2 +472,3 @@ "LinkWalker",
		"Lipperhey SEO Service",
		"Lipperhey Site Explorer",
		"Lipperhey-Kaus-Australis/",
		@@ -477,3 +485,2 @@ "loader",
		"mabontland",
		"Mac Finder",
		"mack",
		@@ -490,6 +497,6 @@ "magicwml/",
		"MasterSeek",
		"Mastodon/",
		"Mata Hari/",
		"mediaget",
		"Mediapartners-Google",
		"MegaIndex\\.ru",
		"MegaSheep",
		@@ -499,7 +506,5 @@ "Megite",
		"metainspector/",
		"MetaProducts Download Express",
		"metaspinner/",
		"metatagsdir/",
		"MetaURI",
		"MFHttpScan",
		"MicroBaz",
		@@ -509,2 +514,3 @@ "Microsoft_Internet_Explorer_5",
		"Mindjet MindManager",
		"Miniflux/",
		"miniflux\\.net",
		@@ -532,3 +538,2 @@ "Missouri College Browse",
		"MVAClient",
		"mygetright/",
		"naofavicon4ie/",
		@@ -549,2 +554,3 @@ "naparek\\.cz",
		"nico/",
		"Nmap Scripting Engine",
		"NORAD National Defence Network",
		@@ -556,3 +562,2 @@ "Norton-Safeweb",
		"nutch",
		"NutchCVS",
		"Nymesis",
		@@ -565,2 +570,3 @@ "ocelli/",
		"OliverPerry",
		"omgili",
		"Onet\\.pl",
		@@ -570,6 +576,6 @@ "Oracle Application",
		"OSSProxy",
		"outbrain",
		"ow\\.ly",
		"ownCloud News/",
		"ozelot/",
		"Page Analyzer",
		"Page Valet/",
		@@ -586,2 +592,3 @@ "page2rss",
		"PBrowse",
		"Pcore-HTTP",
		"pd02_1",
		@@ -615,4 +622,4 @@ "Peew",
		"Powermarks",
		"PR-CY.RU",
		"pricepi\\.com",
		"privacyfinder/",
		"prlog\\.ru",
		@@ -631,3 +638,2 @@ "pro-sitemaps\\.com",
		"Qango\\.com Web Directory",
		"Qseero",
		"QuepasaCreep",
		@@ -646,3 +652,2 @@ "Qwantify",
		"responsecodetest/",
		"RetrevoPageAnalyzer",
		"retrieve",
		@@ -652,2 +657,3 @@ "rico/",
		"Rival IQ",
		"Rivva",
		"RMA/1\\.0",
		@@ -690,7 +696,6 @@ "RoboPal",
		"Site24x7",
		"site[-\\s]?check",
		"site[-\\s]?scan",
		"SiteBar",
		"SiteCondor",
		"siteexplorer\\.info",
		"Siteimprove",
		"SiteRecon",
		@@ -711,2 +716,3 @@ "SiteSnagger",
		"smartwit\\.com",
		"Snacktory",
		"Snappy",
		@@ -722,7 +728,5 @@ "sniff",
		"Sphider",
		"spida/",
		"spider",
		"spinne/",
		"spy",
		"spyder",
		"squidclam",
		@@ -755,3 +759,2 @@ "Squider",
		"Theophrastus",
		"therarestparser/",
		"thumb",
		@@ -780,4 +783,4 @@ "TinEye",
		"vakes/",
		"validator",
		"vb wininet",
		"venus/fedoraplanet",
		"verifier",
		@@ -795,3 +798,5 @@ "verify",
		"VYU2",
		"W3C-mobileOK/",
		"w3c-webcon/",
		"W3C_Unicorn/",
		"w3dt\\.net",
		@@ -814,2 +819,3 @@ "Wappalyzer",
		"webcraft",
		"WebDataStats/",
		"Webglimpse",
		@@ -816,0 +822,0 @@ "webgobbler/",

package.json

		{
		"name": "isbot",
		"version": "2.4.2",
		"version": "2.5.0",
		"description": "detects bots/crawlers/spiders via the user agent.",
		@@ -8,4 +8,7 @@ "main": "index.js",
		"scripts": {
		"test": "mocha test.js",
		"sort": "node ./scripts/sort.js"
		"prepare": "./scripts/download-crawlers-json.sh",
		"sort": "./scripts/sort.js",
		"test": "mocha tests/*.js",
		"lint": "standard",
		"ts": "tsc"
		},
		@@ -30,6 +33,8 @@ "repository": {
		"devDependencies": {
		"chai": "^3.2.0",
		"eslint-plugin-log": "^1.2.3",
		"mocha": "^2.2.5",
		"should": "^7.0.2"
		"standard": "^14.3.1",
		"typescript": "^3.6.4",
		"user-agents": "^1.0.412"
		}
		}

README.md

		# isbot [![](https://img.shields.io/npm/v/isbot.svg)](https://www.npmjs.com/package/isbot)

		Detect bots/crawlers/spiders via the user agent.

		[![Tests Status](https://github.com/gorangajic/isbot/workflows/Test/badge.svg)](https://github.com/gorangajic/isbot/actions)
		@@ -8,19 +10,22 @@
		```console
		$ npm install isbot --save
		$ npm i isbot
		```

		### usage
		## Usage

		### Simple detection

		```js
		isBot(req.headers['user-agent'])

		isBot("Googlebot/2.1 (+http://www.google.com/bot.html)") // true
		isBot('Googlebot/2.1 (+http://www.google.com/bot.html)') // true

		isBot("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36") // false
		isBot('Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36') // false
		```

		### extending
		### Extending more user agents
		Add rules to user agent match RegExp

		```js
		isBot("Mozilla/5.0") // false
		isBot('Mozilla/5.0') // false

		@@ -36,6 +41,7 @@ var myList = [

		isBot("Mozilla/5.0") // true
		isBot('Mozilla/5.0') // true
		```

		### excluding
		### Excluding known crawlers
		Remove rules to user agent match RegExp (see existing rules in `list.json` file)

		@@ -54,1 +60,19 @@ ```js
		```

		### `extend` and `exclude` use case
		Use [lookbehind assertion](https://github.com/tc39/proposal-regexp-lookbehind), introduced in V8 version 4.9 to exclude "Cubot" from "bot" rule

		```js
		isBot.exclude(['bot']);
		isBot.extend(['(?<! cu)bot']); // Recognise cubot browser as legit browser

		isBot('Mozilla/5.0 (Linux; Android 8.0.0; CUBOT_P20) ...') // false
		isBot('Googlebot/2.1 (+http://www.google.com/bot.html)') // true
		```

		### Verbose result
		Return the respective match for bot user agent rule

		```js
		isBot.find('Googlebot/2.1 (+http://www.google.com/bot.html)') // 'bot'
		```

.github/workflows/test.yml

scripts/sort.js

isbot - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics