crawler-find-word
Advanced tools
Comparing version 0.1.1 to 0.1.2
{ | ||
"name": "crawler-find-word", | ||
"version": "0.1.1", | ||
"version": "0.1.2", | ||
"description": "crawler service", | ||
@@ -12,8 +12,8 @@ "main": "crawler-find-word.js", | ||
"dependencies": { | ||
"cheerio": "^0.19.0", | ||
"chai": "^4.1.2", | ||
"cheerio": "^1.0.0-rc.2", | ||
"mocha": "^5.1.0", | ||
"request": "^2.65.0", | ||
"url-parse": "^1.0.5", | ||
"request": "^2.65.0", | ||
"valid-url": "^1.0.9", | ||
"mocha": "^5.1.0", | ||
"chai": "^4.1.2" | ||
"valid-url": "^1.0.9" | ||
}, | ||
@@ -29,4 +29,14 @@ "engines": { | ||
"keywords": [ | ||
"crawler","crawling","scraper","spider","search","find", | ||
"word","phrase","javascript","node","nodejs","url" | ||
"crawler", | ||
"crawling", | ||
"scraper", | ||
"spider", | ||
"search", | ||
"find", | ||
"word", | ||
"phrase", | ||
"javascript", | ||
"node", | ||
"nodejs", | ||
"url" | ||
], | ||
@@ -33,0 +43,0 @@ "bugs": { |
@@ -7,20 +7,24 @@ ## Simple but powerful crawler - find phrase deep in the web | ||
Deep crawl to find word in the body of web pages by base url | ||
Simple but powerful, popular and production crawling/scraping package for Node. | ||
Deep crawl to find word in the body of web pages by base url. | ||
Simple but powerful, popular and production crawling/scraping package for Node. | ||
We strict about our code , so, we choose to use 'travis-ci' and 'npm audit'. | ||
Review bugs you found or feature you want in our Slack click this [invitation](https://join.slack.com/t/crawler-find-word/shared_invite/enQtMzYyOTcyNjE5MDEzLTc4NWM3Y2QyZmNiMWY5OWJhMGVkMzNlZWEyOGQ2NWVjMmMwMjFlZWUyNDkwOTRkZmVmMjIxZDMzNjU0ZTdhZWU) | ||
## Features: | ||
Configurable level of maximum pages to visit | ||
Configurable root URL and Word to search | ||
Use event driven API, Raise 'Done' event when process ends. | ||
Return usefull statistical data. | ||
Use Cheerio to find word or phrase in the DOM. | ||
Tested with Mocha and Chai | ||
Configurable level of maximum pages to visit. | ||
Configurable root URL and Word to search. | ||
Use event driven API, Raise 'Done' event when process ends. | ||
Return usefull statistical data. | ||
Use Cheerio to find word or phrase in the DOM. | ||
Tested with Mocha and Chai. | ||
## Future features | ||
Add 'Error' event handling. | ||
Priority queue of requests. | ||
Control rate limit. | ||
Charset detection and conversion. | ||
Add 'Error' event handling. | ||
Priority queue of requests. | ||
Control rate limit. | ||
Charset detection and conversion. | ||
@@ -27,0 +31,0 @@ ## Demo |
@@ -13,3 +13,11 @@ 'use strict'; | ||
srv.eventHandler.on('done', print); | ||
srv.crawl('https://cnn.com/', 'trump', 2); | ||
var knownPageGetData = function(){ | ||
var u = srv.pages.pop(); | ||
if(u.isWordFound) | ||
throw new exception('Word Found'); | ||
else | ||
throw new exception('Word did not Found'); | ||
}; | ||
srv.eventHandler.on('done', knownPageGetData); | ||
srv.crawl('https://codeburst.io/javascript-unit-testing-using-mocha-and-chai-1d97d9f18e71', 'Mocha', 1); |
@@ -23,2 +23,22 @@ // 'use strict'; | ||
var knownPageGetData = function(){ | ||
var u = srv.pages.pop(); | ||
if(u.isWordFound) | ||
throw 'Word Found'; | ||
else | ||
throw 'Word did not Found'; | ||
}; | ||
var knownPageWithMatch = function(){ | ||
srv.eventHandler.on('done', knownPageGetData); | ||
srv.crawl('https://codeburst.io/javascript-unit-testing-using-mocha-and-chai-1d97d9f18e71', 'Mocha', 1) | ||
}; | ||
var knownPageNoMatch = function(){ | ||
srv.eventHandler.on('done', knownPageGetData); | ||
srv.crawl('https://codeburst.io/javascript-unit-testing-using-mocha-and-chai-1d97d9f18e71', 'xxx', 1) | ||
}; | ||
srv.eventHandler.on('done', save); | ||
@@ -31,5 +51,3 @@ | ||
}); | ||
}); | ||
describe('#crawler-service', function() { | ||
it('should no path to dig throw exception', function() { | ||
@@ -39,5 +57,3 @@ | ||
}); | ||
}); | ||
describe('#crawler-service', function() { | ||
it('should no value for max pages to visit throw exception', function() { | ||
@@ -47,2 +63,25 @@ | ||
}); | ||
}); | ||
it('should be match for the page and the phrase', function() { | ||
setTimeout( function () { | ||
try { | ||
expect(knownPageWithMatch).to.throw("Word Found") | ||
} catch( e ) { | ||
} | ||
}, 1000 ); | ||
}); | ||
it('should be match for the page but no for the phrase', function() { | ||
setTimeout( function () { | ||
try { | ||
expect(knownPageNoMatch).to.throw("Word did not Found"); | ||
} catch( e ) { | ||
} | ||
}, 1000 ); | ||
}); | ||
}); |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
10273
7
217
56
+ Addedcheerio@1.0.0(transitive)
+ Addedcheerio-select@2.1.0(transitive)
+ Addedcss-select@5.1.0(transitive)
+ Addedcss-what@6.1.0(transitive)
+ Addeddom-serializer@2.0.0(transitive)
+ Addeddomelementtype@2.3.0(transitive)
+ Addeddomhandler@5.0.3(transitive)
+ Addeddomutils@3.1.0(transitive)
+ Addedencoding-sniffer@0.2.0(transitive)
+ Addedentities@4.5.0(transitive)
+ Addedhtmlparser2@9.1.0(transitive)
+ Addediconv-lite@0.6.3(transitive)
+ Addednth-check@2.1.1(transitive)
+ Addedparse5@7.2.1(transitive)
+ Addedparse5-htmlparser2-tree-adapter@7.1.0(transitive)
+ Addedparse5-parser-stream@7.1.2(transitive)
+ Addedundici@6.21.0(transitive)
+ Addedwhatwg-encoding@3.1.1(transitive)
+ Addedwhatwg-mimetype@4.0.0(transitive)
- Removedcheerio@0.19.0(transitive)
- Removedcore-util-is@1.0.3(transitive)
- Removedcss-select@1.0.0(transitive)
- Removedcss-what@1.0.0(transitive)
- Removeddom-serializer@0.1.1(transitive)
- Removeddomelementtype@1.3.1(transitive)
- Removeddomhandler@2.3.0(transitive)
- Removeddomutils@1.4.31.5.1(transitive)
- Removedentities@1.0.01.1.2(transitive)
- Removedhtmlparser2@3.8.3(transitive)
- Removedisarray@0.0.1(transitive)
- Removedlodash@3.10.1(transitive)
- Removednth-check@1.0.2(transitive)
- Removedreadable-stream@1.1.14(transitive)
- Removedstring_decoder@0.10.31(transitive)
Updatedcheerio@^1.0.0-rc.2