Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

crawler-find-word

Package Overview
Dependencies
Maintainers
1
Versions
4
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

crawler-find-word - npm Package Compare versions

Comparing version 0.1.1 to 0.1.2

.vscode/launch.json

26

package.json
{
"name": "crawler-find-word",
"version": "0.1.1",
"version": "0.1.2",
"description": "crawler service",

@@ -12,8 +12,8 @@ "main": "crawler-find-word.js",

"dependencies": {
"cheerio": "^0.19.0",
"chai": "^4.1.2",
"cheerio": "^1.0.0-rc.2",
"mocha": "^5.1.0",
"request": "^2.65.0",
"url-parse": "^1.0.5",
"request": "^2.65.0",
"valid-url": "^1.0.9",
"mocha": "^5.1.0",
"chai": "^4.1.2"
"valid-url": "^1.0.9"
},

@@ -29,4 +29,14 @@ "engines": {

"keywords": [
"crawler","crawling","scraper","spider","search","find",
"word","phrase","javascript","node","nodejs","url"
"crawler",
"crawling",
"scraper",
"spider",
"search",
"find",
"word",
"phrase",
"javascript",
"node",
"nodejs",
"url"
],

@@ -33,0 +43,0 @@ "bugs": {

@@ -7,20 +7,24 @@ ## Simple but powerful crawler - find phrase deep in the web

Deep crawl to find word in the body of web pages by base url
Simple but powerful, popular and production crawling/scraping package for Node.
Deep crawl to find word in the body of web pages by base url.
Simple but powerful, popular and production crawling/scraping package for Node.
We strict about our code , so, we choose to use 'travis-ci' and 'npm audit'.
Review bugs you found or feature you want in our Slack click this [invitation](https://join.slack.com/t/crawler-find-word/shared_invite/enQtMzYyOTcyNjE5MDEzLTc4NWM3Y2QyZmNiMWY5OWJhMGVkMzNlZWEyOGQ2NWVjMmMwMjFlZWUyNDkwOTRkZmVmMjIxZDMzNjU0ZTdhZWU)
## Features:
Configurable level of maximum pages to visit
Configurable root URL and Word to search
Use event driven API, Raise 'Done' event when process ends.
Return usefull statistical data.
Use Cheerio to find word or phrase in the DOM.
Tested with Mocha and Chai
Configurable level of maximum pages to visit.
Configurable root URL and Word to search.
Use event driven API, Raise 'Done' event when process ends.
Return usefull statistical data.
Use Cheerio to find word or phrase in the DOM.
Tested with Mocha and Chai.
## Future features
Add 'Error' event handling.
Priority queue of requests.
Control rate limit.
Charset detection and conversion.
Add 'Error' event handling.
Priority queue of requests.
Control rate limit.
Charset detection and conversion.

@@ -27,0 +31,0 @@ ## Demo

@@ -13,3 +13,11 @@ 'use strict';

srv.eventHandler.on('done', print);
srv.crawl('https://cnn.com/', 'trump', 2);
var knownPageGetData = function(){
var u = srv.pages.pop();
if(u.isWordFound)
throw new exception('Word Found');
else
throw new exception('Word did not Found');
};
srv.eventHandler.on('done', knownPageGetData);
srv.crawl('https://codeburst.io/javascript-unit-testing-using-mocha-and-chai-1d97d9f18e71', 'Mocha', 1);

@@ -23,2 +23,22 @@ // 'use strict';

var knownPageGetData = function(){
var u = srv.pages.pop();
if(u.isWordFound)
throw 'Word Found';
else
throw 'Word did not Found';
};
var knownPageWithMatch = function(){
srv.eventHandler.on('done', knownPageGetData);
srv.crawl('https://codeburst.io/javascript-unit-testing-using-mocha-and-chai-1d97d9f18e71', 'Mocha', 1)
};
var knownPageNoMatch = function(){
srv.eventHandler.on('done', knownPageGetData);
srv.crawl('https://codeburst.io/javascript-unit-testing-using-mocha-and-chai-1d97d9f18e71', 'xxx', 1)
};
srv.eventHandler.on('done', save);

@@ -31,5 +51,3 @@

});
});
describe('#crawler-service', function() {
it('should no path to dig throw exception', function() {

@@ -39,5 +57,3 @@

});
});
describe('#crawler-service', function() {
it('should no value for max pages to visit throw exception', function() {

@@ -47,2 +63,25 @@

});
});
it('should be match for the page and the phrase', function() {
setTimeout( function () {
try {
expect(knownPageWithMatch).to.throw("Word Found")
} catch( e ) {
}
}, 1000 );
});
it('should be match for the page but no for the phrase', function() {
setTimeout( function () {
try {
expect(knownPageNoMatch).to.throw("Word did not Found");
} catch( e ) {
}
}, 1000 );
});
});

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc