Socket
Socket
Sign inDemoInstall

js-crawler

Package Overview
Dependencies
57
Maintainers
1
Versions
23
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 0.3.13 to 0.3.14

e2e/static/non_http_https_links/page1.html

13

crawler.js

@@ -257,4 +257,4 @@ var request = require('request');

Crawler.prototype._isTextContent = function(response) {
return response.headers && response.headers['content-type']
&& response.headers['content-type'].match(/^text\/html.*$/);
return Boolean(response && response.headers && response.headers['content-type']
&& response.headers['content-type'].match(/^text\/html.*$/));
};

@@ -290,2 +290,7 @@

Crawler.prototype._isLinkProtocolSupported = function(link) {
return (link.indexOf('://') < 0 && link.indexOf('mailto:') < 0)
|| link.indexOf('http://') >= 0 || link.indexOf('https://') >= 0;
};
Crawler.prototype._getAllUrls = function(defaultBaseUrl, body) {

@@ -308,3 +313,5 @@ var self = this;

.uniq()
.filter(this.shouldCrawl)
.filter(function(link) {
return self._isLinkProtocolSupported(link) && self.shouldCrawl(link);
})
.value();

@@ -311,0 +318,0 @@

@@ -172,2 +172,29 @@ var Crawler = require('../crawler');

describe('references contain links to non-http resources', () => {
it('should ignore mailto link', (done) => {
var crawledUrls = [];
var expectedUrls = [
'http://localhost:3000/non_http_https_links/page1.html',
'http://localhost:3000/non_http_https_links/page2.html'
];
crawler.crawl({
url: 'http://localhost:3000/non_http_https_links/page1.html',
success: function(page) {
crawledUrls.push(page.url);
},
failure: function(error) {
console.log(error);
expect('Error while crawling').toEqual('');
done();
},
finished: function(crawledUrls) {
expect(crawledUrls.sort()).toEqual(expectedUrls.sort());
done();
}
});
});
});
//TODO: Redirect with another HTTP code? 301?

@@ -174,0 +201,0 @@ //TODO: Binary content, links are not analyzed in binary content, binary content itself is not returned (as it can be too large)(?)

{
"name": "js-crawler",
"version": "0.3.13",
"version": "0.3.14",
"description": "Web crawler for Node.js",

@@ -5,0 +5,0 @@ "main": "crawler.js",

@@ -70,2 +70,12 @@ var Crawler = require('../crawler');

it('should ignore mailto links', function() {
expect(crawler._getAllUrls(baseUrl, '<a href="mailto:someone@somewhere.com"></a>'))
.toEqual([]);
});
it('should ignore ftp links', function() {
expect(crawler._getAllUrls(baseUrl, '<a href="ftp://myserver.org"></a>'))
.toEqual([]);
});
describe('ignoreRelative option', function() {

@@ -391,2 +401,6 @@

});
it('if response is not defined, content is not considered to be text', function() {
expect(crawler._isTextContent()).toBe(false);
});
});

@@ -393,0 +407,0 @@

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc