json-web-crawler
Advanced tools
Comparing version
@@ -1,2 +0,2 @@ | ||
console.log('Crawl the popular list at Kickstarter. \n'); | ||
console.info('Crawl the popular list at Kickstarter. \n'); | ||
@@ -63,3 +63,3 @@ const setting = { | ||
Crawler(content, setting) | ||
.then(console.log) | ||
.then(console.error) | ||
.catch(console.log); | ||
@@ -66,0 +66,0 @@ } |
@@ -1,2 +0,2 @@ | ||
console.log('Crawl Dota 2 description at Steam site. \n'); | ||
console.info('Crawl Dota 2 description at Steam site. \n'); | ||
@@ -11,3 +11,3 @@ const setting = { | ||
}, | ||
name: { | ||
appName: { | ||
outOfContainer: true, | ||
@@ -51,3 +51,6 @@ elem: '.apphub_AppName', | ||
elem: '.game_description_snippet', | ||
get: 'text' | ||
get: 'text', | ||
process(value) { | ||
return value.split(', '); | ||
} | ||
}, | ||
@@ -67,3 +70,3 @@ releaseDate: { | ||
Crawler(content, setting) | ||
.catch(console.log) | ||
.catch(console.error) | ||
.then(console.log); | ||
@@ -70,0 +73,0 @@ } |
20
index.js
@@ -74,3 +74,3 @@ 'use strict' | ||
function pageNotFound($, pageNF) { // Not tested yet | ||
function pageNotFound($, pageNF) { // Not completed tested yet | ||
let result = []; | ||
@@ -124,3 +124,5 @@ | ||
return (typeof collectOptions.combineWith !== 'undefined') ? tmpArr.join(collectOptions.combineWith) : tmpArr; | ||
return (typeof collectOptions.combineWith !== 'undefined' && collectOptions.combineWith !== null) | ||
? tmpArr.join(collectOptions.combineWith) | ||
: tmpArr; | ||
}); | ||
@@ -135,4 +137,8 @@ } | ||
if(json.process && json.process.length) | ||
return process(result, json.process); | ||
if(json.process) { | ||
switch(true) { | ||
case (json.process instanceof Array): return process(result, json.process); | ||
case (typeof json.process === 'function'): return json.process(result); | ||
} | ||
} | ||
@@ -211,6 +217,10 @@ return result; | ||
function process(data, processList) { | ||
for(let job of processList) | ||
for(let job of processList) { | ||
if(typeof data !== 'string') | ||
break; | ||
data = _[job[0]](data, job[1], job[2]); | ||
} | ||
return data; | ||
} |
{ | ||
"name": "json-web-crawler", | ||
"version": "0.6.4", | ||
"version": "0.6.5", | ||
"description": "Crawl website by json", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -14,23 +14,13 @@ # Json Web Crawler | ||
```javascript | ||
var Crawl = require('json-web-crawler'); | ||
const Crawl = require('json-web-crawler'); | ||
Crawl('HTML content', your json setting) | ||
.then(console.log) | ||
.catch(console.log); | ||
.catch(console.error); | ||
``` | ||
## Different from 0.0.6 | ||
## Settings | ||
You can compare it in demo | ||
1. `limit`, `range`, `focus` and `ignore` combine to `listOption` | ||
2. `keys` rename to crawl | ||
3. no `name` key in crawl | ||
4. no `use` key, only `process` | ||
## Variables | ||
It's messy, I know. | ||
```javascript | ||
var setting = { | ||
const settings = { | ||
// If match one of this checklist, it will return page not found error. | ||
@@ -55,10 +45,9 @@ pageNotFound: [{ | ||
// If type is 'list', you may need to set these values below. | ||
// If type is 'list', you can set these values below (Optional). | ||
// ================================================================= | ||
// Optional, use if you don't want to crawl the whole list. ** ALL STRAT FROM 0 ** | ||
listOption: ['limit', 10], // eq(0) ~ eq(9) | ||
// listOption: ['range', 6, 12], // eq(6) ~ eq(11)*, if without end, it will continue to the last one | ||
// listOption: ['focus', 0, 3, 7], // [eq(0), eq(3), eq(7)] | ||
// listOption: ['ignore', 1, 2, 5], // Elements you want to ignore it. You can use -1, -2 to count from backward. | ||
// ['range', 6, 12], // eq(6) ~ eq(11)*, if without end, it will continue to the last one | ||
// ['focus', 0, 3, 7], // [eq(0), eq(3), eq(7)] | ||
// ['ignore', 1, 2, 5], // Elements you want to ignore it. You can use -1, -2 to count from backward. | ||
// ================================================================= | ||
@@ -72,14 +61,14 @@ | ||
get: 'text', | ||
// get: 'num' | ||
// get: 'html' | ||
// get: 'length' // => $element.length | ||
// get: 'attrName' // => $elem.attr('attrName') | ||
// get: 'data-dataName' // => $elem.data('dataNAme') | ||
// get: 'data-dataName:X' | ||
// X is optional, if data is an array, set 'data-dataName:0' will return $elem.data('dataAttribute')[0] | ||
// If data is an object, set 'data-dataName:id' will return $elem.data('dataAttribute')['id'] | ||
// If X not exist, it will return the whole data | ||
// 'num' | ||
// 'html' | ||
// 'length' // => $element.length | ||
// 'attrName' // => $elem.attr('attrName') | ||
// 'data-dataName' // => $elem.data('dataNAme') | ||
// 'data-dataName:X' | ||
// X is optional, if data is an array, set 'data-dataName:0' will return $elem.data('dataAttribute')[0] | ||
// If data is an object, set 'data-dataName:id' will return $elem.data('dataAttribute')['id'] | ||
// If X not exist, it will return the whole data | ||
// Optional, if you want to do something else after 'get' | ||
// You can use functions that exist in (lodash)[https://lodash.com/docs]. | ||
// Optional, if you want to do something else after 'get' (string type only) | ||
// You can use some simple functions that existed in (lodash)[https://lodash.com/docs]. | ||
process: [ | ||
@@ -90,13 +79,20 @@ ['match', /regex here/, number], // => str.match(/regex here/)[number], return array if no number, but will cause other process won't work | ||
['substring', 0, 3], | ||
['prepend', 'text'], // => 'text' + get | ||
['append', 'text'], // => get + 'text' | ||
['prepend', 'text'], // => 'text' + value | ||
['append', 'text'], // => value + 'text' | ||
['indexOf', 'text'] // => return number | ||
['independent function'], // like encodeURI, encodeURIComponent, unescape, etc... | ||
['INDENPENDENT_FUNCTION'], // like encodeURI, encodeURIComponent, unescape, etc... | ||
// Due to lodash has the same name `escape` & `unescape` functions with different behavior, | ||
// so both in pure js renamed to `encode` & `decode` here. | ||
// the origin `escape` & `unescape` function will renamed to `encode` & `decode` instead. | ||
], | ||
default: '' // return default value when elem not found, or value is null or undefined (`process` will be ignored) | ||
// Or you want to DIY, you can use function instead | ||
process(value) { | ||
// do something | ||
return newValue; | ||
}, | ||
default: '' // return default value when elem not found, null or undefined (`process` will be ignored) | ||
}, | ||
keyName2: { | ||
elem: 'table tbody thead', | ||
elem: 'table tbody', | ||
@@ -112,3 +108,3 @@ // If the value you want is sperated to several elements, use collect to get all elems | ||
}, { | ||
get: 'href' // If no elem, the default is parent elem (table tbody) | ||
get: 'href' // If no elem, the default is parent elem $(table tbody) | ||
}], | ||
@@ -115,0 +111,0 @@ |
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
19683
2.54%10
11.11%466
2.64%148
-2.63%