Socket
Book a DemoInstallSign in
Socket

json-web-crawler

Package Overview
Dependencies
Maintainers
1
Versions
20
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

json-web-crawler - npm Package Compare versions

Comparing version

to
0.6.5

different from 0.0.6.md

4

demo/kickstarter_popular.js

@@ -1,2 +0,2 @@

console.log('Crawl the popular list at Kickstarter. \n');
console.info('Crawl the popular list at Kickstarter. \n');

@@ -63,3 +63,3 @@ const setting = {

Crawler(content, setting)
.then(console.log)
.then(console.error)
.catch(console.log);

@@ -66,0 +66,0 @@ }

@@ -1,2 +0,2 @@

console.log('Crawl Dota 2 description at Steam site. \n');
console.info('Crawl Dota 2 description at Steam site. \n');

@@ -11,3 +11,3 @@ const setting = {

},
name: {
appName: {
outOfContainer: true,

@@ -51,3 +51,6 @@ elem: '.apphub_AppName',

elem: '.game_description_snippet',
get: 'text'
get: 'text',
process(value) {
return value.split(', ');
}
},

@@ -67,3 +70,3 @@ releaseDate: {

Crawler(content, setting)
.catch(console.log)
.catch(console.error)
.then(console.log);

@@ -70,0 +73,0 @@ }

@@ -74,3 +74,3 @@ 'use strict'

function pageNotFound($, pageNF) { // Not tested yet
function pageNotFound($, pageNF) { // Not completed tested yet
let result = [];

@@ -124,3 +124,5 @@

return (typeof collectOptions.combineWith !== 'undefined') ? tmpArr.join(collectOptions.combineWith) : tmpArr;
return (typeof collectOptions.combineWith !== 'undefined' && collectOptions.combineWith !== null)
? tmpArr.join(collectOptions.combineWith)
: tmpArr;
});

@@ -135,4 +137,8 @@ }

if(json.process && json.process.length)
return process(result, json.process);
if(json.process) {
switch(true) {
case (json.process instanceof Array): return process(result, json.process);
case (typeof json.process === 'function'): return json.process(result);
}
}

@@ -211,6 +217,10 @@ return result;

function process(data, processList) {
for(let job of processList)
for(let job of processList) {
if(typeof data !== 'string')
break;
data = _[job[0]](data, job[1], job[2]);
}
return data;
}
{
"name": "json-web-crawler",
"version": "0.6.4",
"version": "0.6.5",
"description": "Crawl website by json",

@@ -5,0 +5,0 @@ "main": "index.js",

@@ -14,23 +14,13 @@ # Json Web Crawler

```javascript
var Crawl = require('json-web-crawler');
const Crawl = require('json-web-crawler');
Crawl('HTML content', your json setting)
.then(console.log)
.catch(console.log);
.catch(console.error);
```
## Different from 0.0.6
## Settings
You can compare it in demo
1. `limit`, `range`, `focus` and `ignore` combine to `listOption`
2. `keys` rename to crawl
3. no `name` key in crawl
4. no `use` key, only `process`
## Variables
It's messy, I know.
```javascript
var setting = {
const settings = {
// If match one of this checklist, it will return page not found error.

@@ -55,10 +45,9 @@ pageNotFound: [{

// If type is 'list', you may need to set these values below.
// If type is 'list', you can set these values below (Optional).
// =================================================================
// Optional, use if you don't want to crawl the whole list. ** ALL STRAT FROM 0 **
listOption: ['limit', 10], // eq(0) ~ eq(9)
// listOption: ['range', 6, 12], // eq(6) ~ eq(11)*, if without end, it will continue to the last one
// listOption: ['focus', 0, 3, 7], // [eq(0), eq(3), eq(7)]
// listOption: ['ignore', 1, 2, 5], // Elements you want to ignore it. You can use -1, -2 to count from backward.
// ['range', 6, 12], // eq(6) ~ eq(11)*, if without end, it will continue to the last one
// ['focus', 0, 3, 7], // [eq(0), eq(3), eq(7)]
// ['ignore', 1, 2, 5], // Elements you want to ignore it. You can use -1, -2 to count from backward.
// =================================================================

@@ -72,14 +61,14 @@

get: 'text',
// get: 'num'
// get: 'html'
// get: 'length' // => $element.length
// get: 'attrName' // => $elem.attr('attrName')
// get: 'data-dataName' // => $elem.data('dataNAme')
// get: 'data-dataName:X'
// X is optional, if data is an array, set 'data-dataName:0' will return $elem.data('dataAttribute')[0]
// If data is an object, set 'data-dataName:id' will return $elem.data('dataAttribute')['id']
// If X not exist, it will return the whole data
// 'num'
// 'html'
// 'length' // => $element.length
// 'attrName' // => $elem.attr('attrName')
// 'data-dataName' // => $elem.data('dataNAme')
// 'data-dataName:X'
// X is optional, if data is an array, set 'data-dataName:0' will return $elem.data('dataAttribute')[0]
// If data is an object, set 'data-dataName:id' will return $elem.data('dataAttribute')['id']
// If X not exist, it will return the whole data
// Optional, if you want to do something else after 'get'
// You can use functions that exist in (lodash)[https://lodash.com/docs].
// Optional, if you want to do something else after 'get' (string type only)
// You can use some simple functions that existed in (lodash)[https://lodash.com/docs].
process: [

@@ -90,13 +79,20 @@ ['match', /regex here/, number], // => str.match(/regex here/)[number], return array if no number, but will cause other process won't work

['substring', 0, 3],
['prepend', 'text'], // => 'text' + get
['append', 'text'], // => get + 'text'
['prepend', 'text'], // => 'text' + value
['append', 'text'], // => value + 'text'
['indexOf', 'text'] // => return number
['independent function'], // like encodeURI, encodeURIComponent, unescape, etc...
['INDENPENDENT_FUNCTION'], // like encodeURI, encodeURIComponent, unescape, etc...
// Due to lodash has the same name `escape` & `unescape` functions with different behavior,
// so both in pure js renamed to `encode` & `decode` here.
// the origin `escape` & `unescape` function will renamed to `encode` & `decode` instead.
],
default: '' // return default value when elem not found, or value is null or undefined (`process` will be ignored)
// Or you want to DIY, you can use function instead
process(value) {
// do something
return newValue;
},
default: '' // return default value when elem not found, null or undefined (`process` will be ignored)
},
keyName2: {
elem: 'table tbody thead',
elem: 'table tbody',

@@ -112,3 +108,3 @@ // If the value you want is sperated to several elements, use collect to get all elems

}, {
get: 'href' // If no elem, the default is parent elem (table tbody)
get: 'href' // If no elem, the default is parent elem $(table tbody)
}],

@@ -115,0 +111,0 @@

SocketSocket SOC 2 Logo

Product

About

Packages

Stay in touch

Get open source security insights delivered straight into your inbox.

  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc

U.S. Patent No. 12,346,443 & 12,314,394. Other pending.