Comparing version 5.3.2 to 6.0.0
@@ -27,4 +27,5 @@ /// <reference types="cheerio" /> | ||
data: T, | ||
status: number, | ||
statusText: string, | ||
$: cheerio.Cheerio, | ||
response: any, | ||
body: string | ||
@@ -38,3 +39,2 @@ } | ||
declare function scrapeIt<T>(url: string | object, opts: scrapeIt.ScrapeOptions, cb: (err: any, res: scrapeIt.ScrapeResult<T>) => void): void; | ||
export = scrapeIt; |
@@ -5,3 +5,2 @@ "use strict"; | ||
typpy = require("typpy"), | ||
assured = require("assured"), | ||
scrapeHTML = require("scrape-it-core"); | ||
@@ -26,21 +25,9 @@ | ||
*/ | ||
function scrapeIt(url, opts, cb) { | ||
cb = assured(cb); | ||
req(url, function (err, $, res, body) { | ||
if (err) { | ||
return cb(err); | ||
} | ||
try { | ||
var scrapedData = scrapeIt.scrapeHTML($, opts); | ||
cb(null, { | ||
data: scrapedData, | ||
$: $, | ||
response: res, | ||
body: body | ||
}); | ||
} catch (err) { | ||
cb(err); | ||
} | ||
async function scrapeIt(url, opts) { | ||
var res = await req(url); | ||
var scrapedData = scrapeIt.scrapeHTML(res.$, opts); | ||
return Object.assign(res, { | ||
data: scrapedData, | ||
body: res.data | ||
}); | ||
return cb._; | ||
} | ||
@@ -47,0 +34,0 @@ |
@@ -14,3 +14,3 @@ { | ||
"license": "MIT", | ||
"version": "5.3.2", | ||
"version": "6.0.0", | ||
"main": "lib/index.js", | ||
@@ -37,3 +37,2 @@ "types": "lib/index.d.ts", | ||
"cli": "scrape-it-cli", | ||
"description": "Want to save time or not using Node.js? Try our [hosted API](https://scrape-it.saasify.sh).", | ||
"installation": [ | ||
@@ -74,5 +73,5 @@ { | ||
"dependencies": { | ||
"@types/cheerio": "^0.22.29", | ||
"@types/cheerio": "^0.22.31", | ||
"assured": "^1.0.15", | ||
"cheerio-req": "^1.2.4", | ||
"cheerio-req": "^2.0.0", | ||
"scrape-it-core": "^1.0.0", | ||
@@ -82,3 +81,3 @@ "typpy": "^2.3.13" | ||
"devDependencies": { | ||
"lien": "^3.3.1", | ||
"lien": "^3.4.2", | ||
"tester": "^1.4.5" | ||
@@ -97,2 +96,4 @@ }, | ||
"index.js", | ||
"index.d.ts", | ||
"package-lock.json", | ||
"bloggify.js", | ||
@@ -102,2 +103,2 @@ "bloggify.json", | ||
] | ||
} | ||
} |
245
README.md
@@ -41,3 +41,2 @@ <!-- Please do not edit this file. Edit the `blah` field in the `package.json` instead. If in doubt, open an issue. --> | ||
Want to save time or not using Node.js? Try our [hosted API](https://scrape-it.saasify.sh). | ||
@@ -54,3 +53,2 @@ | ||
## :cloud: Installation | ||
@@ -122,89 +120,91 @@ | ||
} | ||
}).then(({ data, response }) => { | ||
console.log(`Status Code: ${response.statusCode}`) | ||
}).then(({ data, status }) => { | ||
console.log(`Status Code: ${status}`) | ||
console.log(data) | ||
}) | ||
}); | ||
// Callback interface | ||
scrapeIt("https://ionicabizau.net", { | ||
// Fetch the articles | ||
articles: { | ||
listItem: ".article" | ||
, data: { | ||
// Get the article date and convert it into a Date object | ||
createdAt: { | ||
selector: ".date" | ||
, convert: x => new Date(x) | ||
} | ||
// Async-Await | ||
(async () => { | ||
const { data } = await scrapeIt("https://ionicabizau.net", { | ||
// Fetch the articles | ||
articles: { | ||
listItem: ".article" | ||
, data: { | ||
// Get the title | ||
, title: "a.article-title" | ||
// Get the article date and convert it into a Date object | ||
createdAt: { | ||
selector: ".date" | ||
, convert: x => new Date(x) | ||
} | ||
// Nested list | ||
, tags: { | ||
listItem: ".tags > span" | ||
} | ||
// Get the title | ||
, title: "a.article-title" | ||
// Get the content | ||
, content: { | ||
selector: ".article-content" | ||
, how: "html" | ||
} | ||
// Nested list | ||
, tags: { | ||
listItem: ".tags > span" | ||
} | ||
// Get attribute value of root listItem by omitting the selector | ||
, classes: { | ||
attr: "class" | ||
// Get the content | ||
, content: { | ||
selector: ".article-content" | ||
, how: "html" | ||
} | ||
// Get attribute value of root listItem by omitting the selector | ||
, classes: { | ||
attr: "class" | ||
} | ||
} | ||
} | ||
} | ||
// Fetch the blog pages | ||
, pages: { | ||
listItem: "li.page" | ||
, name: "pages" | ||
, data: { | ||
title: "a" | ||
, url: { | ||
selector: "a" | ||
, attr: "href" | ||
// Fetch the blog pages | ||
, pages: { | ||
listItem: "li.page" | ||
, name: "pages" | ||
, data: { | ||
title: "a" | ||
, url: { | ||
selector: "a" | ||
, attr: "href" | ||
} | ||
} | ||
} | ||
} | ||
// Fetch some other data from the page | ||
, title: ".header h1" | ||
, desc: ".header h2" | ||
, avatar: { | ||
selector: ".header img" | ||
, attr: "src" | ||
} | ||
}, (err, { data }) => { | ||
console.log(err || data) | ||
}) | ||
// { articles: | ||
// [ { createdAt: Mon Mar 14 2016 00:00:00 GMT+0200 (EET), | ||
// title: 'Pi Day, Raspberry Pi and Command Line', | ||
// tags: [Object], | ||
// content: '<p>Everyone knows (or should know)...a" alt=""></p>\n', | ||
// classes: [Object] }, | ||
// { createdAt: Thu Feb 18 2016 00:00:00 GMT+0200 (EET), | ||
// title: 'How I ported Memory Blocks to modern web', | ||
// tags: [Object], | ||
// content: '<p>Playing computer games is a lot of fun. ...', | ||
// classes: [Object] }, | ||
// { createdAt: Mon Nov 02 2015 00:00:00 GMT+0200 (EET), | ||
// title: 'How to convert JSON to Markdown using json2md', | ||
// tags: [Object], | ||
// content: '<p>I love and ...', | ||
// classes: [Object] } ], | ||
// pages: | ||
// [ { title: 'Blog', url: '/' }, | ||
// { title: 'About', url: '/about' }, | ||
// { title: 'FAQ', url: '/faq' }, | ||
// { title: 'Training', url: '/training' }, | ||
// { title: 'Contact', url: '/contact' } ], | ||
// title: 'Ionică Bizău', | ||
// desc: 'Web Developer, Linux geek and Musician', | ||
// avatar: '/images/logo.png' } | ||
// Fetch some other data from the page | ||
, title: ".header h1" | ||
, desc: ".header h2" | ||
, avatar: { | ||
selector: ".header img" | ||
, attr: "src" | ||
} | ||
}) | ||
console.log(data) | ||
// { articles: | ||
// [ { createdAt: Mon Mar 14 2016 00:00:00 GMT+0200 (EET), | ||
// title: 'Pi Day, Raspberry Pi and Command Line', | ||
// tags: [Object], | ||
// content: '<p>Everyone knows (or should know)...a" alt=""></p>\n', | ||
// classes: [Object] }, | ||
// { createdAt: Thu Feb 18 2016 00:00:00 GMT+0200 (EET), | ||
// title: 'How I ported Memory Blocks to modern web', | ||
// tags: [Object], | ||
// content: '<p>Playing computer games is a lot of fun. ...', | ||
// classes: [Object] }, | ||
// { createdAt: Mon Nov 02 2015 00:00:00 GMT+0200 (EET), | ||
// title: 'How to convert JSON to Markdown using json2md', | ||
// tags: [Object], | ||
// content: '<p>I love and ...', | ||
// classes: [Object] } ], | ||
// pages: | ||
// [ { title: 'Blog', url: '/' }, | ||
// { title: 'About', url: '/about' }, | ||
// { title: 'FAQ', url: '/faq' }, | ||
// { title: 'Training', url: '/training' }, | ||
// { title: 'Contact', url: '/contact' } ], | ||
// title: 'Ionică Bizău', | ||
// desc: 'Web Developer, Linux geek and Musician', | ||
// avatar: '/images/logo.png' } | ||
})() | ||
``` | ||
@@ -386,62 +386,67 @@ | ||
- `@web-master/node-web-scraper` | ||
- `proxylist` | ||
- `macoolka-network` | ||
- `@tryghost/mg-webscraper` | ||
- `mit-ocw-scraper` | ||
- `beervana-scraper` | ||
- `cnn-market` | ||
- `bandcamp-scraper` | ||
- `@tryghost/mg-webscraper` | ||
- `blockchain-notifier` | ||
- `dncli` | ||
- `degusta-scrapper` | ||
- `trump-cabinet-picks` | ||
- `cevo-lookup` | ||
- `camaleon` | ||
- `scrape-vinmonopolet` | ||
- `do-fn` | ||
- `scrapos-worker` | ||
- `university-news-notifier` | ||
- `selfrefactor` | ||
- `parn` | ||
- `picarto-lib` | ||
- `fa.js` | ||
- `mix-dl` | ||
- `jishon` | ||
- `sahibinden` | ||
- `sahibindenServer` | ||
- `sgdq-collector` | ||
- `ubersetzung` | ||
- `ui-studentsearch` | ||
- `paklek-cli` | ||
- `egg-crawler` | ||
- `@thetrg/gibson` | ||
- `jobs-fetcher` | ||
- `fmgo-marketdata` | ||
- `rayko-tools` | ||
- `leximaven` | ||
- `beervana-scraper` | ||
- `codinglove-scraper` | ||
- `sgdq-collector` | ||
- `vandalen.rhyme.js` | ||
- `uniwue-lernplaetze-scraper` | ||
- `node-red-contrib-getdata-website` | ||
- `startpage-quick-search` | ||
- `wikitools` | ||
- `spon-market` | ||
- `macoolka-net-scrape` | ||
- `fmgo-marketdata` | ||
- `gatsby-source-bandcamp` | ||
- `salesforcerelease-parser` | ||
- `yu-ncov-scrape-dxy` | ||
- `rs-api` | ||
- `startpage-quick-search` | ||
- `carirs` | ||
- `helyesiras` | ||
- `covidau` | ||
- `3abn` | ||
- `cevo-lookup` | ||
- `sahibinden` | ||
- `dncli` | ||
- `flamescraper` | ||
- `codementor` | ||
- `scrape-it-cli` | ||
- `codementor` | ||
- `jishon` | ||
- `@thetrg/gibson` | ||
- `blockchain-notifier` | ||
- `camaleon` | ||
- `parn` | ||
- `@lukekarrys/ebp` | ||
- `selfrefactor` | ||
- `yu-ncov-scrape-dxy` | ||
- `u-pull-it-ne-parts-finder` | ||
- `apixpress` | ||
- `growapi` | ||
- `steam-workshop-scraper` | ||
- `scrape-vinmonopolet` | ||
- `paklek-cli` | ||
- `rs-api` | ||
- `sahibindenServer` | ||
- `salesforcerelease-parser` | ||
- `picarto-lib` | ||
- `ui-studentsearch` | ||
- `macoolka-net-scrape` | ||
- `node-red-contrib-scrape-it` | ||
- `egg-crawler` | ||
- `uniwue-lernplaetze-scraper` | ||
- `simple-ai-alpha` | ||
- `ubersetzung` | ||
- `blankningsregistret` | ||
- `scrapos-worker` | ||
- `do-fn` | ||
- `bible-scraper` | ||
- `covidau` | ||
- `jobs-fetcher` | ||
- `trump-cabinet-picks` | ||
- `leximaven` | ||
- `proxylist` | ||
- `@ben-wormald/bandcamp-scraper` | ||
- `bible-scraper` | ||
- `flamescraper` | ||
- `fa.js` | ||
- `growapi` | ||
- `node-red-contrib-scrape-it` | ||
- `carirs` | ||
- `steam-workshop-scraper` | ||
- `macoolka-network` | ||
- `apixpress` | ||
- `degusta-scrapper` | ||
- `nurlresolver` | ||
@@ -448,0 +453,0 @@ |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
479
21691
139
+ Addedaxios@1.7.7(transitive)
+ Addedcheerio-req@2.0.0(transitive)
+ Addedform-data@4.0.0(transitive)
+ Addedproxy-from-env@1.1.0(transitive)
- Removedabab@1.0.4(transitive)
- Removedacorn@2.7.0(transitive)
- Removedacorn-globals@1.0.9(transitive)
- Removedajv@6.12.6(transitive)
- Removedasn1@0.2.6(transitive)
- Removedassert-plus@1.0.0(transitive)
- Removedaws-sign2@0.7.0(transitive)
- Removedaws4@1.13.2(transitive)
- Removedbcrypt-pbkdf@1.0.2(transitive)
- Removedcaseless@0.12.0(transitive)
- Removedcheerio@0.20.0(transitive)
- Removedcheerio-req@1.2.4(transitive)
- Removedcore-util-is@1.0.21.0.3(transitive)
- Removedcss-select@1.2.0(transitive)
- Removedcss-what@2.1.3(transitive)
- Removedcssom@0.3.8(transitive)
- Removedcssstyle@0.2.37(transitive)
- Removeddashdash@1.14.1(transitive)
- Removeddeep-is@0.1.4(transitive)
- Removeddom-serializer@0.1.1(transitive)
- Removeddomelementtype@1.3.1(transitive)
- Removeddomhandler@2.3.0(transitive)
- Removeddomutils@1.5.1(transitive)
- Removedecc-jsbn@0.1.2(transitive)
- Removedentities@1.0.01.1.2(transitive)
- Removedescodegen@1.14.3(transitive)
- Removedesprima@4.0.1(transitive)
- Removedestraverse@4.3.0(transitive)
- Removedesutils@2.0.3(transitive)
- Removedextend@3.0.2(transitive)
- Removedextsprintf@1.3.0(transitive)
- Removedfast-deep-equal@3.1.3(transitive)
- Removedfast-json-stable-stringify@2.1.0(transitive)
- Removedfast-levenshtein@2.0.6(transitive)
- Removedforever-agent@0.6.1(transitive)
- Removedform-data@2.3.3(transitive)
- Removedgetpass@0.1.7(transitive)
- Removedhar-schema@2.0.0(transitive)
- Removedhar-validator@5.1.5(transitive)
- Removedhtmlparser2@3.8.3(transitive)
- Removedhttp-signature@1.2.0(transitive)
- Removedinherits@2.0.4(transitive)
- Removedis-typedarray@1.0.0(transitive)
- Removedisarray@0.0.1(transitive)
- Removedisstream@0.1.2(transitive)
- Removedjsbn@0.1.1(transitive)
- Removedjsdom@7.2.2(transitive)
- Removedjson-schema@0.4.0(transitive)
- Removedjson-schema-traverse@0.4.1(transitive)
- Removedjson-stringify-safe@5.0.1(transitive)
- Removedjsprim@1.4.2(transitive)
- Removedlevn@0.3.0(transitive)
- Removedlodash@4.17.21(transitive)
- Removednth-check@1.0.2(transitive)
- Removednwmatcher@1.4.4(transitive)
- Removedoauth-sign@0.9.0(transitive)
- Removedoptionator@0.8.3(transitive)
- Removedparse5@1.5.1(transitive)
- Removedperformance-now@2.1.0(transitive)
- Removedprelude-ls@1.1.2(transitive)
- Removedpsl@1.9.0(transitive)
- Removedpunycode@2.3.1(transitive)
- Removedqs@6.5.3(transitive)
- Removedreadable-stream@1.1.14(transitive)
- Removedrequest@2.88.2(transitive)
- Removedsafe-buffer@5.2.1(transitive)
- Removedsax@1.4.1(transitive)
- Removedsource-map@0.6.1(transitive)
- Removedsshpk@1.18.0(transitive)
- Removedstring_decoder@0.10.31(transitive)
- Removedsymbol-tree@3.2.4(transitive)
- Removedtinyreq@3.4.2(transitive)
- Removedtough-cookie@2.5.0(transitive)
- Removedtr46@0.0.3(transitive)
- Removedtunnel-agent@0.6.0(transitive)
- Removedtweetnacl@0.14.5(transitive)
- Removedtype-check@0.3.2(transitive)
- Removedul@5.2.15(transitive)
- Removeduri-js@4.4.1(transitive)
- Removeduuid@3.4.0(transitive)
- Removedverror@1.10.0(transitive)
- Removedwebidl-conversions@2.0.1(transitive)
- Removedwhatwg-url-compat@0.6.5(transitive)
- Removedword-wrap@1.2.5(transitive)
- Removedxml-name-validator@2.0.1(transitive)
Updated@types/cheerio@^0.22.31
Updatedcheerio-req@^2.0.0