Socket
Socket
Sign inDemoInstall

scrape-it

Package Overview
Dependencies
Maintainers
1
Versions
47
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

scrape-it - npm Package Compare versions

Comparing version 5.3.2 to 6.0.0

4

lib/index.d.ts

@@ -27,4 +27,5 @@ /// <reference types="cheerio" />

data: T,
status: number,
statusText: string,
$: cheerio.Cheerio,
response: any,
body: string

@@ -38,3 +39,2 @@ }

declare function scrapeIt<T>(url: string | object, opts: scrapeIt.ScrapeOptions, cb: (err: any, res: scrapeIt.ScrapeResult<T>) => void): void;
export = scrapeIt;

@@ -5,3 +5,2 @@ "use strict";

typpy = require("typpy"),
assured = require("assured"),
scrapeHTML = require("scrape-it-core");

@@ -26,21 +25,9 @@

*/
function scrapeIt(url, opts, cb) {
cb = assured(cb);
req(url, function (err, $, res, body) {
if (err) {
return cb(err);
}
try {
var scrapedData = scrapeIt.scrapeHTML($, opts);
cb(null, {
data: scrapedData,
$: $,
response: res,
body: body
});
} catch (err) {
cb(err);
}
async function scrapeIt(url, opts) {
var res = await req(url);
var scrapedData = scrapeIt.scrapeHTML(res.$, opts);
return Object.assign(res, {
data: scrapedData,
body: res.data
});
return cb._;
}

@@ -47,0 +34,0 @@

@@ -14,3 +14,3 @@ {

"license": "MIT",
"version": "5.3.2",
"version": "6.0.0",
"main": "lib/index.js",

@@ -37,3 +37,2 @@ "types": "lib/index.d.ts",

"cli": "scrape-it-cli",
"description": "Want to save time or not using Node.js? Try our [hosted API](https://scrape-it.saasify.sh).",
"installation": [

@@ -74,5 +73,5 @@ {

"dependencies": {
"@types/cheerio": "^0.22.29",
"@types/cheerio": "^0.22.31",
"assured": "^1.0.15",
"cheerio-req": "^1.2.4",
"cheerio-req": "^2.0.0",
"scrape-it-core": "^1.0.0",

@@ -82,3 +81,3 @@ "typpy": "^2.3.13"

"devDependencies": {
"lien": "^3.3.1",
"lien": "^3.4.2",
"tester": "^1.4.5"

@@ -97,2 +96,4 @@ },

"index.js",
"index.d.ts",
"package-lock.json",
"bloggify.js",

@@ -102,2 +103,2 @@ "bloggify.json",

]
}
}

@@ -41,3 +41,2 @@ <!-- Please do not edit this file. Edit the `blah` field in the `package.json` instead. If in doubt, open an issue. -->

Want to save time or not using Node.js? Try our [hosted API](https://scrape-it.saasify.sh).

@@ -54,3 +53,2 @@

## :cloud: Installation

@@ -122,89 +120,91 @@

}
}).then(({ data, response }) => {
console.log(`Status Code: ${response.statusCode}`)
}).then(({ data, status }) => {
console.log(`Status Code: ${status}`)
console.log(data)
})
});
// Callback interface
scrapeIt("https://ionicabizau.net", {
// Fetch the articles
articles: {
listItem: ".article"
, data: {
// Get the article date and convert it into a Date object
createdAt: {
selector: ".date"
, convert: x => new Date(x)
}
// Async-Await
(async () => {
const { data } = await scrapeIt("https://ionicabizau.net", {
// Fetch the articles
articles: {
listItem: ".article"
, data: {
// Get the title
, title: "a.article-title"
// Get the article date and convert it into a Date object
createdAt: {
selector: ".date"
, convert: x => new Date(x)
}
// Nested list
, tags: {
listItem: ".tags > span"
}
// Get the title
, title: "a.article-title"
// Get the content
, content: {
selector: ".article-content"
, how: "html"
}
// Nested list
, tags: {
listItem: ".tags > span"
}
// Get attribute value of root listItem by omitting the selector
, classes: {
attr: "class"
// Get the content
, content: {
selector: ".article-content"
, how: "html"
}
// Get attribute value of root listItem by omitting the selector
, classes: {
attr: "class"
}
}
}
}
// Fetch the blog pages
, pages: {
listItem: "li.page"
, name: "pages"
, data: {
title: "a"
, url: {
selector: "a"
, attr: "href"
// Fetch the blog pages
, pages: {
listItem: "li.page"
, name: "pages"
, data: {
title: "a"
, url: {
selector: "a"
, attr: "href"
}
}
}
}
// Fetch some other data from the page
, title: ".header h1"
, desc: ".header h2"
, avatar: {
selector: ".header img"
, attr: "src"
}
}, (err, { data }) => {
console.log(err || data)
})
// { articles:
// [ { createdAt: Mon Mar 14 2016 00:00:00 GMT+0200 (EET),
// title: 'Pi Day, Raspberry Pi and Command Line',
// tags: [Object],
// content: '<p>Everyone knows (or should know)...a" alt=""></p>\n',
// classes: [Object] },
// { createdAt: Thu Feb 18 2016 00:00:00 GMT+0200 (EET),
// title: 'How I ported Memory Blocks to modern web',
// tags: [Object],
// content: '<p>Playing computer games is a lot of fun. ...',
// classes: [Object] },
// { createdAt: Mon Nov 02 2015 00:00:00 GMT+0200 (EET),
// title: 'How to convert JSON to Markdown using json2md',
// tags: [Object],
// content: '<p>I love and ...',
// classes: [Object] } ],
// pages:
// [ { title: 'Blog', url: '/' },
// { title: 'About', url: '/about' },
// { title: 'FAQ', url: '/faq' },
// { title: 'Training', url: '/training' },
// { title: 'Contact', url: '/contact' } ],
// title: 'Ionică Bizău',
// desc: 'Web Developer, Linux geek and Musician',
// avatar: '/images/logo.png' }
// Fetch some other data from the page
, title: ".header h1"
, desc: ".header h2"
, avatar: {
selector: ".header img"
, attr: "src"
}
})
console.log(data)
// { articles:
// [ { createdAt: Mon Mar 14 2016 00:00:00 GMT+0200 (EET),
// title: 'Pi Day, Raspberry Pi and Command Line',
// tags: [Object],
// content: '<p>Everyone knows (or should know)...a" alt=""></p>\n',
// classes: [Object] },
// { createdAt: Thu Feb 18 2016 00:00:00 GMT+0200 (EET),
// title: 'How I ported Memory Blocks to modern web',
// tags: [Object],
// content: '<p>Playing computer games is a lot of fun. ...',
// classes: [Object] },
// { createdAt: Mon Nov 02 2015 00:00:00 GMT+0200 (EET),
// title: 'How to convert JSON to Markdown using json2md',
// tags: [Object],
// content: '<p>I love and ...',
// classes: [Object] } ],
// pages:
// [ { title: 'Blog', url: '/' },
// { title: 'About', url: '/about' },
// { title: 'FAQ', url: '/faq' },
// { title: 'Training', url: '/training' },
// { title: 'Contact', url: '/contact' } ],
// title: 'Ionică Bizău',
// desc: 'Web Developer, Linux geek and Musician',
// avatar: '/images/logo.png' }
})()
```

@@ -386,62 +386,67 @@

- `@web-master/node-web-scraper`
- `proxylist`
- `macoolka-network`
- `@tryghost/mg-webscraper`
- `mit-ocw-scraper`
- `beervana-scraper`
- `cnn-market`
- `bandcamp-scraper`
- `@tryghost/mg-webscraper`
- `blockchain-notifier`
- `dncli`
- `degusta-scrapper`
- `trump-cabinet-picks`
- `cevo-lookup`
- `camaleon`
- `scrape-vinmonopolet`
- `do-fn`
- `scrapos-worker`
- `university-news-notifier`
- `selfrefactor`
- `parn`
- `picarto-lib`
- `fa.js`
- `mix-dl`
- `jishon`
- `sahibinden`
- `sahibindenServer`
- `sgdq-collector`
- `ubersetzung`
- `ui-studentsearch`
- `paklek-cli`
- `egg-crawler`
- `@thetrg/gibson`
- `jobs-fetcher`
- `fmgo-marketdata`
- `rayko-tools`
- `leximaven`
- `beervana-scraper`
- `codinglove-scraper`
- `sgdq-collector`
- `vandalen.rhyme.js`
- `uniwue-lernplaetze-scraper`
- `node-red-contrib-getdata-website`
- `startpage-quick-search`
- `wikitools`
- `spon-market`
- `macoolka-net-scrape`
- `fmgo-marketdata`
- `gatsby-source-bandcamp`
- `salesforcerelease-parser`
- `yu-ncov-scrape-dxy`
- `rs-api`
- `startpage-quick-search`
- `carirs`
- `helyesiras`
- `covidau`
- `3abn`
- `cevo-lookup`
- `sahibinden`
- `dncli`
- `flamescraper`
- `codementor`
- `scrape-it-cli`
- `codementor`
- `jishon`
- `@thetrg/gibson`
- `blockchain-notifier`
- `camaleon`
- `parn`
- `@lukekarrys/ebp`
- `selfrefactor`
- `yu-ncov-scrape-dxy`
- `u-pull-it-ne-parts-finder`
- `apixpress`
- `growapi`
- `steam-workshop-scraper`
- `scrape-vinmonopolet`
- `paklek-cli`
- `rs-api`
- `sahibindenServer`
- `salesforcerelease-parser`
- `picarto-lib`
- `ui-studentsearch`
- `macoolka-net-scrape`
- `node-red-contrib-scrape-it`
- `egg-crawler`
- `uniwue-lernplaetze-scraper`
- `simple-ai-alpha`
- `ubersetzung`
- `blankningsregistret`
- `scrapos-worker`
- `do-fn`
- `bible-scraper`
- `covidau`
- `jobs-fetcher`
- `trump-cabinet-picks`
- `leximaven`
- `proxylist`
- `@ben-wormald/bandcamp-scraper`
- `bible-scraper`
- `flamescraper`
- `fa.js`
- `growapi`
- `node-red-contrib-scrape-it`
- `carirs`
- `steam-workshop-scraper`
- `macoolka-network`
- `apixpress`
- `degusta-scrapper`
- `nurlresolver`

@@ -448,0 +453,0 @@

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc