Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

scrapq

Package Overview
Dependencies
Maintainers
1
Versions
17
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

scrapq - npm Package Compare versions

Comparing version 1.3.2 to 1.3.3

dist/controls.d.ts

23

dist/index.d.ts

@@ -1,22 +0,11 @@

/// <reference types="cheerio" />
export { scrap } from './scrapper';
export { Query, Selector, TypeOfSelector, TypeOfQuery } from './types';
export { ScrapQuery, ScrapSelector } from './scrapper';
import { attrCreator as attr } from './selectors/attr';
import { existsCreator as exists } from './selectors/exists';
import { htmlCreator as html } from './selectors/html';
import { textCreator as text } from './selectors/text';
import { selectCreator as select } from "./selectors/select";
import { countCreator as count } from "./selectors/count";
import { linkCreator as link } from "./selectors/link";
import { AnySelector, text, count, attr, html } from "./selectors";
import { Query, GetResult } from "./helpers";
import { AnyControl, list } from "./controls";
export declare function scrap<Q extends Query | AnyControl | AnySelector>(html: string, query: Q): GetResult<Q>;
export declare const $: {
if: <T extends import("./selectors/attr").Attr<any> | import("./selectors/exists").Exists<any> | import("./selectors/html").Html<any> | Query | import("./controls/list").List<any> | import("./selectors/select").Select<any> | import("./controls/if").If<any, any, any, any> | import("./selectors/text").Text<any> | import("./selectors/count").Count<any> | import("./selectors/link").Link<any>, F extends import("./selectors/attr").Attr<any> | import("./selectors/exists").Exists<any> | import("./selectors/html").Html<any> | Query | import("./controls/list").List<any> | import("./selectors/select").Select<any> | import("./controls/if").If<any, any, any, any> | import("./selectors/text").Text<any> | import("./selectors/count").Count<any> | import("./selectors/link").Link<any>>(selector: string, condition: (el: Cheerio) => boolean, truthy: T, falsey: F) => import("./controls/if").If<T, F, import("./types").TypeOf<T>, import("./types").TypeOf<F>>;
list: <Q extends import("./selectors/attr").Attr<any> | import("./selectors/exists").Exists<any> | import("./selectors/html").Html<any> | Query | import("./controls/list").List<any> | import("./selectors/select").Select<any> | import("./controls/if").If<any, any, any, any> | import("./selectors/text").Text<any> | import("./selectors/count").Count<any> | import("./selectors/link").Link<any>>(selector: string, data: Q, predicate?: ((el: Cheerio, index: number) => boolean) | undefined) => import("./controls/list").List<Q extends Query ? TypeOfQuery<Q> : Q extends Selector ? TypeOfSelector<Q> : never>;
attr: typeof attr;
exists: typeof exists;
html: typeof html;
text: typeof text;
select: typeof select;
count: typeof count;
link: typeof link;
list: typeof list;
html: typeof html;
};
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
var scrapper_1 = require("./scrapper");
exports.scrap = scrapper_1.scrap;
// Import
var attr_1 = require("./selectors/attr");
var exists_1 = require("./selectors/exists");
var html_1 = require("./selectors/html");
var text_1 = require("./selectors/text");
var select_1 = require("./selectors/select");
var count_1 = require("./selectors/count");
var link_1 = require("./selectors/link");
var list_1 = require("./controls/list");
var if_1 = require("./controls/if");
var cheerio_1 = require("cheerio");
var selectors_1 = require("./selectors");
var controls_1 = require("./controls");
function isSelectorOrControl(test) {
if (("type" in test) && ("select" in test)) {
return true;
}
return false;
}
function scrapSelector($, context, selector) {
switch (selector.type) {
case "text": {
var text_1 = (selector.select === "")
? $(context).text()
: $(selector.select, context).text();
return text_1;
}
case "attr": {
var el = (selector.select === "")
? context
: $(selector.select, context);
var attr_1 = el.attr(selector.attr);
return attr_1.trim();
}
case "count": {
var els = $(selector.select, context);
var count_1 = els.length;
return count_1;
}
case "list": {
var result = [];
var els = $(selector.select, context);
for (var i = 0; i < els.length; i++) {
var el = els.eq(i);
var scrapedEl = isSelectorOrControl(selector.query)
? scrapSelector($, el, selector.query)
: scrapQuery($, el, selector.query, {});
result.push(scrapedEl);
}
return result;
}
case "html": {
if (selector.select === "") {
var html_1 = $(context).html();
return html_1;
}
else {
var el = $(selector.select, context);
var html_2 = el.html();
return (typeof html_2 === "string")
? html_2.trim()
: html_2;
}
}
default: {
throw new Error("Undefined selector \"" + JSON.stringify(selector) + "\"");
}
}
}
function scrapQuery($, context, query, ref) {
Object.keys(query).forEach(function (prop) {
var val = query[prop];
if (isSelectorOrControl(val)) {
ref[prop] = scrapSelector($, context, val);
}
else {
ref[prop] = scrapQuery($, context, val, {});
}
});
return ref;
}
function scrap(html, query) {
var $ = cheerio_1.load(html);
var root = $.root();
if (isSelectorOrControl(query)) {
return scrapSelector($, root, query);
}
else {
return scrapQuery($, root, query, {});
}
}
exports.scrap = scrap;
exports.$ = {
if: if_1.ifCreator,
list: list_1.listCreator,
attr: attr_1.attrCreator,
exists: exists_1.existsCreator,
html: html_1.htmlCreator,
text: text_1.textCreator,
select: select_1.selectCreator,
count: count_1.countCreator,
link: link_1.linkCreator
attr: selectors_1.attr,
text: selectors_1.text,
count: selectors_1.count,
list: controls_1.list,
html: selectors_1.html
};
//# sourceMappingURL=index.js.map

@@ -1,28 +0,96 @@

export { scrap } from './scrapper';
import { load } from "cheerio";
import { AnySelector, text, count, attr, html } from "./selectors";
import { Query, GetResult } from "./helpers";
import { AnyControl, list } from "./controls";
export { Query, Selector, TypeOfSelector, TypeOfQuery } from './types';
export { ScrapQuery, ScrapSelector } from './scrapper';
function isSelectorOrControl(test: any): test is (AnySelector | AnyControl) {
if (("type" in test) && ("select" in test)) {
return true;
}
return false;
}
// Import
import { attrCreator as attr } from './selectors/attr';
import { existsCreator as exists } from './selectors/exists';
import { htmlCreator as html } from './selectors/html';
import { textCreator as text } from './selectors/text';
import { selectCreator as select } from "./selectors/select";
import { countCreator as count } from "./selectors/count";
import { linkCreator as link } from "./selectors/link";
function scrapSelector($: CheerioStatic, context: Cheerio, selector: AnySelector | AnyControl) {
switch (selector.type) {
case "text": {
const text = (selector.select === "")
? $(context).text()
: $(selector.select, context).text();
return text;
}
case "attr": {
const el = (selector.select === "")
? context
: $(selector.select, context);
const attr = el.attr(selector.attr);
return attr.trim();
}
case "count": {
const els = $(selector.select, context);
const count = els.length;
return count;
}
case "list": {
const result: any[] = [];
const els = $(selector.select, context);
for (let i = 0; i < els.length; i++) {
const el = els.eq(i);
const scrapedEl = isSelectorOrControl(selector.query)
? scrapSelector($, el, selector.query)
: scrapQuery($, el, selector.query, {});
result.push(scrapedEl);
}
return result;
}
case "html": {
if (selector.select === "") {
const html = $(context).html();
return html;
} else {
const el = $(selector.select, context);
const html = el.html();
return (typeof html === "string")
? html.trim()
: html;
}
}
default: {
throw new Error(`Undefined selector "${JSON.stringify(selector)}"`);
}
}
}
import { listCreator as List } from './controls/list';
import { ifCreator as If } from './controls/if';
function scrapQuery<Q extends Query>($: CheerioStatic, context: Cheerio, query: Q, ref: any): GetResult<Q> {
Object.keys(query).forEach(prop => {
const val = query[prop];
if (isSelectorOrControl(val)) {
ref[prop] = scrapSelector($, context, val);
} else {
ref[prop] = scrapQuery($, context, val, {});
}
});
return ref;
}
export function scrap<Q extends Query | AnyControl | AnySelector>(
html: string,
query: Q
): GetResult<Q> {
const $ = load(html);
const root = $.root();
if (isSelectorOrControl(query)) {
return scrapSelector($, root, query) as any;
} else {
return scrapQuery($, root, query as Query, {}) as any;
}
}
export const $ = {
if: If,
list: List,
attr,
exists,
html,
text,
select,
count,
link
attr,
text,
count,
list,
html
};
{
"name": "scrapq",
"version": "1.3.2",
"version": "1.3.3",
"description": "Lightweight Typescript library for scrapping html",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"typings": "./dist/index.d.ts",

@@ -12,7 +13,5 @@ "scripts": {

"dist": "npm run test; npm run build; npm run build:web",
"build": "tsc",
"build": "tsc && npm run build:web && npm run build:dts",
"build:web": "webpack --mode=production",
"prettify": "npm run prettify:src; npm run prettify:test",
"prettify:src": "prettier --write ./lib/**/*.ts",
"prettify:test": "prettier --write ./test/**/*.ts"
"build:dts": "dts-bundle --name scrapq --main ./dist/index.d.ts --baseDir ./dist/external/"
},

@@ -33,7 +32,7 @@ "keywords": [

"@types/node": "^10.5.1",
"dts-bundle": "^0.7.3",
"jasmine": "^3.1.0",
"jasmine-ts": "^0.2.1",
"prettier": "^1.13.7",
"ts-loader": "^5.3.3",
"typescript": "^3.0.3",
"typescript": "^3.4.0-dev.20190321",
"webpack": "^4.29.0",

@@ -40,0 +39,0 @@ "webpack-cli": "^3.2.1"

@@ -39,28 +39,2 @@ import { scrap, $ } from '../lib';

it('should exists .title', () => {
const result = scrap(STR_TO_SCRAP, {
hasTitle: $.exists('h1.title')
});
expect(result.hasTitle).toBe(true);
});
it('should not exists .castle', () => {
const result = scrap(STR_TO_SCRAP, {
hasCastle: $.exists('.castle')
});
expect(result.hasCastle).toBe(false);
});
it('should exists .msg inside list', () => {
const result = scrap(STR_TO_SCRAP, {
items: $.list('li', {
hasMsg: $.exists('span.msg')
})
});
expect(result.items.length).toBe(3);
expect(result.items[0].hasMsg).toBe(false);
expect(result.items[1].hasMsg).toBe(true);
expect(result.items[2].hasMsg).toBe(false);
});
it('should scrap text from <li><span/>', () => {

@@ -86,9 +60,2 @@ const result = scrap(STR_TO_SCRAP, {

it('should use custom selector', () => {
const result = scrap(STR_TO_SCRAP, {
title: $.select('h1', (el) => el.text())
});
expect(result.title).toBe('Hello');
});
it('should get list of texts', () => {

@@ -130,25 +97,2 @@ const result = scrap(STR_TO_SCRAP, {

it('should get link from an <a/> element', () => {
const result = scrap(STR_TO_SCRAP, {
link: $.link('a')
});
expect(result.link).toBe('/read-more');
});
it('should not get link from non-existing element', () => {
const result = scrap(STR_TO_SCRAP, {
link: $.link('tr')
});
expect(result.link).toBeUndefined();
});
it('should use predicate filter on list selector', () => {
const result = scrap(STR_TO_SCRAP, {
items: $.list('span', {
msg: $.text('')
}, (el) => el.hasClass('msg'))
});
expect(result.items[0].msg).toBe('Ciao');
});
it('should use only selector to scrap title', () => {

@@ -165,12 +109,2 @@ const title = scrap(STR_TO_SCRAP, $.text('.title'));

it('should use truthy condition', () => {
const result = scrap(STR_TO_SCRAP, $.if('.title', (el) => !!el, $.text('.title'), $.text('.msg')));
expect(result).toBe('Hello');
});
it('should use falsey condition', () => {
const result = scrap(STR_TO_SCRAP, $.if('.notexisting', (el) => !el, $.text('.title'), { msg: $.text('.msg') }));
expect(result).toEqual({ msg: 'Ciao' });
});
});

@@ -20,11 +20,2 @@ import { scrap, $ } from "../../lib";

});
it("should use predicate to only scrap .msg", () => {
const result = scrap(
html,
$.list("span", $.text(""), el => el.hasClass("msg"))
);
expect(result.length).toBe(1);
expect(result).toEqual(["Ciao"]);
});
});

@@ -11,6 +11,6 @@ import { readFileSync } from "fs";

text: $.text("li > a"),
link: $.link("li > a"),
link: $.attr("li > a", "href"),
submenu: $.list("ul.dropdown-menu>li", {
text: $.text("li > a"),
link: $.link("li > a")
link: $.attr("li > a", "href")
})

@@ -17,0 +17,0 @@ }),

@@ -10,7 +10,2 @@ import { scrap, $ } from "../../lib";

it("should scrap href attr from an <a/> and split it by -", () => {
const result = scrap(html, $.attr("a", "href", attr => attr.split("-")));
expect(result).toEqual(["/read", "more"]);
});
it("should scrap data-extra from <div/>", () => {

@@ -17,0 +12,0 @@ const result = scrap(html, $.attr(".footer", "data-extra"));

@@ -10,7 +10,2 @@ import { scrap, $ } from "../../lib";

it("should count <li/> elements and convert it to string", () => {
const result = scrap(html, $.count("li", count => count.toString()));
expect(result).toBe("3");
});
it("should count <h1/>", () => {

@@ -17,0 +12,0 @@ const result = scrap(html, $.count("h1"));

@@ -10,15 +10,2 @@ import { scrap, $ } from "../../lib";

it("should get html from <ul/> and get lenght", () => {
const result = scrap(
html,
$.html("ul>li:first-child", html => html.length)
);
expect(result).toBe(22);
});
// it("should not get html from non exists element", () => {
// const result = scrap(html, $.html('h3'));
// expect(result).toBe(null);
// });
it("should count using query", () => {

@@ -25,0 +12,0 @@ const result = scrap(html, {

@@ -10,12 +10,2 @@ import { scrap, $ } from "../../lib";

it("should get text from .msg and get length", () => {
const result = scrap(html, $.text(".msg", text => text.length));
expect(result).toBe(4);
});
// it("should get text from <ul/>", () => {
// const result = scrap(html, Q.text('ul'));
// expect(result).toBe('');
// });
it("should not get text from non existing element", () => {

@@ -22,0 +12,0 @@ const result = scrap(html, $.text("h3"));

@@ -6,3 +6,3 @@ {

"module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */
"lib": ["es2015","es2016","es2017"], /* Specify library files to be included in the compilation. */
"lib": ["es2015","es2016","es2017","es2018"], /* Specify library files to be included in the compilation. */
// "allowJs": true, /* Allow javascript files to be compiled. */

@@ -17,2 +17,3 @@ // "checkJs": true, /* Report errors in .js files. */

"removeComments": false, /* Do not emit comments to output. */
"resolveJsonModule": true,
// "noEmit": true, /* Do not emit outputs. */

@@ -19,0 +20,0 @@ // "importHelpers": true, /* Import emit helpers from 'tslib'. */

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc