Comparing version 1.3.2 to 1.3.3
@@ -1,22 +0,11 @@ | ||
/// <reference types="cheerio" /> | ||
export { scrap } from './scrapper'; | ||
export { Query, Selector, TypeOfSelector, TypeOfQuery } from './types'; | ||
export { ScrapQuery, ScrapSelector } from './scrapper'; | ||
import { attrCreator as attr } from './selectors/attr'; | ||
import { existsCreator as exists } from './selectors/exists'; | ||
import { htmlCreator as html } from './selectors/html'; | ||
import { textCreator as text } from './selectors/text'; | ||
import { selectCreator as select } from "./selectors/select"; | ||
import { countCreator as count } from "./selectors/count"; | ||
import { linkCreator as link } from "./selectors/link"; | ||
import { AnySelector, text, count, attr, html } from "./selectors"; | ||
import { Query, GetResult } from "./helpers"; | ||
import { AnyControl, list } from "./controls"; | ||
export declare function scrap<Q extends Query | AnyControl | AnySelector>(html: string, query: Q): GetResult<Q>; | ||
export declare const $: { | ||
if: <T extends import("./selectors/attr").Attr<any> | import("./selectors/exists").Exists<any> | import("./selectors/html").Html<any> | Query | import("./controls/list").List<any> | import("./selectors/select").Select<any> | import("./controls/if").If<any, any, any, any> | import("./selectors/text").Text<any> | import("./selectors/count").Count<any> | import("./selectors/link").Link<any>, F extends import("./selectors/attr").Attr<any> | import("./selectors/exists").Exists<any> | import("./selectors/html").Html<any> | Query | import("./controls/list").List<any> | import("./selectors/select").Select<any> | import("./controls/if").If<any, any, any, any> | import("./selectors/text").Text<any> | import("./selectors/count").Count<any> | import("./selectors/link").Link<any>>(selector: string, condition: (el: Cheerio) => boolean, truthy: T, falsey: F) => import("./controls/if").If<T, F, import("./types").TypeOf<T>, import("./types").TypeOf<F>>; | ||
list: <Q extends import("./selectors/attr").Attr<any> | import("./selectors/exists").Exists<any> | import("./selectors/html").Html<any> | Query | import("./controls/list").List<any> | import("./selectors/select").Select<any> | import("./controls/if").If<any, any, any, any> | import("./selectors/text").Text<any> | import("./selectors/count").Count<any> | import("./selectors/link").Link<any>>(selector: string, data: Q, predicate?: ((el: Cheerio, index: number) => boolean) | undefined) => import("./controls/list").List<Q extends Query ? TypeOfQuery<Q> : Q extends Selector ? TypeOfSelector<Q> : never>; | ||
attr: typeof attr; | ||
exists: typeof exists; | ||
html: typeof html; | ||
text: typeof text; | ||
select: typeof select; | ||
count: typeof count; | ||
link: typeof link; | ||
list: typeof list; | ||
html: typeof html; | ||
}; |
"use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
var scrapper_1 = require("./scrapper"); | ||
exports.scrap = scrapper_1.scrap; | ||
// Import | ||
var attr_1 = require("./selectors/attr"); | ||
var exists_1 = require("./selectors/exists"); | ||
var html_1 = require("./selectors/html"); | ||
var text_1 = require("./selectors/text"); | ||
var select_1 = require("./selectors/select"); | ||
var count_1 = require("./selectors/count"); | ||
var link_1 = require("./selectors/link"); | ||
var list_1 = require("./controls/list"); | ||
var if_1 = require("./controls/if"); | ||
var cheerio_1 = require("cheerio"); | ||
var selectors_1 = require("./selectors"); | ||
var controls_1 = require("./controls"); | ||
function isSelectorOrControl(test) { | ||
if (("type" in test) && ("select" in test)) { | ||
return true; | ||
} | ||
return false; | ||
} | ||
function scrapSelector($, context, selector) { | ||
switch (selector.type) { | ||
case "text": { | ||
var text_1 = (selector.select === "") | ||
? $(context).text() | ||
: $(selector.select, context).text(); | ||
return text_1; | ||
} | ||
case "attr": { | ||
var el = (selector.select === "") | ||
? context | ||
: $(selector.select, context); | ||
var attr_1 = el.attr(selector.attr); | ||
return attr_1.trim(); | ||
} | ||
case "count": { | ||
var els = $(selector.select, context); | ||
var count_1 = els.length; | ||
return count_1; | ||
} | ||
case "list": { | ||
var result = []; | ||
var els = $(selector.select, context); | ||
for (var i = 0; i < els.length; i++) { | ||
var el = els.eq(i); | ||
var scrapedEl = isSelectorOrControl(selector.query) | ||
? scrapSelector($, el, selector.query) | ||
: scrapQuery($, el, selector.query, {}); | ||
result.push(scrapedEl); | ||
} | ||
return result; | ||
} | ||
case "html": { | ||
if (selector.select === "") { | ||
var html_1 = $(context).html(); | ||
return html_1; | ||
} | ||
else { | ||
var el = $(selector.select, context); | ||
var html_2 = el.html(); | ||
return (typeof html_2 === "string") | ||
? html_2.trim() | ||
: html_2; | ||
} | ||
} | ||
default: { | ||
throw new Error("Undefined selector \"" + JSON.stringify(selector) + "\""); | ||
} | ||
} | ||
} | ||
function scrapQuery($, context, query, ref) { | ||
Object.keys(query).forEach(function (prop) { | ||
var val = query[prop]; | ||
if (isSelectorOrControl(val)) { | ||
ref[prop] = scrapSelector($, context, val); | ||
} | ||
else { | ||
ref[prop] = scrapQuery($, context, val, {}); | ||
} | ||
}); | ||
return ref; | ||
} | ||
function scrap(html, query) { | ||
var $ = cheerio_1.load(html); | ||
var root = $.root(); | ||
if (isSelectorOrControl(query)) { | ||
return scrapSelector($, root, query); | ||
} | ||
else { | ||
return scrapQuery($, root, query, {}); | ||
} | ||
} | ||
exports.scrap = scrap; | ||
exports.$ = { | ||
if: if_1.ifCreator, | ||
list: list_1.listCreator, | ||
attr: attr_1.attrCreator, | ||
exists: exists_1.existsCreator, | ||
html: html_1.htmlCreator, | ||
text: text_1.textCreator, | ||
select: select_1.selectCreator, | ||
count: count_1.countCreator, | ||
link: link_1.linkCreator | ||
attr: selectors_1.attr, | ||
text: selectors_1.text, | ||
count: selectors_1.count, | ||
list: controls_1.list, | ||
html: selectors_1.html | ||
}; | ||
//# sourceMappingURL=index.js.map |
112
lib/index.ts
@@ -1,28 +0,96 @@ | ||
export { scrap } from './scrapper'; | ||
import { load } from "cheerio"; | ||
import { AnySelector, text, count, attr, html } from "./selectors"; | ||
import { Query, GetResult } from "./helpers"; | ||
import { AnyControl, list } from "./controls"; | ||
export { Query, Selector, TypeOfSelector, TypeOfQuery } from './types'; | ||
export { ScrapQuery, ScrapSelector } from './scrapper'; | ||
function isSelectorOrControl(test: any): test is (AnySelector | AnyControl) { | ||
if (("type" in test) && ("select" in test)) { | ||
return true; | ||
} | ||
return false; | ||
} | ||
// Import | ||
import { attrCreator as attr } from './selectors/attr'; | ||
import { existsCreator as exists } from './selectors/exists'; | ||
import { htmlCreator as html } from './selectors/html'; | ||
import { textCreator as text } from './selectors/text'; | ||
import { selectCreator as select } from "./selectors/select"; | ||
import { countCreator as count } from "./selectors/count"; | ||
import { linkCreator as link } from "./selectors/link"; | ||
function scrapSelector($: CheerioStatic, context: Cheerio, selector: AnySelector | AnyControl) { | ||
switch (selector.type) { | ||
case "text": { | ||
const text = (selector.select === "") | ||
? $(context).text() | ||
: $(selector.select, context).text(); | ||
return text; | ||
} | ||
case "attr": { | ||
const el = (selector.select === "") | ||
? context | ||
: $(selector.select, context); | ||
const attr = el.attr(selector.attr); | ||
return attr.trim(); | ||
} | ||
case "count": { | ||
const els = $(selector.select, context); | ||
const count = els.length; | ||
return count; | ||
} | ||
case "list": { | ||
const result: any[] = []; | ||
const els = $(selector.select, context); | ||
for (let i = 0; i < els.length; i++) { | ||
const el = els.eq(i); | ||
const scrapedEl = isSelectorOrControl(selector.query) | ||
? scrapSelector($, el, selector.query) | ||
: scrapQuery($, el, selector.query, {}); | ||
result.push(scrapedEl); | ||
} | ||
return result; | ||
} | ||
case "html": { | ||
if (selector.select === "") { | ||
const html = $(context).html(); | ||
return html; | ||
} else { | ||
const el = $(selector.select, context); | ||
const html = el.html(); | ||
return (typeof html === "string") | ||
? html.trim() | ||
: html; | ||
} | ||
} | ||
default: { | ||
throw new Error(`Undefined selector "${JSON.stringify(selector)}"`); | ||
} | ||
} | ||
} | ||
import { listCreator as List } from './controls/list'; | ||
import { ifCreator as If } from './controls/if'; | ||
function scrapQuery<Q extends Query>($: CheerioStatic, context: Cheerio, query: Q, ref: any): GetResult<Q> { | ||
Object.keys(query).forEach(prop => { | ||
const val = query[prop]; | ||
if (isSelectorOrControl(val)) { | ||
ref[prop] = scrapSelector($, context, val); | ||
} else { | ||
ref[prop] = scrapQuery($, context, val, {}); | ||
} | ||
}); | ||
return ref; | ||
} | ||
export function scrap<Q extends Query | AnyControl | AnySelector>( | ||
html: string, | ||
query: Q | ||
): GetResult<Q> { | ||
const $ = load(html); | ||
const root = $.root(); | ||
if (isSelectorOrControl(query)) { | ||
return scrapSelector($, root, query) as any; | ||
} else { | ||
return scrapQuery($, root, query as Query, {}) as any; | ||
} | ||
} | ||
export const $ = { | ||
if: If, | ||
list: List, | ||
attr, | ||
exists, | ||
html, | ||
text, | ||
select, | ||
count, | ||
link | ||
attr, | ||
text, | ||
count, | ||
list, | ||
html | ||
}; |
{ | ||
"name": "scrapq", | ||
"version": "1.3.2", | ||
"version": "1.3.3", | ||
"description": "Lightweight Typescript library for scrapping html", | ||
"main": "./dist/index.js", | ||
"types": "./dist/index.d.ts", | ||
"typings": "./dist/index.d.ts", | ||
@@ -12,7 +13,5 @@ "scripts": { | ||
"dist": "npm run test; npm run build; npm run build:web", | ||
"build": "tsc", | ||
"build": "tsc && npm run build:web && npm run build:dts", | ||
"build:web": "webpack --mode=production", | ||
"prettify": "npm run prettify:src; npm run prettify:test", | ||
"prettify:src": "prettier --write ./lib/**/*.ts", | ||
"prettify:test": "prettier --write ./test/**/*.ts" | ||
"build:dts": "dts-bundle --name scrapq --main ./dist/index.d.ts --baseDir ./dist/external/" | ||
}, | ||
@@ -33,7 +32,7 @@ "keywords": [ | ||
"@types/node": "^10.5.1", | ||
"dts-bundle": "^0.7.3", | ||
"jasmine": "^3.1.0", | ||
"jasmine-ts": "^0.2.1", | ||
"prettier": "^1.13.7", | ||
"ts-loader": "^5.3.3", | ||
"typescript": "^3.0.3", | ||
"typescript": "^3.4.0-dev.20190321", | ||
"webpack": "^4.29.0", | ||
@@ -40,0 +39,0 @@ "webpack-cli": "^3.2.1" |
@@ -39,28 +39,2 @@ import { scrap, $ } from '../lib'; | ||
it('should exists .title', () => { | ||
const result = scrap(STR_TO_SCRAP, { | ||
hasTitle: $.exists('h1.title') | ||
}); | ||
expect(result.hasTitle).toBe(true); | ||
}); | ||
it('should not exists .castle', () => { | ||
const result = scrap(STR_TO_SCRAP, { | ||
hasCastle: $.exists('.castle') | ||
}); | ||
expect(result.hasCastle).toBe(false); | ||
}); | ||
it('should exists .msg inside list', () => { | ||
const result = scrap(STR_TO_SCRAP, { | ||
items: $.list('li', { | ||
hasMsg: $.exists('span.msg') | ||
}) | ||
}); | ||
expect(result.items.length).toBe(3); | ||
expect(result.items[0].hasMsg).toBe(false); | ||
expect(result.items[1].hasMsg).toBe(true); | ||
expect(result.items[2].hasMsg).toBe(false); | ||
}); | ||
it('should scrap text from <li><span/>', () => { | ||
@@ -86,9 +60,2 @@ const result = scrap(STR_TO_SCRAP, { | ||
it('should use custom selector', () => { | ||
const result = scrap(STR_TO_SCRAP, { | ||
title: $.select('h1', (el) => el.text()) | ||
}); | ||
expect(result.title).toBe('Hello'); | ||
}); | ||
it('should get list of texts', () => { | ||
@@ -130,25 +97,2 @@ const result = scrap(STR_TO_SCRAP, { | ||
it('should get link from an <a/> element', () => { | ||
const result = scrap(STR_TO_SCRAP, { | ||
link: $.link('a') | ||
}); | ||
expect(result.link).toBe('/read-more'); | ||
}); | ||
it('should not get link from non-existing element', () => { | ||
const result = scrap(STR_TO_SCRAP, { | ||
link: $.link('tr') | ||
}); | ||
expect(result.link).toBeUndefined(); | ||
}); | ||
it('should use predicate filter on list selector', () => { | ||
const result = scrap(STR_TO_SCRAP, { | ||
items: $.list('span', { | ||
msg: $.text('') | ||
}, (el) => el.hasClass('msg')) | ||
}); | ||
expect(result.items[0].msg).toBe('Ciao'); | ||
}); | ||
it('should use only selector to scrap title', () => { | ||
@@ -165,12 +109,2 @@ const title = scrap(STR_TO_SCRAP, $.text('.title')); | ||
it('should use truthy condition', () => { | ||
const result = scrap(STR_TO_SCRAP, $.if('.title', (el) => !!el, $.text('.title'), $.text('.msg'))); | ||
expect(result).toBe('Hello'); | ||
}); | ||
it('should use falsey condition', () => { | ||
const result = scrap(STR_TO_SCRAP, $.if('.notexisting', (el) => !el, $.text('.title'), { msg: $.text('.msg') })); | ||
expect(result).toEqual({ msg: 'Ciao' }); | ||
}); | ||
}); |
@@ -20,11 +20,2 @@ import { scrap, $ } from "../../lib"; | ||
}); | ||
it("should use predicate to only scrap .msg", () => { | ||
const result = scrap( | ||
html, | ||
$.list("span", $.text(""), el => el.hasClass("msg")) | ||
); | ||
expect(result.length).toBe(1); | ||
expect(result).toEqual(["Ciao"]); | ||
}); | ||
}); |
@@ -11,6 +11,6 @@ import { readFileSync } from "fs"; | ||
text: $.text("li > a"), | ||
link: $.link("li > a"), | ||
link: $.attr("li > a", "href"), | ||
submenu: $.list("ul.dropdown-menu>li", { | ||
text: $.text("li > a"), | ||
link: $.link("li > a") | ||
link: $.attr("li > a", "href") | ||
}) | ||
@@ -17,0 +17,0 @@ }), |
@@ -10,7 +10,2 @@ import { scrap, $ } from "../../lib"; | ||
it("should scrap href attr from an <a/> and split it by -", () => { | ||
const result = scrap(html, $.attr("a", "href", attr => attr.split("-"))); | ||
expect(result).toEqual(["/read", "more"]); | ||
}); | ||
it("should scrap data-extra from <div/>", () => { | ||
@@ -17,0 +12,0 @@ const result = scrap(html, $.attr(".footer", "data-extra")); |
@@ -10,7 +10,2 @@ import { scrap, $ } from "../../lib"; | ||
it("should count <li/> elements and convert it to string", () => { | ||
const result = scrap(html, $.count("li", count => count.toString())); | ||
expect(result).toBe("3"); | ||
}); | ||
it("should count <h1/>", () => { | ||
@@ -17,0 +12,0 @@ const result = scrap(html, $.count("h1")); |
@@ -10,15 +10,2 @@ import { scrap, $ } from "../../lib"; | ||
it("should get html from <ul/> and get lenght", () => { | ||
const result = scrap( | ||
html, | ||
$.html("ul>li:first-child", html => html.length) | ||
); | ||
expect(result).toBe(22); | ||
}); | ||
// it("should not get html from non exists element", () => { | ||
// const result = scrap(html, $.html('h3')); | ||
// expect(result).toBe(null); | ||
// }); | ||
it("should count using query", () => { | ||
@@ -25,0 +12,0 @@ const result = scrap(html, { |
@@ -10,12 +10,2 @@ import { scrap, $ } from "../../lib"; | ||
it("should get text from .msg and get length", () => { | ||
const result = scrap(html, $.text(".msg", text => text.length)); | ||
expect(result).toBe(4); | ||
}); | ||
// it("should get text from <ul/>", () => { | ||
// const result = scrap(html, Q.text('ul')); | ||
// expect(result).toBe(''); | ||
// }); | ||
it("should not get text from non existing element", () => { | ||
@@ -22,0 +12,0 @@ const result = scrap(html, $.text("h3")); |
@@ -6,3 +6,3 @@ { | ||
"module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */ | ||
"lib": ["es2015","es2016","es2017"], /* Specify library files to be included in the compilation. */ | ||
"lib": ["es2015","es2016","es2017","es2018"], /* Specify library files to be included in the compilation. */ | ||
// "allowJs": true, /* Allow javascript files to be compiled. */ | ||
@@ -17,2 +17,3 @@ // "checkJs": true, /* Report errors in .js files. */ | ||
"removeComments": false, /* Do not emit comments to output. */ | ||
"resolveJsonModule": true, | ||
// "noEmit": true, /* Do not emit outputs. */ | ||
@@ -19,0 +20,0 @@ // "importHelpers": true, /* Import emit helpers from 'tslib'. */ |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
2207363
46
2501