Socket
Socket
Sign inDemoInstall

web-auto-extractor

Package Overview
Dependencies
99
Maintainers
1
Versions
38
Alerts
File Explorer

Advanced tools

Install Socket

Detect and block malicious and high-risk dependencies

Install

Comparing version 1.0.0-beta.2 to 1.0.0-beta.3

12

dist/parsers/jsonld-parser.js

@@ -11,7 +11,10 @@ 'use strict';

var $html = (0, _utils.getCheerioObject)(html);
var jsonldData = [];
var jsonldData = {};
$html('script[type="application/ld+json"]').each(function (index, item) {
try {
jsonldData[index] = JSON.parse((0, _cheerio2.default)(item).text());
var parsedJSON = JSON.parse((0, _cheerio2.default)(item).text());
var type = parsedJSON['@type'];
jsonldData[type] = jsonldData[type] || [];
jsonldData[type].push(parsedJSON);
} catch (e) {

@@ -22,6 +25,3 @@ console.log('Error in jsonld parse - ' + e);

return {
data: jsonldData,
unnormalizedData: null
};
return jsonldData;
};

@@ -28,0 +28,0 @@

@@ -18,24 +18,13 @@ 'use strict';

function getPropValue(tagName, attribs, TYPE, PROP) {
var value = void 0,
attr = void 0;
if (attribs[TYPE]) {
value = null;
attr = null;
return null;
} else if (tagName === 'a' || tagName === 'link') {
value = attribs.href.trim();
attr = 'href';
return attribs.href.trim();
} else if (attribs.content) {
value = attribs.content.trim();
attr = 'content';
return attribs.content.trim();
} else if (attribs[PROP] === 'image' && attribs.src) {
value = attribs.src.trim();
attr = 'src';
return attribs.src.trim();
} else {
value = null;
attr = '@text';
return null;
}
return {
value: value,
attr: attr
};
}

@@ -75,5 +64,4 @@

var tags = [];
var props = [];
var path = [];
var topLevelScope = {};
var textForProp = null;

@@ -84,4 +72,2 @@ var parser = new _htmlparser2.default.Parser({

var tag = false;
var parentScope = void 0,
scopeIndex = void 0;

@@ -91,9 +77,6 @@ if (attribs[TYPE]) {

var newScope = {};
parentScope = currentScope;
currentScope[attribs[PROP]] = currentScope[attribs[PROP]] || [];
scopeIndex = currentScope[attribs[PROP]].length;
currentScope[attribs[PROP]].push(newScope);
currentScope = newScope;
} else {
parentScope = topLevelScope;
currentScope = {};

@@ -106,3 +89,2 @@

topLevelScope[type] = topLevelScope[type] || [];
scopeIndex = topLevelScope[type].length;
topLevelScope[type].push(currentScope);

@@ -113,10 +95,4 @@ }

if (currentScope) {
var _getPropValue = getPropValue(tagName, attribs, TYPE, PROP);
var value = getPropValue(tagName, attribs, TYPE, PROP);
var value = _getPropValue.value;
var attr = _getPropValue.attr;
var selector = void 0,
name = void 0;
if (attribs[TYPE]) {

@@ -131,37 +107,12 @@ var _getType2 = getType(attribs[TYPE]);

currentScope['@type'] = _type;
name = attribs[PROP] ? attribs[PROP] : _type;
var parentSelector = parentScope['@selector'] ? parentScope['@selector'] + ' ' : '';
var selfSelector = attribs[PROP] ? '[' + PROP + '="' + attribs[PROP] + '"]' : '[' + TYPE + '="' + attribs[TYPE] + '"]';
currentScope['@selector'] = parentSelector + selfSelector + (':eq(' + scopeIndex + ')');
tag = TYPE;
selector = {
select: currentScope['@selector'],
extract: {
attr: attr
}
};
props.push({
name: name,
value: value,
selector: selector,
path: path.concat(name, scopeIndex)
});
path.push(name, scopeIndex);
scopes.push(currentScope);
} else if (attribs[PROP]) {
selector = {
select: currentScope['@selector'] + ' ' + ('[' + PROP + '="' + attribs[PROP] + '"]') + ':eq(0)',
extract: {
attr: attr
}
};
value = !value && selector ? $(selector.select).text().trim() : value;
currentScope[attribs[PROP]] = value;
name = attribs[PROP];
props.push({
name: name,
value: value,
selector: selector,
path: path.concat(name)
});
if (!value) {
tag = PROP;
currentScope[attribs[PROP]] = '';
textForProp = attribs[PROP];
} else {
currentScope[attribs[PROP]] = value;
}
}

@@ -171,8 +122,13 @@ }

},
ontext: function ontext(text) {
if (textForProp) {
scopes[scopes.length - 1][textForProp] += text.trim();
}
},
onclosetag: function onclosetag(tagname) {
var tag = tags.pop();
if (tag) {
if (tag === TYPE) {
(function () {
var scope = scopes.pop();
delete scope['@selector'];
if (!scope['@context']) {

@@ -186,5 +142,5 @@ delete scope['@context'];

});
path.pop();
path.pop();
})();
} else if (tag === PROP) {
textForProp = false;
}

@@ -196,3 +152,3 @@ },

onend: function onend() {
resolve({ data: topLevelScope, unnormalizedData: props });
resolve(topLevelScope);
}

@@ -199,0 +155,0 @@ });

{
"name": "web-auto-extractor",
"version": "1.0.0-beta.2",
"version": "1.0.0-beta.3",
"description": "Automatically extracts structured information from webpages",

@@ -5,0 +5,0 @@ "main": "dist/index.js",

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc