microdata-node
Advanced tools
Comparing version 0.0.5 to 0.0.6
161
lib/index.js
'use strict'; | ||
var urlUtil = require('url'); | ||
var Item = require('./Item'); | ||
function Item(spec) { | ||
var typeString = spec.type && spec.type.trim(); | ||
if (typeString) { | ||
this.type = typeString.split(/\s+/); | ||
} | ||
function unique(array) { | ||
return array.reduce(function (uniques, item) { | ||
if (uniques.indexOf(item) < 0) { | ||
uniques.push(item); | ||
} | ||
return uniques; | ||
}, []); | ||
} | ||
var idString = spec.id && spec.id.trim(); | ||
if (idString) { | ||
this.id = idString; | ||
function splitUnique(string) { | ||
string = string && string.trim(); | ||
if (string) { | ||
return unique(string.split(/\s+/)); | ||
} else { | ||
return undefined; | ||
} | ||
this.properties = {}; | ||
} | ||
Item.prototype.addProperty = function addProperty(name, value) { | ||
if (!this.properties[name]) this.properties[name] = []; | ||
this.properties[name].push(value); | ||
}; | ||
function parse($, config) { | ||
Item.prototype.serialize = function serialize() { | ||
var item = { | ||
properties: {} | ||
}; | ||
config = config || {}; | ||
var items = []; | ||
if (this.type) { | ||
item.type = this.type; | ||
} | ||
function walkNode(node, currentItem) { | ||
var prop = splitUnique(node.attr('itemprop')); | ||
if (this.id) { | ||
item.id = this.id; | ||
if (prop && currentItem) { | ||
prop.forEach(function (propName) { | ||
var value = parseProperty(node); | ||
currentItem.addProperty(propName, value); | ||
}); | ||
} else if (node.is('[itemscope]')) { | ||
var newItem = parseItem(node); | ||
items.push(newItem); | ||
} else { | ||
node.children().each(function (i, child) { | ||
walkNode($(child), currentItem); | ||
}); | ||
} | ||
} | ||
Object.keys(this.properties).forEach(function (propName) { | ||
var values = this.properties[propName]; | ||
function parseItem(node) { | ||
var item = new Item({ | ||
type: splitUnique(node.attr('itemtype')), | ||
id: node.attr('itemid') | ||
}); | ||
var serializedValues = values.map(function (value) { | ||
if (value instanceof Item) { | ||
return value.serialize(); | ||
} else { | ||
return value; | ||
} | ||
}, this); | ||
node.children().each(function (i, child) { | ||
walkNode($(child), item); | ||
}); | ||
item.properties[propName] = serializedValues; | ||
}, this); | ||
return item; | ||
} | ||
return item; | ||
}; | ||
function resolveAttribute(node, attr) { | ||
return node.attr(attr) || ''; | ||
} | ||
function parse($, config) { | ||
config = config || {}; | ||
function parseValue(node) { | ||
function resolveAttribute(attr) { | ||
return node.attr(attr) || ''; | ||
function resolveUrlAttribute(node, attr) { | ||
var relative = node.attr(attr); | ||
if (relative && config.base) { | ||
return urlUtil.resolve(config.base, relative) || ''; | ||
} else { | ||
return relative || ''; | ||
} | ||
} | ||
function resolveUrlAttribute(attr) { | ||
var relative = node.attr(attr); | ||
if (relative && config.base) { | ||
return urlUtil.resolve(config.base, relative) || ''; | ||
} else { | ||
return relative || ''; | ||
} | ||
} | ||
if (node.is('meta')) { | ||
return resolveAttribute('content'); | ||
function parseProperty(node) { | ||
if (node.is('[itemscope]')) { | ||
return parseItem(node); | ||
} else if (node.is('meta')) { | ||
return resolveAttribute(node, 'content'); | ||
} else if (node.is('audio,embed,iframe,img,source,track,video')) { | ||
return resolveUrlAttribute('src'); | ||
return resolveUrlAttribute(node, 'src'); | ||
} else if (node.is('a,area,link')) { | ||
return resolveUrlAttribute('href'); | ||
return resolveUrlAttribute(node, 'href'); | ||
} else if (node.is('object')) { | ||
return resolveUrlAttribute('data'); | ||
return resolveUrlAttribute(node, 'data'); | ||
} else if (node.is('data')) { | ||
return resolveAttribute('value'); | ||
return resolveAttribute(node, 'value'); | ||
} else if (node.is('meter')) { | ||
return resolveAttribute('value'); | ||
return resolveAttribute(node, 'value'); | ||
} else if (node.is('time')) { | ||
return resolveAttribute('datetime'); | ||
return resolveAttribute(node, 'datetime'); | ||
} else { | ||
return node.text() || ''; | ||
} | ||
return node.text() || ''; | ||
} | ||
var items = []; | ||
walkNode($.root()); | ||
function walkNode(node, parentItem) { | ||
var currentItem = parentItem; | ||
var newItem = null; | ||
if (node.attr('itemscope') !== undefined) { | ||
newItem = new Item({ | ||
type: node.attr('itemtype'), | ||
id: node.attr('itemid') | ||
}); | ||
} | ||
var prop = node.attr('itemprop'); | ||
if (prop === undefined) { | ||
if (newItem) { | ||
items.push(newItem); | ||
} | ||
} else if (currentItem) { | ||
var value = newItem || parseValue(node); | ||
currentItem.addProperty(prop, value); | ||
} | ||
node.children().each(function (i, child) { | ||
walkNode($(child), newItem || currentItem); | ||
}); | ||
} | ||
walkNode($.root(), null); | ||
return { | ||
@@ -125,0 +98,0 @@ items: items.map(function (item) { |
{ | ||
"name": "microdata-node", | ||
"version": "0.0.5", | ||
"version": "0.0.6", | ||
"description": "Cheerio based microdata parser", | ||
@@ -5,0 +5,0 @@ "main": "lib", |
@@ -18,3 +18,3 @@ /* global describe, it */ | ||
it('finds an empty scope', function () { | ||
it('finds an empty item', function () { | ||
var $ = cheerio.load('<div itemscope>hello</div>'); | ||
@@ -26,6 +26,8 @@ var result = parser.parse($); | ||
assert.isUndefined(result.items[0].type); | ||
assert.isUndefined(result.items[0].id); | ||
assert.isObject(result.items[0].properties); | ||
assert.lengthOf(Object.keys(result.items[0].properties), 0); | ||
}); | ||
it('finds a scope with a type', function () { | ||
it('finds an item with a type', function () { | ||
var $ = cheerio.load('<div itemscope itemtype="http://schema.org/Person">hello</div>'); | ||
@@ -38,3 +40,32 @@ var result = parser.parse($); | ||
it('finds a scope within an element', function () { | ||
it('finds an item with a global id', function () { | ||
var $ = cheerio.load('<div itemscope itemid="urn:isbn:0-330-34032-8">hello</div>'); | ||
var result = parser.parse($); | ||
assert.isArray(result.items); | ||
assert.lengthOf(result.items, 1); | ||
assert.deepEqual(result.items[0].id, 'urn:isbn:0-330-34032-8'); | ||
}); | ||
it('finds an item with multiple types', function () { | ||
var $ = cheerio.load('<div itemscope itemtype=" http://schema.org/Person http://schema.org/PostalAddress ">hello</div>'); | ||
var result = parser.parse($); | ||
assert.isArray(result.items); | ||
assert.lengthOf(result.items, 1); | ||
assert.deepEqual(result.items[0].type, [ | ||
'http://schema.org/Person', | ||
'http://schema.org/PostalAddress' | ||
]); | ||
}); | ||
it('finds an item with type defined twice', function () { | ||
var $ = cheerio.load('<div itemscope itemtype="http://schema.org/Person http://schema.org/Person">hello</div>'); | ||
var result = parser.parse($); | ||
assert.isArray(result.items); | ||
assert.lengthOf(result.items, 1); | ||
assert.deepEqual(result.items[0].type, [ | ||
'http://schema.org/Person' | ||
]); | ||
}); | ||
it('finds an item within an element', function () { | ||
var $ = cheerio.load('<div><div itemscope itemtype="http://schema.org/Person">hello</div></div>'); | ||
@@ -47,3 +78,3 @@ var result = parser.parse($); | ||
it('finds multiple scopes within an element', function () { | ||
it('finds multiple items within an element', function () { | ||
var $ = cheerio.load( | ||
@@ -62,3 +93,3 @@ '<div>' + | ||
it('finds a scope with properties', function () { | ||
it('finds an item with properties', function () { | ||
var $ = cheerio.load( | ||
@@ -79,3 +110,3 @@ '<div itemscope itemtype="http://schema.org/Person">' + | ||
it('finds a scope with childscopes', function () { | ||
it('finds an item with childitems', function () { | ||
var $ = cheerio.load( | ||
@@ -95,10 +126,13 @@ '<div itemscope itemtype="http://schema.org/Person">' + | ||
assert.deepEqual(result.items[0].properties.address1, [{ | ||
type: ['http://schema.org/PostalAddress'], | ||
properties: { street: ['street1'] } | ||
}]); | ||
assert.deepEqual(result.items[0].properties.address2, [{ | ||
type: ['http://schema.org/PostalAddress'], | ||
properties: { street: ['street2'] } | ||
}]); | ||
assert.deepEqual(result.items[0].properties, { | ||
address1: [{ | ||
type: ['http://schema.org/PostalAddress'], | ||
properties: { street: ['street1'] } | ||
}], | ||
address2: [{ | ||
type: ['http://schema.org/PostalAddress'], | ||
properties: { street: ['street2'] } | ||
}] | ||
}); | ||
}); | ||
@@ -121,2 +155,43 @@ | ||
it('handles empty propertynames', function () { | ||
var $ = cheerio.load( | ||
'<div itemscope itemtype="http://schema.org/Person">' + | ||
'<div itemprop="">Jan</div>' + | ||
'</div>' | ||
); | ||
var result = parser.parse($); | ||
assert.isArray(result.items); | ||
assert.lengthOf(result.items, 1); | ||
assert.deepEqual(result.items[0].properties, {}); | ||
}); | ||
it('handles multiple propertynames', function () { | ||
var $ = cheerio.load( | ||
'<div itemscope itemtype="http://schema.org/Person">' + | ||
'<div itemprop="name additionalName">Jan</div>' + | ||
'</div>' | ||
); | ||
var result = parser.parse($); | ||
assert.isArray(result.items); | ||
assert.lengthOf(result.items, 1); | ||
assert.deepEqual(result.items[0].properties, { | ||
name: ['Jan'], | ||
additionalName: ['Jan'] | ||
}); | ||
}); | ||
it('handles duplicated propertynames', function () { | ||
var $ = cheerio.load( | ||
'<div itemscope itemtype="http://schema.org/Person">' + | ||
'<div itemprop=" name name ">Jan</div>' + | ||
'</div>' | ||
); | ||
var result = parser.parse($); | ||
assert.isArray(result.items); | ||
assert.lengthOf(result.items, 1); | ||
assert.deepEqual(result.items[0].properties, { | ||
name: ['Jan'] | ||
}); | ||
}); | ||
}); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
13206
9
301
1