html-metadata
Advanced tools
Comparing version 1.5.0 to 1.6.0
11
index.js
@@ -125,2 +125,13 @@ /** | ||
/** | ||
* Retrieves JSON-LD for given html object | ||
* | ||
* @param {Object} chtml html Cheerio object | ||
* @param {Function} [callback] optional callback function | ||
* @return {Object} BBPromise for JSON-LD | ||
*/ | ||
exports.parseJsonLd = function(chtml, callback){ | ||
return index.parseJsonLd(chtml).nodeify(callback); | ||
}; | ||
/** | ||
* Scrapes OpenGraph data given html object | ||
@@ -127,0 +138,0 @@ * |
@@ -342,2 +342,29 @@ /** | ||
/** | ||
* Returns JSON-LD provided by page given HTML object | ||
* @param {Object} chtml html Cheerio object | ||
* @return {Object} BBPromise for JSON-LD | ||
*/ | ||
exports.parseJsonLd = BBPromise.method(function(chtml) { | ||
var json = []; | ||
var jsonLd = chtml('script[type="application/ld+json"]'); | ||
jsonLd.each(function() { | ||
var contents = chtml(this).text().trim(); | ||
try { | ||
contents = JSON.parse(contents); | ||
} catch (e) { | ||
// Fail silently, just in case there are valid tags | ||
return; | ||
} | ||
json.push(contents); | ||
}); | ||
if (json.length === 0) { | ||
throw new Error("No JSON-LD valid script tags present on page"); | ||
} | ||
return json.length > 1 ? json : json[0]; | ||
}); | ||
/** | ||
* Scrapes OpenGraph data given html object | ||
@@ -559,2 +586,3 @@ * @param {Object} chtml html Cheerio object | ||
'highwirePress': exports.parseHighwirePress, | ||
'jsonLd': exports.parseJsonLd, | ||
'openGraph': exports.parseOpenGraph, | ||
@@ -561,0 +589,0 @@ 'schemaOrg': exports.parseSchemaOrgMicrodata, |
{ | ||
"name": "html-metadata", | ||
"version": "1.5.0", | ||
"version": "1.6.0", | ||
"description": "Scrapes metadata of several different standards", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -6,3 +6,3 @@ html-metadata | ||
The aim of this library is to be a comprehensive source for extracting all html embedded metadata. Currently it supports Schema.org microdata using a third party library, a native BEPress, Dublin Core, Highwire Press, Open Graph, Twitter, EPrints, and COinS implementation, and some general metadata that doesn't belong to a particular standard (for instance, the content of the title tag, or meta description tags). | ||
The aim of this library is to be a comprehensive source for extracting all html embedded metadata. Currently it supports Schema.org microdata using a third party library, a native BEPress, Dublin Core, Highwire Press, JSON-LD, Open Graph, Twitter, EPrints, and COinS implementation, and some general metadata that doesn't belong to a particular standard (for instance, the content of the title tag, or meta description tags). | ||
@@ -9,0 +9,0 @@ Planned is support for RDFa, AGLS, and other yet unheard of metadata types. Contributions and requests for other metadata types welcome! |
@@ -11,2 +11,3 @@ 'use strict'; | ||
var assert = require('./utils/assert.js'); | ||
var fs = require('fs'); | ||
@@ -102,2 +103,17 @@ | ||
it('should not find JSON-LD, reject promise', function() { | ||
var url = 'http://example.com'; | ||
return preq.get(url) | ||
.then(function(callRes) { | ||
var $ = cheerio.load(callRes.body); | ||
var prom = meta.parseJsonLd($); | ||
return assert.fails(prom); | ||
}); | ||
}); | ||
it('should reject promise with malformed JSON-LD', function() { | ||
var $ = cheerio.load(fs.readFileSync('./test/static/turtle_article_errors.html')); | ||
return assert.fails(meta.parseJsonLd($)); | ||
}); | ||
//TODO: Add test for lacking general metadata | ||
@@ -104,0 +120,0 @@ //TODO: Add test for lacking any metadata |
@@ -172,2 +172,31 @@ 'use strict'; | ||
describe('JSON-LD tests (for types of Organizations)', function() { | ||
var urls = ['http://www.theguardian.com/us', 'http://jsonld.com/', 'http://www.apple.com/']; | ||
urls.forEach(function(test) { | ||
describe(test, function() { | ||
it('should return an object or array', function() { | ||
return meta(test) | ||
.then(function(res) { | ||
assert.ok(typeof res.jsonLd === 'object'); | ||
}); | ||
}); | ||
it('should get correct JSON-LD data', function() { | ||
return meta(test) | ||
.then(function(res) { | ||
var result = res.jsonLd; | ||
if (res.jsonLd instanceof Array) { | ||
result = res.jsonLd.filter(function(r) { | ||
return r['@type'] === 'Organization'; | ||
})[0]; | ||
}; | ||
['@context', '@type', 'url', 'logo'].forEach(function(key) { | ||
assert.ok(result.hasOwnProperty(key)); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); | ||
}); |
@@ -20,3 +20,3 @@ 'use strict'; | ||
it('should get correct info from turtle movie file', function() { | ||
expected = {"dublinCore":{"title":"Turtles of the Jungle","creator":"http://www.example.com/turtlelvr","description":"A 2008 film about jungle turtles.","date":"2012-02-04 12:00:00","type":"Image.Moving"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"openGraph":{"locale":"en_US","type":"video.movie","title":"Turtles of the Jungle","description":"A 2008 film about jungle turtles.","url":"http://example.com","site_name":"Awesome Turtle Movies Website","image":[{"url":"http://example.com/turtle.jpg"},{"url":"http://example.com/shell.jpg"}],"tag":["turtle","movie","awesome"],"director":"http://www.example.com/PhilTheTurtle","actor":["http://www.example.com/PatTheTurtle","http://www.example.com/SaminaTheTurtle"],"writer":"http://www.example.com/TinaTheTurtle","release_date":"2015-01-14T19:14:27+00:00","duration":"1000000"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":"@Turtlessssssssss","url":"http://www.example.com/turtles","title":"Turtles of the Jungle","description":"A 2008 film about jungle turtles.","player":{"url":"http://www.example.com/turtles/player","width":"400","height":"400","stream":{"url":"http://www.example.com/turtles/turtle.mp4","content_type":"video/mp4"}}}}; | ||
expected = JSON.parse(fs.readFileSync('./test/static/turtle_movie.json')); | ||
$ = cheerio.load(fs.readFileSync('./test/static/turtle_movie.html')); | ||
@@ -29,3 +29,3 @@ return meta.parseAll($).then(function(results){ | ||
it('should get correct info from turtle article file', function() { | ||
expected = {"bePress":{"series_title":"Turtles","author":"Turtle Lvr","author_institution":"Mediawiki","title":"Turtles are AWESOME!!1","date":"2012","pdf_url":"http://www.example.com/turtlelvr/pdf","abstract_html_url":"http://www.example.com/turtlelvr","publisher":"Turtles Society","online_date":"2012/02/04"},"coins":[{"ctx_ver":"Z39.88-2004","rft_id":"info:doi/http://dx.doi.org/10.5555/12345678","rfr_id":"info:sid/crossref.org:search","rft_val_fmt":"info:ofi/fmt:kev:mtx:journal","rft":{"atitle":"Toward a Unified Theory of High-Energy Metaphysics: Silly String Theory","jtitle":"Journal of Psychoceramics","date":"2008","volume":"5","issue":"11","spage":"1","epage":"3","aufirst":"Josiah","aulast":"Carberry","genre":"article","au":["Josiah Carberry"]}}],"dublinCore":{"title":"Turtles are AWESOME!!1","creator":"http://www.example.com/turtlelvr","description":"Exposition on the awesomeness of turtles","date":"2012-02-04 12:00:00","type":"Text.Article"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"highwirePress":{"journal_title":"Turtles","issn":"1234-5678","doi":"10.1000/123","publication_date":"2012-02-04","title":"Turtles are AWESOME!!1","author":"Turtle Lvr","author_institution":"Mediawiki","volume":"150","issue":"1","firstpage":"123","lastpage":"456","publisher":"Turtles Society","abstract":"Exposition on the awesomeness of turtles."},"openGraph":{"locale":"en_US","type":"article","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","url":"http://example.com","site_name":"Awesome Turtles Website","image":[{"url":"http://example.com/turtle.jpg","secure_url":"https://secure.example.com/turtle.jpg","type":"image/jpeg","width":"400","height":"300"},{"url":"http://example.com/shell.jpg","width":"200","height":"150"}],"audio":{"url":"http://example.com/sound.mp3","secure_url":"https://secure.example.com/sound.mp3","type":"audio/mpeg"},"tag":["turtles","are","awesome"],"section":["Turtles are tough","Turtles are flawless","Turtles are cute"],"published_time":"2012-02-04T12:00:00+00:00","modified_time":"2015-01-14T19:14:27+00:00","author":"http://examples.com/turtlelvr","publisher":"http://mediawiki.org"},"eprints":{"title":"Turtles are AWESOME!!1","creators_name":"http://www.example.com/turtlelvr","abstract":"Exposition on the awesomeness of turtles","datestamp":"2012-02-04 12:00:00","type":"article"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":["@Turtlessssssssss","@Turtlezzzzzzzzzz"],"url":"http://www.example.com/turtles","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","image":{"url":"http://example.com/turtles.jpg","alt":"It's a bunch of turtles!"},"app":{"url":{"iphone":"turtle://","googleplay":"turtle://"},"id":{"iphone":"000","googleplay":"superturtlearticle.androidapp"}}}}; | ||
expected = JSON.parse(fs.readFileSync('./test/static/turtle_article.json')); | ||
$ = cheerio.load(fs.readFileSync('./test/static/turtle_article.html')); | ||
@@ -38,4 +38,4 @@ return meta.parseAll($).then(function(results){ | ||
it('should be case insensitive on Turtle Article file', function() { | ||
expected = {"bePress":{"series_title":"Turtles","author":"Turtle Lvr","author_institution":"Mediawiki","title":"Turtles are AWESOME!!1","date":"2012","pdf_url":"http://www.example.com/turtlelvr/pdf","abstract_html_url":"http://www.example.com/turtlelvr","publisher":"Turtles Society","online_date":"2012/02/04"},"coins":[{"ctx_ver":"Z39.88-2004","rft_id":"info:doi/http://dx.doi.org/10.5555/12345678","rfr_id":"info:sid/crossref.org:search","rft_val_fmt":"info:ofi/fmt:kev:mtx:journal","rft":{"atitle":"Toward a Unified Theory of High-Energy Metaphysics: Silly String Theory","jtitle":"Journal of Psychoceramics","date":"2008","volume":"5","issue":"11","spage":"1","epage":"3","aufirst":"Josiah","aulast":"Carberry","genre":"article","au":["Josiah Carberry"]}}],"dublinCore":{"title":"Turtles are AWESOME!!1","creator":"http://www.example.com/turtlelvr","description":"Exposition on the awesomeness of turtles","date":"2012-02-04 12:00:00","type":"Text.Article"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"highwirePress":{"journal_title":"Turtles","issn":"1234-5678","doi":"10.1000/123","publication_date":"2012-02-04","title":"Turtles are AWESOME!!1","author":"Turtle Lvr","author_institution":"Mediawiki","volume":"150","issue":"1","firstpage":"123","lastpage":"456","publisher":"Turtles Society","abstract":"Exposition on the awesomeness of turtles."},"openGraph":{"locale":"en_US","type":"article","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","url":"http://example.com","site_name":"Awesome Turtles Website","image":[{"url":"http://example.com/turtle.jpg","secure_url":"https://secure.example.com/turtle.jpg","type":"image/jpeg","width":"400","height":"300"},{"url":"http://example.com/shell.jpg","width":"200","height":"150"}],"audio":{"url":"http://example.com/sound.mp3","secure_url":"https://secure.example.com/sound.mp3","type":"audio/mpeg"},"tag":["turtles","are","awesome"],"section":["Turtles are tough","Turtles are flawless","Turtles are cute"],"published_time":"2012-02-04T12:00:00+00:00","modified_time":"2015-01-14T19:14:27+00:00","author":"http://examples.com/turtlelvr","publisher":"http://mediawiki.org"},"eprints":{"title":"Turtles are AWESOME!!1","creators_name":"http://www.example.com/turtlelvr","abstract":"Exposition on the awesomeness of turtles","datestamp":"2012-02-04 12:00:00","type":"article"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":["@Turtlessssssssss","@Turtlezzzzzzzzzz"],"url":"http://www.example.com/turtles","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","image":{"url":"http://example.com/turtles.jpg","alt":"It's a bunch of turtles!"},"app":{"url":{"iphone":"turtle://","googleplay":"turtle://"},"id":{"iphone":"000","googleplay":"superturtlearticle.androidapp"}}}}; | ||
$ = cheerio.load(fs.readFileSync('./test/static/Turtle_Article.html')); | ||
expected = JSON.parse(fs.readFileSync('./test/static/turtle_article.json')); | ||
$ = cheerio.load(fs.readFileSync('./test/static/turtle_article_case.html')); | ||
return meta.parseAll($).then(function(results){ | ||
@@ -42,0 +42,0 @@ assert.deepEqual(results, expected); |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
74576
22
1419
2