Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

html-metadata

Package Overview
Dependencies
Maintainers
2
Versions
21
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

html-metadata - npm Package Compare versions

Comparing version 1.5.0 to 1.6.0

test/static/turtle_article_case.html

11

index.js

@@ -125,2 +125,13 @@ /**

/**
* Retrieves JSON-LD for given html object
*
* @param {Object} chtml html Cheerio object
* @param {Function} [callback] optional callback function
* @return {Object} BBPromise for JSON-LD
*/
exports.parseJsonLd = function(chtml, callback){
return index.parseJsonLd(chtml).nodeify(callback);
};
/**
* Scrapes OpenGraph data given html object

@@ -127,0 +138,0 @@ *

@@ -342,2 +342,29 @@ /**

/**
* Returns JSON-LD provided by page given HTML object
* @param {Object} chtml html Cheerio object
* @return {Object} BBPromise for JSON-LD
*/
exports.parseJsonLd = BBPromise.method(function(chtml) {
var json = [];
var jsonLd = chtml('script[type="application/ld+json"]');
jsonLd.each(function() {
var contents = chtml(this).text().trim();
try {
contents = JSON.parse(contents);
} catch (e) {
// Fail silently, just in case there are valid tags
return;
}
json.push(contents);
});
if (json.length === 0) {
throw new Error("No JSON-LD valid script tags present on page");
}
return json.length > 1 ? json : json[0];
});
/**
* Scrapes OpenGraph data given html object

@@ -559,2 +586,3 @@ * @param {Object} chtml html Cheerio object

'highwirePress': exports.parseHighwirePress,
'jsonLd': exports.parseJsonLd,
'openGraph': exports.parseOpenGraph,

@@ -561,0 +589,0 @@ 'schemaOrg': exports.parseSchemaOrgMicrodata,

2

package.json
{
"name": "html-metadata",
"version": "1.5.0",
"version": "1.6.0",
"description": "Scrapes metadata of several different standards",

@@ -5,0 +5,0 @@ "main": "index.js",

@@ -6,3 +6,3 @@ html-metadata

The aim of this library is to be a comprehensive source for extracting all html embedded metadata. Currently it supports Schema.org microdata using a third party library, a native BEPress, Dublin Core, Highwire Press, Open Graph, Twitter, EPrints, and COinS implementation, and some general metadata that doesn't belong to a particular standard (for instance, the content of the title tag, or meta description tags).
The aim of this library is to be a comprehensive source for extracting all html embedded metadata. Currently it supports Schema.org microdata using a third party library, a native BEPress, Dublin Core, Highwire Press, JSON-LD, Open Graph, Twitter, EPrints, and COinS implementation, and some general metadata that doesn't belong to a particular standard (for instance, the content of the title tag, or meta description tags).

@@ -9,0 +9,0 @@ Planned is support for RDFa, AGLS, and other yet unheard of metadata types. Contributions and requests for other metadata types welcome!

@@ -11,2 +11,3 @@ 'use strict';

var assert = require('./utils/assert.js');
var fs = require('fs');

@@ -102,2 +103,17 @@

it('should not find JSON-LD, reject promise', function() {
var url = 'http://example.com';
return preq.get(url)
.then(function(callRes) {
var $ = cheerio.load(callRes.body);
var prom = meta.parseJsonLd($);
return assert.fails(prom);
});
});
it('should reject promise with malformed JSON-LD', function() {
var $ = cheerio.load(fs.readFileSync('./test/static/turtle_article_errors.html'));
return assert.fails(meta.parseJsonLd($));
});
//TODO: Add test for lacking general metadata

@@ -104,0 +120,0 @@ //TODO: Add test for lacking any metadata

@@ -172,2 +172,31 @@ 'use strict';

describe('JSON-LD tests (for types of Organizations)', function() {
var urls = ['http://www.theguardian.com/us', 'http://jsonld.com/', 'http://www.apple.com/'];
urls.forEach(function(test) {
describe(test, function() {
it('should return an object or array', function() {
return meta(test)
.then(function(res) {
assert.ok(typeof res.jsonLd === 'object');
});
});
it('should get correct JSON-LD data', function() {
return meta(test)
.then(function(res) {
var result = res.jsonLd;
if (res.jsonLd instanceof Array) {
result = res.jsonLd.filter(function(r) {
return r['@type'] === 'Organization';
})[0];
};
['@context', '@type', 'url', 'logo'].forEach(function(key) {
assert.ok(result.hasOwnProperty(key));
});
});
});
});
});
});
});

@@ -20,3 +20,3 @@ 'use strict';

it('should get correct info from turtle movie file', function() {
expected = {"dublinCore":{"title":"Turtles of the Jungle","creator":"http://www.example.com/turtlelvr","description":"A 2008 film about jungle turtles.","date":"2012-02-04 12:00:00","type":"Image.Moving"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"openGraph":{"locale":"en_US","type":"video.movie","title":"Turtles of the Jungle","description":"A 2008 film about jungle turtles.","url":"http://example.com","site_name":"Awesome Turtle Movies Website","image":[{"url":"http://example.com/turtle.jpg"},{"url":"http://example.com/shell.jpg"}],"tag":["turtle","movie","awesome"],"director":"http://www.example.com/PhilTheTurtle","actor":["http://www.example.com/PatTheTurtle","http://www.example.com/SaminaTheTurtle"],"writer":"http://www.example.com/TinaTheTurtle","release_date":"2015-01-14T19:14:27+00:00","duration":"1000000"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":"@Turtlessssssssss","url":"http://www.example.com/turtles","title":"Turtles of the Jungle","description":"A 2008 film about jungle turtles.","player":{"url":"http://www.example.com/turtles/player","width":"400","height":"400","stream":{"url":"http://www.example.com/turtles/turtle.mp4","content_type":"video/mp4"}}}};
expected = JSON.parse(fs.readFileSync('./test/static/turtle_movie.json'));
$ = cheerio.load(fs.readFileSync('./test/static/turtle_movie.html'));

@@ -29,3 +29,3 @@ return meta.parseAll($).then(function(results){

it('should get correct info from turtle article file', function() {
expected = {"bePress":{"series_title":"Turtles","author":"Turtle Lvr","author_institution":"Mediawiki","title":"Turtles are AWESOME!!1","date":"2012","pdf_url":"http://www.example.com/turtlelvr/pdf","abstract_html_url":"http://www.example.com/turtlelvr","publisher":"Turtles Society","online_date":"2012/02/04"},"coins":[{"ctx_ver":"Z39.88-2004","rft_id":"info:doi/http://dx.doi.org/10.5555/12345678","rfr_id":"info:sid/crossref.org:search","rft_val_fmt":"info:ofi/fmt:kev:mtx:journal","rft":{"atitle":"Toward a Unified Theory of High-Energy Metaphysics: Silly String Theory","jtitle":"Journal of Psychoceramics","date":"2008","volume":"5","issue":"11","spage":"1","epage":"3","aufirst":"Josiah","aulast":"Carberry","genre":"article","au":["Josiah Carberry"]}}],"dublinCore":{"title":"Turtles are AWESOME!!1","creator":"http://www.example.com/turtlelvr","description":"Exposition on the awesomeness of turtles","date":"2012-02-04 12:00:00","type":"Text.Article"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"highwirePress":{"journal_title":"Turtles","issn":"1234-5678","doi":"10.1000/123","publication_date":"2012-02-04","title":"Turtles are AWESOME!!1","author":"Turtle Lvr","author_institution":"Mediawiki","volume":"150","issue":"1","firstpage":"123","lastpage":"456","publisher":"Turtles Society","abstract":"Exposition on the awesomeness of turtles."},"openGraph":{"locale":"en_US","type":"article","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","url":"http://example.com","site_name":"Awesome Turtles Website","image":[{"url":"http://example.com/turtle.jpg","secure_url":"https://secure.example.com/turtle.jpg","type":"image/jpeg","width":"400","height":"300"},{"url":"http://example.com/shell.jpg","width":"200","height":"150"}],"audio":{"url":"http://example.com/sound.mp3","secure_url":"https://secure.example.com/sound.mp3","type":"audio/mpeg"},"tag":["turtles","are","awesome"],"section":["Turtles are tough","Turtles are flawless","Turtles are cute"],"published_time":"2012-02-04T12:00:00+00:00","modified_time":"2015-01-14T19:14:27+00:00","author":"http://examples.com/turtlelvr","publisher":"http://mediawiki.org"},"eprints":{"title":"Turtles are AWESOME!!1","creators_name":"http://www.example.com/turtlelvr","abstract":"Exposition on the awesomeness of turtles","datestamp":"2012-02-04 12:00:00","type":"article"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":["@Turtlessssssssss","@Turtlezzzzzzzzzz"],"url":"http://www.example.com/turtles","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","image":{"url":"http://example.com/turtles.jpg","alt":"It's a bunch of turtles!"},"app":{"url":{"iphone":"turtle://","googleplay":"turtle://"},"id":{"iphone":"000","googleplay":"superturtlearticle.androidapp"}}}};
expected = JSON.parse(fs.readFileSync('./test/static/turtle_article.json'));
$ = cheerio.load(fs.readFileSync('./test/static/turtle_article.html'));

@@ -38,4 +38,4 @@ return meta.parseAll($).then(function(results){

it('should be case insensitive on Turtle Article file', function() {
expected = {"bePress":{"series_title":"Turtles","author":"Turtle Lvr","author_institution":"Mediawiki","title":"Turtles are AWESOME!!1","date":"2012","pdf_url":"http://www.example.com/turtlelvr/pdf","abstract_html_url":"http://www.example.com/turtlelvr","publisher":"Turtles Society","online_date":"2012/02/04"},"coins":[{"ctx_ver":"Z39.88-2004","rft_id":"info:doi/http://dx.doi.org/10.5555/12345678","rfr_id":"info:sid/crossref.org:search","rft_val_fmt":"info:ofi/fmt:kev:mtx:journal","rft":{"atitle":"Toward a Unified Theory of High-Energy Metaphysics: Silly String Theory","jtitle":"Journal of Psychoceramics","date":"2008","volume":"5","issue":"11","spage":"1","epage":"3","aufirst":"Josiah","aulast":"Carberry","genre":"article","au":["Josiah Carberry"]}}],"dublinCore":{"title":"Turtles are AWESOME!!1","creator":"http://www.example.com/turtlelvr","description":"Exposition on the awesomeness of turtles","date":"2012-02-04 12:00:00","type":"Text.Article"},"general":{"author":"Turtle Lvr","authorlink":"http://examples.com/turtlelvr","canonical":"http://example.com/turtles","description":"Exposition on the awesomeness of turtles","publisher":"https://mediawiki.org","robots":"we welcome our robot overlords","shortlink":"http://example.com/c","title":"Turtles are AWESOME!!1 | Awesome Turtles Website", "lang":"en"},"highwirePress":{"journal_title":"Turtles","issn":"1234-5678","doi":"10.1000/123","publication_date":"2012-02-04","title":"Turtles are AWESOME!!1","author":"Turtle Lvr","author_institution":"Mediawiki","volume":"150","issue":"1","firstpage":"123","lastpage":"456","publisher":"Turtles Society","abstract":"Exposition on the awesomeness of turtles."},"openGraph":{"locale":"en_US","type":"article","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","url":"http://example.com","site_name":"Awesome Turtles Website","image":[{"url":"http://example.com/turtle.jpg","secure_url":"https://secure.example.com/turtle.jpg","type":"image/jpeg","width":"400","height":"300"},{"url":"http://example.com/shell.jpg","width":"200","height":"150"}],"audio":{"url":"http://example.com/sound.mp3","secure_url":"https://secure.example.com/sound.mp3","type":"audio/mpeg"},"tag":["turtles","are","awesome"],"section":["Turtles are tough","Turtles are flawless","Turtles are cute"],"published_time":"2012-02-04T12:00:00+00:00","modified_time":"2015-01-14T19:14:27+00:00","author":"http://examples.com/turtlelvr","publisher":"http://mediawiki.org"},"eprints":{"title":"Turtles are AWESOME!!1","creators_name":"http://www.example.com/turtlelvr","abstract":"Exposition on the awesomeness of turtles","datestamp":"2012-02-04 12:00:00","type":"article"},"twitter":{"card":"summary","site":"@Turtlessssssssss","creator":["@Turtlessssssssss","@Turtlezzzzzzzzzz"],"url":"http://www.example.com/turtles","title":"Turtles are AWESOME!!1","description":"Exposition on the awesomeness of turtles","image":{"url":"http://example.com/turtles.jpg","alt":"It's a bunch of turtles!"},"app":{"url":{"iphone":"turtle://","googleplay":"turtle://"},"id":{"iphone":"000","googleplay":"superturtlearticle.androidapp"}}}};
$ = cheerio.load(fs.readFileSync('./test/static/Turtle_Article.html'));
expected = JSON.parse(fs.readFileSync('./test/static/turtle_article.json'));
$ = cheerio.load(fs.readFileSync('./test/static/turtle_article_case.html'));
return meta.parseAll($).then(function(results){

@@ -42,0 +42,0 @@ assert.deepEqual(results, expected);

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc