Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

open-graph-scraper

Package Overview
Dependencies
Maintainers
1
Versions
108
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

open-graph-scraper - npm Package Compare versions

Comparing version 3.0.2 to 3.1.0

3

CHANGELOG.md

@@ -0,1 +1,4 @@

### 3.1.0
- Adding ability to extract meta from HTML string
### 3.0.2

@@ -2,0 +5,0 @@ - Adding CHANGELOG.md

126

lib/openGraphScraper.js

@@ -38,2 +38,10 @@ 'use strict';

const setOptionsAndReturnOpenGraphResults = function (options, callback) {
if (options.html) {
if (options.url) {
return callback(true, { error: 'Must specify either `url` or `html`, not both', success: false, requestUrl: options.url, errorDetails: 'Must specify either `url` or `html`, not both' }, null);
}
const ogObject = extractMetaTags(options.html, options);
return callback(false, { data: ogObject, success: true }, null);
}
let validate = utils.validate(options.url, options.timeout);

@@ -101,3 +109,2 @@

const peekSize = options.peekSize || 1024;
let ogObject = {};

@@ -119,66 +126,75 @@ request(options, function (error, response, body) {

const $ = cheerio.load(body);
const meta = $('meta');
const keys = Object.keys(meta);
let ogObject = extractMetaTags(body, options);
if (options.withCharset) {
ogObject.charset = charset(response.headers, body, peekSize);
}
return callback(null, ogObject, response);
}
});
};
keys.forEach(function (key) {
if (!(meta[key].attribs && (meta[key].attribs.property || meta[key].attribs.name))) {
return;
/*
* extract meta tags from html string
* @param string body - html string
* @param string options - options the user has set
*/
const extractMetaTags = (body, options) => {
let ogObject = {};
const $ = cheerio.load(body);
const meta = $('meta');
const keys = Object.keys(meta);
keys.forEach(function (key) {
if (!(meta[key].attribs && (meta[key].attribs.property || meta[key].attribs.name))) {
return;
}
const property = meta[key].attribs.property || meta[key].attribs.name;
const content = meta[key].attribs.content || meta[key].attribs.value;
fields.forEach(function (item) {
if (property === item.property) {
if (!item.multiple) {
ogObject[item.fieldName] = content;
} else if (!ogObject[item.fieldName]) {
ogObject[item.fieldName] = [content];
} else if (Array.isArray(ogObject[item.fieldName])) {
ogObject[item.fieldName].push(content);
}
const property = meta[key].attribs.property || meta[key].attribs.name;
const content = meta[key].attribs.content || meta[key].attribs.value;
fields.forEach(function (item) {
if (property === item.property) {
if (!item.multiple) {
ogObject[item.fieldName] = content;
} else if (!ogObject[item.fieldName]) {
ogObject[item.fieldName] = [content];
} else if (Array.isArray(ogObject[item.fieldName])) {
ogObject[item.fieldName].push(content);
}
}
});
});
// set the ogImage or fallback to ogImageURL or ogImageSecureURL
ogObject.ogImage = ogObject.ogImage ? ogObject.ogImage : (ogObject.ogImageURL ? ogObject.ogImageURL : (ogObject.ogImageSecureURL ? ogObject.ogImageSecureURL : []));
if (!ogObject.ogImage || !ogObject.ogImage.length) {
delete ogObject['ogImage'];
}
});
});
// sets up all the media stuff
ogObject = media.mediaSetup(ogObject, options);
// set the ogImage or fallback to ogImageURL or ogImageSecureURL
ogObject.ogImage = ogObject.ogImage ? ogObject.ogImage : (ogObject.ogImageURL ? ogObject.ogImageURL : (ogObject.ogImageSecureURL ? ogObject.ogImageSecureURL : []));
if (!ogObject.ogImage || !ogObject.ogImage.length) {
delete ogObject['ogImage'];
}
// Check for 'only get open graph info'
if (!options.onlyGetOpenGraphInfo) {
// Get title tag if og title was not provided
if (!ogObject.ogTitle && $('head > title').text() && $('head > title').text().length > 0) {
ogObject.ogTitle = $('head > title').text();
// sets up all the media stuff
ogObject = media.mediaSetup(ogObject, options);
// Check for 'only get open graph info'
if (!options.onlyGetOpenGraphInfo) {
// Get title tag if og title was not provided
if (!ogObject.ogTitle && $('head > title').text() && $('head > title').text().length > 0) {
ogObject.ogTitle = $('head > title').text();
}
// Get meta description tag if og description was not provided
if (!ogObject.ogDescription && $('head > meta[name="description"]').attr('content') && $('head > meta[name="description"]').attr('content').length > 0) {
ogObject.ogDescription = $('head > meta[name="description"]').attr('content');
}
// Get first image as og:image if there is no og:image tag.
const ogImageFallback = options.ogImageFallback === undefined ? true : options.ogImageFallback;
if (!ogObject.ogImage && ogImageFallback) {
const supportedImageExts = ['jpg', 'jpeg', 'png'];
$('img').each(function (i, elem) {
if ($(elem).attr('src') && $(elem).attr('src').length > 0 && supportedImageExts.indexOf($(elem).attr('src').split('.').pop()) !== -1) {
ogObject.ogImage = {
url: $(elem).attr('src')
};
return false;
}
// Get meta description tag if og description was not provided
if (!ogObject.ogDescription && $('head > meta[name="description"]').attr('content') && $('head > meta[name="description"]').attr('content').length > 0) {
ogObject.ogDescription = $('head > meta[name="description"]').attr('content');
}
// Get first image as og:image if there is no og:image tag.
const ogImageFallback = options.ogImageFallback === undefined ? true : options.ogImageFallback;
if (!ogObject.ogImage && ogImageFallback) {
const supportedImageExts = ['jpg', 'jpeg', 'png'];
$('img').each(function (i, elem) {
if ($(elem).attr('src') && $(elem).attr('src').length > 0 && supportedImageExts.indexOf($(elem).attr('src').split('.').pop()) !== -1) {
ogObject.ogImage = {
url: $(elem).attr('src')
};
return false;
}
});
}
}
// console.log('ogObject',ogObject);
return callback(null, ogObject, response);
});
}
});
}
return ogObject;
};

@@ -185,0 +201,0 @@

{
"name": "open-graph-scraper",
"description": "Node.js scraper module for Open Graph and Twitter Card info",
"version": "3.0.2",
"version": "3.1.0",
"license": "MIT",

@@ -6,0 +6,0 @@ "main": "index.js",

@@ -101,2 +101,14 @@ openGraphScraper

It's possible to pass in an HTML string instead of a URL. There won't be a resonse object.
```
const htmlString = /* html string goes here */;
const ogs = require('open-graph-scraper');
const options = {'html': htmlString};
ogs(options, function (error, results) {
console.log('error:', error); // This is returns true or false. True if there was a error. The error it self is inside the results object.
console.log('results:', results);
});
```
### Results JSON

@@ -103,0 +115,0 @@ Check the return for a ```success``` flag. If success is set to true, then the url input was valid. Otherwise it will be set to false. The above example will return something like...

@@ -5,2 +5,10 @@ 'use strict';

const expect = require('expect.js');
const HTML_STRING = `
<html>
<head>
<meta property="og:title" content="Test page"/>
</head>
<body></body>
</html>
`;

@@ -788,2 +796,28 @@ describe('GET OG', function () {

});
it('Invalid Call - Can\'t request URL and pass in HTML string', function (done) {
app({
'url': 'https://upload.wikimedia.org/wikipedia/commons/a/a2/Overlook_Hong_Kong_Island_north_coast,_Victoria_Harbour_and_Kowloon_from_middle_section_of_Lugard_Road_at_daytime_(enlarged_version_and_better_contrast,_revised).jpg',
'html': HTML_STRING
}, function (error, result) {
console.log('error:', error);
console.log('result:', result);
expect(error).to.be(true);
expect(result.success).to.be(false);
expect(result.requestUrl).to.be('https://upload.wikimedia.org/wikipedia/commons/a/a2/Overlook_Hong_Kong_Island_north_coast,_Victoria_Harbour_and_Kowloon_from_middle_section_of_Lugard_Road_at_daytime_(enlarged_version_and_better_contrast,_revised).jpg');
expect(result.error).to.be('Must specify either `url` or `html`, not both');
done();
});
});
it('Valid Call - pass in HTML string', function (done) {
app({
'html': HTML_STRING
}, function (error, result) {
console.log('error:', error);
console.log('result:', result);
expect(error).to.be(false);
expect(result.success).to.be(true);
expect(result.data.ogTitle).to.be('Test page');
done();
});
});
});
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc