open-graph-scraper
Advanced tools
Comparing version 3.0.2 to 3.1.0
@@ -0,1 +1,4 @@ | ||
### 3.1.0 | ||
- Adding ability to extract meta from HTML string | ||
### 3.0.2 | ||
@@ -2,0 +5,0 @@ - Adding CHANGELOG.md |
@@ -38,2 +38,10 @@ 'use strict'; | ||
const setOptionsAndReturnOpenGraphResults = function (options, callback) { | ||
if (options.html) { | ||
if (options.url) { | ||
return callback(true, { error: 'Must specify either `url` or `html`, not both', success: false, requestUrl: options.url, errorDetails: 'Must specify either `url` or `html`, not both' }, null); | ||
} | ||
const ogObject = extractMetaTags(options.html, options); | ||
return callback(false, { data: ogObject, success: true }, null); | ||
} | ||
let validate = utils.validate(options.url, options.timeout); | ||
@@ -101,3 +109,2 @@ | ||
const peekSize = options.peekSize || 1024; | ||
let ogObject = {}; | ||
@@ -119,66 +126,75 @@ request(options, function (error, response, body) { | ||
const $ = cheerio.load(body); | ||
const meta = $('meta'); | ||
const keys = Object.keys(meta); | ||
let ogObject = extractMetaTags(body, options); | ||
if (options.withCharset) { | ||
ogObject.charset = charset(response.headers, body, peekSize); | ||
} | ||
return callback(null, ogObject, response); | ||
} | ||
}); | ||
}; | ||
keys.forEach(function (key) { | ||
if (!(meta[key].attribs && (meta[key].attribs.property || meta[key].attribs.name))) { | ||
return; | ||
/* | ||
* extract meta tags from html string | ||
* @param string body - html string | ||
* @param string options - options the user has set | ||
*/ | ||
const extractMetaTags = (body, options) => { | ||
let ogObject = {}; | ||
const $ = cheerio.load(body); | ||
const meta = $('meta'); | ||
const keys = Object.keys(meta); | ||
keys.forEach(function (key) { | ||
if (!(meta[key].attribs && (meta[key].attribs.property || meta[key].attribs.name))) { | ||
return; | ||
} | ||
const property = meta[key].attribs.property || meta[key].attribs.name; | ||
const content = meta[key].attribs.content || meta[key].attribs.value; | ||
fields.forEach(function (item) { | ||
if (property === item.property) { | ||
if (!item.multiple) { | ||
ogObject[item.fieldName] = content; | ||
} else if (!ogObject[item.fieldName]) { | ||
ogObject[item.fieldName] = [content]; | ||
} else if (Array.isArray(ogObject[item.fieldName])) { | ||
ogObject[item.fieldName].push(content); | ||
} | ||
const property = meta[key].attribs.property || meta[key].attribs.name; | ||
const content = meta[key].attribs.content || meta[key].attribs.value; | ||
fields.forEach(function (item) { | ||
if (property === item.property) { | ||
if (!item.multiple) { | ||
ogObject[item.fieldName] = content; | ||
} else if (!ogObject[item.fieldName]) { | ||
ogObject[item.fieldName] = [content]; | ||
} else if (Array.isArray(ogObject[item.fieldName])) { | ||
ogObject[item.fieldName].push(content); | ||
} | ||
} | ||
}); | ||
}); | ||
// set the ogImage or fallback to ogImageURL or ogImageSecureURL | ||
ogObject.ogImage = ogObject.ogImage ? ogObject.ogImage : (ogObject.ogImageURL ? ogObject.ogImageURL : (ogObject.ogImageSecureURL ? ogObject.ogImageSecureURL : [])); | ||
if (!ogObject.ogImage || !ogObject.ogImage.length) { | ||
delete ogObject['ogImage']; | ||
} | ||
}); | ||
}); | ||
// sets up all the media stuff | ||
ogObject = media.mediaSetup(ogObject, options); | ||
// set the ogImage or fallback to ogImageURL or ogImageSecureURL | ||
ogObject.ogImage = ogObject.ogImage ? ogObject.ogImage : (ogObject.ogImageURL ? ogObject.ogImageURL : (ogObject.ogImageSecureURL ? ogObject.ogImageSecureURL : [])); | ||
if (!ogObject.ogImage || !ogObject.ogImage.length) { | ||
delete ogObject['ogImage']; | ||
} | ||
// Check for 'only get open graph info' | ||
if (!options.onlyGetOpenGraphInfo) { | ||
// Get title tag if og title was not provided | ||
if (!ogObject.ogTitle && $('head > title').text() && $('head > title').text().length > 0) { | ||
ogObject.ogTitle = $('head > title').text(); | ||
// sets up all the media stuff | ||
ogObject = media.mediaSetup(ogObject, options); | ||
// Check for 'only get open graph info' | ||
if (!options.onlyGetOpenGraphInfo) { | ||
// Get title tag if og title was not provided | ||
if (!ogObject.ogTitle && $('head > title').text() && $('head > title').text().length > 0) { | ||
ogObject.ogTitle = $('head > title').text(); | ||
} | ||
// Get meta description tag if og description was not provided | ||
if (!ogObject.ogDescription && $('head > meta[name="description"]').attr('content') && $('head > meta[name="description"]').attr('content').length > 0) { | ||
ogObject.ogDescription = $('head > meta[name="description"]').attr('content'); | ||
} | ||
// Get first image as og:image if there is no og:image tag. | ||
const ogImageFallback = options.ogImageFallback === undefined ? true : options.ogImageFallback; | ||
if (!ogObject.ogImage && ogImageFallback) { | ||
const supportedImageExts = ['jpg', 'jpeg', 'png']; | ||
$('img').each(function (i, elem) { | ||
if ($(elem).attr('src') && $(elem).attr('src').length > 0 && supportedImageExts.indexOf($(elem).attr('src').split('.').pop()) !== -1) { | ||
ogObject.ogImage = { | ||
url: $(elem).attr('src') | ||
}; | ||
return false; | ||
} | ||
// Get meta description tag if og description was not provided | ||
if (!ogObject.ogDescription && $('head > meta[name="description"]').attr('content') && $('head > meta[name="description"]').attr('content').length > 0) { | ||
ogObject.ogDescription = $('head > meta[name="description"]').attr('content'); | ||
} | ||
// Get first image as og:image if there is no og:image tag. | ||
const ogImageFallback = options.ogImageFallback === undefined ? true : options.ogImageFallback; | ||
if (!ogObject.ogImage && ogImageFallback) { | ||
const supportedImageExts = ['jpg', 'jpeg', 'png']; | ||
$('img').each(function (i, elem) { | ||
if ($(elem).attr('src') && $(elem).attr('src').length > 0 && supportedImageExts.indexOf($(elem).attr('src').split('.').pop()) !== -1) { | ||
ogObject.ogImage = { | ||
url: $(elem).attr('src') | ||
}; | ||
return false; | ||
} | ||
}); | ||
} | ||
} | ||
// console.log('ogObject',ogObject); | ||
return callback(null, ogObject, response); | ||
}); | ||
} | ||
}); | ||
} | ||
return ogObject; | ||
}; | ||
@@ -185,0 +201,0 @@ |
{ | ||
"name": "open-graph-scraper", | ||
"description": "Node.js scraper module for Open Graph and Twitter Card info", | ||
"version": "3.0.2", | ||
"version": "3.1.0", | ||
"license": "MIT", | ||
@@ -6,0 +6,0 @@ "main": "index.js", |
@@ -101,2 +101,14 @@ openGraphScraper | ||
It's possible to pass in an HTML string instead of a URL. There won't be a resonse object. | ||
``` | ||
const htmlString = /* html string goes here */; | ||
const ogs = require('open-graph-scraper'); | ||
const options = {'html': htmlString}; | ||
ogs(options, function (error, results) { | ||
console.log('error:', error); // This is returns true or false. True if there was a error. The error it self is inside the results object. | ||
console.log('results:', results); | ||
}); | ||
``` | ||
### Results JSON | ||
@@ -103,0 +115,0 @@ Check the return for a ```success``` flag. If success is set to true, then the url input was valid. Otherwise it will be set to false. The above example will return something like... |
@@ -5,2 +5,10 @@ 'use strict'; | ||
const expect = require('expect.js'); | ||
const HTML_STRING = ` | ||
<html> | ||
<head> | ||
<meta property="og:title" content="Test page"/> | ||
</head> | ||
<body></body> | ||
</html> | ||
`; | ||
@@ -788,2 +796,28 @@ describe('GET OG', function () { | ||
}); | ||
it('Invalid Call - Can\'t request URL and pass in HTML string', function (done) { | ||
app({ | ||
'url': 'https://upload.wikimedia.org/wikipedia/commons/a/a2/Overlook_Hong_Kong_Island_north_coast,_Victoria_Harbour_and_Kowloon_from_middle_section_of_Lugard_Road_at_daytime_(enlarged_version_and_better_contrast,_revised).jpg', | ||
'html': HTML_STRING | ||
}, function (error, result) { | ||
console.log('error:', error); | ||
console.log('result:', result); | ||
expect(error).to.be(true); | ||
expect(result.success).to.be(false); | ||
expect(result.requestUrl).to.be('https://upload.wikimedia.org/wikipedia/commons/a/a2/Overlook_Hong_Kong_Island_north_coast,_Victoria_Harbour_and_Kowloon_from_middle_section_of_Lugard_Road_at_daytime_(enlarged_version_and_better_contrast,_revised).jpg'); | ||
expect(result.error).to.be('Must specify either `url` or `html`, not both'); | ||
done(); | ||
}); | ||
}); | ||
it('Valid Call - pass in HTML string', function (done) { | ||
app({ | ||
'html': HTML_STRING | ||
}, function (error, result) { | ||
console.log('error:', error); | ||
console.log('result:', result); | ||
expect(error).to.be(false); | ||
expect(result.success).to.be(true); | ||
expect(result.data.ogTitle).to.be('Test page'); | ||
done(); | ||
}); | ||
}); | ||
}); |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
90081
1909
152