open-graph-scraper
Advanced tools
Comparing version 4.3.0 to 4.3.1
# Change Log | ||
## 4.3.1 | ||
- Small code clean up and adding tests | ||
- Updating Dependencies | ||
## 4.3.0 | ||
@@ -4,0 +8,0 @@ - Adding support for request headers |
const { findImageTypeFromUrl, isImageTypeValid } = require('./utils'); | ||
const doesElementExist = (selector, attribute, $) => { | ||
if ($(selector).attr(attribute) && $(selector).attr(attribute).length > 0) return true; | ||
return false; | ||
}; | ||
const doesElementExist = (selector, attribute, $) => ( | ||
$(selector).attr(attribute) && $(selector).attr(attribute).length > 0 | ||
); | ||
@@ -53,2 +52,3 @@ const fallback = (ogObject, options, $) => { | ||
}); | ||
if (ogObject.ogImage.length === 0) delete ogObject.ogImage; | ||
} else if (ogObject.ogImage && ogObject.ogImage.url && !ogObject.ogImage.type) { | ||
@@ -124,4 +124,2 @@ // if there isn't a type, try to pull it from the URL | ||
ogObject.ogDate = $('time[datetime]').attr('datetime'); | ||
} else if (doesElementExist('time[datetime][pubdate]', 'datetime', $)) { | ||
ogObject.ogDate = $('time[datetime][pubdate]').attr('datetime'); | ||
} | ||
@@ -128,0 +126,0 @@ } |
151
lib/media.js
@@ -0,1 +1,2 @@ | ||
/* eslint-disable max-len */ | ||
const fields = require('./fields'); | ||
@@ -73,8 +74,4 @@ | ||
exports.mediaSetup = (ogObject, options) => { | ||
/* Combine image/width/height/type | ||
and sort for priority */ | ||
if (ogObject.ogImage | ||
|| ogObject.ogImageWidth | ||
|| ogObject.twitterImageHeight | ||
|| ogObject.ogImageType) { | ||
// sets ogImage image/width/height/type to null if one these exists | ||
if (ogObject.ogImage || ogObject.ogImageWidth || ogObject.twitterImageHeight || ogObject.ogImageType) { | ||
ogObject.ogImage = ogObject.ogImage ? ogObject.ogImage : [null]; | ||
@@ -86,10 +83,8 @@ ogObject.ogImageWidth = ogObject.ogImageWidth ? ogObject.ogImageWidth : [null]; | ||
const ogImages = zip(ogObject.ogImage, | ||
ogObject.ogImageWidth, | ||
ogObject.ogImageHeight, | ||
ogObject.ogImageType) | ||
.map(mediaMapper).sort(mediaSorter); | ||
// format images | ||
const ogImages = zip(ogObject.ogImage, ogObject.ogImageWidth, ogObject.ogImageHeight, ogObject.ogImageType) | ||
.map(mediaMapper) | ||
.sort(mediaSorter); | ||
/* Combine video/width/height/type | ||
and sort for priority */ | ||
// sets ogVideo video/width/height/type to null if one these exists | ||
if (ogObject.ogVideo || ogObject.ogVideoWidth || ogObject.ogVideoHeight || ogObject.ogVideoType) { | ||
@@ -102,17 +97,11 @@ ogObject.ogVideo = ogObject.ogVideo ? ogObject.ogVideo : [null]; | ||
const ogVideos = zip(ogObject.ogVideo, | ||
ogObject.ogVideoWidth, | ||
ogObject.ogVideoHeight, | ||
ogObject.ogVideoType) | ||
.map(mediaMapper).sort(mediaSorter); | ||
// format videos | ||
const ogVideos = zip(ogObject.ogVideo, ogObject.ogVideoWidth, ogObject.ogVideoHeight, ogObject.ogVideoType) | ||
.map(mediaMapper) | ||
.sort(mediaSorter); | ||
/* Combine twitter image/width/height/alt | ||
and sort for priority */ | ||
if (ogObject.twitterImageSrc | ||
|| ogObject.twitterImage | ||
|| ogObject.twitterImageWidth | ||
|| ogObject.twitterImageHeight | ||
|| ogObject.twitterImageAlt) { | ||
// if twitterImage isn't there, try twitterImageSrc | ||
ogObject.twitterImage = ogObject.twitterImage ? ogObject.twitterImage : ogObject.twitterImageSrc; | ||
// sets twitter image image/width/height/type to null if one these exists | ||
if (ogObject.twitterImageSrc || ogObject.twitterImage || ogObject.twitterImageWidth || ogObject.twitterImageHeight || ogObject.twitterImageAlt) { | ||
ogObject.twitterImageSrc = ogObject.twitterImageSrc ? ogObject.twitterImageSrc : [null]; | ||
ogObject.twitterImage = ogObject.twitterImage ? ogObject.twitterImage : ogObject.twitterImageSrc; // deafult to twitterImageSrc | ||
ogObject.twitterImageWidth = ogObject.twitterImageWidth ? ogObject.twitterImageWidth : [null]; | ||
@@ -123,14 +112,9 @@ ogObject.twitterImageHeight = ogObject.twitterImageHeight ? ogObject.twitterImageHeight : [null]; | ||
const twitterImages = zip(ogObject.twitterImage, | ||
ogObject.twitterImageWidth, | ||
ogObject.twitterImageHeight, | ||
ogObject.twitterImageAlt) | ||
.map(mediaMapperTwitterImage).sort(mediaSorter); | ||
// format twitter images | ||
const twitterImages = zip(ogObject.twitterImage, ogObject.twitterImageWidth, ogObject.twitterImageHeight, ogObject.twitterImageAlt) | ||
.map(mediaMapperTwitterImage) | ||
.sort(mediaSorter); | ||
/* Combine twitter player/width/height/stream | ||
and sort for priority */ | ||
if (ogObject.twitterPlayer | ||
|| ogObject.twitterPlayerWidth | ||
|| ogObject.twitterPlayerHeight | ||
|| ogObject.twitterPlayerStream) { | ||
// sets twitter player/width/height/stream to null if one these exists | ||
if (ogObject.twitterPlayer || ogObject.twitterPlayerWidth || ogObject.twitterPlayerHeight || ogObject.twitterPlayerStream) { | ||
ogObject.twitterPlayer = ogObject.twitterPlayer ? ogObject.twitterPlayer : [null]; | ||
@@ -142,10 +126,8 @@ ogObject.twitterPlayerWidth = ogObject.twitterPlayerWidth ? ogObject.twitterPlayerWidth : [null]; | ||
const twitterPlayers = zip(ogObject.twitterPlayer, | ||
ogObject.twitterPlayerWidth, | ||
ogObject.twitterPlayerHeight, | ||
ogObject.twitterPlayerStream) | ||
.map(mediaMapperTwitterPlayer).sort(mediaSorter); | ||
// format twitter player | ||
const twitterPlayers = zip(ogObject.twitterPlayer, ogObject.twitterPlayerWidth, ogObject.twitterPlayerHeight, ogObject.twitterPlayerStream) | ||
.map(mediaMapperTwitterPlayer) | ||
.sort(mediaSorter); | ||
/* Combine music:song url, track, disk | ||
and sort in the right album order */ | ||
// sets music song/songTrack/songDisc to null if one these exists | ||
if (ogObject.musicSong || ogObject.musicSongTrack || ogObject.musicSongDisc) { | ||
@@ -157,65 +139,28 @@ ogObject.musicSong = ogObject.musicSong ? ogObject.musicSong : [null]; | ||
const musicSongs = zip(ogObject.musicSong, | ||
ogObject.musicSongTrack, | ||
ogObject.musicSongDisc) | ||
.map(mediaMapperMusicSong).sort(mediaSorterMusicSong); | ||
// format music songs | ||
const musicSongs = zip(ogObject.musicSong, ogObject.musicSongTrack, ogObject.musicSongDisc) | ||
.map(mediaMapperMusicSong) | ||
.sort(mediaSorterMusicSong); | ||
// Devare temporary fields | ||
fields.filter((item) => ( | ||
item.multiple && item.fieldName && ( | ||
item.fieldName.startsWith('ogImage') | ||
|| item.fieldName.startsWith('ogVideo') | ||
|| item.fieldName.startsWith('twitter') | ||
|| item.fieldName.startsWith('musicSong') | ||
) | ||
)).forEach((item) => { | ||
delete ogObject[item.fieldName]; | ||
}); | ||
// remove old values since everything will live under the main property | ||
fields.filter((item) => (item.multiple && item.fieldName && item.fieldName.match('(ogImage|ogVideo|twitter|musicSong).*'))) | ||
.forEach((item) => { | ||
delete ogObject[item.fieldName]; | ||
}); | ||
// Select the best image | ||
if (ogImages.length) { | ||
if (options.allMedia) { | ||
ogObject.ogImage = ogImages; | ||
} else { | ||
[ogObject.ogImage] = ogImages; | ||
} | ||
if (options.allMedia) { | ||
if (ogImages.length) ogObject.ogImage = ogImages; | ||
if (ogVideos.length) ogObject.ogVideo = ogVideos; | ||
if (twitterImages.length) ogObject.twitterImage = twitterImages; | ||
if (twitterPlayers.length) ogObject.twitterPlayer = twitterPlayers; | ||
if (musicSongs.length) ogObject.musicSong = musicSongs; | ||
} else { | ||
if (ogImages.length) [ogObject.ogImage] = ogImages; | ||
if (ogVideos.length) [ogObject.ogVideo] = ogVideos; | ||
if (twitterImages.length) [ogObject.twitterImage] = twitterImages; | ||
if (twitterPlayers.length) [ogObject.twitterPlayer] = twitterPlayers; | ||
if (musicSongs.length) [ogObject.musicSong] = musicSongs; | ||
} | ||
// Select the best video | ||
if (ogVideos.length) { | ||
if (options.allMedia) { | ||
ogObject.ogVideo = ogVideos; | ||
} else { | ||
[ogObject.ogVideo] = ogVideos; | ||
} | ||
} | ||
// Select the best twitter image | ||
if (twitterImages.length) { | ||
if (options.allMedia) { | ||
ogObject.twitterImage = twitterImages; | ||
} else { | ||
[ogObject.twitterImage] = twitterImages; | ||
} | ||
} | ||
// Select the best player | ||
if (twitterPlayers.length) { | ||
if (options.allMedia) { | ||
ogObject.twitterPlayer = twitterPlayers; | ||
} else { | ||
[ogObject.twitterPlayer] = twitterPlayers; | ||
} | ||
} | ||
// Select the best music:song | ||
if (musicSongs.length) { | ||
if (options.allMedia) { | ||
ogObject.musicSong = musicSongs; | ||
} else { | ||
[ogObject.musicSong] = musicSongs; | ||
} | ||
} | ||
return ogObject; | ||
}; |
@@ -19,8 +19,6 @@ const cheerio = require('cheerio'); | ||
const $ = cheerio.load(body); | ||
const allMeta = $('meta'); | ||
allMeta.each((index, meta) => { | ||
if (!meta.attribs || (!meta.attribs.property && !meta.attribs.name)) { | ||
return; | ||
} | ||
// find all of the open graph info in the meta tags | ||
$('meta').each((index, meta) => { | ||
if (!meta.attribs || (!meta.attribs.property && !meta.attribs.name)) return; | ||
const property = meta.attribs.property || meta.attribs.name; | ||
@@ -48,11 +46,6 @@ const content = meta.attribs.content || meta.attribs.value; | ||
// remove ogObject.ogImage if there is nothing found | ||
if (!ogObject.ogImage || !ogObject.ogImage.length) { | ||
delete ogObject.ogImage; | ||
} | ||
// sets up all the media stuff | ||
// formats the multiple media values | ||
ogObject = media.mediaSetup(ogObject, options); | ||
// Check for 'only get open graph info' | ||
// if onlyGetOpenGraphInfo isn't set, run the open graph fallbacks | ||
if (!options.onlyGetOpenGraphInfo) { | ||
@@ -62,7 +55,3 @@ ogObject = fallback(ogObject, options, $); | ||
// remove ogObject.ogImage if there is nothing found | ||
if (ogObject.ogImage && ogObject.ogImage.length === 0) { | ||
delete ogObject.ogImage; | ||
} | ||
// removes any undefs | ||
ogObject = utils.removeNestedUndefinedValues(ogObject); | ||
@@ -79,5 +68,4 @@ | ||
const requestAndResultsFormatter = async (options) => { | ||
const peekSize = options.peekSize || 1024; | ||
const requestUrl = options.url; | ||
delete options.url; | ||
delete options.url; // setting options.url messes with got | ||
@@ -88,2 +76,3 @@ return request.get(requestUrl, options) | ||
let formatBody = response.body; | ||
if (response && response.statusCode && (response.statusCode.toString().substring(0, 1) === '4' || response.statusCode.toString().substring(0, 1) === '5')) { | ||
@@ -94,5 +83,6 @@ throw new Error('Server has returned a 400/500 error code'); | ||
} | ||
if (options.runChar) { | ||
const char = charset | ||
.find(response.headers, formatBody, peekSize) || chardet.detect(formatBody); | ||
.find(response.headers, formatBody, options.peekSize) || chardet.detect(formatBody); | ||
if (char) { | ||
@@ -112,6 +102,7 @@ // eslint-disable-next-line no-useless-catch | ||
if (options.withCharset) { | ||
ogObject.charset = charset.find(response.headers, formatBody, peekSize); | ||
ogObject.charset = charset.find(response.headers, formatBody, options.peekSize); | ||
} | ||
ogObject.requestUrl = options.url; | ||
ogObject.success = true; | ||
return { ogObject, response }; | ||
@@ -133,5 +124,3 @@ }) | ||
if (options.html) { | ||
if (options.url) { | ||
throw new Error('Must specify either `url` or `html`, not both'); | ||
} | ||
if (options.url) throw new Error('Must specify either `url` or `html`, not both'); | ||
const ogObject = extractMetaTags(options.html, options); | ||
@@ -145,7 +134,8 @@ ogObject.requestUrl = null; | ||
if (!validate.returnInputUrl) throw new Error('Invalid URL'); | ||
if (!validate.url) throw new Error('Invalid URL'); | ||
options.url = validate.returnInputUrl; | ||
options.timeout = validate.returnInputTimeout; | ||
options.url = validate.url; | ||
options.timeout = validate.timeout; | ||
options.decompress = options.decompress || true; | ||
options.peekSize = options.peekSize || 1024; | ||
options.followRedirect = options.followRedirect || true; | ||
@@ -172,8 +162,8 @@ options.maxRedirects = options.maxRedirects || 10; | ||
// trying to limit non html pages | ||
if (validate.returnInputUrl.includes('.jpg') | ||
|| validate.returnInputUrl.includes('.jpeg') | ||
|| validate.returnInputUrl.includes('.png') | ||
|| validate.returnInputUrl.includes('.mp3') | ||
|| validate.returnInputUrl.includes('.zip') | ||
|| validate.returnInputUrl.includes('.pdf')) { | ||
if (options.url.includes('.jpg') | ||
|| options.url.includes('.jpeg') | ||
|| options.url.includes('.png') | ||
|| options.url.includes('.mp3') | ||
|| options.url.includes('.zip') | ||
|| options.url.includes('.pdf')) { | ||
throw new Error('Must scrape an HTML page'); | ||
@@ -180,0 +170,0 @@ } |
@@ -7,3 +7,3 @@ const validator = require('validator'); | ||
*/ | ||
const isInputUrlValid = (url) => typeof url === 'string' && url.length > 0 && validator.isURL(url, [{ | ||
const isUrlValid = (url) => typeof url === 'string' && url.length > 0 && validator.isURL(url, [{ | ||
protocols: ['http', 'https'], | ||
@@ -32,3 +32,3 @@ require_tld: true, | ||
*/ | ||
const isInputTimeoutValid = (inputTimeout) => typeof inputTimeout === 'number' && /^\d{1,10}$/.test(inputTimeout); | ||
const isTimeoutValid = (timeout) => typeof timeout === 'number' && /^\d{1,10}$/.test(timeout); | ||
@@ -39,5 +39,5 @@ /* | ||
*/ | ||
exports.validate = (inputUrl, inputTimeout) => ({ | ||
returnInputUrl: isInputUrlValid(inputUrl) ? coerceUrl(inputUrl) : null, | ||
returnInputTimeout: isInputTimeoutValid(inputTimeout) ? inputTimeout : 2000, | ||
exports.validate = (url, timeout) => ({ | ||
url: isUrlValid(url) ? coerceUrl(url) : null, | ||
timeout: isTimeoutValid(timeout) ? timeout : 2000, | ||
}); | ||
@@ -44,0 +44,0 @@ |
{ | ||
"name": "open-graph-scraper", | ||
"description": "Node.js scraper module for Open Graph and Twitter Card info", | ||
"version": "4.3.0", | ||
"version": "4.3.1", | ||
"license": "MIT", | ||
@@ -27,6 +27,6 @@ "main": "index.js", | ||
"dependencies": { | ||
"chardet": "^1.1.0", | ||
"chardet": "^1.2.1", | ||
"cheerio": "^1.0.0-rc.3", | ||
"got": "^11.3.0", | ||
"iconv-lite": "^0.6.0", | ||
"got": "^11.4.0", | ||
"iconv-lite": "^0.6.1", | ||
"validator": "^13.0.0" | ||
@@ -40,15 +40,15 @@ }, | ||
"chai": "^4.2.0", | ||
"eslint": "^7.3.1", | ||
"eslint": "^7.4.0", | ||
"eslint-config-airbnb": "^18.0.1", | ||
"eslint-plugin-import": "^2.19.1", | ||
"eslint-plugin-import": "^2.22.0", | ||
"eslint-plugin-jsx-a11y": "^6.2.3", | ||
"eslint-plugin-mocha": "^7.0.1", | ||
"eslint-plugin-promise": "^4.2.1", | ||
"eslint-plugin-react": "^7.17.0", | ||
"eslint-plugin-react-hooks": "^4.0.4", | ||
"markdownlint-cli": "^0.23.1", | ||
"eslint-plugin-react": "^7.20.3", | ||
"eslint-plugin-react-hooks": "^4.0.5", | ||
"markdownlint-cli": "^0.23.2", | ||
"mocha": "^8.0.1", | ||
"nyc": "^15.0.1", | ||
"sinon": "^9.0.2", | ||
"snyk": "^1.348.1" | ||
"snyk": "^1.360.0" | ||
}, | ||
@@ -55,0 +55,0 @@ "repository": { |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
49411
1326
Updatedchardet@^1.2.1
Updatedgot@^11.4.0
Updatediconv-lite@^0.6.1