Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

open-graph-scraper

Package Overview
Dependencies
Maintainers
1
Versions
108
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

open-graph-scraper - npm Package Compare versions

Comparing version 4.3.0 to 4.3.1

4

CHANGELOG.md
# Change Log
## 4.3.1
- Small code clean up and adding tests
- Updating Dependencies
## 4.3.0

@@ -4,0 +8,0 @@ - Adding support for request headers

10

lib/fallback.js
const { findImageTypeFromUrl, isImageTypeValid } = require('./utils');
const doesElementExist = (selector, attribute, $) => {
if ($(selector).attr(attribute) && $(selector).attr(attribute).length > 0) return true;
return false;
};
const doesElementExist = (selector, attribute, $) => (
$(selector).attr(attribute) && $(selector).attr(attribute).length > 0
);

@@ -53,2 +52,3 @@ const fallback = (ogObject, options, $) => {

});
if (ogObject.ogImage.length === 0) delete ogObject.ogImage;
} else if (ogObject.ogImage && ogObject.ogImage.url && !ogObject.ogImage.type) {

@@ -124,4 +124,2 @@ // if there isn't a type, try to pull it from the URL

ogObject.ogDate = $('time[datetime]').attr('datetime');
} else if (doesElementExist('time[datetime][pubdate]', 'datetime', $)) {
ogObject.ogDate = $('time[datetime][pubdate]').attr('datetime');
}

@@ -128,0 +126,0 @@ }

@@ -0,1 +1,2 @@

/* eslint-disable max-len */
const fields = require('./fields');

@@ -73,8 +74,4 @@

exports.mediaSetup = (ogObject, options) => {
/* Combine image/width/height/type
and sort for priority */
if (ogObject.ogImage
|| ogObject.ogImageWidth
|| ogObject.twitterImageHeight
|| ogObject.ogImageType) {
// sets ogImage image/width/height/type to null if one these exists
if (ogObject.ogImage || ogObject.ogImageWidth || ogObject.twitterImageHeight || ogObject.ogImageType) {
ogObject.ogImage = ogObject.ogImage ? ogObject.ogImage : [null];

@@ -86,10 +83,8 @@ ogObject.ogImageWidth = ogObject.ogImageWidth ? ogObject.ogImageWidth : [null];

const ogImages = zip(ogObject.ogImage,
ogObject.ogImageWidth,
ogObject.ogImageHeight,
ogObject.ogImageType)
.map(mediaMapper).sort(mediaSorter);
// format images
const ogImages = zip(ogObject.ogImage, ogObject.ogImageWidth, ogObject.ogImageHeight, ogObject.ogImageType)
.map(mediaMapper)
.sort(mediaSorter);
/* Combine video/width/height/type
and sort for priority */
// sets ogVideo video/width/height/type to null if one these exists
if (ogObject.ogVideo || ogObject.ogVideoWidth || ogObject.ogVideoHeight || ogObject.ogVideoType) {

@@ -102,17 +97,11 @@ ogObject.ogVideo = ogObject.ogVideo ? ogObject.ogVideo : [null];

const ogVideos = zip(ogObject.ogVideo,
ogObject.ogVideoWidth,
ogObject.ogVideoHeight,
ogObject.ogVideoType)
.map(mediaMapper).sort(mediaSorter);
// format videos
const ogVideos = zip(ogObject.ogVideo, ogObject.ogVideoWidth, ogObject.ogVideoHeight, ogObject.ogVideoType)
.map(mediaMapper)
.sort(mediaSorter);
/* Combine twitter image/width/height/alt
and sort for priority */
if (ogObject.twitterImageSrc
|| ogObject.twitterImage
|| ogObject.twitterImageWidth
|| ogObject.twitterImageHeight
|| ogObject.twitterImageAlt) {
// if twitterImage isn't there, try twitterImageSrc
ogObject.twitterImage = ogObject.twitterImage ? ogObject.twitterImage : ogObject.twitterImageSrc;
// sets twitter image image/width/height/type to null if one these exists
if (ogObject.twitterImageSrc || ogObject.twitterImage || ogObject.twitterImageWidth || ogObject.twitterImageHeight || ogObject.twitterImageAlt) {
ogObject.twitterImageSrc = ogObject.twitterImageSrc ? ogObject.twitterImageSrc : [null];
ogObject.twitterImage = ogObject.twitterImage ? ogObject.twitterImage : ogObject.twitterImageSrc; // deafult to twitterImageSrc
ogObject.twitterImageWidth = ogObject.twitterImageWidth ? ogObject.twitterImageWidth : [null];

@@ -123,14 +112,9 @@ ogObject.twitterImageHeight = ogObject.twitterImageHeight ? ogObject.twitterImageHeight : [null];

const twitterImages = zip(ogObject.twitterImage,
ogObject.twitterImageWidth,
ogObject.twitterImageHeight,
ogObject.twitterImageAlt)
.map(mediaMapperTwitterImage).sort(mediaSorter);
// format twitter images
const twitterImages = zip(ogObject.twitterImage, ogObject.twitterImageWidth, ogObject.twitterImageHeight, ogObject.twitterImageAlt)
.map(mediaMapperTwitterImage)
.sort(mediaSorter);
/* Combine twitter player/width/height/stream
and sort for priority */
if (ogObject.twitterPlayer
|| ogObject.twitterPlayerWidth
|| ogObject.twitterPlayerHeight
|| ogObject.twitterPlayerStream) {
// sets twitter player/width/height/stream to null if one these exists
if (ogObject.twitterPlayer || ogObject.twitterPlayerWidth || ogObject.twitterPlayerHeight || ogObject.twitterPlayerStream) {
ogObject.twitterPlayer = ogObject.twitterPlayer ? ogObject.twitterPlayer : [null];

@@ -142,10 +126,8 @@ ogObject.twitterPlayerWidth = ogObject.twitterPlayerWidth ? ogObject.twitterPlayerWidth : [null];

const twitterPlayers = zip(ogObject.twitterPlayer,
ogObject.twitterPlayerWidth,
ogObject.twitterPlayerHeight,
ogObject.twitterPlayerStream)
.map(mediaMapperTwitterPlayer).sort(mediaSorter);
// format twitter player
const twitterPlayers = zip(ogObject.twitterPlayer, ogObject.twitterPlayerWidth, ogObject.twitterPlayerHeight, ogObject.twitterPlayerStream)
.map(mediaMapperTwitterPlayer)
.sort(mediaSorter);
/* Combine music:song url, track, disk
and sort in the right album order */
// sets music song/songTrack/songDisc to null if one these exists
if (ogObject.musicSong || ogObject.musicSongTrack || ogObject.musicSongDisc) {

@@ -157,65 +139,28 @@ ogObject.musicSong = ogObject.musicSong ? ogObject.musicSong : [null];

const musicSongs = zip(ogObject.musicSong,
ogObject.musicSongTrack,
ogObject.musicSongDisc)
.map(mediaMapperMusicSong).sort(mediaSorterMusicSong);
// format music songs
const musicSongs = zip(ogObject.musicSong, ogObject.musicSongTrack, ogObject.musicSongDisc)
.map(mediaMapperMusicSong)
.sort(mediaSorterMusicSong);
// Devare temporary fields
fields.filter((item) => (
item.multiple && item.fieldName && (
item.fieldName.startsWith('ogImage')
|| item.fieldName.startsWith('ogVideo')
|| item.fieldName.startsWith('twitter')
|| item.fieldName.startsWith('musicSong')
)
)).forEach((item) => {
delete ogObject[item.fieldName];
});
// remove old values since everything will live under the main property
fields.filter((item) => (item.multiple && item.fieldName && item.fieldName.match('(ogImage|ogVideo|twitter|musicSong).*')))
.forEach((item) => {
delete ogObject[item.fieldName];
});
// Select the best image
if (ogImages.length) {
if (options.allMedia) {
ogObject.ogImage = ogImages;
} else {
[ogObject.ogImage] = ogImages;
}
if (options.allMedia) {
if (ogImages.length) ogObject.ogImage = ogImages;
if (ogVideos.length) ogObject.ogVideo = ogVideos;
if (twitterImages.length) ogObject.twitterImage = twitterImages;
if (twitterPlayers.length) ogObject.twitterPlayer = twitterPlayers;
if (musicSongs.length) ogObject.musicSong = musicSongs;
} else {
if (ogImages.length) [ogObject.ogImage] = ogImages;
if (ogVideos.length) [ogObject.ogVideo] = ogVideos;
if (twitterImages.length) [ogObject.twitterImage] = twitterImages;
if (twitterPlayers.length) [ogObject.twitterPlayer] = twitterPlayers;
if (musicSongs.length) [ogObject.musicSong] = musicSongs;
}
// Select the best video
if (ogVideos.length) {
if (options.allMedia) {
ogObject.ogVideo = ogVideos;
} else {
[ogObject.ogVideo] = ogVideos;
}
}
// Select the best twitter image
if (twitterImages.length) {
if (options.allMedia) {
ogObject.twitterImage = twitterImages;
} else {
[ogObject.twitterImage] = twitterImages;
}
}
// Select the best player
if (twitterPlayers.length) {
if (options.allMedia) {
ogObject.twitterPlayer = twitterPlayers;
} else {
[ogObject.twitterPlayer] = twitterPlayers;
}
}
// Select the best music:song
if (musicSongs.length) {
if (options.allMedia) {
ogObject.musicSong = musicSongs;
} else {
[ogObject.musicSong] = musicSongs;
}
}
return ogObject;
};

@@ -19,8 +19,6 @@ const cheerio = require('cheerio');

const $ = cheerio.load(body);
const allMeta = $('meta');
allMeta.each((index, meta) => {
if (!meta.attribs || (!meta.attribs.property && !meta.attribs.name)) {
return;
}
// find all of the open graph info in the meta tags
$('meta').each((index, meta) => {
if (!meta.attribs || (!meta.attribs.property && !meta.attribs.name)) return;
const property = meta.attribs.property || meta.attribs.name;

@@ -48,11 +46,6 @@ const content = meta.attribs.content || meta.attribs.value;

// remove ogObject.ogImage if there is nothing found
if (!ogObject.ogImage || !ogObject.ogImage.length) {
delete ogObject.ogImage;
}
// sets up all the media stuff
// formats the multiple media values
ogObject = media.mediaSetup(ogObject, options);
// Check for 'only get open graph info'
// if onlyGetOpenGraphInfo isn't set, run the open graph fallbacks
if (!options.onlyGetOpenGraphInfo) {

@@ -62,7 +55,3 @@ ogObject = fallback(ogObject, options, $);

// remove ogObject.ogImage if there is nothing found
if (ogObject.ogImage && ogObject.ogImage.length === 0) {
delete ogObject.ogImage;
}
// removes any undefs
ogObject = utils.removeNestedUndefinedValues(ogObject);

@@ -79,5 +68,4 @@

const requestAndResultsFormatter = async (options) => {
const peekSize = options.peekSize || 1024;
const requestUrl = options.url;
delete options.url;
delete options.url; // setting options.url messes with got

@@ -88,2 +76,3 @@ return request.get(requestUrl, options)

let formatBody = response.body;
if (response && response.statusCode && (response.statusCode.toString().substring(0, 1) === '4' || response.statusCode.toString().substring(0, 1) === '5')) {

@@ -94,5 +83,6 @@ throw new Error('Server has returned a 400/500 error code');

}
if (options.runChar) {
const char = charset
.find(response.headers, formatBody, peekSize) || chardet.detect(formatBody);
.find(response.headers, formatBody, options.peekSize) || chardet.detect(formatBody);
if (char) {

@@ -112,6 +102,7 @@ // eslint-disable-next-line no-useless-catch

if (options.withCharset) {
ogObject.charset = charset.find(response.headers, formatBody, peekSize);
ogObject.charset = charset.find(response.headers, formatBody, options.peekSize);
}
ogObject.requestUrl = options.url;
ogObject.success = true;
return { ogObject, response };

@@ -133,5 +124,3 @@ })

if (options.html) {
if (options.url) {
throw new Error('Must specify either `url` or `html`, not both');
}
if (options.url) throw new Error('Must specify either `url` or `html`, not both');
const ogObject = extractMetaTags(options.html, options);

@@ -145,7 +134,8 @@ ogObject.requestUrl = null;

if (!validate.returnInputUrl) throw new Error('Invalid URL');
if (!validate.url) throw new Error('Invalid URL');
options.url = validate.returnInputUrl;
options.timeout = validate.returnInputTimeout;
options.url = validate.url;
options.timeout = validate.timeout;
options.decompress = options.decompress || true;
options.peekSize = options.peekSize || 1024;
options.followRedirect = options.followRedirect || true;

@@ -172,8 +162,8 @@ options.maxRedirects = options.maxRedirects || 10;

// trying to limit non html pages
if (validate.returnInputUrl.includes('.jpg')
|| validate.returnInputUrl.includes('.jpeg')
|| validate.returnInputUrl.includes('.png')
|| validate.returnInputUrl.includes('.mp3')
|| validate.returnInputUrl.includes('.zip')
|| validate.returnInputUrl.includes('.pdf')) {
if (options.url.includes('.jpg')
|| options.url.includes('.jpeg')
|| options.url.includes('.png')
|| options.url.includes('.mp3')
|| options.url.includes('.zip')
|| options.url.includes('.pdf')) {
throw new Error('Must scrape an HTML page');

@@ -180,0 +170,0 @@ }

@@ -7,3 +7,3 @@ const validator = require('validator');

*/
const isInputUrlValid = (url) => typeof url === 'string' && url.length > 0 && validator.isURL(url, [{
const isUrlValid = (url) => typeof url === 'string' && url.length > 0 && validator.isURL(url, [{
protocols: ['http', 'https'],

@@ -32,3 +32,3 @@ require_tld: true,

*/
const isInputTimeoutValid = (inputTimeout) => typeof inputTimeout === 'number' && /^\d{1,10}$/.test(inputTimeout);
const isTimeoutValid = (timeout) => typeof timeout === 'number' && /^\d{1,10}$/.test(timeout);

@@ -39,5 +39,5 @@ /*

*/
exports.validate = (inputUrl, inputTimeout) => ({
returnInputUrl: isInputUrlValid(inputUrl) ? coerceUrl(inputUrl) : null,
returnInputTimeout: isInputTimeoutValid(inputTimeout) ? inputTimeout : 2000,
exports.validate = (url, timeout) => ({
url: isUrlValid(url) ? coerceUrl(url) : null,
timeout: isTimeoutValid(timeout) ? timeout : 2000,
});

@@ -44,0 +44,0 @@

{
"name": "open-graph-scraper",
"description": "Node.js scraper module for Open Graph and Twitter Card info",
"version": "4.3.0",
"version": "4.3.1",
"license": "MIT",

@@ -27,6 +27,6 @@ "main": "index.js",

"dependencies": {
"chardet": "^1.1.0",
"chardet": "^1.2.1",
"cheerio": "^1.0.0-rc.3",
"got": "^11.3.0",
"iconv-lite": "^0.6.0",
"got": "^11.4.0",
"iconv-lite": "^0.6.1",
"validator": "^13.0.0"

@@ -40,15 +40,15 @@ },

"chai": "^4.2.0",
"eslint": "^7.3.1",
"eslint": "^7.4.0",
"eslint-config-airbnb": "^18.0.1",
"eslint-plugin-import": "^2.19.1",
"eslint-plugin-import": "^2.22.0",
"eslint-plugin-jsx-a11y": "^6.2.3",
"eslint-plugin-mocha": "^7.0.1",
"eslint-plugin-promise": "^4.2.1",
"eslint-plugin-react": "^7.17.0",
"eslint-plugin-react-hooks": "^4.0.4",
"markdownlint-cli": "^0.23.1",
"eslint-plugin-react": "^7.20.3",
"eslint-plugin-react-hooks": "^4.0.5",
"markdownlint-cli": "^0.23.2",
"mocha": "^8.0.1",
"nyc": "^15.0.1",
"sinon": "^9.0.2",
"snyk": "^1.348.1"
"snyk": "^1.360.0"
},

@@ -55,0 +55,0 @@ "repository": {

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc