open-graph-scraper
Advanced tools
Comparing version 4.11.1 to 5.0.0
# Change Log | ||
## 5.0.0 | ||
- Updating to `got` version 12! | ||
- Adding typescript support. | ||
- The `retry` option is now a object -> https://github.com/sindresorhus/got/blob/main/documentation/7-retry.md#retry | ||
- The `timeout` option is now a object -> https://github.com/sindresorhus/got/blob/main/documentation/6-timeout.md#timeout-options | ||
- Dropping callback support. If you still want to use callbacks, you can use `callbackify` -> https://nodejs.org/api/util.html#util_util_callbackify_original | ||
- Auth errors will now be passed back to the clint and will no long just be `Page not found` errors. | ||
- Dropping support for node12 since `got` no longer supports it. | ||
- Removing `options.encoding`. | ||
- Updating Dependencies | ||
## 4.11.1 | ||
@@ -4,0 +16,0 @@ |
100
index.js
const openGraphScraper = require('./lib/openGraphScraper'); | ||
/* | ||
* run | ||
* @param string options - options the user has set | ||
* @param function callback and promise | ||
/** | ||
* @typedef {object} customMetaTags | ||
* @property {boolean} multiple - is there more than one of these tags on a page (normally this is false) | ||
* @property {string} property - meta tag name/property attribute | ||
* @property {string} fieldName - name of the result variable | ||
*/ | ||
exports.run = async (options, callback) => { | ||
const hasCallback = typeof callback === 'function'; | ||
if (hasCallback) { | ||
let results; | ||
try { | ||
results = await openGraphScraper(options); | ||
} catch (exception) { | ||
const returnError = { | ||
/** | ||
* You can find the `isUrl` settings details at https://github.com/validatorjs/validator.js | ||
* @typedef {object} validatorSettings | ||
* @property {string[]} protocols | ||
* @property {boolean} require_tld | ||
* @property {boolean} require_protocol | ||
* @property {boolean} require_host | ||
* @property {boolean} require_valid_protocol | ||
* @property {boolean} allow_underscores | ||
* @property {boolean} host_whitelist | ||
* @property {boolean} host_blacklist | ||
* @property {boolean} allow_trailing_dot | ||
* @property {boolean} allow_protocol_relative_urls | ||
* @property {boolean} disallow_auth | ||
*/ | ||
/** | ||
* `open-graph-scraper` uses [got](https://github.com/sindresorhus/got) for requests and most of | ||
* [got's options](https://github.com/sindresorhus/got/blob/main/documentation/2-options.md) | ||
* should work as `open-graph-scraper` options. | ||
* | ||
* @param {object} options - The options used by Open Graph Scraper | ||
* @param {string} options.url - URL of the site. (Required) | ||
* @param {string} [options.html] - You can pass in an HTML string to run ogs on it. (use without options.url) | ||
* @param {string[]} [options.blacklist] - Pass in an array of sites you don't want ogs to run on. | ||
* @param {boolean} [options.onlyGetOpenGraphInfo] - Only fetch open graph info and don't fall back on anything else. | ||
* @param {boolean} [options.ogImageFallback] - Fetch other images if no open graph ones are found. | ||
* @param {customMetaTags[]} [options.customMetaTags] - Here you can define custom meta tags you want to scrape. | ||
* @param {boolean} [options.allMedia] - By default, OGS will only send back the first image/video it finds. | ||
* @param {number} [options.peekSize] - Sets the peekSize for the request. | ||
* @param {number} [options.downloadLimit] - Maximum size of the content downloaded from the server, in bytes. | ||
* @param {validatorSettings} [options.urlValidatorSettings] - Sets the options used by validator.js for testing the URL | ||
* @returns {{error: boolean, result: object, response: object}} Object with the Open Graph results with the given page | ||
* | ||
*/ | ||
const run = async (options) => { | ||
let results; | ||
try { | ||
results = await openGraphScraper(options); | ||
} catch (exception) { | ||
const returnError = { | ||
error: true, | ||
result: { | ||
success: false, | ||
@@ -20,34 +57,13 @@ requestUrl: options.url, | ||
errorDetails: exception, | ||
}; | ||
return callback(true, returnError); | ||
} | ||
return callback(false, results.ogObject, results.response); | ||
}, | ||
}; | ||
throw returnError; | ||
} | ||
// eslint-disable-next-line no-async-promise-executor | ||
return new Promise(async (resolve, reject) => { | ||
let results; | ||
try { | ||
results = await openGraphScraper(options); | ||
} catch (exception) { | ||
const returnError = { | ||
error: true, | ||
result: { | ||
success: false, | ||
requestUrl: options.url, | ||
error: exception.message, | ||
errorDetails: exception, | ||
}, | ||
}; | ||
reject(returnError); | ||
return; | ||
} | ||
const returnValues = { | ||
error: false, | ||
result: results.ogObject, | ||
response: results.response, | ||
}; | ||
resolve(returnValues); | ||
}); | ||
return { | ||
error: false, | ||
result: results.ogObject, | ||
response: results.response, | ||
}; | ||
}; | ||
module.exports = (options, callback) => exports.run(options, callback); | ||
module.exports = run; |
@@ -8,6 +8,9 @@ const cheerio = require('cheerio'); | ||
/* | ||
* extract meta tags from html string | ||
* @param string body - html string | ||
* @param string options - options the user has set | ||
/** | ||
* extract all of the meta tags needed for ogs | ||
* | ||
* @param {object} body - the body of the got request | ||
* @param {object} options - options for ogs | ||
* @return {object} object with ogs results | ||
* | ||
*/ | ||
@@ -25,3 +28,3 @@ exports.extractMetaTags = (body, options) => { | ||
metaFields.forEach((item) => { | ||
if (property.toLowerCase() === item.property.toLowerCase()) { | ||
if (item && property.toLowerCase() === item.property.toLowerCase()) { | ||
if (!item.multiple) { | ||
@@ -28,0 +31,0 @@ ogObject[item.fieldName] = content; |
@@ -7,2 +7,11 @@ const { findImageTypeFromUrl, isImageTypeValid, isUrlValid } = require('./utils'); | ||
/** | ||
* ogs fallbacks | ||
* | ||
* @param {object} ogObject - the current ogObject | ||
* @param {object} options - options for ogs | ||
* @param {object} $ - cheerio.load() of the current html | ||
* @return {object} object with ogs results with updated fallback values | ||
* | ||
*/ | ||
const fallback = (ogObject, options, $) => { | ||
@@ -9,0 +18,0 @@ // title fallback |
@@ -0,1 +1,7 @@ | ||
/** | ||
* array of meta tags ogs is looking for | ||
* | ||
* @return {array} array of meta tags | ||
* | ||
*/ | ||
module.exports = [ | ||
@@ -2,0 +8,0 @@ { |
@@ -67,7 +67,9 @@ /* eslint-disable max-len */ | ||
/* | ||
* media setup | ||
* @param string ogObject - return open open graph info | ||
* @param string options - options the user has set | ||
* @param function callback | ||
/** | ||
* formats the multiple media values | ||
* | ||
* @param {object} ogObject - the current ogObject | ||
* @param {object} options - options for ogs | ||
* @return {object} object with ogs results with updated media values | ||
* | ||
*/ | ||
@@ -74,0 +76,0 @@ exports.mediaSetup = (ogObject, options) => { |
const { extractMetaTags } = require('./extract'); | ||
const { requestAndResultsFormatter } = require('./request'); | ||
const charset = require('./charset'); | ||
const utils = require('./utils'); | ||
/* | ||
* set options and return open graph results | ||
* @param string options - options the user has set | ||
* @param function callback | ||
/** | ||
* sets up options for the got request and calls extract on html | ||
* | ||
* @param {object} options - options for ogs | ||
* @return {object} object with ogs results | ||
* | ||
*/ | ||
const setOptionsAndReturnOpenGraphResults = async (options) => { | ||
options.customMetaTags = options.customMetaTags || []; // setting customMetaTags up here because of HTML | ||
const { ogsOptions, gotOptions } = utils.optionSetupAndSplit(options); | ||
if (options.html) { | ||
if (options.url) throw new Error('Must specify either `url` or `html`, not both'); | ||
const ogObject = extractMetaTags(options.html, options); | ||
if (ogsOptions.html) { | ||
if (ogsOptions.url) throw new Error('Must specify either `url` or `html`, not both'); | ||
const ogObject = extractMetaTags(ogsOptions.html, ogsOptions); | ||
ogObject.requestUrl = null; | ||
ogObject.success = true; | ||
return { ogObject, response: { body: options.html } }; | ||
return { ogObject, response: { body: ogsOptions.html } }; | ||
} | ||
if (!options.urlValidatorSettings) { | ||
// set the default URL validator Settings | ||
options.urlValidatorSettings = { | ||
protocols: ['http', 'https'], | ||
require_tld: true, | ||
require_protocol: false, | ||
require_host: true, | ||
require_valid_protocol: true, | ||
allow_underscores: false, | ||
host_whitelist: false, | ||
host_blacklist: false, | ||
allow_trailing_dot: false, | ||
allow_protocol_relative_urls: false, | ||
disallow_auth: false, | ||
}; | ||
} | ||
const formattedUrl = utils.validateAndFormatURL(ogsOptions.url, ogsOptions.urlValidatorSettings); | ||
const validate = utils.validate(options.url, options.timeout, options.urlValidatorSettings); | ||
if (!formattedUrl.url) throw new Error('Invalid URL'); | ||
if (!validate.url) throw new Error('Invalid URL'); | ||
ogsOptions.url = formattedUrl.url; | ||
gotOptions.url = formattedUrl.url; | ||
options.url = validate.url; | ||
options.timeout = validate.timeout; | ||
options = { | ||
decompress: true, | ||
peekSize: 1024, | ||
followRedirect: true, | ||
maxRedirects: 10, | ||
retry: 2, | ||
onlyGetOpenGraphInfo: false, | ||
ogImageFallback: true, | ||
allMedia: false, | ||
headers: {}, | ||
responseType: 'buffer', | ||
agent: null, | ||
downloadLimit: 1000000, | ||
...options, | ||
}; | ||
// trying to limit non html pages | ||
if (utils.isThisANonHTMLUrl(ogsOptions.url)) throw new Error('Must scrape an HTML page'); | ||
if (options.encoding === null) { | ||
// eslint-disable-next-line no-console | ||
console.log('Setting `options.encoding` to `null` has been deprecated. You should no longer need to do this.'); | ||
options.encoding = 'utf8'; | ||
// eslint-disable-next-line max-len | ||
if (ogsOptions.blacklist && ogsOptions.blacklist.some((blacklistedHostname) => ogsOptions.url.includes(blacklistedHostname))) { | ||
throw new Error('Host name has been black listed'); | ||
} | ||
if (process.browser) { | ||
options.decompress = false; | ||
} | ||
try { | ||
const { requestBody, response } = await requestAndResultsFormatter(gotOptions, ogsOptions); | ||
// trying to limit non html pages | ||
if (utils.isThisANonHTMLUrl(options.url)) throw new Error('Must scrape an HTML page'); | ||
const ogObject = extractMetaTags(requestBody, ogsOptions); | ||
if (options.blacklist && options.blacklist.some((blacklistedHostname) => options.url.includes(blacklistedHostname))) { | ||
throw new Error('Host name has been black listed'); | ||
} | ||
if (!ogsOptions.onlyGetOpenGraphInfo) { | ||
ogObject.charset = charset.find(response.headers, requestBody, ogsOptions.peekSize); | ||
} | ||
ogObject.requestUrl = ogsOptions.url; | ||
ogObject.success = true; | ||
try { | ||
const results = await requestAndResultsFormatter(options); | ||
return results; | ||
// setting response.rawBody to the parsed body since response.body is a buffer | ||
response.rawBody = requestBody; | ||
return { ogObject, response }; | ||
} catch (exception) { | ||
@@ -87,8 +61,6 @@ if (exception && (exception.code === 'ENOTFOUND' || exception.code === 'EHOSTUNREACH' || exception.code === 'ENETUNREACH')) { | ||
throw new Error('Time out'); | ||
} else if (exception && exception.message && exception.message.startsWith('Response code 4')) { | ||
throw new Error('Page not found'); | ||
} else if (exception && exception.message && exception.message.startsWith('Response code 5')) { | ||
throw new Error('Web server is returning error'); | ||
} else if (exception && exception.message && exception.message === 'Promise was canceled') { | ||
throw new Error(`Exceeded the download limit of ${options.downloadLimit} bytes`); | ||
throw new Error(`Exceeded the download limit of ${ogsOptions.downloadLimit} bytes`); | ||
} | ||
@@ -95,0 +67,0 @@ if (exception instanceof Error) throw exception; |
@@ -6,19 +6,20 @@ const chardet = require('chardet'); | ||
const charset = require('./charset'); | ||
const { extractMetaTags } = require('./extract'); | ||
/* | ||
* request and results formatter | ||
* @param string options - options the user has set | ||
/** | ||
* performs the got request and formats the body for ogs | ||
* | ||
* @param {object} gotOptions - options for got | ||
* @param {object} ogsOptions - options for ogs | ||
* @return {object} formatted request body and response | ||
* | ||
*/ | ||
exports.requestAndResultsFormatter = async (options) => { | ||
const requestUrl = options.url; | ||
delete options.url; // setting options.url messes with got | ||
exports.requestAndResultsFormatter = async (gotOptions, ogsOptions) => { | ||
const got = await gotClient(ogsOptions.downloadLimit); | ||
return gotClient.get(requestUrl, options) | ||
return got(gotOptions) | ||
.then((response) => { | ||
options.url = requestUrl; | ||
let formatBody = response.body; | ||
let requestBody = response.body; | ||
if (response && response.headers && response.headers['content-type'] && !response.headers['content-type'].includes('text/html')) { | ||
throw new Error('Page must return a header content-type with text/html'); | ||
if (response && response.headers && response.headers['content-type'] && !response.headers['content-type'].includes('text/')) { | ||
throw new Error('Page must return a header content-type with text/'); | ||
} | ||
@@ -32,7 +33,7 @@ | ||
const char = charset.find(response.headers, formatBody, options.peekSize) || chardet.detect(formatBody); | ||
if (char && typeof formatBody === 'object') { | ||
const char = charset.find(response.headers, requestBody, ogsOptions.peekSize) || chardet.detect(requestBody); | ||
if (char && typeof requestBody === 'object') { | ||
// eslint-disable-next-line no-useless-catch | ||
try { | ||
formatBody = iconv.decode(formatBody, char); | ||
requestBody = iconv.decode(requestBody, char); | ||
} catch (exception) { | ||
@@ -42,20 +43,12 @@ throw exception; | ||
} else { | ||
formatBody = formatBody.toString(); | ||
requestBody = requestBody.toString(); | ||
} | ||
const ogObject = extractMetaTags(formatBody, options); | ||
if (!options.onlyGetOpenGraphInfo) { | ||
ogObject.charset = charset.find(response.headers, formatBody, options.peekSize); | ||
if (!requestBody) { | ||
throw new Error('Page not found'); | ||
} | ||
ogObject.requestUrl = options.url; | ||
ogObject.success = true; | ||
// setting response.rawBody to the parsed body since response.body is a buffer | ||
response.rawBody = formatBody; | ||
return { ogObject, response }; | ||
return { requestBody, response }; | ||
}) | ||
.catch((error) => { | ||
options.url = requestUrl; | ||
if (error instanceof Error) throw error; | ||
@@ -62,0 +55,0 @@ throw new Error(error); |
189
lib/utils.js
const validator = require('validator'); | ||
const got = require('got'); | ||
/* | ||
* validates the url | ||
* @param string var - the url we want to scrape | ||
/** | ||
* Checks if URL is valid | ||
* | ||
* @param {string} url - url to be checked | ||
* @param {string} urlValidatorSettings - settings used by validator | ||
* @return {boolean} boolean value if the url is valid | ||
* | ||
*/ | ||
exports.isUrlValid = (url, urlValidatorSettings) => typeof url === 'string' && url.length > 0 && validator.isURL(url, urlValidatorSettings); | ||
/* | ||
* forces url to start with http:// | ||
* @param string var - the url we want to scrape | ||
/** | ||
* Forces url to start with http:// if it doesn't | ||
* | ||
* @param {string} url - url to be updated | ||
* @return {string} url that starts with http | ||
* | ||
*/ | ||
const coerceUrl = (url) => (/^(f|ht)tps?:\/\//i.test(url) ? url : `http://${url}`); | ||
/* | ||
* validate timeout - how long should we wait for a request | ||
* @param number var - the time we want to wait | ||
/** | ||
* validates and formats url | ||
* | ||
* @param {string} url - url to be checked and formatted | ||
* @param {string} urlValidatorSettings - settings used by validator | ||
* @return {string} proper url or null | ||
* | ||
*/ | ||
const isTimeoutValid = (timeout) => typeof timeout === 'number' && /^\d{1,10}$/.test(timeout); | ||
/* | ||
* validates url and timeout | ||
* @param string var - user input url and timeout | ||
*/ | ||
exports.validate = (url, timeout, urlValidatorSettings) => ({ | ||
exports.validateAndFormatURL = (url, urlValidatorSettings) => ({ | ||
url: this.isUrlValid(url, urlValidatorSettings) ? coerceUrl(url) : null, | ||
timeout: isTimeoutValid(timeout) ? timeout : 2000, | ||
}); | ||
/* | ||
* findImageTypeFromUrl | ||
* @param string url - image url | ||
/** | ||
* finds the image type from a given url | ||
* | ||
* @param {string} url - url to be checked | ||
* @return {string} image type from url | ||
* | ||
*/ | ||
@@ -41,5 +47,8 @@ exports.findImageTypeFromUrl = (url) => { | ||
/* | ||
* isImageTypeValid | ||
* @param string type - image type | ||
/** | ||
* checks if image type is valid | ||
* | ||
* @param {string} type - type to be checked | ||
* @return {boolean} boolean value if type is value | ||
* | ||
*/ | ||
@@ -51,5 +60,8 @@ exports.isImageTypeValid = (type) => { | ||
/* | ||
* isThisANonHTMLPage | ||
* @param string url - url of site | ||
/** | ||
* checks if URL is a non html page | ||
* | ||
* @param {string} url - url to be checked | ||
* @return {boolean} boolean value if url is non html | ||
* | ||
*/ | ||
@@ -62,5 +74,8 @@ exports.isThisANonHTMLUrl = (url) => { | ||
/* | ||
* removeNestedUndefinedValues | ||
* @param object object - an object | ||
/** | ||
* find and delete nested undefs | ||
* | ||
* @param {object} object - object to be cleaned | ||
* @return {object} object without nested undefs | ||
* | ||
*/ | ||
@@ -75,32 +90,96 @@ exports.removeNestedUndefinedValues = (object) => { | ||
/* | ||
* gotClient | ||
* limit the size of the content we fetch when performing the request | ||
/** | ||
* split the options object into ogs and got option objects | ||
* | ||
* @param {object} options - options that need to be split | ||
* @return {object} object with nested options for ogs and got | ||
* | ||
*/ | ||
exports.optionSetupAndSplit = (options) => { | ||
const ogsOptions = { | ||
allMedia: false, | ||
customMetaTags: [], | ||
downloadLimit: 1000000, | ||
ogImageFallback: true, | ||
onlyGetOpenGraphInfo: false, | ||
peekSize: 1024, | ||
urlValidatorSettings: { | ||
protocols: ['http', 'https'], | ||
require_tld: true, | ||
require_protocol: false, | ||
require_host: true, | ||
require_valid_protocol: true, | ||
allow_underscores: false, | ||
host_whitelist: false, | ||
host_blacklist: false, | ||
allow_trailing_dot: false, | ||
allow_protocol_relative_urls: false, | ||
disallow_auth: false, | ||
}, | ||
...options, | ||
}; | ||
const gotOptions = { | ||
decompress: true, | ||
followRedirect: true, | ||
headers: {}, | ||
maxRedirects: 10, | ||
responseType: 'buffer', | ||
...options, | ||
}; | ||
if (process.browser) { | ||
gotOptions.decompress = false; | ||
} | ||
// remove any OGS options from gotOptions since this will cause errors in got | ||
delete gotOptions.allMedia; | ||
delete gotOptions.blacklist; | ||
delete gotOptions.customMetaTags; | ||
delete gotOptions.downloadLimit; | ||
delete gotOptions.ogImageFallback; | ||
delete gotOptions.onlyGetOpenGraphInfo; | ||
delete gotOptions.peekSize; | ||
delete gotOptions.urlValidatorSettings; | ||
return { ogsOptions, gotOptions }; | ||
}; | ||
/** | ||
* gotClient - limit the size of the content we fetch when performing the request | ||
* from https://github.com/sindresorhus/got/blob/main/documentation/examples/advanced-creation.js | ||
* | ||
* @param {string} downloadLimit - the download limit, will close connection once it is reached | ||
* @return {function} got client with download limit | ||
* | ||
*/ | ||
exports.gotClient = got.extend({ | ||
handlers: [ | ||
(options, next) => { | ||
const { downloadLimit } = options; | ||
const promiseOrStream = next(options); | ||
exports.gotClient = async (downloadLimit) => { | ||
// https://github.com/sindresorhus/got/issues/1789 | ||
// eslint-disable-next-line import/no-unresolved | ||
const { got } = await import('got'); | ||
const destroy = (message) => { | ||
if (options.isStream) { | ||
promiseOrStream.destroy(new Error(message)); | ||
return; | ||
} | ||
promiseOrStream.cancel(message); | ||
}; | ||
return got.extend({ | ||
handlers: [ | ||
(options, next) => { | ||
const promiseOrStream = next(options); | ||
if (typeof downloadLimit === 'number') { | ||
promiseOrStream.on('downloadProgress', (progress) => { | ||
if (progress.transferred > downloadLimit && progress.percent !== 1) { | ||
destroy(`Exceeded the download limit of ${downloadLimit} bytes`); | ||
const destroy = (message) => { | ||
if (options.isStream) { | ||
promiseOrStream.destroy(new Error(message)); | ||
return; | ||
} | ||
}); | ||
} | ||
promiseOrStream.cancel(message); | ||
}; | ||
return promiseOrStream; | ||
}, | ||
], | ||
}); | ||
if (typeof downloadLimit === 'number') { | ||
promiseOrStream.on('downloadProgress', (progress) => { | ||
if (progress.transferred > downloadLimit && progress.percent !== 1) { | ||
destroy(`Exceeded the download limit of ${downloadLimit} bytes`); | ||
} | ||
}); | ||
} | ||
return promiseOrStream; | ||
}, | ||
], | ||
}); | ||
}; |
{ | ||
"name": "open-graph-scraper", | ||
"description": "Node.js scraper module for Open Graph and Twitter Card info", | ||
"version": "4.11.1", | ||
"version": "5.0.0", | ||
"license": "MIT", | ||
"main": "index.js", | ||
"types": "./dist/index.d.ts", | ||
"scripts": { | ||
"build:types": "npx -p typescript tsc", | ||
"eslint:fix": "eslint . --ext .js --fix", | ||
@@ -17,3 +19,3 @@ "eslint": "eslint . --ext .js", | ||
"engines": { | ||
"node": ">=12.x.x" | ||
"node": ">=14.16" | ||
}, | ||
@@ -26,5 +28,5 @@ "author": { | ||
"dependencies": { | ||
"chardet": "^1.4.0", | ||
"cheerio": "^1.0.0-rc.11", | ||
"got": "^11.8.5", | ||
"chardet": "^1.5.0", | ||
"cheerio": "^1.0.0-rc.12", | ||
"got": "^12.5.2", | ||
"iconv-lite": "^0.6.3", | ||
@@ -35,16 +37,21 @@ "validator": "^13.7.0" | ||
"CHANGELOG.md", | ||
"/dist", | ||
"/lib" | ||
], | ||
"devDependencies": { | ||
"@snyk/protect": "^1.954.0", | ||
"chai": "^4.3.6", | ||
"@babel/core": "^7.20.2", | ||
"@babel/eslint-parser": "^7.19.1", | ||
"@snyk/protect": "^1.1054.0", | ||
"chai": "^4.3.7", | ||
"eslint": "^8.27.0", | ||
"eslint-config-airbnb-base": "^15.0.0", | ||
"eslint-plugin-import": "^2.26.0", | ||
"eslint-plugin-mocha": "^10.0.5", | ||
"eslint-plugin-promise": "^6.0.0", | ||
"eslint": "^8.18.0", | ||
"mocha": "^10.0.0", | ||
"nyc": "^15.0.1", | ||
"sinon": "^14.0.0", | ||
"tunnel": "^0.0.6" | ||
"eslint-plugin-mocha": "^10.1.0", | ||
"eslint-plugin-promise": "^6.1.1", | ||
"mocha": "^10.1.0", | ||
"nock": "^13.2.9", | ||
"nyc": "^15.1.0", | ||
"sinon": "^14.0.2", | ||
"tunnel": "^0.0.6", | ||
"typescript": "^4.8.4" | ||
}, | ||
@@ -51,0 +58,0 @@ "repository": { |
@@ -16,19 +16,5 @@ # openGraphScraper | ||
Callback Example: | ||
```javascript | ||
const ogs = require('open-graph-scraper'); | ||
const options = { url: 'http://ogp.me/' }; | ||
ogs(options, (error, results, response) => { | ||
console.log('error:', error); // This returns true or false. True if there was an error. The error itself is inside the results object. | ||
console.log('results:', results); // This contains all of the Open Graph results | ||
console.log('response:', response); // This contains the HTML of page | ||
}); | ||
``` | ||
Promise Example: | ||
```javascript | ||
const ogs = require('open-graph-scraper'); | ||
const options = { url: 'http://ogp.me/' }; | ||
ogs(options) | ||
@@ -69,3 +55,2 @@ .then((data) => { | ||
| url | URL of the site. | | x | | ||
| timeout | Timeout of the request | 2000 ms | | | ||
| html | You can pass in an HTML string to run ogs on it. (use without options.url) | | | | ||
@@ -77,13 +62,7 @@ | blacklist | Pass in an array of sites you don't want ogs to run on. | [] | | | ||
| allMedia | By default, OGS will only send back the first image/video it finds | false | | | ||
| decompress | Set the accept-encoding to gzip/deflate | true | | | ||
| followRedirect | Defines if redirect responses should be followed automatically. | true | | | ||
| maxRedirects | Max number of redirects ogs will follow. | 10 | | | ||
| retry | Number of times ogs will retry the request. | 2 | | | ||
| headers | An object containing request headers. Useful for setting the user-agent | {} | | | ||
| peekSize | Sets the peekSize for the request | 1024 | | | ||
| agent | Used for Proxies, Look below for notes on how to use. | null | | | ||
| downloadLimit | Maximum size of the content downloaded from the server, in bytes | 1000000 (1MB) | | | ||
| urlValidatorSettings | Sets the options used by validator.js for testing the URL | [Here](https://github.com/jshemas/openGraphScraper/blob/master/lib/openGraphScraper.js#L21-L36) | | | ||
Note: `open-graph-scraper` uses [got](https://github.com/sindresorhus/got) for requests and most of [got's options](https://github.com/sindresorhus/got#options) should work as `open-graph-scraper` options. | ||
Note: `open-graph-scraper` uses [got](https://github.com/sindresorhus/got) for requests and most of [got's options](https://github.com/sindresorhus/got/blob/main/documentation/2-options.md) should work as `open-graph-scraper` options. | ||
@@ -111,3 +90,3 @@ ## Custom Meta Tag Example | ||
[Look here](https://github.com/sindresorhus/got#proxies) for more info on how to use proxies. | ||
[Look here](https://github.com/sindresorhus/got/blob/main/documentation/tips.md#proxying) for more info on how to use proxies. | ||
@@ -144,3 +123,3 @@ ```javascript | ||
const options = { | ||
url: "https://twitter.com/elonmusk/status/1364826301027115008", | ||
url: "https://www.wikipedia.org/", | ||
headers: { | ||
@@ -150,6 +129,8 @@ "user-agent": "Googlebot/2.1 (+http://www.google.com/bot.html)", | ||
}; | ||
ogs(options, (error, results) => { | ||
console.log("error:", error); // This returns true or false. True if there was an error. The error itself is inside the results object. | ||
console.log("results:", results); // This contains all of the Open Graph results | ||
}); | ||
ogs(options) | ||
.then((data) => { | ||
const { error, result, response } = data; | ||
console.log("error:", error); // This returns true or false. True if there was an error. The error itself is inside the results object. | ||
console.log("results:", results); // This contains all of the Open Graph results | ||
}) | ||
``` | ||
@@ -156,0 +137,0 @@ |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
70767
31
1744
15
139
+ Added@sindresorhus/is@5.6.0(transitive)
+ Added@szmarczak/http-timer@5.0.1(transitive)
+ Addedcacheable-lookup@7.0.0(transitive)
+ Addedcacheable-request@10.2.14(transitive)
+ Addedform-data-encoder@2.1.4(transitive)
+ Addedget-stream@6.0.1(transitive)
+ Addedgot@12.6.1(transitive)
+ Addedhttp2-wrapper@2.2.1(transitive)
+ Addedlowercase-keys@3.0.0(transitive)
+ Addedmimic-response@4.0.0(transitive)
+ Addednormalize-url@8.0.1(transitive)
+ Addedp-cancelable@3.0.0(transitive)
+ Addedresponselike@3.0.0(transitive)
- Removed@sindresorhus/is@4.6.0(transitive)
- Removed@szmarczak/http-timer@4.0.6(transitive)
- Removed@types/cacheable-request@6.0.3(transitive)
- Removed@types/keyv@3.1.4(transitive)
- Removed@types/node@22.9.0(transitive)
- Removed@types/responselike@1.0.3(transitive)
- Removedcacheable-lookup@5.0.4(transitive)
- Removedcacheable-request@7.0.4(transitive)
- Removedclone-response@1.0.3(transitive)
- Removedend-of-stream@1.4.4(transitive)
- Removedget-stream@5.2.0(transitive)
- Removedgot@11.8.6(transitive)
- Removedhttp2-wrapper@1.0.3(transitive)
- Removedlowercase-keys@2.0.0(transitive)
- Removedmimic-response@1.0.1(transitive)
- Removednormalize-url@6.1.0(transitive)
- Removedonce@1.4.0(transitive)
- Removedp-cancelable@2.1.1(transitive)
- Removedpump@3.0.2(transitive)
- Removedresponselike@2.0.1(transitive)
- Removedundici-types@6.19.8(transitive)
- Removedwrappy@1.0.2(transitive)
Updatedchardet@^1.5.0
Updatedcheerio@^1.0.0-rc.12
Updatedgot@^12.5.2