open-graph-scraper
Advanced tools
Comparing version 6.6.3 to 6.7.0
# Change Log | ||
## 6.7.0 | ||
- Replace `validator` with internal version of `isUrl` so we have better control on how that works. | ||
- Fix issue where `JSON` parsing fails when Youtube escape '&' to '\x26'. | ||
- Updating dependencies | ||
## 6.6.3 | ||
@@ -4,0 +10,0 @@ |
@@ -11,2 +11,3 @@ "use strict"; | ||
const media_1 = __importDefault(require("./media")); | ||
const utils_1 = require("./utils"); | ||
/** | ||
@@ -101,4 +102,5 @@ * extract all of the meta tags needed for ogs | ||
ogObject.jsonLD = []; | ||
const scriptText = $(script).text(); | ||
let scriptText = $(script).text(); | ||
if (scriptText) { | ||
scriptText = (0, utils_1.unescapeScriptText)(scriptText); | ||
ogObject.jsonLD.push(JSON.parse(scriptText)); | ||
@@ -105,0 +107,0 @@ } |
@@ -15,3 +15,4 @@ "use strict"; | ||
exports.isCustomMetaTagsValid = isCustomMetaTagsValid; | ||
const validator_1 = __importDefault(require("validator")); | ||
exports.unescapeScriptText = unescapeScriptText; | ||
const isUrl_1 = __importDefault(require("./isUrl")); | ||
exports.defaultUrlValidatorSettings = { | ||
@@ -40,3 +41,3 @@ allow_fragments: true, | ||
function isUrlValid(url, urlValidatorSettings) { | ||
return typeof url === 'string' && url.length > 0 && validator_1.default.isURL(url, urlValidatorSettings); | ||
return typeof url === 'string' && url.length > 0 && (0, isUrl_1.default)(url, urlValidatorSettings); | ||
} | ||
@@ -153,1 +154,36 @@ /** | ||
} | ||
/** | ||
* Unescape script text. | ||
* | ||
* Certain websites escape script text within script tags, which can | ||
* interfere with `JSON.parse()`. Therefore, we need to unescape it. | ||
* | ||
* Known good escape sequences: | ||
* | ||
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#uhhhh | ||
* | ||
* ```js | ||
* JSON.parse('"\\u2611"'); // '☑' | ||
* ``` | ||
* | ||
* Known bad escape sequences: | ||
* | ||
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#xhh | ||
* | ||
* ```js | ||
* JSON.parse('"\\x26"'); // '&' | ||
* ``` | ||
* | ||
* @param {string} scriptText - the text of the script tag | ||
* @returns {string} unescaped script text | ||
*/ | ||
function unescapeScriptText(scriptText) { | ||
// https://stackoverflow.com/a/34056693 | ||
return scriptText.replace(/\\x([0-9a-f]{2})/ig, (_, pair) => { | ||
const charCode = parseInt(pair, 16); | ||
if (charCode === 34) { | ||
return '\\"'; | ||
} | ||
return String.fromCharCode(charCode); | ||
}); | ||
} |
@@ -11,2 +11,3 @@ "use strict"; | ||
const media_1 = __importDefault(require("./media")); | ||
const utils_1 = require("./utils"); | ||
/** | ||
@@ -101,4 +102,5 @@ * extract all of the meta tags needed for ogs | ||
ogObject.jsonLD = []; | ||
const scriptText = $(script).text(); | ||
let scriptText = $(script).text(); | ||
if (scriptText) { | ||
scriptText = (0, utils_1.unescapeScriptText)(scriptText); | ||
ogObject.jsonLD.push(JSON.parse(scriptText)); | ||
@@ -105,0 +107,0 @@ } |
@@ -15,3 +15,4 @@ "use strict"; | ||
exports.isCustomMetaTagsValid = isCustomMetaTagsValid; | ||
const validator_1 = __importDefault(require("validator")); | ||
exports.unescapeScriptText = unescapeScriptText; | ||
const isUrl_1 = __importDefault(require("./isUrl")); | ||
exports.defaultUrlValidatorSettings = { | ||
@@ -40,3 +41,3 @@ allow_fragments: true, | ||
function isUrlValid(url, urlValidatorSettings) { | ||
return typeof url === 'string' && url.length > 0 && validator_1.default.isURL(url, urlValidatorSettings); | ||
return typeof url === 'string' && url.length > 0 && (0, isUrl_1.default)(url, urlValidatorSettings); | ||
} | ||
@@ -153,1 +154,36 @@ /** | ||
} | ||
/** | ||
* Unescape script text. | ||
* | ||
* Certain websites escape script text within script tags, which can | ||
* interfere with `JSON.parse()`. Therefore, we need to unescape it. | ||
* | ||
* Known good escape sequences: | ||
* | ||
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#uhhhh | ||
* | ||
* ```js | ||
* JSON.parse('"\\u2611"'); // '☑' | ||
* ``` | ||
* | ||
* Known bad escape sequences: | ||
* | ||
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#xhh | ||
* | ||
* ```js | ||
* JSON.parse('"\\x26"'); // '&' | ||
* ``` | ||
* | ||
* @param {string} scriptText - the text of the script tag | ||
* @returns {string} unescaped script text | ||
*/ | ||
function unescapeScriptText(scriptText) { | ||
// https://stackoverflow.com/a/34056693 | ||
return scriptText.replace(/\\x([0-9a-f]{2})/ig, (_, pair) => { | ||
const charCode = parseInt(pair, 16); | ||
if (charCode === 34) { | ||
return '\\"'; | ||
} | ||
return String.fromCharCode(charCode); | ||
}); | ||
} |
{ | ||
"name": "open-graph-scraper", | ||
"description": "Node.js scraper module for Open Graph and Twitter Card info", | ||
"version": "6.6.3", | ||
"version": "6.7.0", | ||
"license": "MIT", | ||
@@ -41,4 +41,3 @@ "main": "./dist/cjs/index.js", | ||
"iconv-lite": "^0.6.3", | ||
"undici": "^6.19.2", | ||
"validator": "^13.12.0" | ||
"undici": "^6.19.4" | ||
}, | ||
@@ -54,6 +53,5 @@ "files": [ | ||
"@types/mocha": "^10.0.7", | ||
"@types/node": "^18.19.40", | ||
"@types/validator": "^13.12.0", | ||
"@typescript-eslint/eslint-plugin": "^7.16.1", | ||
"@typescript-eslint/parser": "^7.16.1", | ||
"@types/node": "^18.19.41", | ||
"@typescript-eslint/eslint-plugin": "^7.17.0", | ||
"@typescript-eslint/parser": "^7.17.0", | ||
"chai": "^4.4.1", | ||
@@ -65,3 +63,3 @@ "eslint": "^8.57.0", | ||
"eslint-plugin-mocha": "^10.4.3", | ||
"eslint-plugin-promise": "^6.4.0", | ||
"eslint-plugin-promise": "^6.6.0", | ||
"mocha": "^10.6.0", | ||
@@ -68,0 +66,0 @@ "nyc": "^17.0.0", |
@@ -86,1 +86,27 @@ import type { CustomMetaTags, OgObjectInteral, OpenGraphScraperOptions, ValidatorSettings } from './types'; | ||
export declare function isCustomMetaTagsValid(customMetaTags: CustomMetaTags[]): boolean; | ||
/** | ||
* Unescape script text. | ||
* | ||
* Certain websites escape script text within script tags, which can | ||
* interfere with `JSON.parse()`. Therefore, we need to unescape it. | ||
* | ||
* Known good escape sequences: | ||
* | ||
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#uhhhh | ||
* | ||
* ```js | ||
* JSON.parse('"\\u2611"'); // '☑' | ||
* ``` | ||
* | ||
* Known bad escape sequences: | ||
* | ||
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions/Character_escape#xhh | ||
* | ||
* ```js | ||
* JSON.parse('"\\x26"'); // '&' | ||
* ``` | ||
* | ||
* @param {string} scriptText - the text of the script tag | ||
* @returns {string} unescaped script text | ||
*/ | ||
export declare function unescapeScriptText(scriptText: string): string; |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
182281
4
17
35
4846
- Removedvalidator@^13.12.0
- Removedvalidator@13.12.0(transitive)
Updatedundici@^6.19.4