link-preview-js
Advanced tools
Comparing version 2.1.15 to 2.1.16
export declare const CONSTANTS: { | ||
REGEX_VALID_URL: RegExp; | ||
REGEX_LOOPBACK: RegExp; | ||
REGEX_CONTENT_TYPE_IMAGE: RegExp; | ||
@@ -4,0 +5,0 @@ REGEX_CONTENT_TYPE_AUDIO: RegExp; |
@@ -39,2 +39,9 @@ "use strict"; | ||
"$", "i"), | ||
REGEX_LOOPBACK: new RegExp("^" + | ||
"(?:(?:10|127)(?:\\.\\d{1,3}){3})" + | ||
"|" + | ||
"(?:(?:169\\.254|192\\.168|192\\.0)(?:\\.\\d{1,3}){2})" + | ||
"|" + | ||
"(?:172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" + | ||
"$", "i"), | ||
REGEX_CONTENT_TYPE_IMAGE: new RegExp("image/.*", "i"), | ||
@@ -44,3 +51,3 @@ REGEX_CONTENT_TYPE_AUDIO: new RegExp("audio/.*", "i"), | ||
REGEX_CONTENT_TYPE_TEXT: new RegExp("text/.*", "i"), | ||
REGEX_CONTENT_TYPE_APPLICATION: new RegExp("application/.*", "i") | ||
REGEX_CONTENT_TYPE_APPLICATION: new RegExp("application/.*", "i"), | ||
}; |
@@ -7,2 +7,3 @@ interface ILinkPreviewOptions { | ||
followRedirects?: boolean; | ||
resolveDNSHost?: (url: string) => Promise<string>; | ||
} | ||
@@ -9,0 +10,0 @@ interface IPreFetchedResource { |
@@ -48,9 +48,14 @@ "use strict"; | ||
var constants_1 = require("./constants"); | ||
var metaTag = function (doc, type, attr) { | ||
var nodes = doc("meta[" + attr + "='" + type + "']"); | ||
function throwOnLoopback(address) { | ||
if (constants_1.CONSTANTS.REGEX_LOOPBACK.test(address)) { | ||
throw new Error("SSRF request detected, trying to query host"); | ||
} | ||
} | ||
function metaTag(doc, type, attr) { | ||
var nodes = doc("meta[".concat(attr, "='").concat(type, "']")); | ||
return nodes.length ? nodes : null; | ||
}; | ||
var metaTagContent = function (doc, type, attr) { | ||
return doc("meta[" + attr + "='" + type + "']").attr("content"); | ||
}; | ||
} | ||
function metaTagContent(doc, type, attr) { | ||
return doc("meta[".concat(attr, "='").concat(type, "']")).attr("content"); | ||
} | ||
function getTitle(doc) { | ||
@@ -91,4 +96,4 @@ var title = metaTagContent(doc, "og:title", "property") || | ||
nodes = | ||
metaTag(doc, imagePropertyType + ":image", "property") || | ||
metaTag(doc, imagePropertyType + ":image", "name"); | ||
metaTag(doc, "".concat(imagePropertyType, ":image"), "property") || | ||
metaTag(doc, "".concat(imagePropertyType, ":image"), "name"); | ||
if (nodes) { | ||
@@ -163,7 +168,9 @@ nodes.each(function (_, node) { | ||
nodeType = nodeTypes[index]; | ||
if (nodeType.type === "tag") | ||
if (nodeType.type === "tag") { | ||
videoType = nodeType ? nodeType.attribs.content : null; | ||
} | ||
nodeSecureUrl = nodeSecureUrls[index]; | ||
if (nodeSecureUrl.type === "tag") | ||
if (nodeSecureUrl.type === "tag") { | ||
videoSecureUrl = nodeSecureUrl ? nodeSecureUrl.attribs.content : null; | ||
} | ||
videoObj = { | ||
@@ -202,3 +209,3 @@ url: video, | ||
// look for all icon tags | ||
nodes = doc("link[" + relSelector + "]"); | ||
nodes = doc("link[".concat(relSelector, "]")); | ||
// collect all images from icon tags | ||
@@ -310,3 +317,3 @@ if (nodes.length) { | ||
catch (e) { | ||
throw new Error("link-preview-js could not fetch link information " + e.toString()); | ||
throw new Error("link-preview-js could not fetch link information ".concat(e.toString())); | ||
} | ||
@@ -324,3 +331,3 @@ } | ||
return __awaiter(this, void 0, void 0, function () { | ||
var detectedUrl, timeout, controller, timeoutCounter, fetchOptions, fetchUrl, response, headers, normalizedResponse; | ||
var detectedUrl, resolvedUrl, timeout, controller, timeoutCounter, fetchOptions, fetchUrl, response, headers, normalizedResponse; | ||
var _c; | ||
@@ -340,2 +347,9 @@ return __generator(this, function (_d) { | ||
} | ||
if (!!!(options === null || options === void 0 ? void 0 : options.resolveDNSHost)) return [3 /*break*/, 2]; | ||
return [4 /*yield*/, options.resolveDNSHost(detectedUrl)]; | ||
case 1: | ||
resolvedUrl = _d.sent(); | ||
throwOnLoopback(resolvedUrl); | ||
_d.label = 2; | ||
case 2: | ||
timeout = (_a = options === null || options === void 0 ? void 0 : options.timeout) !== null && _a !== void 0 ? _a : 3000; | ||
@@ -354,3 +368,3 @@ controller = new abort_controller_1.default(); | ||
: detectedUrl; | ||
return [4 /*yield*/, cross_fetch_1.fetch(fetchUrl, fetchOptions).catch(function (e) { | ||
return [4 /*yield*/, (0, cross_fetch_1.fetch)(fetchUrl, fetchOptions).catch(function (e) { | ||
if (e.name === "AbortError") { | ||
@@ -361,3 +375,3 @@ throw new Error("Request timeout"); | ||
})]; | ||
case 1: | ||
case 3: | ||
response = _d.sent(); | ||
@@ -376,3 +390,3 @@ clearTimeout(timeoutCounter); | ||
return [4 /*yield*/, response.text()]; | ||
case 2: | ||
case 4: | ||
normalizedResponse = (_c.data = _d.sent(), | ||
@@ -379,0 +393,0 @@ _c); |
{ | ||
"name": "link-preview-js", | ||
"version": "2.1.15", | ||
"version": "2.1.16", | ||
"description": "Javascript module to extract and fetch HTTP link information from blocks of text.", | ||
@@ -34,3 +34,3 @@ "main": "build/index.js", | ||
"abort-controller": "^3.0.0", | ||
"cheerio": "1.0.0-rc.10", | ||
"cheerio": "1.0.0-rc.11", | ||
"cross-fetch": "3.1.5", | ||
@@ -46,19 +46,7 @@ "url": "0.11.0" | ||
"@types/jest": "^26.0.19", | ||
"@typescript-eslint/eslint-plugin": "^2.15.0", | ||
"@typescript-eslint/parser": "^2.15.0", | ||
"eslint": "^6.8.0", | ||
"eslint-config-airbnb": "^18.0.1", | ||
"eslint-config-prettier": "^6.9.0", | ||
"eslint-import-resolver-typescript": "^2.0.0", | ||
"eslint-plugin-import": "^2.19.1", | ||
"eslint-plugin-jest": "^23.3.0", | ||
"eslint-plugin-json": "^2.0.1", | ||
"eslint-plugin-jsx-a11y": "^6.2.3", | ||
"eslint-plugin-prettier": "^3.1.2", | ||
"eslint-plugin-react": "^7.17.0", | ||
"jest": "^26.6.3", | ||
"prettier": "2.2.1", | ||
"prettier": "2.7.1", | ||
"ts-jest": "^26.4.4", | ||
"typescript": "^4.1.3" | ||
"typescript": "^4.7.4" | ||
} | ||
} |
@@ -19,16 +19,18 @@ <h1 align="center">Link Preview JS</h1> | ||
> **READ BEFORE CREATING AN ISSUE** | ||
> | ||
> It's more than likely there is nothing wrong with the library for some simple reasons: | ||
> | ||
> - It's very simple: fetch html, parse html, look for OpenGraph html tags. | ||
> - The library will never break unless the entire web all of the sudden decides to break itself (by changing ALL OpenGraph tags ALL AT ONCE) | ||
> - If the target website you are trying to preview redirects you to a login page **the preview will "fail"** | ||
> - If the target website does not have OpenGraph tags **the preview will most likely "fail"** | ||
> - **You cannot preview (read: HTTP get) another web page from YOUR web page. This is an intentional security feature of browsers called CORS** | ||
> | ||
> If you haven't read this and create an issue "Amazon/Youtube/Instagram/Whatever doesn't work" I will just close the issue | ||
**Before creating an issue** | ||
Allows you to extract information from a HTTP url/link (or parse a HTML string) and retrieve meta information such as title, description, images, videos, etc. via **Facebook OpenGraph** tags. | ||
It's more than likely there is nothing wrong with the library: | ||
- It's very simple; fetch html, parse html, look for OpenGraph html tags. | ||
- Unless HTML or the OpenGraph standard change, the library will not break | ||
- If the target website you are trying to preview redirects you to a login page **the preview will "fail"**, becuase it will parse the login page | ||
- If the target website does not have OpenGraph tags **the preview will most likely "fail"**, there are some fallbacks but in general it will not work | ||
- **You cannot preview (fetch) another web page from YOUR web page. This is an intentional security feature of browsers called CORS** | ||
Any opened issue that does not take this points into account will just be closed. | ||
# Link Preview | ||
Allows you to extract information from a HTTP url/link (or parse a HTML string) and retrieve meta information such as title, description, images, videos, etc. via **OpenGraph** tags. | ||
## GOTCHAs | ||
@@ -38,3 +40,2 @@ | ||
- **This library acts as if the user would visit the page, sites might re-direct you to sign-up pages, consent screens, etc.** You can try to change the user-agent header (try with `google-bot` or with `Twitterbot`), but you need to work around these issues yourself. | ||
- When you are testing this library do not use **google.com**, it does not return the necessary headers and you will think the library is broken. | ||
@@ -91,2 +92,3 @@ ## API | ||
| followRedirects (**optional**) (default false) | For security reasons, the library does not automatically follow redirects, a malicious agent can exploit redirects to steal data, turn this on at your own risk | | ||
| resolveDNSHost (**optional**) | Function that resolves the final address of the detected/parsed URL to prevent SSRF attacks | | ||
@@ -105,2 +107,29 @@ ```javascript | ||
## SSRF Concerns | ||
Doing requests on behalf of your users or using user provided URLs is dangerous. One of such attacks is a trying to fetch a domain which redirects to localhost and so the users getting the contents of your server (doesn't affect mobile runtimes). In order to mittigate this attack you can use the resolveDNSHost option: | ||
```ts | ||
// example how to use node's dns resolver | ||
const dns = require("node:dns"); | ||
getLinkPreview("http://maliciousLocalHostRedirection.com", { | ||
resolveDNSHost: async (url: string) => { | ||
return new Promise((resolve, reject) => { | ||
dns.lookup(url, (err, address, family) => { | ||
if (err) { | ||
reject(err); | ||
return; | ||
} | ||
resolve(address); // if address resolves to localhost or '127.0.0.1' library will throw an error | ||
}); | ||
}); | ||
}, | ||
}).catch((e) => { | ||
// will throw a detected redirection to localhost | ||
}); | ||
``` | ||
This might add some latency to your request but prevents loopback attacks. | ||
## Response | ||
@@ -107,0 +136,0 @@ |
33025
7
545
200
+ Addedcheerio@1.0.0-rc.11(transitive)
+ Addedcheerio-select@2.1.0(transitive)
+ Addedcss-select@5.1.0(transitive)
+ Addeddom-serializer@2.0.0(transitive)
+ Addeddomhandler@5.0.3(transitive)
+ Addeddomutils@3.2.2(transitive)
+ Addedentities@4.5.0(transitive)
+ Addedhtmlparser2@8.0.2(transitive)
+ Addedparse5@7.2.1(transitive)
+ Addedparse5-htmlparser2-tree-adapter@7.1.0(transitive)
- Removedcheerio@1.0.0-rc.10(transitive)
- Removedcheerio-select@1.6.0(transitive)
- Removedcss-select@4.3.0(transitive)
- Removeddom-serializer@1.4.1(transitive)
- Removeddomhandler@4.3.1(transitive)
- Removeddomutils@2.8.0(transitive)
- Removedentities@2.2.0(transitive)
- Removedhtmlparser2@6.1.0(transitive)
- Removedparse5@6.0.1(transitive)
- Removedparse5-htmlparser2-tree-adapter@6.0.1(transitive)
Updatedcheerio@1.0.0-rc.11