link-preview-js
Advanced tools
Comparing version 2.0.4 to 2.0.5
@@ -5,3 +5,3 @@ import { getLinkPreview } from "../build/index"; | ||
it(`should extract link info from just URL`, async () => { | ||
const linkInfo = await getLinkPreview( | ||
const linkInfo: any = await getLinkPreview( | ||
`https://www.youtube.com/watch?v=wuClZjOdT30`, | ||
@@ -21,5 +21,3 @@ { headers: { "Accept-Language": `en-US` } }, | ||
expect(linkInfo.videos.length).toEqual(0); | ||
expect(linkInfo.favicons[0]).toEqual( | ||
`https://www.youtube.com/yts/img/favicon_32-vflOogEID.png`, | ||
); | ||
expect(linkInfo.favicons[0]).not.toBe(``); | ||
expect(linkInfo.contentType.toLowerCase()).toEqual( | ||
@@ -31,3 +29,3 @@ `text/html; charset=utf-8`, | ||
it(`should extract link info from a URL with a newline`, async () => { | ||
const linkInfo = await getLinkPreview( | ||
const linkInfo: any = await getLinkPreview( | ||
` | ||
@@ -49,5 +47,3 @@ https://www.youtube.com/watch?v=wuClZjOdT30, | ||
expect(linkInfo.videos.length).toEqual(0); | ||
expect(linkInfo.favicons[0]).toEqual( | ||
`https://www.youtube.com/yts/img/favicon_32-vflOogEID.png`, | ||
); | ||
expect(linkInfo.favicons[0]).not.toBe(``); | ||
expect(linkInfo.contentType.toLowerCase()).toEqual( | ||
@@ -59,3 +55,3 @@ `text/html; charset=utf-8`, | ||
it(`should extract link info from just text with a URL`, async () => { | ||
const linkInfo = await getLinkPreview( | ||
const linkInfo: any = await getLinkPreview( | ||
`This is some text blah blah https://www.youtube.com/watch?v=wuClZjOdT30 and more text`, | ||
@@ -75,5 +71,3 @@ { headers: { "Accept-Language": `en-US` } }, | ||
expect(linkInfo.videos.length).toEqual(0); | ||
expect(linkInfo.favicons[0]).toEqual( | ||
`https://www.youtube.com/yts/img/favicon_32-vflOogEID.png`, | ||
); | ||
expect(linkInfo.favicons[0]).toBeTruthy(); | ||
expect(linkInfo.contentType.toLowerCase()).toEqual( | ||
@@ -85,3 +79,3 @@ `text/html; charset=utf-8`, | ||
it(`should make request with different languages`, async () => { | ||
let linkInfo = await getLinkPreview(`https://www.hsbc.ca/`, { | ||
let linkInfo: any = await getLinkPreview(`https://www.hsbc.ca/`, { | ||
headers: { "Accept-Language": `fr` }, | ||
@@ -132,26 +126,22 @@ }); | ||
it(`should handle unknown content type urls`, async () => { | ||
const linkInfo = await getLinkPreview(`https://mjml.io/try-it-live`); | ||
expect(linkInfo.url).toEqual(`https://mjml.io/try-it-live`); | ||
expect(linkInfo.mediaType).toEqual(`website`); | ||
}); | ||
// This site changed? it is not returning application any more but rather website | ||
it.skip(`should handle application urls`, async () => { | ||
const linkInfo = await getLinkPreview( | ||
`https://mjml.io/try-it-live`, | ||
`https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`, | ||
); | ||
expect(linkInfo.url).toEqual( | ||
`https://mjml.io/try-it-live`, | ||
`https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`, | ||
); | ||
expect(linkInfo.mediaType).toEqual(`website`); | ||
expect(linkInfo.mediaType).toEqual(`application`); | ||
expect(linkInfo.contentType.toLowerCase()).toEqual(`application/pdf`); | ||
expect(linkInfo.favicons[0]).toBeTruthy(); | ||
}); | ||
// This site changed? it is not returning application any more but rather website | ||
// it(`should handle application urls`, async () => { | ||
// const linkInfo = await getLinkPreview( | ||
// `https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`, | ||
// ); | ||
// expect(linkInfo.url).toEqual( | ||
// `https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`, | ||
// ); | ||
// expect(linkInfo.mediaType).toEqual(`application`); | ||
// expect(linkInfo.contentType.toLowerCase()).toEqual(`application/pdf`); | ||
// expect(linkInfo.favicons[0]).toBeTruthy(); | ||
// }); | ||
it(`no link in text should fail gracefully`, async () => { | ||
@@ -171,5 +161,28 @@ await expect( | ||
it(`hould handle empty strings gracefully`, async () => { | ||
it(`should handle empty strings gracefully`, async () => { | ||
await expect(getLinkPreview(``)).rejects.toThrowErrorMatchingSnapshot(); | ||
}); | ||
it(`should handle a proxy url option`, async () => { | ||
// origin header is required by cors-anywhere | ||
const linkInfo: any = await getLinkPreview( | ||
`https://www.youtube.com/watch?v=wuClZjOdT30`, | ||
{ proxyUrl: `https://cors-anywhere.herokuapp.com/`, headers: { Origin: `http://localhost:8000`, "Accept-Language": `en-US` } }, | ||
); | ||
expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`); | ||
expect(linkInfo.siteName).toEqual(`YouTube`); | ||
expect(linkInfo.title).toEqual(`Geography Now! Germany`); | ||
expect(linkInfo.description).toBeTruthy(); | ||
expect(linkInfo.mediaType).toEqual(`video.other`); | ||
expect(linkInfo.images.length).toEqual(1); | ||
expect(linkInfo.images[0]).toEqual( | ||
`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`, | ||
); | ||
expect(linkInfo.videos.length).toEqual(0); | ||
expect(linkInfo.favicons[0]).not.toBe(``); | ||
expect(linkInfo.contentType.toLowerCase()).toEqual( | ||
`text/html; charset=utf-8`, | ||
); | ||
}); | ||
}); |
interface ILinkPreviewOptions { | ||
headers?: Record<string, string>; | ||
imagesPropertyType?: string; | ||
proxyUrl?: string; | ||
} | ||
@@ -5,0 +6,0 @@ export declare function getLinkPreview(text: string, options?: ILinkPreviewOptions): Promise<{ |
@@ -46,4 +46,9 @@ "use strict"; | ||
var constants_1 = require("./constants"); | ||
var metaTag = function (doc, type, attr) { | ||
var nodes = doc("meta[" + attr + "='" + type + "']"); | ||
return nodes.length ? nodes : null; | ||
}; | ||
var metaTagContent = function (doc, type, attr) { return doc("meta[" + attr + "='" + type + "']").attr("content"); }; | ||
function getTitle(doc) { | ||
var title = doc("meta[property='og:title']").attr("content"); | ||
var title = metaTagContent(doc, "og:title", "property") || metaTagContent(doc, "og:title", "name"); | ||
if (!title) { | ||
@@ -55,24 +60,19 @@ title = doc("title").text(); | ||
function getSiteName(doc) { | ||
var siteName = doc("meta[property='og:site_name']").attr("content"); | ||
var siteName = metaTagContent(doc, "og:site_name", "property") || metaTagContent(doc, "og:site_name", "name"); | ||
return siteName; | ||
} | ||
function getDescription(doc) { | ||
var description = doc("meta[name=description]").attr("content"); | ||
if (description === undefined) { | ||
description = doc("meta[name=Description]").attr("content"); | ||
} | ||
if (description === undefined) { | ||
description = doc("meta[property='og:description']").attr("content"); | ||
} | ||
var description = metaTagContent(doc, "description", "name") || metaTagContent(doc, "Description", "name") || metaTagContent(doc, "og:description", "property"); | ||
return description; | ||
} | ||
function getMediaType(doc) { | ||
var node = doc("meta[name=medium]"); | ||
if (node.length) { | ||
var node = metaTag(doc, "medium", "name"); | ||
if (node) { | ||
var content = node.attr("content"); | ||
return content === "image" ? "photo" : content; | ||
} | ||
return doc("meta[property='og:type']").attr("content"); | ||
return (metaTagContent(doc, "og:type", "property") || metaTagContent(doc, "og:type", "name")); | ||
} | ||
function getImages(doc, rootUrl, imagesPropertyType) { | ||
var _a; | ||
var images = []; | ||
@@ -83,4 +83,4 @@ var nodes; | ||
var imagePropertyType = (imagesPropertyType !== null && imagesPropertyType !== void 0 ? imagesPropertyType : "og"); | ||
nodes = doc("meta[property='" + imagePropertyType + ":image']"); | ||
if (nodes.length) { | ||
nodes = metaTag(doc, imagePropertyType + ":image", "property") || metaTag(doc, imagePropertyType + ":image", "name"); | ||
if (nodes) { | ||
nodes.each(function (_, node) { | ||
@@ -102,3 +102,3 @@ src = node.attribs.content; | ||
nodes = doc("img"); | ||
if (nodes.length) { | ||
if ((_a = nodes) === null || _a === void 0 ? void 0 : _a.length) { | ||
dic = {}; | ||
@@ -121,2 +121,3 @@ images = []; | ||
function getVideos(doc) { | ||
var _a; | ||
var videos = []; | ||
@@ -134,10 +135,9 @@ var nodeTypes; | ||
var index; | ||
var nodes = doc("meta[property='og:video']"); | ||
var length = nodes.length; | ||
if (length) { | ||
nodeTypes = doc("meta[property='og:video:type']"); | ||
nodeSecureUrls = doc("meta[property='og:video:secure_url']"); | ||
width = doc("meta[property='og:video:width']").attr("content"); | ||
height = doc("meta[property='og:video:height']").attr("content"); | ||
for (index = 0; index < length; index += 1) { | ||
var nodes = metaTag(doc, "og:video", "property") || metaTag(doc, "og:video", "name"); | ||
if ((_a = nodes) === null || _a === void 0 ? void 0 : _a.length) { | ||
nodeTypes = metaTag(doc, "og:video:type", "property") || metaTag(doc, "og:video:type", "name"); | ||
nodeSecureUrls = metaTag(doc, "og:video:secure_url", "property") || metaTag(doc, "og:video:secure_url", "name"); | ||
width = metaTagContent(doc, "og:video:width", "property") || metaTagContent(doc, "og:video:width", "name"); | ||
height = metaTagContent(doc, "og:video:height", "property") || metaTagContent(doc, "og:video:height", "name"); | ||
for (index = 0; index < nodes.length; index += 1) { | ||
video = nodes[index].attribs.content; | ||
@@ -251,7 +251,7 @@ nodeType = nodeTypes[index]; | ||
function getLinkPreview(text, options) { | ||
var _a, _b; | ||
var _a, _b, _c, _d; | ||
return __awaiter(this, void 0, void 0, function () { | ||
var detectedUrl, fetchOptions, response, finalUrl, contentType, htmlString_1, htmlString_2, htmlString, e_1; | ||
return __generator(this, function (_c) { | ||
switch (_c.label) { | ||
var detectedUrl, fetchOptions, fetchUrl, response, finalUrl, contentType, htmlString_1, htmlString_2, htmlString, e_1; | ||
return __generator(this, function (_e) { | ||
switch (_e.label) { | ||
case 0: | ||
@@ -261,6 +261,3 @@ if (!text || typeof text !== "string") { | ||
} | ||
detectedUrl = text | ||
.replace(/\n/g, " ") | ||
.split(" ") | ||
.find(function (token) { return constants_1.CONSTANTS.REGEX_VALID_URL.test(token); }); | ||
detectedUrl = text.replace(/\n/g, " ").split(" ").find(function (token) { return constants_1.CONSTANTS.REGEX_VALID_URL.test(token); }); | ||
if (!detectedUrl) { | ||
@@ -270,9 +267,11 @@ throw new Error("link-preview-js did not receive a valid a url or text"); | ||
fetchOptions = { headers: (_b = (_a = options) === null || _a === void 0 ? void 0 : _a.headers, (_b !== null && _b !== void 0 ? _b : {})) }; | ||
_c.label = 1; | ||
fetchUrl = ((_c = options) === null || _c === void 0 ? void 0 : _c.proxyUrl) ? options.proxyUrl.concat(detectedUrl) : detectedUrl; | ||
_e.label = 1; | ||
case 1: | ||
_c.trys.push([1, 8, , 9]); | ||
return [4 /*yield*/, cross_fetch_1.fetch(detectedUrl, fetchOptions)]; | ||
_e.trys.push([1, 8, , 9]); | ||
return [4 /*yield*/, cross_fetch_1.fetch(fetchUrl, fetchOptions)]; | ||
case 2: | ||
response = _c.sent(); | ||
finalUrl = response.url; | ||
response = _e.sent(); | ||
finalUrl = ((_d = options) === null || _d === void 0 ? void 0 : _d.proxyUrl) ? response.url.replace(options.proxyUrl, "") | ||
: response.url; | ||
contentType = response.headers.get("content-type"); | ||
@@ -282,3 +281,3 @@ if (!!contentType) return [3 /*break*/, 4]; | ||
case 3: | ||
htmlString_1 = _c.sent(); | ||
htmlString_1 = _e.sent(); | ||
return [2 /*return*/, parseUnknownResponse(htmlString_1, finalUrl, options)]; | ||
@@ -303,3 +302,3 @@ case 4: | ||
case 5: | ||
htmlString_2 = _c.sent(); | ||
htmlString_2 = _e.sent(); | ||
return [2 /*return*/, parseTextResponse(htmlString_2, finalUrl, options, contentType)]; | ||
@@ -312,6 +311,6 @@ case 6: | ||
case 7: | ||
htmlString = _c.sent(); | ||
htmlString = _e.sent(); | ||
return [2 /*return*/, parseUnknownResponse(htmlString, finalUrl, options)]; | ||
case 8: | ||
e_1 = _c.sent(); | ||
e_1 = _e.sent(); | ||
throw new Error("link-preview-js could not fetch link information " + e_1.toString()); | ||
@@ -318,0 +317,0 @@ case 9: return [2 /*return*/]; |
70
index.ts
@@ -9,11 +9,17 @@ import cheerio from "cheerio-without-node-native"; | ||
imagesPropertyType?: string; | ||
proxyUrl?: string; | ||
} | ||
const metaTag = (doc: any, type: string, attr: string) => { | ||
const nodes = doc(`meta[${attr}='${type}']`); | ||
return nodes.length ? nodes : null; | ||
}; | ||
const metaTagContent = (doc: any, type: string, attr: string) => doc(`meta[${attr}='${type}']`).attr(`content`); | ||
function getTitle(doc: any) { | ||
let title = doc(`meta[property='og:title']`).attr(`content`); | ||
let title = metaTagContent(doc, `og:title`, `property`) || metaTagContent(doc, `og:title`, `name`); | ||
if (!title) { | ||
title = doc(`title`).text(); | ||
} | ||
return title; | ||
@@ -23,4 +29,3 @@ } | ||
function getSiteName(doc: any) { | ||
const siteName = doc(`meta[property='og:site_name']`).attr(`content`); | ||
const siteName = metaTagContent(doc, `og:site_name`, `property`) || metaTagContent(doc, `og:site_name`, `name`); | ||
return siteName; | ||
@@ -30,12 +35,3 @@ } | ||
function getDescription(doc: any) { | ||
let description = doc(`meta[name=description]`).attr(`content`); | ||
if (description === undefined) { | ||
description = doc(`meta[name=Description]`).attr(`content`); | ||
} | ||
if (description === undefined) { | ||
description = doc(`meta[property='og:description']`).attr(`content`); | ||
} | ||
const description = metaTagContent(doc, `description`, `name`) || metaTagContent(doc, `Description`, `name`) || metaTagContent(doc, `og:description`, `property`); | ||
return description; | ||
@@ -45,9 +41,8 @@ } | ||
function getMediaType(doc: any) { | ||
const node = doc(`meta[name=medium]`); | ||
if (node.length) { | ||
const node = metaTag(doc, `medium`, `name`); | ||
if (node) { | ||
const content = node.attr(`content`); | ||
return content === `image` ? `photo` : content; | ||
} | ||
return doc(`meta[property='og:type']`).attr(`content`); | ||
return (metaTagContent(doc, `og:type`, `property`) || metaTagContent(doc, `og:type`, `name`)); | ||
} | ||
@@ -62,5 +57,5 @@ | ||
const imagePropertyType = imagesPropertyType ?? `og`; | ||
nodes = doc(`meta[property='${imagePropertyType}:image']`); | ||
nodes = metaTag(doc, `${imagePropertyType}:image`, `property`) || metaTag(doc, `${imagePropertyType}:image`, `name`); | ||
if (nodes.length) { | ||
if (nodes) { | ||
nodes.each((_: number, node: any) => { | ||
@@ -83,3 +78,3 @@ src = node.attribs.content; | ||
if (nodes.length) { | ||
if (nodes?.length) { | ||
dic = {}; | ||
@@ -117,12 +112,11 @@ images = []; | ||
const nodes = doc(`meta[property='og:video']`); | ||
const { length } = nodes; | ||
const nodes = metaTag(doc, `og:video`, `property`) || metaTag(doc, `og:video`, `name`); | ||
if (length) { | ||
nodeTypes = doc(`meta[property='og:video:type']`); | ||
nodeSecureUrls = doc(`meta[property='og:video:secure_url']`); | ||
width = doc(`meta[property='og:video:width']`).attr(`content`); | ||
height = doc(`meta[property='og:video:height']`).attr(`content`); | ||
if (nodes?.length) { | ||
nodeTypes = metaTag(doc, `og:video:type`, `property`) || metaTag(doc, `og:video:type`, `name`); | ||
nodeSecureUrls = metaTag(doc, `og:video:secure_url`, `property`) || metaTag(doc, `og:video:secure_url`, `name`); | ||
width = metaTagContent(doc, `og:video:width`, `property`) || metaTagContent(doc, `og:video:width`, `name`); | ||
height = metaTagContent(doc, `og:video:height`, `property`) || metaTagContent(doc, `og:video:height`, `name`); | ||
for (index = 0; index < length; index += 1) { | ||
for (index = 0; index < nodes.length; index += 1) { | ||
video = nodes[index].attribs.content; | ||
@@ -159,3 +153,2 @@ | ||
// returns an array of URL's to favicon images | ||
@@ -271,6 +264,3 @@ function getFavicons(doc: any, rootUrl: string) { | ||
const detectedUrl = text | ||
.replace(/\n/g, ` `) | ||
.split(` `) | ||
.find((token) => CONSTANTS.REGEX_VALID_URL.test(token)); | ||
const detectedUrl = text.replace(/\n/g, ` `).split(` `).find((token) => CONSTANTS.REGEX_VALID_URL.test(token)); | ||
@@ -283,7 +273,11 @@ if (!detectedUrl) { | ||
const fetchUrl = options?.proxyUrl ? options.proxyUrl.concat(detectedUrl) : detectedUrl; | ||
try { | ||
const response = await fetch(detectedUrl, fetchOptions); | ||
const response = await fetch(fetchUrl, fetchOptions); | ||
// get final URL (after any redirects) | ||
const finalUrl = response.url; | ||
// get final URL (after any redirects, strip out proxy url from response url) | ||
const finalUrl = options?.proxyUrl | ||
? response.url.replace(options.proxyUrl, ``) | ||
: response.url; | ||
@@ -290,0 +284,0 @@ // get content type of response |
{ | ||
"name": "link-preview-js", | ||
"version": "2.0.4", | ||
"version": "2.0.5", | ||
"description": "Javascript module to extract and fetch HTTP link information from blocks of text. ", | ||
@@ -5,0 +5,0 @@ "main": "build/index.js", |
# link-preview-js | ||
[![CircleCI](https://circleci.com/gh/ospfranco/link-preview-js/tree/master.svg?style=svg)](https://circleci.com/gh/ospfranco/link-preview-js/tree/master) [![npm version](https://badge.fury.io/js/link-preview-js.svg)](https://badge.fury.io/js/link-preview-js) | ||
Typescript library that allows you to extract information from a URL or parse text and retrieve information from the first available link. | ||
## This library does not work on CORS protected environments, i.e: all the major browsers | ||
## Does not work on CORS protected environments, means: **all the browsers** | ||
Chrome, Firefox, Safari, etc DO NOT ALLOW YOU TO DO CROSS SITE REQUESTS therefore you cannot request another domain from your web application, read more about [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS). | ||
A (respectable) browser **DOES NOT ALLOW YOU TO DO CROSS ORIGIN REQUESTS**, you cannot do a request to a different domain from your web application, if do not know how *same-origin-policy* works you can read [this](https://dev.to/lydiahallie/cs-visualized-cors-5b8h) fantastic piece written by @lydiahallie | ||
## This library uses [cheerio-without-native](https://github.com/oyyd/cheerio-without-node-native), github is now warning me that there are security vunerabilities because the package has been abandonded, I'm not responsible for any security implications this might carry, I could use cheerio but that means loosing compatibility with RN, which actually might not be a bad idea... | ||
As of 23 of April of 2020: Do not use https://google.com it does not return the appropiate tags to be parsed | ||
## Security disclaimer | ||
This library uses [cheerio-without-native](https://github.com/oyyd/cheerio-without-node-native) which unfortunately has been abandoned and now has some security vunerabilities (according to github) I'm not responsible for any security implications this might carry | ||
# Migration to 2.X.X | ||
As of 23 of April of 2020: Do not use https://google.com it does not return the appropiate meta-tags to be parsed, so test with another domain, don't waste your time | ||
The api for version 2.X.X changed slightly, there is no longer a default unnamed export, only a named method export `getLinkPreview` | ||
## Install | ||
`$ yarn add link-preview-js` | ||
``` | ||
yarn add link-preview-js | ||
``` | ||
@@ -32,5 +30,9 @@ ## Usage | ||
// pass the link directly | ||
getLinkPreview('https://www.youtube.com/watch?v=MejbOFk7H6c') | ||
.then((data) => console.debug(data)); | ||
// OR | ||
// a chunk of text | ||
getLinkPreview('This is a text supposed to be parsed and the first link displayed https://www.youtube.com/watch?v=MejbOFk7H6c') | ||
@@ -135,5 +137,1 @@ .then((data) => console.debug(data)); | ||
MIT license | ||
## Sponsor | ||
If you find this package useful, please considering [sponsoring](https://github.com/sponsors/ospfranco), buying a coffee is enough, thanks! |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
47521
24
937
136