Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

link-preview-js

Package Overview
Dependencies
Maintainers
1
Versions
39
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

link-preview-js - npm Package Compare versions

Comparing version 2.1.2 to 2.1.3

5

build/constants.js

@@ -1,2 +0,5 @@

export const CONSTANTS = {
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.CONSTANTS = void 0;
exports.CONSTANTS = {
REGEX_VALID_URL: new RegExp("^" +

@@ -3,0 +6,0 @@ // protocol identifier

299

build/index.js

@@ -0,1 +1,2 @@

"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {

@@ -10,15 +11,47 @@ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }

};
import cheerio from "cheerio";
import { fetch } from "cross-fetch";
import urlObj from "url";
import { CONSTANTS } from "./constants";
const metaTag = (doc, type, attr) => {
const nodes = doc(`meta[${attr}='${type}']`);
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.getPreviewFromContent = exports.getLinkPreview = void 0;
var cheerio_1 = __importDefault(require("cheerio"));
var cross_fetch_1 = require("cross-fetch");
var url_1 = __importDefault(require("url"));
var constants_1 = require("./constants");
var metaTag = function (doc, type, attr) {
var nodes = doc("meta[" + attr + "='" + type + "']");
return nodes.length ? nodes : null;
};
const metaTagContent = (doc, type, attr) => doc(`meta[${attr}='${type}']`).attr(`content`);
var metaTagContent = function (doc, type, attr) { return doc("meta[" + attr + "='" + type + "']").attr("content"); };
function getTitle(doc) {
let title = metaTagContent(doc, `og:title`, `property`) || metaTagContent(doc, `og:title`, `name`);
var title = metaTagContent(doc, "og:title", "property") || metaTagContent(doc, "og:title", "name");
if (!title) {
title = doc(`title`).text();
title = doc("title").text();
}

@@ -28,29 +61,29 @@ return title;

function getSiteName(doc) {
const siteName = metaTagContent(doc, `og:site_name`, `property`) || metaTagContent(doc, `og:site_name`, `name`);
var siteName = metaTagContent(doc, "og:site_name", "property") || metaTagContent(doc, "og:site_name", "name");
return siteName;
}
function getDescription(doc) {
const description = metaTagContent(doc, `description`, `name`) || metaTagContent(doc, `Description`, `name`) || metaTagContent(doc, `og:description`, `property`);
var description = metaTagContent(doc, "description", "name") || metaTagContent(doc, "Description", "name") || metaTagContent(doc, "og:description", "property");
return description;
}
function getMediaType(doc) {
const node = metaTag(doc, `medium`, `name`);
var node = metaTag(doc, "medium", "name");
if (node) {
const content = node.attr(`content`);
return content === `image` ? `photo` : content;
var content = node.attr("content");
return content === "image" ? "photo" : content;
}
return (metaTagContent(doc, `og:type`, `property`) || metaTagContent(doc, `og:type`, `name`));
return (metaTagContent(doc, "og:type", "property") || metaTagContent(doc, "og:type", "name"));
}
function getImages(doc, rootUrl, imagesPropertyType) {
let images = [];
let nodes;
let src;
let dic = {};
const imagePropertyType = imagesPropertyType !== null && imagesPropertyType !== void 0 ? imagesPropertyType : `og`;
nodes = metaTag(doc, `${imagePropertyType}:image`, `property`) || metaTag(doc, `${imagePropertyType}:image`, `name`);
var images = [];
var nodes;
var src;
var dic = {};
var imagePropertyType = imagesPropertyType !== null && imagesPropertyType !== void 0 ? imagesPropertyType : "og";
nodes = metaTag(doc, imagePropertyType + ":image", "property") || metaTag(doc, imagePropertyType + ":image", "name");
if (nodes) {
nodes.each((_, node) => {
nodes.each(function (_, node) {
src = node.attribs.content;
if (src) {
src = urlObj.resolve(rootUrl, src);
src = url_1.default.resolve(rootUrl, src);
images.push(src);

@@ -61,13 +94,13 @@ }

if (images.length <= 0 && !imagesPropertyType) {
src = doc(`link[rel=image_src]`).attr(`href`);
src = doc("link[rel=image_src]").attr("href");
if (src) {
src = urlObj.resolve(rootUrl, src);
src = url_1.default.resolve(rootUrl, src);
images = [src];
}
else {
nodes = doc(`img`);
nodes = doc("img");
if (nodes === null || nodes === void 0 ? void 0 : nodes.length) {
dic = {};
images = [];
nodes.each((_, node) => {
nodes.each(function (_, node) {
src = node.attribs.src;

@@ -78,3 +111,3 @@ if (src && !dic[src]) {

// height = node.attribs.height;
images.push(urlObj.resolve(rootUrl, src));
images.push(url_1.default.resolve(rootUrl, src));
}

@@ -88,20 +121,20 @@ });

function getVideos(doc) {
const videos = [];
let nodeTypes;
let nodeSecureUrls;
let nodeType;
let nodeSecureUrl;
let video;
let videoType;
let videoSecureUrl;
let width;
let height;
let videoObj;
let index;
const nodes = metaTag(doc, `og:video`, `property`) || metaTag(doc, `og:video`, `name`);
var videos = [];
var nodeTypes;
var nodeSecureUrls;
var nodeType;
var nodeSecureUrl;
var video;
var videoType;
var videoSecureUrl;
var width;
var height;
var videoObj;
var index;
var nodes = metaTag(doc, "og:video", "property") || metaTag(doc, "og:video", "name");
if (nodes === null || nodes === void 0 ? void 0 : nodes.length) {
nodeTypes = metaTag(doc, `og:video:type`, `property`) || metaTag(doc, `og:video:type`, `name`);
nodeSecureUrls = metaTag(doc, `og:video:secure_url`, `property`) || metaTag(doc, `og:video:secure_url`, `name`);
width = metaTagContent(doc, `og:video:width`, `property`) || metaTagContent(doc, `og:video:width`, `name`);
height = metaTagContent(doc, `og:video:height`, `property`) || metaTagContent(doc, `og:video:height`, `name`);
nodeTypes = metaTag(doc, "og:video:type", "property") || metaTag(doc, "og:video:type", "name");
nodeSecureUrls = metaTag(doc, "og:video:secure_url", "property") || metaTag(doc, "og:video:secure_url", "name");
width = metaTagContent(doc, "og:video:width", "property") || metaTagContent(doc, "og:video:width", "name");
height = metaTagContent(doc, "og:video:height", "property") || metaTagContent(doc, "og:video:height", "name");
for (index = 0; index < nodes.length; index += 1) {

@@ -117,6 +150,6 @@ video = nodes[index].attribs.content;

type: videoType,
width,
height,
width: width,
height: height,
};
if (videoType && videoType.indexOf(`video/`) === 0) {
if (videoType && videoType.indexOf("video/") === 0) {
videos.splice(0, 0, videoObj);

@@ -133,23 +166,23 @@ }

function getDefaultFavicon(rootUrl) {
return urlObj.resolve(rootUrl, `/favicon.ico`);
return url_1.default.resolve(rootUrl, "/favicon.ico");
}
// returns an array of URL's to favicon images
function getFavicons(doc, rootUrl) {
const images = [];
let nodes = [];
let src;
const relSelectors = [
`rel=icon`,
`rel="shortcut icon"`,
`rel=apple-touch-icon`,
var images = [];
var nodes = [];
var src;
var relSelectors = [
"rel=icon",
"rel=\"shortcut icon\"",
"rel=apple-touch-icon",
];
relSelectors.forEach((relSelector) => {
relSelectors.forEach(function (relSelector) {
// look for all icon tags
nodes = doc(`link[${relSelector}]`);
nodes = doc("link[" + relSelector + "]");
// collect all images from icon tags
if (nodes.length) {
nodes.each((_, node) => {
nodes.each(function (_, node) {
src = node.attribs.href;
if (src) {
src = urlObj.resolve(rootUrl, src);
src = url_1.default.resolve(rootUrl, src);
images.push(src);

@@ -168,5 +201,5 @@ }

return {
url,
mediaType: `image`,
contentType,
url: url,
mediaType: "image",
contentType: contentType,
favicons: [getDefaultFavicon(url)],

@@ -177,5 +210,5 @@ };

return {
url,
mediaType: `audio`,
contentType,
url: url,
mediaType: "audio",
contentType: contentType,
favicons: [getDefaultFavicon(url)],

@@ -186,5 +219,5 @@ };

return {
url,
mediaType: `video`,
contentType,
url: url,
mediaType: "video",
contentType: contentType,
favicons: [getDefaultFavicon(url)],

@@ -195,17 +228,18 @@ };

return {
url,
mediaType: `application`,
contentType,
url: url,
mediaType: "application",
contentType: contentType,
favicons: [getDefaultFavicon(url)],
};
}
function parseTextResponse(body, url, options = {}, contentType) {
const doc = cheerio.load(body);
function parseTextResponse(body, url, options, contentType) {
if (options === void 0) { options = {}; }
var doc = cheerio_1.default.load(body);
return {
url,
url: url,
title: getTitle(doc),
siteName: getSiteName(doc),
description: getDescription(doc),
mediaType: getMediaType(doc) || `website`,
contentType,
mediaType: getMediaType(doc) || "website",
contentType: contentType,
images: getImages(doc, url, options.imagesPropertyType),

@@ -216,3 +250,4 @@ videos: getVideos(doc),

}
function parseUnknownResponse(body, url, options = {}, contentType) {
function parseUnknownResponse(body, url, options, contentType) {
if (options === void 0) { options = {}; }
return parseTextResponse(body, url, options, contentType);

@@ -222,7 +257,7 @@ }

try {
let contentType = response.headers[`content-type`];
var contentType = response.headers["content-type"];
// console.warn(`original content type`, contentType);
if (contentType === null || contentType === void 0 ? void 0 : contentType.indexOf(`;`)) {
if (contentType === null || contentType === void 0 ? void 0 : contentType.indexOf(";")) {
// eslint-disable-next-line prefer-destructuring
contentType = contentType.split(`;`)[0];
contentType = contentType.split(";")[0];
// console.warn(`splitting content type`, contentType);

@@ -238,23 +273,23 @@ }

// parse response depending on content type
if (CONSTANTS.REGEX_CONTENT_TYPE_IMAGE.test(contentType)) {
if (constants_1.CONSTANTS.REGEX_CONTENT_TYPE_IMAGE.test(contentType)) {
return parseImageResponse(response.url, contentType);
}
if (CONSTANTS.REGEX_CONTENT_TYPE_AUDIO.test(contentType)) {
if (constants_1.CONSTANTS.REGEX_CONTENT_TYPE_AUDIO.test(contentType)) {
return parseAudioResponse(response.url, contentType);
}
if (CONSTANTS.REGEX_CONTENT_TYPE_VIDEO.test(contentType)) {
if (constants_1.CONSTANTS.REGEX_CONTENT_TYPE_VIDEO.test(contentType)) {
return parseVideoResponse(response.url, contentType);
}
if (CONSTANTS.REGEX_CONTENT_TYPE_TEXT.test(contentType)) {
const htmlString = response.data;
return parseTextResponse(htmlString, response.url, options, contentType);
if (constants_1.CONSTANTS.REGEX_CONTENT_TYPE_TEXT.test(contentType)) {
var htmlString_1 = response.data;
return parseTextResponse(htmlString_1, response.url, options, contentType);
}
if (CONSTANTS.REGEX_CONTENT_TYPE_APPLICATION.test(contentType)) {
if (constants_1.CONSTANTS.REGEX_CONTENT_TYPE_APPLICATION.test(contentType)) {
return parseApplicationResponse(response.url, contentType);
}
const htmlString = response.data;
var htmlString = response.data;
return parseUnknownResponse(htmlString, response.url, options);
}
catch (e) {
throw new Error(`link-preview-js could not fetch link information ${e.toString()}`);
throw new Error("link-preview-js could not fetch link information " + e.toString());
}

@@ -269,31 +304,44 @@ }

*/
export function getLinkPreview(text, options) {
function getLinkPreview(text, options) {
var _a;
return __awaiter(this, void 0, void 0, function* () {
if (!text || typeof text !== `string`) {
throw new Error(`link-preview-js did not receive a valid url or text`);
}
const detectedUrl = text.replace(/\n/g, ` `).split(` `).find((token) => CONSTANTS.REGEX_VALID_URL.test(token));
if (!detectedUrl) {
throw new Error(`link-preview-js did not receive a valid a url or text`);
}
const fetchOptions = {
headers: (_a = options === null || options === void 0 ? void 0 : options.headers) !== null && _a !== void 0 ? _a : {},
redirect: `follow`,
};
const fetchUrl = (options === null || options === void 0 ? void 0 : options.proxyUrl) ? options.proxyUrl.concat(detectedUrl) : detectedUrl;
const response = yield fetch(fetchUrl, fetchOptions);
const headers = {};
response.headers.forEach((header, key) => {
headers[key] = header;
return __awaiter(this, void 0, void 0, function () {
var detectedUrl, fetchOptions, fetchUrl, response, headers, normalizedResponse;
var _b;
return __generator(this, function (_c) {
switch (_c.label) {
case 0:
if (!text || typeof text !== "string") {
throw new Error("link-preview-js did not receive a valid url or text");
}
detectedUrl = text.replace(/\n/g, " ").split(" ").find(function (token) { return constants_1.CONSTANTS.REGEX_VALID_URL.test(token); });
if (!detectedUrl) {
throw new Error("link-preview-js did not receive a valid a url or text");
}
fetchOptions = {
headers: (_a = options === null || options === void 0 ? void 0 : options.headers) !== null && _a !== void 0 ? _a : {},
redirect: "follow",
};
fetchUrl = (options === null || options === void 0 ? void 0 : options.proxyUrl) ? options.proxyUrl.concat(detectedUrl) : detectedUrl;
return [4 /*yield*/, cross_fetch_1.fetch(fetchUrl, fetchOptions)];
case 1:
response = _c.sent();
headers = {};
response.headers.forEach(function (header, key) {
headers[key] = header;
});
_b = {
url: (options === null || options === void 0 ? void 0 : options.proxyUrl) ? response.url.replace(options.proxyUrl, "")
: response.url,
headers: headers
};
return [4 /*yield*/, response.text()];
case 2:
normalizedResponse = (_b.data = _c.sent(),
_b);
return [2 /*return*/, parseResponse(normalizedResponse, options)];
}
});
const normalizedResponse = {
url: (options === null || options === void 0 ? void 0 : options.proxyUrl) ? response.url.replace(options.proxyUrl, ``)
: response.url,
headers,
data: yield response.text(),
};
return parseResponse(normalizedResponse, options);
});
}
exports.getLinkPreview = getLinkPreview;
/**

@@ -306,12 +354,15 @@ * Skip the library fetching the website for you, instead pass a response object

*/
export function getPreviewFromContent(response, options) {
return __awaiter(this, void 0, void 0, function* () {
if (!response || typeof response !== `object`) {
throw new Error(`link-preview-js did not receive a valid response object`);
}
if (!response.url) {
throw new Error(`link-preview-js did not receive a valid response object`);
}
return parseResponse(response, options);
function getPreviewFromContent(response, options) {
return __awaiter(this, void 0, void 0, function () {
return __generator(this, function (_a) {
if (!response || typeof response !== "object") {
throw new Error("link-preview-js did not receive a valid response object");
}
if (!response.url) {
throw new Error("link-preview-js did not receive a valid response object");
}
return [2 /*return*/, parseResponse(response, options)];
});
});
}
exports.getPreviewFromContent = getPreviewFromContent;
{
"name": "link-preview-js",
"version": "2.1.2",
"version": "2.1.3",
"description": "Javascript module to extract and fetch HTTP link information from blocks of text. ",

@@ -5,0 +5,0 @@ "main": "build/index.js",

@@ -1,18 +0,21 @@

# link-preview-js
Typescript library (27kb unpacked) that allows you to extract information from a HTTP url/link and/or parse text and retrieve information from the first available link.
Typescript library that allows you to extract information from a URL or parse text and retrieve information from the first available link.
## CORS
## Does not work on CORS protected environments, means: **all the browsers**
**BROWSERS DO NOT ALLOW YOU TO DO CROSS ORIGIN REQUESTS (CORS)**, you cannot request a different domain from your web app, period.
A (respectable) browser **DOES NOT ALLOW YOU TO DO CROSS ORIGIN REQUESTS**, you cannot do a request to a different domain from your web application, if do not know how *same-origin-policy* works you can read [this](https://dev.to/lydiahallie/cs-visualized-cors-5b8h) fantastic piece
If do not know how *same-origin-policy* works you can read [this](https://dev.to/lydiahallie/cs-visualized-cors-5b8h)
This library therefore works on node (back-end environments) and certain mobile run-times (cordova or react-native)
## Disclaimer
This library no longer depends on a custom fork of cheerio and it is using the official cheerio lib! 🎉 all security issues should be resolved!
As Google continues to wage war upon the internet and break it on all the ways only convenient to Google, sometimes it's domains break the standards, to be more specific: www.google.com does not return a required meta data and YouTube does funny re-directions on mobile which also causes this library to return wrong data... 🖕 Google, feel free to submit a PR if you want to accommodate the trillion dollar corporation.
As google continues to take over the web and break it on all the ways only convenient to google, sometimes it's domains break the standards, to be more specific: www.google.com does not return a required meta data and YouTube does funny re-directions on mobile which also causes this library to return wrong data... 🖕 Google, feel free to try to fix this issues yourself by opening a PR but I won't spend my time trying to accommodate all edge cases
Current compilation target is ES6 which should run on a modern Node (12+) environment and react-native versions, it won't run on outdated versions (which you shouldn't be running anyways).
## Install
## How to use
### Install the lib
```

@@ -22,10 +25,8 @@ yarn add link-preview-js

## Usage
### Use the methods
Library exposes two methods:
`getLinkPreview`: you have to pass a string, doesn't matter if it is just a URL or a piece of text that contains a URL, the library will take care of parsing it and returning the info of first valid HTTP(S) URL info it finds. (URL parsing is done via: https://gist.github.com/dperini/729294).
`getLinkPreview`, you have to pass a string, doesn't matter if it is just a URL or a piece of text that contains a URL, the library will take care of parsing it and returning the info of first valid HTTP(S) URL info it finds. (URL parsing is done via: https://gist.github.com/dperini/729294).
`getPreviewFromContent`: useful for passing a pre-fetched Response object from an existing async/etc. call. Refer to example below for required object values.
`getPreviewFromContent`, useful for passing a pre-fetched Response object from an existing async/etc. call. Refer to example below for required object values.
```typescript

@@ -40,3 +41,3 @@ import { getLinkPreview, getPreviewFromContent } from 'link-preview-js';

// a chunk of text
// pass a chunk of text
getLinkPreview('This is a text supposed to be parsed and the first link displayed https://www.youtube.com/watch?v=MejbOFk7H6c')

@@ -48,3 +49,3 @@ .then((data) => console.debug(data));

// a pre-fetched response object
// pass a pre-fetched response object
// The passed response object should include, at minimum:

@@ -65,4 +66,2 @@ // {

})
```

@@ -98,3 +97,3 @@

```
```javascript
{

@@ -107,3 +106,3 @@ url: "https://www.youtube.com/watch?v=MejbOFk7H6c",

mediaType: "video.other",
contentType: "text/html; charset=utf-8"
contentType: "text/html; charset=utf-8",
videos: [],

@@ -116,3 +115,3 @@ favicons:["https://www.youtube.com/yts/img/favicon_32-vflOogEID.png","https://www.youtube.com/yts/img/favicon_48-vflVjB_Qk.png","https://www.youtube.com/yts/img/favicon_96-vflW9Ec0w.png","https://www.youtube.com/yts/img/favicon_144-vfliLAfaB.png","https://s.ytimg.com/yts/img/favicon-vfl8qSV2F.ico"]

```
```javascript
{

@@ -128,3 +127,3 @@ url: "https://media.npr.org/assets/img/2018/04/27/gettyimages-656523922nunes-4bb9a194ab2986834622983bb2f8fe57728a9e5f-s1100-c15.jpg",

```
```javascript
{

@@ -140,3 +139,3 @@ url: "https://ondemand.npr.org/anon.npr-mp3/npr/atc/2007/12/20071231_atc_13.mp3",

```
```javascript
{

@@ -152,3 +151,3 @@ url: "https://www.w3schools.com/html/mov_bbb.mp4",

```
```javascript
{

@@ -162,10 +161,4 @@ url: "https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf",

## Tests
```
yarn test
```
## License
MIT license
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc