Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

link-preview-js

Package Overview
Dependencies
Maintainers
1
Versions
39
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

link-preview-js - npm Package Compare versions

Comparing version 2.0.2 to 2.0.3

.travis.yml

28

__tests__/index.spec.ts

@@ -1,2 +0,2 @@

import { getLinkPreview } from "../index";
import { getLinkPreview } from "../build/index";

@@ -124,17 +124,27 @@ describe(`link preview`, () => {

// This site changed? it is not returning application any more but rather website
// eslint-disable-next-line jest/no-disabled-tests
it.skip(`should handle application urls`, async () => {
it(`should handle unknown content type urls`, async () => {
const linkInfo = await getLinkPreview(
`https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`,
`https://mjml.io/try-it-live`,
);
expect(linkInfo.url).toEqual(
`https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`,
`https://mjml.io/try-it-live`,
);
expect(linkInfo.mediaType).toEqual(`application`);
expect(linkInfo.contentType.toLowerCase()).toEqual(`application/pdf`);
expect(linkInfo.favicons[0]).toBeTruthy();
expect(linkInfo.mediaType).toEqual(`website`);
});
// This site changed? it is not returning application any more but rather website
// it(`should handle application urls`, async () => {
// const linkInfo = await getLinkPreview(
// `https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`,
// );
// expect(linkInfo.url).toEqual(
// `https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`,
// );
// expect(linkInfo.mediaType).toEqual(`application`);
// expect(linkInfo.contentType.toLowerCase()).toEqual(`application/pdf`);
// expect(linkInfo.favicons[0]).toBeTruthy();
// });
it(`no link in text should fail gracefully`, async () => {

@@ -141,0 +151,0 @@ await expect(

@@ -1,2 +0,1 @@

import { ResponseModel } from './Response.model';
interface ILinkPreviewOptions {

@@ -6,3 +5,24 @@ headers?: Record<string, string>;

}
export declare function getLinkPreview(text: string, options?: ILinkPreviewOptions): Promise<ResponseModel>;
export declare function getLinkPreview(text: string, options?: ILinkPreviewOptions): Promise<{
url: string;
mediaType: string;
contentType: string;
favicons: any[];
} | {
url: string;
title: any;
siteName: any;
description: any;
mediaType: any;
contentType: string | undefined;
images: string[];
videos: {
url: any;
secureUrl: any;
type: any;
width: any;
height: any;
}[];
favicons: any[];
}>;
export {};

@@ -42,31 +42,29 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
// eslint-disable-next-line @typescript-eslint/ban-ts-ignore
// @ts-ignore
var react_native_html_parser_1 = __importDefault(require("react-native-html-parser"));
var cheerio_without_node_native_1 = __importDefault(require("cheerio-without-node-native"));
var cross_fetch_1 = require("cross-fetch");
var url_1 = __importDefault(require("url"));
var constants_1 = require("./constants");
var Response_model_1 = require("./Response.model");
function getNodeValue(node) {
if (node) {
return node.attributes[1].nodeValue;
function getTitle(doc) {
var title = doc("meta[property='og:title']").attr("content");
if (!title) {
title = doc("title").text();
}
return null;
return title;
}
function getTitle(doc) {
return getNodeValue(doc.querySelect("meta[property='og:title']")[0]);
}
function getSiteName(doc) {
return getNodeValue(doc.querySelect("meta[property='og:site_name']")[0]);
var siteName = doc("meta[property='og:site_name']").attr("content");
return siteName;
}
function getDescription(doc) {
var description = getNodeValue(doc.querySelect("meta[name=Description]")[0]);
if (description == null) {
description = getNodeValue(doc.querySelect("meta[property='og:description']")[0]);
var description = doc("meta[name=description]").attr("content");
if (description === undefined) {
description = doc("meta[name=Description]").attr("content");
}
if (description === undefined) {
description = doc("meta[property='og:description']").attr("content");
}
return description;
}
// TODO I don't know what this returns, the node definitely has no length?
function getMediaType(doc) {
var node = doc.querySelect("meta[name=Description]")[0];
var node = doc("meta[name=medium]");
if (node.length) {

@@ -197,3 +195,3 @@ var content = node.attr("content");

function parseImageResponse(url, contentType) {
return new Response_model_1.ResponseModel({
return {
url: url,

@@ -203,6 +201,6 @@ mediaType: "image",

favicons: [getDefaultFavicon(url)],
});
};
}
function parseAudioResponse(url, contentType) {
return new Response_model_1.ResponseModel({
return {
url: url,

@@ -212,6 +210,6 @@ mediaType: "audio",

favicons: [getDefaultFavicon(url)],
});
};
}
function parseVideoResponse(url, contentType) {
return new Response_model_1.ResponseModel({
return {
url: url,

@@ -221,6 +219,6 @@ mediaType: "video",

favicons: [getDefaultFavicon(url)],
});
};
}
function parseApplicationResponse(url, contentType) {
return new Response_model_1.ResponseModel({
return {
url: url,

@@ -230,8 +228,8 @@ mediaType: "application",

favicons: [getDefaultFavicon(url)],
});
};
}
function parseTextResponse(body, url, options, contentType) {
if (options === void 0) { options = {}; }
var doc = new react_native_html_parser_1.default.DOMParser().parseFromString(body);
return new Response_model_1.ResponseModel({
var doc = cheerio_without_node_native_1.default.load(body);
return {
url: url,

@@ -246,8 +244,12 @@ title: getTitle(doc),

favicons: getFavicons(doc, url),
});
};
}
function parseUnknownResponse(body, url, options, contentType) {
if (options === void 0) { options = {}; }
return parseTextResponse(body, url, options, contentType);
}
function getLinkPreview(text, options) {
var _a, _b;
return __awaiter(this, void 0, void 0, function () {
var detectedUrl, fetchOptions, response, finalUrl, contentType, htmlString, e_1;
var detectedUrl, fetchOptions, response, finalUrl, contentType, htmlString_1, htmlString_2, htmlString, e_1;
return __generator(this, function (_c) {

@@ -269,3 +271,3 @@ switch (_c.label) {

case 1:
_c.trys.push([1, 5, , 6]);
_c.trys.push([1, 8, , 9]);
return [4 /*yield*/, cross_fetch_1.fetch(detectedUrl, fetchOptions)];

@@ -276,5 +278,8 @@ case 2:

contentType = response.headers.get("content-type");
if (!contentType) {
throw new Error("link-preview-js could not determine content-type for link");
}
if (!!contentType) return [3 /*break*/, 4];
return [4 /*yield*/, response.text()];
case 3:
htmlString_1 = _c.sent();
return [2 /*return*/, parseUnknownResponse(htmlString_1, finalUrl, options)];
case 4:
if (contentType instanceof Array) {

@@ -294,16 +299,19 @@ // eslint-disable-next-line prefer-destructuring

}
if (!constants_1.CONSTANTS.REGEX_CONTENT_TYPE_TEXT.test(contentType)) return [3 /*break*/, 4];
if (!constants_1.CONSTANTS.REGEX_CONTENT_TYPE_TEXT.test(contentType)) return [3 /*break*/, 6];
return [4 /*yield*/, response.text()];
case 3:
htmlString = _c.sent();
return [2 /*return*/, parseTextResponse(htmlString, finalUrl, options, contentType)];
case 4:
case 5:
htmlString_2 = _c.sent();
return [2 /*return*/, parseTextResponse(htmlString_2, finalUrl, options, contentType)];
case 6:
if (constants_1.CONSTANTS.REGEX_CONTENT_TYPE_APPLICATION.test(contentType)) {
return [2 /*return*/, parseApplicationResponse(finalUrl, contentType)];
}
throw new Error("Unknown content type for URL.");
case 5:
return [4 /*yield*/, response.text()];
case 7:
htmlString = _c.sent();
return [2 /*return*/, parseUnknownResponse(htmlString, finalUrl, options)];
case 8:
e_1 = _c.sent();
throw new Error("link-preview-js could not fetch link information " + e_1.toString());
case 6: return [2 /*return*/];
case 9: return [2 /*return*/];
}

@@ -310,0 +318,0 @@ });

@@ -1,8 +0,5 @@

// eslint-disable-next-line @typescript-eslint/ban-ts-ignore
// @ts-ignore
import DomParser from 'react-native-html-parser';
import cheerio from "cheerio-without-node-native";
import { fetch } from "cross-fetch";
import urlObj from "url";
import { CONSTANTS } from "./constants";
import { ResponseModel } from './Response.model';

@@ -14,54 +11,56 @@ interface ILinkPreviewOptions {

function getNodeValue(node?: any) {
if (node) {
return node.attributes[1].nodeValue;
function getTitle(doc: any) {
let title = doc(`meta[property='og:title']`).attr(`content`);
if (!title) {
title = doc(`title`).text();
}
return null;
}
function getTitle(doc: any) {
return getNodeValue(doc.querySelect(`meta[property='og:title']`)[0]);
return title;
}
function getSiteName(doc: any) {
return getNodeValue(doc.querySelect(`meta[property='og:site_name']`)[0]);
const siteName = doc(`meta[property='og:site_name']`).attr(`content`);
return siteName;
}
function getDescription(doc: any) {
let description = getNodeValue(doc.querySelect(`meta[name=Description]`)[0]);
let description = doc(`meta[name=description]`).attr(`content`);
if (description == null) {
description = getNodeValue(doc.querySelect(`meta[property='og:description']`)[0]);
if (description === undefined) {
description = doc(`meta[name=Description]`).attr(`content`);
}
if (description === undefined) {
description = doc(`meta[property='og:description']`).attr(`content`);
}
return description;
}
// TODO I don't know what this returns, the node definitely has no length?
function getMediaType(doc: any) {
return getNodeValue(doc.querySelect(`meta[property='og:type']`)[0]);
// const nodes = doc.querySelect(`meta[name=medium]`);
const node = doc(`meta[name=medium]`);
// if (nodes.length) {
// console.warn(`MEDIA TYPE NODES: ${nodes.length}`, nodes[0]);
// const value = getNodeValue(nodes[0]);
// return value === `image` ? `photo` : value;
// }
// return getNodeValue(doc.querySelect(`meta[property='og:type']`)[0]);
if (node.length) {
const content = node.attr(`content`);
return content === `image` ? `photo` : content;
}
return doc(`meta[property='og:type']`).attr(`content`);
}
function getImages(doc: any, rootUrl: string, imagesPropertyType?: string) {
const images: string[] = [];
let images: string[] = [];
let nodes;
let src;
// let dic: Record<string, boolean> = {};
let dic: Record<string, boolean> = {};
const imagePropertyType = imagesPropertyType ?? `og`;
const nodes = doc.querySelect(`meta[property='${imagePropertyType}:image']`);
nodes = doc(`meta[property='${imagePropertyType}:image']`);
if (nodes.length) {
nodes.forEach((node: any) => {
src = getNodeValue(node);
nodes.each((_: number, node: any) => {
src = node.attribs.content;
if (src) {
src = urlObj.resolve(rootUrl, src);
images.push(src);

@@ -72,25 +71,25 @@ }

// if (images.length <= 0 && !imagesPropertyType) {
// src = doc(`link[rel=image_src]`).attr(`href`);
// if (src) {
// src = urlObj.resolve(rootUrl, src);
// images = [src];
// } else {
// nodes = doc(`img`);
if (images.length <= 0 && !imagesPropertyType) {
src = doc(`link[rel=image_src]`).attr(`href`);
if (src) {
src = urlObj.resolve(rootUrl, src);
images = [src];
} else {
nodes = doc(`img`);
// if (nodes.length) {
// dic = {};
// images = [];
// nodes.each((_: number, node: any) => {
// src = node.attribs.src;
// if (src && !dic[src]) {
// dic[src] = true;
// // width = node.attribs.width;
// // height = node.attribs.height;
// images.push(urlObj.resolve(rootUrl, src));
// }
// });
// }
// }
// }
if (nodes.length) {
dic = {};
images = [];
nodes.each((_: number, node: any) => {
src = node.attribs.src;
if (src && !dic[src]) {
dic[src] = true;
// width = node.attribs.width;
// height = node.attribs.height;
images.push(urlObj.resolve(rootUrl, src));
}
});
}
}
}

@@ -114,36 +113,33 @@ return images;

const nodes = doc.querySelect(`meta[property='og:video']`);
const nodes = doc(`meta[property='og:video']`);
const { length } = nodes;
// console.warn(`ROPO VIDEO NODES`, nodes);
if (length) {
// nodeTypes = doc.querySelect(`meta[property='og:video:type']`);
// nodeSecureUrls = doc.querySelect(`meta[property='og:video:secure_url']`);
// width = doc.querySelect(`meta[property='og:video:width']`).attr(`content`);
// height = doc.querySelect(`meta[property='og:video:height']`).attr(`content`);
nodeTypes = doc(`meta[property='og:video:type']`);
nodeSecureUrls = doc(`meta[property='og:video:secure_url']`);
width = doc(`meta[property='og:video:width']`).attr(`content`);
height = doc(`meta[property='og:video:height']`).attr(`content`);
// for (index = 0; index < length; index += 1) {
// video = nodes[index].attribs.content;
for (index = 0; index < length; index += 1) {
video = nodes[index].attribs.content;
// nodeType = nodeTypes[index];
// videoType = nodeType ? nodeType.attribs.content : null;
nodeType = nodeTypes[index];
videoType = nodeType ? nodeType.attribs.content : null;
// nodeSecureUrl = nodeSecureUrls[index];
// videoSecureUrl = nodeSecureUrl ? nodeSecureUrl.attribs.content : null;
nodeSecureUrl = nodeSecureUrls[index];
videoSecureUrl = nodeSecureUrl ? nodeSecureUrl.attribs.content : null;
// videoObj = {
// url: video,
// secureUrl: videoSecureUrl,
// type: videoType,
// width,
// height,
// };
// if (videoType && videoType.indexOf(`video/`) === 0) {
// videos.splice(0, 0, videoObj);
// } else {
// videos.push(videoObj);
// }
// }
videoObj = {
url: video,
secureUrl: videoSecureUrl,
type: videoType,
width,
height,
};
if (videoType && videoType.indexOf(`video/`) === 0) {
videos.splice(0, 0, videoObj);
} else {
videos.push(videoObj);
}
}
}

@@ -162,4 +158,4 @@

function getFavicons(doc: any, rootUrl: string) {
let images: string[] = [];
const nodes = [];
const images = [];
let nodes = [];
let src;

@@ -169,4 +165,4 @@

`rel=icon`,
// `rel="shortcut icon"`,
// `rel=apple-touch-icon`,
`rel="shortcut icon"`,
`rel=apple-touch-icon`,
];

@@ -176,10 +172,14 @@

// look for all icon tags
const favicons = doc.querySelect(`link[${relSelector}]`).map((node: any) => {
// console.warn(`Favicons map iteration`, urlObj.resolve(rootUrl, value));
console.warn(`Favicons map iteration`, node);
const value = getNodeValue(node);
return urlObj.resolve(rootUrl, value);
});
nodes = doc(`link[${relSelector}]`);
images = images.concat(favicons);
// collect all images from icon tags
if (nodes.length) {
nodes.each((_: number, node: any) => {
src = node.attribs.href;
if (src) {
src = urlObj.resolve(rootUrl, src);
images.push(src);
}
});
}
});

@@ -196,3 +196,3 @@

function parseImageResponse(url: string, contentType: string) {
return new ResponseModel({
return {
url,

@@ -202,7 +202,7 @@ mediaType: `image`,

favicons: [getDefaultFavicon(url)],
});
};
}
function parseAudioResponse(url: string, contentType: string) {
return new ResponseModel({
return {
url,

@@ -212,7 +212,7 @@ mediaType: `audio`,

favicons: [getDefaultFavicon(url)],
});
};
}
function parseVideoResponse(url: string, contentType: string) {
return new ResponseModel({
return {
url,

@@ -222,7 +222,7 @@ mediaType: `video`,

favicons: [getDefaultFavicon(url)],
});
};
}
function parseApplicationResponse(url: string, contentType: string) {
return new ResponseModel({
return {
url,

@@ -232,3 +232,3 @@ mediaType: `application`,

favicons: [getDefaultFavicon(url)],
});
};
}

@@ -240,7 +240,7 @@

options: ILinkPreviewOptions = {},
contentType: string,
contentType?: string,
) {
const doc = new DomParser.DOMParser().parseFromString(body, `text/html`);
const doc = cheerio.load(body);
return new ResponseModel({
return {
url,

@@ -255,5 +255,13 @@ title: getTitle(doc),

favicons: getFavicons(doc, url),
});
};
}
function parseUnknownResponse(
body: string,
url: string,
options: ILinkPreviewOptions = {},
contentType?: string,
) {
return parseTextResponse(body, url, options, contentType);
}

@@ -263,3 +271,3 @@ export async function getLinkPreview(

options?: ILinkPreviewOptions,
): Promise<ResponseModel> {
) {
if (!text || typeof text !== `string`) {

@@ -290,5 +298,4 @@ throw new Error(`link-preview-js did not receive a valid url or text`);

if (!contentType) {
throw new Error(
`link-preview-js could not determine content-type for link`,
);
const htmlString = await response.text();
return parseUnknownResponse(htmlString, finalUrl, options);
}

@@ -318,3 +325,4 @@

}
throw new Error(`Unknown content type for URL.`);
const htmlString = await response.text();
return parseUnknownResponse(htmlString, finalUrl, options);
} catch (e) {

@@ -321,0 +329,0 @@ throw new Error(

{
"name": "link-preview-js",
"version": "2.0.2",
"version": "2.0.3",
"description": "Javascript module to extract and fetch HTTP link information from blocks of text. ",

@@ -8,3 +8,4 @@ "main": "build/index.js",

"test": "jest --detectOpenHandles --colors",
"build": "tsc"
"build": "tsc",
"publish": "tsc && jest --detectOpenHandles && npm publish "
},

@@ -23,4 +24,4 @@ "keywords": [

"dependencies": {
"cheerio-without-node-native": "0.20.2",
"cross-fetch": "3.0.4",
"react-native-html-parser": "^0.1.0",
"url": "0.11.0"

@@ -47,2 +48,2 @@ },

}
}
}

@@ -5,20 +5,22 @@ # link-preview-js

Pure js library that allows you to extract information from a URL or parse text and retrieve information from the first available link.
Typescript library that allows you to extract information from a URL or parse text and retrieve information from the first available link.
# WARNING: THIS LIBRARY DOES NOT WORK ON CORS PROTECTED ENVIRONEMNTS, RUNNING IT ON BROWSERS WON'T WORK
## This library does not work on CORS protected environments, i.e: all the major browsers
Chrome, Firefox, Safari, etc DO NOT ALLOW YOU TO DO CROSS SITE REQUESTS therefore you cannot use this library or even manually request another domain from your web application, read more about [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS). However you can use this library on React-Native or on your back-end to fetch link-information to your app.
Chrome, Firefox, Safari, etc DO NOT ALLOW YOU TO DO CROSS SITE REQUESTS therefore you cannot request another domain from your web application, read more about [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS).
## Known issues for React-Native
## This library uses [cheerio-without-native](https://github.com/oyyd/cheerio-without-node-native), github is now warning me that there are security vunerabilities because the package has been abandonded, I'm not responsible for any security implications this might carry, I could use cheerio but that means loosing compatibility with RN, which actually might not be a bad idea...
Apparently the fetch especification breaks on some older samsung devices, this is not patchable on this library.
As of 23 of April of 2020: Do not use https://google.com it does not return the appropiate tags to be parsed
# 2.X.X
# Migration to 2.X.X
The api for version 2.X.X changed slightly, there is no longer a default unnamed export, only a named method export `getLinkPreview`, the library has also been completely re-written on typescript so you now have types and some minor bugs have been fixed.
The api for version 2.X.X changed slightly, there is no longer a default unnamed export, only a named method export `getLinkPreview`
## Usage
## Install
`$ yarn add link-preview-js`
## Usage
Library exposes just one method `getLinkPreview`, you have to pass a string, doesn't matter if it is just a URL or a piece of text that contains a URL, the library will take care of parsing it and returning the info of first valid HTTP(S) URL info it finds.

@@ -31,4 +33,2 @@

...
getLinkPreview('https://www.youtube.com/watch?v=MejbOFk7H6c')

@@ -35,0 +35,0 @@ .then((data) => console.debug(data));

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc