🚀 Big News: Socket Acquires Coana to Bring Reachability Analysis to Every Appsec Team.Learn more
Socket
DemoInstallSign in
Socket

linkinator

Package Overview
Dependencies
Maintainers
1
Versions
114
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

linkinator - npm Package Compare versions

Comparing version

to
2.14.1

29

build/src/cli.js

@@ -64,2 +64,8 @@ #!/usr/bin/env node

--url-rewrite-search
Pattern to search for in urls. Must be used with --url-rewrite-replace.
--url-rewrite-replace
Expression used to replace search content. Must be used with --url-rewrite-search.
--verbosity

@@ -89,2 +95,4 @@ Override the default verbosity for this command. Available options are

retry: { type: 'boolean' },
urlRewriteSearch: { type: 'string' },
urlReWriteReplace: { type: 'string' },
},

@@ -100,5 +108,9 @@ booleanDefault: undefined,

flags = await config_1.getConfig(cli.flags);
if ((flags.urlRewriteReplace && !flags.urlRewriteSearch) ||
(flags.urlRewriteSearch && !flags.urlRewriteReplace)) {
throw new Error('The url-rewrite-replace flag must be used with the url-rewrite-search flag.');
}
const start = Date.now();
const verbosity = parseVerbosity(cli.flags);
const format = parseFormat(cli.flags);
const verbosity = parseVerbosity(flags);
const format = parseFormat(flags);
const logger = new logger_1.Logger(verbosity, format);

@@ -145,2 +157,10 @@ logger.error(`🏊‍♂️ crawling ${cli.input}`);

}
if (flags.urlRewriteSearch && flags.urlRewriteReplace) {
opts.urlRewriteExpressions = [
{
pattern: new RegExp(flags.urlRewriteSearch),
replacement: flags.urlRewriteReplace,
},
];
}
const result = await checker.check(opts);

@@ -237,8 +257,9 @@ const filteredResults = result.links.filter(link => {

const total = (Date.now() - start) / 1000;
const scannedLinks = result.links.filter(x => x.state !== index_1.LinkState.SKIPPED);
if (!result.passed) {
const borked = result.links.filter(x => x.state === index_1.LinkState.BROKEN);
logger.error(chalk.bold(`${chalk.red('ERROR')}: Detected ${borked.length} broken links. Scanned ${chalk.yellow(result.links.length.toString())} links in ${chalk.cyan(total.toString())} seconds.`));
logger.error(chalk.bold(`${chalk.red('ERROR')}: Detected ${borked.length} broken links. Scanned ${chalk.yellow(scannedLinks.length.toString())} links in ${chalk.cyan(total.toString())} seconds.`));
process.exit(1);
}
logger.error(chalk.bold(`🤖 Successfully scanned ${chalk.green(result.links.length.toString())} links in ${chalk.cyan(total.toString())} seconds.`));
logger.error(chalk.bold(`🤖 Successfully scanned ${chalk.green(scannedLinks.length.toString())} links in ${chalk.cyan(total.toString())} seconds.`));
}

@@ -245,0 +266,0 @@ function parseVerbosity(flags) {

@@ -8,2 +8,3 @@ export interface Flags {

silent?: boolean;
verbosity?: string;
timeout?: number;

@@ -14,3 +15,5 @@ markdown?: boolean;

retry?: boolean;
urlRewriteSearch?: string;
urlRewriteReplace?: string;
}
export declare function getConfig(flags: Flags): Promise<Flags>;

@@ -99,2 +99,11 @@ "use strict";

var _a;
// apply any regex url replacements
if (opts.checkOptions.urlRewriteExpressions) {
for (const exp of opts.checkOptions.urlRewriteExpressions) {
const newUrl = opts.url.href.replace(exp.pattern, exp.replacement);
if (opts.url.href !== newUrl) {
opts.url.href = newUrl;
}
}
}
// explicitly skip non-http[s] links before making the request

@@ -255,3 +264,4 @@ const proto = opts.url.protocol;

}
let crawl = (opts.checkOptions.recurse && ((_a = result.url) === null || _a === void 0 ? void 0 : _a.href.startsWith(opts.rootPath)));
let crawl = (opts.checkOptions.recurse &&
((_a = result.url) === null || _a === void 0 ? void 0 : _a.href.startsWith(opts.rootPath)));
// only crawl links that start with the same host

@@ -375,3 +385,4 @@ if (crawl) {

// trim the starting http://localhost:0000 if we stood up a local static server
if (((_a = options === null || options === void 0 ? void 0 : options.staticHttpServerHost) === null || _a === void 0 ? void 0 : _a.length) && (url === null || url === void 0 ? void 0 : url.startsWith(options.staticHttpServerHost))) {
if (((_a = options === null || options === void 0 ? void 0 : options.staticHttpServerHost) === null || _a === void 0 ? void 0 : _a.length) &&
(url === null || url === void 0 ? void 0 : url.startsWith(options.staticHttpServerHost))) {
newUrl = url.slice(options.staticHttpServerHost.length);

@@ -378,0 +389,0 @@ // add the full filesystem path back if we trimmed it

65

build/src/links.js
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.getLinks = void 0;
const cheerio = require("cheerio");
const htmlParser = require("htmlparser2");
const url_1 = require("url");

@@ -32,25 +32,25 @@ const linksAttr = {

};
// Create lookup table for tag name to attribute that contains URL:
const tagAttr = {};
Object.keys(linksAttr).forEach(attr => {
for (const tag of linksAttr[attr]) {
if (!tagAttr[tag])
tagAttr[tag] = [];
tagAttr[tag].push(attr);
}
});
function getLinks(source, baseUrl) {
const $ = cheerio.load(source);
let realBaseUrl = baseUrl;
const base = $('base[href]');
if (base.length) {
// only first <base by specification
const htmlBaseUrl = base.first().attr('href');
realBaseUrl = getBaseUrl(htmlBaseUrl, baseUrl);
}
let baseSet = false;
const links = new Array();
const attrs = Object.keys(linksAttr);
for (const attr of attrs) {
const elements = linksAttr[attr].map(tag => `${tag}[${attr}]`).join(',');
$(elements).each((i, ele) => {
const element = ele;
if (!element.attribs) {
return;
const parser = new htmlParser.Parser({
onopentag(tag, attributes) {
// Allow alternate base URL to be specified in tag:
if (tag === 'base' && !baseSet) {
realBaseUrl = getBaseUrl(attributes.href, baseUrl);
baseSet = true;
}
const values = parseAttr(attr, element.attribs[attr]);
// ignore href properties for link tags where rel is likely to fail
const relValuesToIgnore = ['dns-prefetch', 'preconnect'];
if (element.tagName === 'link' &&
relValuesToIgnore.includes(element.attribs['rel'])) {
if (tag === 'link' && relValuesToIgnore.includes(attributes.rel)) {
return;

@@ -60,5 +60,5 @@ }

// the content actually looks like a url
if (element.tagName === 'meta' && element.attribs['content']) {
if (tag === 'meta' && attributes.content) {
try {
new url_1.URL(element.attribs['content']);
new url_1.URL(attributes.content);
}

@@ -69,10 +69,13 @@ catch (e) {

}
for (const v of values) {
if (v) {
const link = parseLink(v, realBaseUrl);
links.push(link);
if (tagAttr[tag]) {
for (const attr of tagAttr[tag]) {
if (attributes[attr]) {
links.push(parseLink(attributes[attr], realBaseUrl));
}
}
}
});
}
},
});
parser.write(source);
parser.end();
return links;

@@ -98,12 +101,2 @@ }

}
function parseAttr(name, value) {
switch (name) {
case 'srcset':
return value
.split(',')
.map((pair) => pair.trim().split(/\s+/)[0]);
default:
return [value];
}
}
function parseLink(link, baseUrl) {

@@ -110,0 +103,0 @@ try {

@@ -0,1 +1,5 @@

export interface UrlRewriteExpression {
pattern: RegExp;
replacement: string;
}
export interface CheckOptions {

@@ -12,2 +16,3 @@ concurrency?: number;

retry?: boolean;
urlRewriteExpressions?: UrlRewriteExpression[];
}

@@ -14,0 +19,0 @@ export interface InternalCheckOptions extends CheckOptions {

@@ -101,3 +101,3 @@ "use strict";

const pathParts = options.path[0].split(path.sep);
options.path = [path.sep + pathParts[pathParts.length - 1]];
options.path = [path.join('.', pathParts[pathParts.length - 1])];
options.serverRoot =

@@ -104,0 +104,0 @@ pathParts.slice(0, pathParts.length - 1).join(path.sep) || '.';

@@ -0,3 +1,5 @@

/// <reference types="node" />
import { EventEmitter } from 'events';
export interface QueueOptions {
concurrency?: number;
concurrency: number;
}

@@ -7,9 +9,14 @@ export interface QueueItemOptions {

}
export declare interface Queue {
on(event: 'done', listener: () => void): this;
}
export declare type AsyncFunction = () => Promise<void>;
export declare class Queue {
export declare class Queue extends EventEmitter {
private q;
private activeTimers;
private activeFunctions;
private concurrency;
constructor(options: QueueOptions);
add(fn: AsyncFunction, options?: QueueItemOptions): void;
private tick;
onIdle(): Promise<void>;
}
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Queue = void 0;
const p_queue_1 = require("p-queue");
class Queue {
const events_1 = require("events");
class Queue extends events_1.EventEmitter {
constructor(options) {
this.activeTimers = 0;
this.q = new p_queue_1.default({
concurrency: options.concurrency,
super();
this.q = [];
this.activeFunctions = 0;
this.concurrency = options.concurrency;
}
add(fn, options) {
const delay = (options === null || options === void 0 ? void 0 : options.delay) || 0;
const timeToRun = Date.now() + delay;
this.q.push({
fn,
timeToRun,
});
setTimeout(() => this.tick(), delay);
}
add(fn, options) {
if (options === null || options === void 0 ? void 0 : options.delay) {
setTimeout(() => {
this.q.add(fn);
this.activeTimers--;
}, options.delay);
this.activeTimers++;
tick() {
// Check if we're complete
if (this.activeFunctions === 0 && this.q.length === 0) {
this.emit('done');
return;
}
else {
this.q.add(fn);
for (let i = 0; i < this.q.length; i++) {
// Check if we have too many concurrent functions executing
if (this.activeFunctions >= this.concurrency) {
return;
}
// grab the element at the front of the array
const item = this.q.shift();
// make sure this element is ready to execute - if not, to the back of the stack
if (item.timeToRun > Date.now()) {
this.q.push(item);
}
else {
// this function is ready to go!
this.activeFunctions++;
item.fn().finally(() => {
this.activeFunctions--;
this.tick();
});
}
}
}
async onIdle() {
await this.q.onIdle();
await new Promise(resolve => {
if (this.activeTimers === 0) {
resolve();
return;
}
const timer = setInterval(async () => {
if (this.activeTimers === 0) {
await this.q.onIdle();
clearInterval(timer);
resolve();
return;
}
}, 500);
return new Promise(resolve => {
this.on('done', () => resolve());
});

@@ -40,0 +52,0 @@ }

@@ -6,8 +6,12 @@ "use strict";

const path = require("path");
const util = require("util");
const fs = require("fs");
const util_1 = require("util");
const marked = require("marked");
const serve = require("serve-handler");
const mime = require("mime");
const url_1 = require("url");
const escape = require("escape-html");
const enableDestroy = require("server-destroy");
const readFile = util.promisify(fs.readFile);
const readFile = util_1.promisify(fs.readFile);
const stat = util_1.promisify(fs.stat);
const readdir = util_1.promisify(fs.readdir);
/**

@@ -19,24 +23,6 @@ * Spin up a local HTTP server to serve static requests from disk

async function startWebServer(options) {
const root = path.resolve(options.root);
return new Promise((resolve, reject) => {
const server = http
.createServer(async (req, res) => {
const pathParts = req.url.split('/').filter(x => !!x);
if (pathParts.length > 0) {
const ext = path.extname(pathParts[pathParts.length - 1]);
if (options.markdown && ext.toLowerCase() === '.md') {
const filePath = path.join(path.resolve(options.root), req.url);
const data = await readFile(filePath, { encoding: 'utf-8' });
const result = marked(data, { gfm: true });
res.writeHead(200, {
'content-type': 'text/html',
});
res.end(result);
return;
}
}
return serve(req, res, {
public: options.root,
directoryListing: options.directoryListing,
});
})
.createServer((req, res) => handleRequest(req, res, root, options))
.listen(options.port, () => resolve(server))

@@ -48,2 +34,75 @@ .on('error', reject);

exports.startWebServer = startWebServer;
async function handleRequest(req, res, root, options) {
var _a;
const url = new url_1.URL(req.url || '/', `http://localhost:${options.port}`);
const pathParts = url.pathname.split('/').filter(x => !!x);
const originalPath = path.join(root, ...pathParts);
if (url.pathname.endsWith('/')) {
pathParts.push('index.html');
}
const localPath = path.join(root, ...pathParts);
if (!localPath.startsWith(root)) {
res.writeHead(500);
res.end();
return;
}
const maybeListing = options.directoryListing && localPath.endsWith(`${path.sep}index.html`);
try {
const stats = await stat(localPath);
const isDirectory = stats.isDirectory();
if (isDirectory) {
// this means we got a path with no / at the end!
const doc = "<html><body>Redirectin'</body></html>";
res.statusCode = 301;
res.setHeader('Content-Type', 'text/html; charset=UTF-8');
res.setHeader('Content-Length', Buffer.byteLength(doc));
res.setHeader('Location', req.url + '/');
res.end(doc);
return;
}
}
catch (err) {
if (!maybeListing) {
return return404(res, err);
}
}
try {
let data = await readFile(localPath, { encoding: 'utf8' });
let mimeType = mime.getType(localPath);
const isMarkdown = (_a = req.url) === null || _a === void 0 ? void 0 : _a.toLocaleLowerCase().endsWith('.md');
if (isMarkdown && options.markdown) {
data = marked(data, { gfm: true });
mimeType = 'text/html; charset=UTF-8';
}
res.setHeader('Content-Type', mimeType);
res.setHeader('Content-Length', Buffer.byteLength(data));
res.writeHead(200);
res.end(data);
}
catch (err) {
if (maybeListing) {
try {
const files = await readdir(originalPath);
const fileList = files
.filter(f => escape(f))
.map(f => `<li><a href="${f}">${f}</a></li>`)
.join('\r\n');
const data = `<html><body><ul>${fileList}</ul></body></html>`;
res.writeHead(200);
res.end(data);
return;
}
catch (err) {
return return404(res, err);
}
}
else {
return return404(res, err);
}
}
}
function return404(res, err) {
res.writeHead(404);
res.end(JSON.stringify(err));
}
//# sourceMappingURL=server.js.map
{
"name": "linkinator",
"description": "Find broken links, missing images, etc in your HTML. Scurry around your site and find all those broken links.",
"version": "2.13.1",
"version": "2.14.1",
"license": "MIT",
"repository": "JustinBeckwith/linkinator",
"author": "Justin Beckwith",
"main": "build/src/index.js",

@@ -15,20 +16,20 @@ "types": "build/src/index.d.ts",

"prepare": "npm run compile",
"codecov": "c8 report --reporter=json && codecov -f coverage/*.json",
"compile": "tsc -p .",
"test": "c8 mocha build/test",
"fix": "gts fix",
"codecov": "c8 report --reporter=json && codecov -f coverage/*.json",
"lint": "gts lint",
"build-binaries": "pkg . --out-path build/binaries",
"docs-test": "npm link && linkinator ./README.md"
"docs-test": "node build/src/cli.js ./README.md"
},
"dependencies": {
"chalk": "^4.0.0",
"cheerio": "^1.0.0-rc.5",
"escape-html": "^1.0.3",
"gaxios": "^4.0.0",
"glob": "^7.1.6",
"htmlparser2": "^7.1.2",
"jsonexport": "^3.0.0",
"marked": "^1.2.5",
"marked": "^2.0.0",
"meow": "^9.0.0",
"p-queue": "^6.2.1",
"serve-handler": "^6.1.3",
"mime": "^2.5.0",
"server-destroy": "^1.0.1",

@@ -39,14 +40,15 @@ "update-notifier": "^5.0.0"

"@types/chai": "^4.2.7",
"@types/cheerio": "0.22.23",
"@types/escape-html": "^1.0.0",
"@types/glob": "^7.1.3",
"@types/marked": "^1.2.0",
"@types/marked": "^2.0.0",
"@types/meow": "^5.0.0",
"@types/mime": "^2.0.3",
"@types/mocha": "^8.0.0",
"@types/node": "^12.7.12",
"@types/serve-handler": "^6.1.0",
"@types/node": "^14.0.0",
"@types/server-destroy": "^1.0.0",
"@types/sinon": "^9.0.0",
"@types/sinon": "^10.0.0",
"@types/update-notifier": "^5.0.0",
"c8": "^7.0.0",
"chai": "^4.2.0",
"codecov": "^3.8.1",
"execa": "^5.0.0",

@@ -56,5 +58,6 @@ "gts": "^3.0.0",

"nock": "^13.0.0",
"pkg": "^4.4.9",
"semantic-release": "^17.0.0",
"sinon": "^9.0.0",
"pkg": "^5.0.0",
"semantic-release": "^18.0.0",
"sinon": "^11.0.0",
"strip-ansi": "^6.0.0",
"typescript": "^4.0.0"

@@ -79,8 +82,3 @@ },

"checker"
],
"c8": {
"exclude": [
"build/test"
]
}
]
}
# 🐿 linkinator
> A super simple site crawler and broken link checker.
[![npm version](https://img.shields.io/npm/v/linkinator.svg)](https://www.npmjs.org/package/linkinator)
[![Build Status](https://img.shields.io/github/workflow/status/JustinBeckwith/linkinator/ci/master)](https://github.com/JustinBeckwith/linkinator/actions?query=branch%3Amaster+workflow%3Aci)
[![codecov](https://img.shields.io/codecov/c/github/JustinBeckwith/linkinator/master)](https://codecov.io/gh/JustinBeckwith/linkinator)
[![npm version](https://img.shields.io/npm/v/linkinator)](https://www.npmjs.org/package/linkinator)
[![Build Status](https://img.shields.io/github/workflow/status/JustinBeckwith/linkinator/ci/main)](https://github.com/JustinBeckwith/linkinator/actions?query=branch%3Amain+workflow%3Aci)
[![codecov](https://img.shields.io/codecov/c/github/JustinBeckwith/linkinator/main)](https://codecov.io/gh/JustinBeckwith/linkinator)
[![Known Vulnerabilities](https://img.shields.io/snyk/vulnerabilities/github/JustinBeckwith/linkinator)](https://snyk.io/test/github/JustinBeckwith/linkinator)
[![Code Style: Google](https://img.shields.io/badge/code%20style-google-blueviolet.svg)](https://github.com/google/gts)
[![semantic-release](https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg)](https://github.com/semantic-release/semantic-release)
[![Code Style: Google](https://img.shields.io/badge/code%20style-google-blueviolet)](https://github.com/google/gts)
[![semantic-release](https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079)](https://github.com/semantic-release/semantic-release)
Behold my latest inator! The `linkinator` provides an API and CLI for crawling websites and validating links. It's got a ton of sweet features:
- 🔥 Easily perform scans on remote sites or local files

@@ -22,3 +24,3 @@ - 🔥 Scan any element that includes links, not just `<a href>`

```sh
$ npm install linkinator
npm install linkinator
```

@@ -32,3 +34,3 @@

```
```text
$ linkinator LOCATIONS [ --arguments ]

@@ -83,2 +85,8 @@

--url-rewrite-search
Pattern to search for in urls. Must be used with --url-rewrite-replace.
--url-rewrite-replace
Expression used to replace search content. Must be used with --url-rewrite-search.
--verbosity

@@ -94,3 +102,3 @@ Override the default verbosity for this command. Available options are

```sh
$ npx linkinator http://jbeckwith.com
npx linkinator http://jbeckwith.com
```

@@ -101,3 +109,3 @@

```sh
$ npx linkinator ./docs
npx linkinator ./docs
```

@@ -108,3 +116,3 @@

```sh
$ npx linkinator ./docs --recurse
npx linkinator ./docs --recurse
```

@@ -115,3 +123,3 @@

```sh
$ npx linkinator ./docs --skip www.googleapis.com
npx linkinator ./docs --skip www.googleapis.com
```

@@ -122,3 +130,3 @@

```sh
$ linkinator http://jbeckwith.com --skip '^(?!http://jbeckwith.com)'
linkinator http://jbeckwith.com --skip '^(?!http://jbeckwith.com)'
```

@@ -129,3 +137,3 @@

```sh
$ linkinator ./docs --format CSV
linkinator ./docs --format CSV
```

@@ -136,3 +144,3 @@

```sh
$ linkinator ./README.md --markdown
linkinator ./README.md --markdown
```

@@ -143,6 +151,7 @@

```sh
$ linkinator "**/*.md" --markdown
linkinator "**/*.md" --markdown
```
### Configuration file
You can pass options directly to the `linkinator` CLI, or you can define a config file. By default, `linkinator` will look for a `linkinator.config.json` file in the current working directory.

@@ -168,6 +177,7 @@

```sh
$ linkinator --config /some/path/your-config.json
linkinator --config /some/path/your-config.json
```
## GitHub Actions
You can use `linkinator` as a GitHub Action as well, using [JustinBeckwith/linkinator-action](https://github.com/JustinBeckwith/linkinator-action):

@@ -196,4 +206,6 @@

#### linkinator.check(options)
### linkinator.check(options)
Asynchronous method that runs a site wide scan. Options come in the form of an object that includes:
- `path` (string|string[]) - A fully qualified path to the url to be scanned, or the path(s) to the directory on disk that contains files to be scanned. *required*.

@@ -210,5 +222,8 @@ - `concurrency` (number) - The number of connections to make simultaneously. Defaults to 100.

- `directoryListing` (boolean) - Automatically serve a static file listing page when serving a directory. Defaults to `false`.
- `urlRewriteExpressions` (array) - Collection of objects that contain a search pattern, and replacement.
#### linkinator.LinkChecker()
### linkinator.LinkChecker()
Constructor method that can be used to create a new `LinkChecker` instance. This is particularly useful if you want to receive events as the crawler crawls. Exposes the following events:
- `pagestart` (string) - Provides the url that the crawler has just started to scan.

@@ -220,4 +235,6 @@ - `link` (object) - Provides an object with

### Simple example
### Examples
#### Simple example
```js

@@ -257,3 +274,3 @@ const link = require('linkinator');

### Complete example
#### Complete example

@@ -318,8 +335,11 @@ In most cases you're going to want to respond to events, as running the check command can kinda take a long time.

### Using a proxy
This library supports proxies via the `HTTP_PROXY` and `HTTPS_PROXY` environment variables. This [guide](https://www.golinuxcloud.com/set-up-proxy-http-proxy-environment-variable/) provides a nice overview of how to format and set these variables.
### Globbing
You may have noticed in the example, when using a glob the pattern is encapsulated in quotes:
```sh
$ linkinator "**/*.md" --markdown
linkinator "**/*.md" --markdown
```

@@ -330,11 +350,15 @@

### Debugging
Oftentimes when a link fails, it's an easy to spot typo, or a clear 404. Other times ... you may need more details on exactly what went wrong. To see a full call stack for the HTTP request failure, use `--verbosity DEBUG`:
```sh
$ linkinator https://jbeckwith.com --verbosity DEBUG
linkinator https://jbeckwith.com --verbosity DEBUG
```
### Controlling Output
The `--verbosity` flag offers preset options for controlling the output, but you may want more control. Using [`jq`](https://stedolan.github.io/jq/) and `--format JSON` - you can do just that!
```sh
$ linkinator https://jbeckwith.com --verbosity DEBUG --format JSON | jq '.links | .[] | select(.state | contains("BROKEN"))'
linkinator https://jbeckwith.com --verbosity DEBUG --format JSON | jq '.links | .[] | select(.state | contains("BROKEN"))'
```

@@ -341,0 +365,0 @@

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet