hast-util-sanitize
Advanced tools
Comparing version 4.1.0 to 5.0.0
@@ -1,3 +0,3 @@ | ||
export {sanitize} from './lib/index.js' | ||
export {defaultSchema} from './lib/schema.js' | ||
export type Schema = import('./lib/index.js').Schema | ||
export { sanitize } from "./lib/index.js"; | ||
export { defaultSchema } from "./lib/schema.js"; | ||
export type Schema = import('./lib/index.js').Schema; |
/** | ||
* Utility to sanitize a tree | ||
* Sanitize a tree. | ||
* | ||
* @param {Node} node | ||
* Hast tree to sanitize | ||
* @param {Schema} [schema] | ||
* Schema defining how to sanitize - defaults to Github style sanitation | ||
* @param {Readonly<Nodes>} node | ||
* Unsafe tree. | ||
* @param {Readonly<Schema> | null | undefined} [options] | ||
* Configuration (default: `defaultSchema`). | ||
* @returns {Nodes} | ||
* New, safe tree. | ||
*/ | ||
export function sanitize(node: Node, schema?: Schema | undefined): Node | ||
export type Root = import('hast').Root | ||
export type Content = import('hast').Content | ||
export type Element = import('hast').Element | ||
export type Properties = import('hast').Properties | ||
export type Node = Content | Root | ||
export function sanitize(node: Readonly<Nodes>, options?: Readonly<Schema> | null | undefined): Nodes; | ||
export type Comment = import('hast').Comment; | ||
export type Doctype = import('hast').Doctype; | ||
export type Element = import('hast').Element; | ||
export type ElementContent = import('hast').ElementContent; | ||
export type Nodes = import('hast').Nodes; | ||
export type Properties = import('hast').Properties; | ||
export type Root = import('hast').Root; | ||
export type RootContent = import('hast').RootContent; | ||
export type Text = import('hast').Text; | ||
/** | ||
* Possible property values. | ||
* Definition for a property. | ||
*/ | ||
export type PropertyValue = Properties[string] | ||
export type PropertyDefinition = [string, ...Array<Exclude<Properties[keyof Properties], Array<any>> | RegExp>] | string; | ||
/** | ||
* Possible primitive HTML attribute values. | ||
* Schema that defines what nodes and properties are allowed. | ||
* | ||
* The default schema is `defaultSchema`, which follows how GitHub cleans. | ||
* If any top-level key is missing in the given schema, the corresponding | ||
* value of the default schema is used. | ||
* | ||
* To extend the standard schema with a few changes, clone `defaultSchema` | ||
* like so: | ||
* | ||
* ```js | ||
* import deepmerge from 'deepmerge' | ||
* import {h} from 'hastscript' | ||
* import {defaultSchema, sanitize} from 'hast-util-sanitize' | ||
* | ||
* // This allows `className` on all elements. | ||
* const schema = deepmerge(defaultSchema, {attributes: {'*': ['className']}}) | ||
* | ||
* const tree = sanitize(h('div', {className: ['foo']}), schema) | ||
* | ||
* // `tree` still has `className`. | ||
* console.log(tree) | ||
* // { | ||
* // type: 'element', | ||
* // tagName: 'div', | ||
* // properties: {className: ['foo']}, | ||
* // children: [] | ||
* // } | ||
* ``` | ||
*/ | ||
export type PrimitivePropertyValue = string | number | boolean | ||
export type Schema = { | ||
/** | ||
* Whether to allow comment nodes (default: `false`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* allowComments: true | ||
* ``` | ||
*/ | ||
allowComments?: boolean | null | undefined; | ||
/** | ||
* Whether to allow doctype nodes (default: `false`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* allowDoctypes: true | ||
* ``` | ||
*/ | ||
allowDoctypes?: boolean | null | undefined; | ||
/** | ||
* Map of tag names to a list of tag names which are required ancestors | ||
* (default: `defaultSchema.ancestors`). | ||
* | ||
* Elements with these tag names will be ignored if they occur outside of one | ||
* of their allowed parents. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* ancestors: { | ||
* tbody: ['table'], | ||
* // … | ||
* tr: ['table'] | ||
* } | ||
* ``` | ||
*/ | ||
ancestors?: Record<string, Array<string>> | null | undefined; | ||
/** | ||
* Map of tag names to allowed property names (default: | ||
* `defaultSchema.attributes`). | ||
* | ||
* The special key `'*'` as a tag name defines property names allowed on all | ||
* elements. | ||
* | ||
* The special value `'data*'` as a property name can be used to allow all | ||
* `data` properties. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* attributes: { | ||
* 'ariaDescribedBy', 'ariaLabel', 'ariaLabelledBy', …, 'href' | ||
* // … | ||
* '*': [ | ||
* 'abbr', | ||
* 'accept', | ||
* 'acceptCharset', | ||
* // … | ||
* 'vAlign', | ||
* 'value', | ||
* 'width' | ||
* ] | ||
* } | ||
* ``` | ||
* | ||
* Instead of a single string in the array, which allows any property value | ||
* for the field, you can use an array to allow several values. | ||
* For example, `input: ['type']` allows `type` set to any value on `input`s. | ||
* But `input: [['type', 'checkbox', 'radio']]` allows `type` when set to | ||
* `'checkbox'` or `'radio'`. | ||
* | ||
* You can use regexes, so for example `span: [['className', /^hljs-/]]` | ||
* allows any class that starts with `hljs-` on `span`s. | ||
* | ||
* When comma- or space-separated values are used (such as `className`), each | ||
* value in is checked individually. | ||
* For example, to allow certain classes on `span`s for syntax highlighting, | ||
* use `span: [['className', 'number', 'operator', 'token']]`. | ||
* This will allow `'number'`, `'operator'`, and `'token'` classes, but drop | ||
* others. | ||
*/ | ||
attributes?: Record<string, Array<PropertyDefinition>> | null | undefined; | ||
/** | ||
* List of property names that clobber (default: `defaultSchema.clobber`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* clobber: ['ariaDescribedBy', 'ariaLabelledBy', 'id', 'name'] | ||
* ``` | ||
*/ | ||
clobber?: Array<string> | null | undefined; | ||
/** | ||
* Prefix to use before clobbering properties (default: | ||
* `defaultSchema.clobberPrefix`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* clobberPrefix: 'user-content-' | ||
* ``` | ||
*/ | ||
clobberPrefix?: string | null | undefined; | ||
/** | ||
* Map of *property names* to allowed protocols (default: | ||
* `defaultSchema.protocols`). | ||
* | ||
* This defines URLs that are always allowed to have local URLs (relative to | ||
* the current website, such as `this`, `#this`, `/this`, or `?this`), and | ||
* only allowed to have remote URLs (such as `https://example.com`) if they | ||
* use a known protocol. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* protocols: { | ||
* cite: ['http', 'https'], | ||
* // … | ||
* src: ['http', 'https'] | ||
* } | ||
* ``` | ||
*/ | ||
protocols?: Record<string, Array<string> | null | undefined> | null | undefined; | ||
/** | ||
* Map of tag names to required property names with a default value | ||
* (default: `defaultSchema.required`). | ||
* | ||
* This defines properties that must be set. | ||
* If a field does not exist (after the element was made safe), these will be | ||
* added with the given value. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* required: { | ||
* input: {disabled: true, type: 'checkbox'} | ||
* } | ||
* ``` | ||
* | ||
* > 👉 **Note**: properties are first checked based on `schema.attributes`, | ||
* > then on `schema.required`. | ||
* > That means properties could be removed by `attributes` and then added | ||
* > again with `required`. | ||
*/ | ||
required?: Record<string, Record<string, Properties[keyof Properties]>> | null | undefined; | ||
/** | ||
* List of tag names to strip from the tree (default: `defaultSchema.strip`). | ||
* | ||
* By default, unsafe elements (those not in `schema.tagNames`) are replaced | ||
* by what they contain. | ||
* This option can drop their contents. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* strip: ['script'] | ||
* ``` | ||
*/ | ||
strip?: Array<string> | null | undefined; | ||
/** | ||
* List of allowed tag names (default: `defaultSchema.tagNames`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* tagNames: [ | ||
* 'a', | ||
* 'b', | ||
* // … | ||
* 'ul', | ||
* 'var' | ||
* ] | ||
* ``` | ||
*/ | ||
tagNames?: Array<string> | null | undefined; | ||
}; | ||
/** | ||
* Map of tag names to allow lists for each property. | ||
* Info passed around. | ||
*/ | ||
export type Attributes = Record< | ||
string, | ||
Array<string | [string, ...Array<PrimitivePropertyValue | RegExp>]> | ||
> | ||
/** | ||
* Normalized input. | ||
*/ | ||
export type AttributeClean = Record< | ||
string, | ||
Array<PrimitivePropertyValue | RegExp> | ||
> | ||
/** | ||
* Sanitization configuration. | ||
*/ | ||
export type Schema = { | ||
/** | ||
* Map of tag names to allowed properties. | ||
* | ||
* The special `'*'` key defines property names allowed on all elements. | ||
*/ | ||
attributes?: Attributes | undefined | ||
/** | ||
* Map of tag names to required property names and their default property value. | ||
*/ | ||
required?: Record<string, Record<string, PropertyValue>> | undefined | ||
/** | ||
* List of allowed tag names. | ||
*/ | ||
tagNames?: Array<string> | undefined | ||
/** | ||
* Map of protocols to allow in property values. | ||
*/ | ||
protocols?: Record<string, Array<string>> | undefined | ||
/** | ||
* Map of tag names to their required ancestor elements. | ||
*/ | ||
ancestors?: Record<string, Array<string>> | undefined | ||
/** | ||
* List of allowed property names which can clobber. | ||
*/ | ||
clobber?: Array<string> | undefined | ||
/** | ||
* Prefix to use before potentially clobbering property names. | ||
*/ | ||
clobberPrefix?: string | undefined | ||
/** | ||
* Names of elements to strip from the tree. | ||
*/ | ||
strip?: Array<string> | undefined | ||
/** | ||
* Whether to allow comments. | ||
*/ | ||
allowComments?: boolean | undefined | ||
/** | ||
* Whether to allow doctypes. | ||
*/ | ||
allowDoctypes?: boolean | undefined | ||
} | ||
export type Handler = ( | ||
schema: Schema, | ||
value: any, | ||
node: any, | ||
stack: Array<string> | ||
) => unknown | ||
export type NodeDefinition = Record<string, Handler> | ||
export type NodeDefinitionGetter = ( | ||
schema: Schema, | ||
node: Node | ||
) => NodeDefinition | undefined | ||
export type NodeSchema = Record<string, NodeDefinition | NodeDefinitionGetter> | ||
export type State = { | ||
/** | ||
* Schema. | ||
*/ | ||
schema: Readonly<Schema>; | ||
/** | ||
* Tag names of ancestors. | ||
*/ | ||
stack: Array<string>; | ||
}; |
898
lib/index.js
/** | ||
* @typedef {import('hast').Root} Root | ||
* @typedef {import('hast').Content} Content | ||
* @typedef {import('hast').Comment} Comment | ||
* @typedef {import('hast').Doctype} Doctype | ||
* @typedef {import('hast').Element} Element | ||
* @typedef {import('hast').ElementContent} ElementContent | ||
* @typedef {import('hast').Nodes} Nodes | ||
* @typedef {import('hast').Properties} Properties | ||
* @typedef {Content | Root} Node | ||
* @typedef {import('hast').Root} Root | ||
* @typedef {import('hast').RootContent} RootContent | ||
* @typedef {import('hast').Text} Text | ||
*/ | ||
/** | ||
* @typedef {[string, ...Array<Exclude<Properties[keyof Properties], Array<any>> | RegExp>] | string} PropertyDefinition | ||
* Definition for a property. | ||
* | ||
* @typedef {Properties[string]} PropertyValue | ||
* Possible property values. | ||
* @typedef {string | number | boolean} PrimitivePropertyValue | ||
* Possible primitive HTML attribute values. | ||
* @typedef Schema | ||
* Schema that defines what nodes and properties are allowed. | ||
* | ||
* @typedef {Record<string, Array<string | [string, ...Array<PrimitivePropertyValue | RegExp>]>>} Attributes | ||
* Map of tag names to allow lists for each property. | ||
* @typedef {Record<string, Array<PrimitivePropertyValue | RegExp>>} AttributeClean | ||
* Normalized input. | ||
* The default schema is `defaultSchema`, which follows how GitHub cleans. | ||
* If any top-level key is missing in the given schema, the corresponding | ||
* value of the default schema is used. | ||
* | ||
* @typedef Schema | ||
* Sanitization configuration. | ||
* @property {Attributes | undefined} [attributes] | ||
* Map of tag names to allowed properties. | ||
* To extend the standard schema with a few changes, clone `defaultSchema` | ||
* like so: | ||
* | ||
* The special `'*'` key defines property names allowed on all elements. | ||
* @property {Record<string, Record<string, PropertyValue>> | undefined} [required] | ||
* Map of tag names to required property names and their default property value. | ||
* @property {Array<string> | undefined} [tagNames] | ||
* List of allowed tag names. | ||
* @property {Record<string, Array<string>> | undefined} [protocols] | ||
* Map of protocols to allow in property values. | ||
* @property {Record<string, Array<string>> | undefined} [ancestors] | ||
* Map of tag names to their required ancestor elements. | ||
* @property {Array<string> | undefined} [clobber] | ||
* List of allowed property names which can clobber. | ||
* @property {string | undefined} [clobberPrefix] | ||
* Prefix to use before potentially clobbering property names. | ||
* @property {Array<string> | undefined} [strip] | ||
* Names of elements to strip from the tree. | ||
* @property {boolean | undefined} [allowComments] | ||
* Whether to allow comments. | ||
* @property {boolean | undefined} [allowDoctypes] | ||
* Whether to allow doctypes. | ||
* ```js | ||
* import deepmerge from 'deepmerge' | ||
* import {h} from 'hastscript' | ||
* import {defaultSchema, sanitize} from 'hast-util-sanitize' | ||
* | ||
* @typedef {(schema: Schema, value: any, node: any, stack: Array<string>) => unknown} Handler | ||
* @typedef {Record<string, Handler>} NodeDefinition | ||
* @typedef {((schema: Schema, node: Node) => NodeDefinition | undefined)} NodeDefinitionGetter | ||
* @typedef {Record<string, NodeDefinition | NodeDefinitionGetter>} NodeSchema | ||
* // This allows `className` on all elements. | ||
* const schema = deepmerge(defaultSchema, {attributes: {'*': ['className']}}) | ||
* | ||
* const tree = sanitize(h('div', {className: ['foo']}), schema) | ||
* | ||
* // `tree` still has `className`. | ||
* console.log(tree) | ||
* // { | ||
* // type: 'element', | ||
* // tagName: 'div', | ||
* // properties: {className: ['foo']}, | ||
* // children: [] | ||
* // } | ||
* ``` | ||
* @property {boolean | null | undefined} [allowComments=false] | ||
* Whether to allow comment nodes (default: `false`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* allowComments: true | ||
* ``` | ||
* @property {boolean | null | undefined} [allowDoctypes=false] | ||
* Whether to allow doctype nodes (default: `false`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* allowDoctypes: true | ||
* ``` | ||
* @property {Record<string, Array<string>> | null | undefined} [ancestors] | ||
* Map of tag names to a list of tag names which are required ancestors | ||
* (default: `defaultSchema.ancestors`). | ||
* | ||
* Elements with these tag names will be ignored if they occur outside of one | ||
* of their allowed parents. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* ancestors: { | ||
* tbody: ['table'], | ||
* // … | ||
* tr: ['table'] | ||
* } | ||
* ``` | ||
* @property {Record<string, Array<PropertyDefinition>> | null | undefined} [attributes] | ||
* Map of tag names to allowed property names (default: | ||
* `defaultSchema.attributes`). | ||
* | ||
* The special key `'*'` as a tag name defines property names allowed on all | ||
* elements. | ||
* | ||
* The special value `'data*'` as a property name can be used to allow all | ||
* `data` properties. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* attributes: { | ||
* 'ariaDescribedBy', 'ariaLabel', 'ariaLabelledBy', …, 'href' | ||
* // … | ||
* '*': [ | ||
* 'abbr', | ||
* 'accept', | ||
* 'acceptCharset', | ||
* // … | ||
* 'vAlign', | ||
* 'value', | ||
* 'width' | ||
* ] | ||
* } | ||
* ``` | ||
* | ||
* Instead of a single string in the array, which allows any property value | ||
* for the field, you can use an array to allow several values. | ||
* For example, `input: ['type']` allows `type` set to any value on `input`s. | ||
* But `input: [['type', 'checkbox', 'radio']]` allows `type` when set to | ||
* `'checkbox'` or `'radio'`. | ||
* | ||
* You can use regexes, so for example `span: [['className', /^hljs-/]]` | ||
* allows any class that starts with `hljs-` on `span`s. | ||
* | ||
* When comma- or space-separated values are used (such as `className`), each | ||
* value in is checked individually. | ||
* For example, to allow certain classes on `span`s for syntax highlighting, | ||
* use `span: [['className', 'number', 'operator', 'token']]`. | ||
* This will allow `'number'`, `'operator'`, and `'token'` classes, but drop | ||
* others. | ||
* @property {Array<string> | null | undefined} [clobber] | ||
* List of property names that clobber (default: `defaultSchema.clobber`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* clobber: ['ariaDescribedBy', 'ariaLabelledBy', 'id', 'name'] | ||
* ``` | ||
* @property {string | null | undefined} [clobberPrefix] | ||
* Prefix to use before clobbering properties (default: | ||
* `defaultSchema.clobberPrefix`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* clobberPrefix: 'user-content-' | ||
* ``` | ||
* @property {Record<string, Array<string> | null | undefined> | null | undefined} [protocols] | ||
* Map of *property names* to allowed protocols (default: | ||
* `defaultSchema.protocols`). | ||
* | ||
* This defines URLs that are always allowed to have local URLs (relative to | ||
* the current website, such as `this`, `#this`, `/this`, or `?this`), and | ||
* only allowed to have remote URLs (such as `https://example.com`) if they | ||
* use a known protocol. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* protocols: { | ||
* cite: ['http', 'https'], | ||
* // … | ||
* src: ['http', 'https'] | ||
* } | ||
* ``` | ||
* @property {Record<string, Record<string, Properties[keyof Properties]>> | null | undefined} [required] | ||
* Map of tag names to required property names with a default value | ||
* (default: `defaultSchema.required`). | ||
* | ||
* This defines properties that must be set. | ||
* If a field does not exist (after the element was made safe), these will be | ||
* added with the given value. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* required: { | ||
* input: {disabled: true, type: 'checkbox'} | ||
* } | ||
* ``` | ||
* | ||
* > 👉 **Note**: properties are first checked based on `schema.attributes`, | ||
* > then on `schema.required`. | ||
* > That means properties could be removed by `attributes` and then added | ||
* > again with `required`. | ||
* @property {Array<string> | null | undefined} [strip] | ||
* List of tag names to strip from the tree (default: `defaultSchema.strip`). | ||
* | ||
* By default, unsafe elements (those not in `schema.tagNames`) are replaced | ||
* by what they contain. | ||
* This option can drop their contents. | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* strip: ['script'] | ||
* ``` | ||
* @property {Array<string> | null | undefined} [tagNames] | ||
* List of allowed tag names (default: `defaultSchema.tagNames`). | ||
* | ||
* For example: | ||
* | ||
* ```js | ||
* tagNames: [ | ||
* 'a', | ||
* 'b', | ||
* // … | ||
* 'ul', | ||
* 'var' | ||
* ] | ||
* ``` | ||
* | ||
* @typedef State | ||
* Info passed around. | ||
* @property {Readonly<Schema>} schema | ||
* Schema. | ||
* @property {Array<string>} stack | ||
* Tag names of ancestors. | ||
*/ | ||
import structuredClone from '@ungap/structured-clone' | ||
import {position} from 'unist-util-position' | ||
import {defaultSchema} from './schema.js' | ||
@@ -53,46 +217,32 @@ | ||
/** @type {NodeSchema} */ | ||
const nodeSchema = { | ||
root: {children: all}, | ||
doctype: handleDoctype, | ||
comment: handleComment, | ||
element: { | ||
tagName: handleTagName, | ||
properties: handleProperties, | ||
children: all | ||
}, | ||
text: {value: handleValue}, | ||
'*': {data: allow, position: allow} | ||
} | ||
/** | ||
* Utility to sanitize a tree | ||
* Sanitize a tree. | ||
* | ||
* @param {Node} node | ||
* Hast tree to sanitize | ||
* @param {Schema} [schema] | ||
* Schema defining how to sanitize - defaults to Github style sanitation | ||
* @param {Readonly<Nodes>} node | ||
* Unsafe tree. | ||
* @param {Readonly<Schema> | null | undefined} [options] | ||
* Configuration (default: `defaultSchema`). | ||
* @returns {Nodes} | ||
* New, safe tree. | ||
*/ | ||
export function sanitize(node, schema) { | ||
/** @type {Node} */ | ||
export function sanitize(node, options) { | ||
/** @type {Nodes} */ | ||
let ctx = {type: 'root', children: []} | ||
if (node && typeof node === 'object' && node.type) { | ||
const replace = one( | ||
Object.assign({}, defaultSchema, schema || {}), | ||
node, | ||
[] | ||
) | ||
/** @type {State} */ | ||
const state = { | ||
schema: options ? {...defaultSchema, ...options} : defaultSchema, | ||
stack: [] | ||
} | ||
const replace = transform(state, node) | ||
if (replace) { | ||
if (Array.isArray(replace)) { | ||
if (replace.length === 1) { | ||
ctx = replace[0] | ||
} else { | ||
// @ts-expect-error Assume `root` is not a child. | ||
ctx.children = replace | ||
} | ||
if (replace) { | ||
if (Array.isArray(replace)) { | ||
if (replace.length === 1) { | ||
ctx = replace[0] | ||
} else { | ||
ctx = replace | ||
ctx.children = replace | ||
} | ||
} else { | ||
ctx = replace | ||
} | ||
@@ -107,48 +257,129 @@ } | ||
* | ||
* @param {Schema} schema | ||
* @param {Node} node | ||
* @param {Array<string>} stack | ||
* @returns {Node | Array<Node> | undefined} | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<unknown>} node | ||
* Unsafe node. | ||
* @returns {Array<ElementContent> | Nodes | undefined} | ||
* Safe result. | ||
*/ | ||
function one(schema, node, stack) { | ||
const type = node && node.type | ||
/** @type {Node} */ | ||
// @ts-expect-error rest of props added later. | ||
const replacement = {type: node.type} | ||
/** @type {boolean | undefined} */ | ||
let replace | ||
function transform(state, node) { | ||
if (node && typeof node === 'object') { | ||
const unsafe = /** @type {Record<string, Readonly<unknown>>} */ (node) | ||
const type = typeof unsafe.type === 'string' ? unsafe.type : '' | ||
if (own.call(nodeSchema, type)) { | ||
/** @type {NodeDefinition | NodeDefinitionGetter | undefined} */ | ||
let definition = nodeSchema[type] | ||
switch (type) { | ||
case 'comment': { | ||
return comment(state, unsafe) | ||
} | ||
if (typeof definition === 'function') { | ||
definition = definition(schema, node) | ||
case 'doctype': { | ||
return doctype(state, unsafe) | ||
} | ||
case 'element': { | ||
return element(state, unsafe) | ||
} | ||
case 'root': { | ||
return root(state, unsafe) | ||
} | ||
case 'text': { | ||
return text(state, unsafe) | ||
} | ||
default: | ||
} | ||
} | ||
} | ||
if (definition) { | ||
const allowed = Object.assign({}, definition, nodeSchema['*']) | ||
/** @type {string} */ | ||
let key | ||
/** | ||
* Make a safe comment. | ||
* | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe | ||
* Unsafe comment-like value. | ||
* @returns {Comment | undefined} | ||
* Safe comment (if with `allowComments`). | ||
*/ | ||
function comment(state, unsafe) { | ||
if (state.schema.allowComments) { | ||
// See <https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments> | ||
const result = typeof unsafe.value === 'string' ? unsafe.value : '' | ||
const index = result.indexOf('-->') | ||
const value = index < 0 ? result : result.slice(0, index) | ||
replace = true | ||
/** @type {Comment} */ | ||
const node = {type: 'comment', value} | ||
for (key in allowed) { | ||
if (own.call(allowed, key)) { | ||
// @ts-expect-error: fine. | ||
// type-coverage:ignore-next-line | ||
const result = allowed[key](schema, node[key], node, stack) | ||
patch(node, unsafe) | ||
// eslint-disable-next-line max-depth | ||
if (result === false) { | ||
replace = undefined | ||
// Set the non-safe value. | ||
// @ts-expect-error: fine. | ||
// type-coverage:ignore-next-line | ||
replacement[key] = node[key] | ||
} else if (result !== undefined && result !== null) { | ||
// @ts-expect-error: fine. | ||
// type-coverage:ignore-next-line | ||
replacement[key] = result | ||
} | ||
return node | ||
} | ||
} | ||
/** | ||
* Make a safe doctype. | ||
* | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe | ||
* Unsafe doctype-like value. | ||
* @returns {Doctype | undefined} | ||
* Safe doctype (if with `allowDoctypes`). | ||
*/ | ||
function doctype(state, unsafe) { | ||
if (state.schema.allowDoctypes) { | ||
/** @type {Doctype} */ | ||
const node = {type: 'doctype'} | ||
patch(node, unsafe) | ||
return node | ||
} | ||
} | ||
/** | ||
* Make a safe element. | ||
* | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe | ||
* Unsafe element-like value. | ||
* @returns {Array<ElementContent> | Element | undefined} | ||
* Safe element. | ||
*/ | ||
function element(state, unsafe) { | ||
const name = typeof unsafe.tagName === 'string' ? unsafe.tagName : '' | ||
state.stack.push(name) | ||
const content = /** @type {Array<ElementContent>} */ ( | ||
children(state, unsafe.children) | ||
) | ||
const props = properties(state, unsafe.properties) | ||
state.stack.pop() | ||
let safeElement = false | ||
if ( | ||
name.length > 0 && | ||
name !== '*' && | ||
(!state.schema.tagNames || state.schema.tagNames.includes(name)) | ||
) { | ||
safeElement = true | ||
// Some nodes can break out of their context if they don’t have a certain | ||
// ancestor. | ||
if (state.schema.ancestors && own.call(state.schema.ancestors, name)) { | ||
const ancestors = state.schema.ancestors[name] | ||
let index = -1 | ||
safeElement = false | ||
while (++index < ancestors.length) { | ||
if (state.stack.includes(ancestors[index])) { | ||
safeElement = true | ||
} | ||
@@ -159,35 +390,85 @@ } | ||
if (replace) { | ||
return replacement | ||
if (!safeElement) { | ||
return state.schema.strip && !state.schema.strip.includes(name) | ||
? content | ||
: undefined | ||
} | ||
return replacement.type === 'element' && | ||
schema.strip && | ||
!schema.strip.includes(replacement.tagName) | ||
? replacement.children | ||
: undefined | ||
/** @type {Element} */ | ||
const node = { | ||
type: 'element', | ||
tagName: name, | ||
properties: props, | ||
children: content | ||
} | ||
patch(node, unsafe) | ||
return node | ||
} | ||
/** | ||
* Sanitize `children`. | ||
* Make a safe root. | ||
* | ||
* @type {Handler} | ||
* @param {Array<Node>} children | ||
* @param {Node} node | ||
* @returns {Array<Node>} | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe | ||
* Unsafe root-like value. | ||
* @returns {Root} | ||
* Safe root. | ||
*/ | ||
function all(schema, children, node, stack) { | ||
/** @type {Array<Node>} */ | ||
function root(state, unsafe) { | ||
const content = /** @type {Array<RootContent>} */ ( | ||
children(state, unsafe.children) | ||
) | ||
/** @type {Root} */ | ||
const node = {type: 'root', children: content} | ||
patch(node, unsafe) | ||
return node | ||
} | ||
/** | ||
* Make a safe text. | ||
* | ||
* @param {State} _ | ||
* Info passed around. | ||
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe | ||
* Unsafe text-like value. | ||
* @returns {Text} | ||
* Safe text. | ||
*/ | ||
function text(_, unsafe) { | ||
const value = typeof unsafe.value === 'string' ? unsafe.value : '' | ||
/** @type {Text} */ | ||
const node = {type: 'text', value} | ||
patch(node, unsafe) | ||
return node | ||
} | ||
/** | ||
* Make children safe. | ||
* | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<unknown>} children | ||
* Unsafe value. | ||
* @returns {Array<Nodes>} | ||
* Safe children. | ||
*/ | ||
function children(state, children) { | ||
/** @type {Array<Nodes>} */ | ||
const results = [] | ||
if (Array.isArray(children)) { | ||
const childrenUnknown = /** @type {Array<Readonly<unknown>>} */ (children) | ||
let index = -1 | ||
if (node.type === 'element') { | ||
stack.push(node.tagName) | ||
} | ||
while (++index < childrenUnknown.length) { | ||
const value = transform(state, childrenUnknown[index]) | ||
while (++index < children.length) { | ||
const value = one(schema, children[index], stack) | ||
if (value) { | ||
@@ -201,6 +482,2 @@ if (Array.isArray(value)) { | ||
} | ||
if (node.type === 'element') { | ||
stack.pop() | ||
} | ||
} | ||
@@ -211,31 +488,24 @@ | ||
/** @type {NodeDefinitionGetter} */ | ||
function handleDoctype(schema) { | ||
return schema.allowDoctypes ? {name: handleDoctypeName} : undefined | ||
} | ||
/** @type {NodeDefinitionGetter} */ | ||
function handleComment(schema) { | ||
return schema.allowComments ? {value: handleCommentValue} : undefined | ||
} | ||
/** | ||
* Sanitize `properties`. | ||
* Make element properties safe. | ||
* | ||
* @type {Handler} | ||
* @param {Properties} properties | ||
* @param {Element} node | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<unknown>} properties | ||
* Unsafe value. | ||
* @returns {Properties} | ||
* Safe value. | ||
*/ | ||
function handleProperties(schema, properties, node, stack) { | ||
const name = handleTagName(schema, node.tagName, node, stack) | ||
/* c8 ignore next */ | ||
const attrs = schema.attributes || {} | ||
/* c8 ignore next */ | ||
const reqs = schema.required || {} | ||
const props = properties || {} | ||
const allowed = Object.assign( | ||
{}, | ||
toPropertyValueMap(attrs['*']), | ||
toPropertyValueMap(name && own.call(attrs, name) ? attrs[name] : []) | ||
function properties(state, properties) { | ||
const tagName = state.stack[state.stack.length - 1] | ||
const attributes = state.schema.attributes | ||
const required = state.schema.required | ||
const specific = | ||
attributes && own.call(attributes, tagName) | ||
? attributes[tagName] | ||
: undefined | ||
const defaults = | ||
attributes && own.call(attributes, '*') ? attributes['*'] : undefined | ||
const props = /** @type {Readonly<Record<string, Readonly<unknown>>>} */ ( | ||
properties && typeof properties === 'object' ? properties : {} | ||
) | ||
@@ -249,20 +519,17 @@ /** @type {Properties} */ | ||
if (own.call(props, key)) { | ||
let value = props[key] | ||
/** @type {AttributeClean[string]} */ | ||
/** @type {Readonly<PropertyDefinition> | undefined} */ | ||
let definition | ||
if (own.call(allowed, key)) { | ||
definition = allowed[key] | ||
} else if (data(key) && own.call(allowed, 'data*')) { | ||
definition = allowed['data*'] | ||
} else { | ||
continue | ||
} | ||
if (specific) definition = findDefinition(specific, key) | ||
if (!definition && defaults) definition = findDefinition(defaults, key) | ||
value = Array.isArray(value) | ||
? handlePropertyValues(schema, value, key, definition) | ||
: handlePropertyValue(schema, value, key, definition) | ||
if (definition) { | ||
const unsafe = props[key] | ||
const safe = Array.isArray(unsafe) | ||
? propertyValues(state, definition, key, unsafe) | ||
: propertyValue(state, definition, key, unsafe) | ||
if (value !== undefined && value !== null) { | ||
result[key] = value | ||
if (safe !== null && safe !== undefined) { | ||
result[key] = safe | ||
} | ||
} | ||
@@ -272,6 +539,8 @@ } | ||
if (name && own.call(reqs, name)) { | ||
for (key in reqs[name]) { | ||
if (!own.call(result, key)) { | ||
result[key] = reqs[name][key] | ||
if (required && own.call(required, tagName)) { | ||
const properties = required[tagName] | ||
for (key in properties) { | ||
if (own.call(properties, key) && !own.call(result, key)) { | ||
result[key] = properties[key] | ||
} | ||
@@ -285,101 +554,24 @@ } | ||
/** | ||
* Always return a valid HTML5 doctype. | ||
* | ||
* @type {Handler} | ||
* @returns {string} | ||
*/ | ||
function handleDoctypeName() { | ||
return 'html' | ||
} | ||
/** | ||
* Sanitize `tagName`. | ||
* | ||
* @param {Schema} schema | ||
* @param {string} tagName | ||
* @param {Node} _ | ||
* @param {Array<string>} stack | ||
* @returns {string | false} | ||
*/ | ||
function handleTagName(schema, tagName, _, stack) { | ||
const name = typeof tagName === 'string' ? tagName : '' | ||
let index = -1 | ||
if ( | ||
!name || | ||
name === '*' || | ||
(schema.tagNames && !schema.tagNames.includes(name)) | ||
) { | ||
return false | ||
} | ||
// Some nodes can break out of their context if they don’t have a certain | ||
// ancestor. | ||
if (schema.ancestors && own.call(schema.ancestors, name)) { | ||
while (++index < schema.ancestors[name].length) { | ||
if (stack.includes(schema.ancestors[name][index])) { | ||
return name | ||
} | ||
} | ||
return false | ||
} | ||
return name | ||
} | ||
/** | ||
* See <https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments> | ||
* | ||
* @type {Handler} | ||
* @param {unknown} value | ||
* @returns {string} | ||
*/ | ||
function handleCommentValue(_, value) { | ||
/** @type {string} */ | ||
const result = typeof value === 'string' ? value : '' | ||
const index = result.indexOf('-->') | ||
return index < 0 ? result : result.slice(0, index) | ||
} | ||
/** | ||
* Sanitize `value`. | ||
* | ||
* @type {Handler} | ||
* @param {unknown} value | ||
* @returns {string} | ||
*/ | ||
function handleValue(_, value) { | ||
return typeof value === 'string' ? value : '' | ||
} | ||
/** | ||
* Allow `value`. | ||
* | ||
* @type {Handler} | ||
* @param {unknown} value | ||
*/ | ||
function allow(_, value) { | ||
return value | ||
} | ||
/** | ||
* Sanitize a property value which is a list. | ||
* | ||
* @param {Schema} schema | ||
* @param {Array<unknown>} values | ||
* @param {string} prop | ||
* @param {AttributeClean[string]} definition | ||
* @returns {Array<string | number>} | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<PropertyDefinition>} definition | ||
* Definition. | ||
* @param {string} key | ||
* Field name. | ||
* @param {Readonly<Array<Readonly<unknown>>>} values | ||
* Unsafe value (but an array). | ||
* @returns {Array<number | string>} | ||
* Safe value. | ||
*/ | ||
function handlePropertyValues(schema, values, prop, definition) { | ||
function propertyValues(state, definition, key, values) { | ||
let index = -1 | ||
/** @type {Array<string | number>} */ | ||
/** @type {Array<number | string>} */ | ||
const result = [] | ||
while (++index < values.length) { | ||
const value = handlePropertyValue(schema, values[index], prop, definition) | ||
const value = propertyValue(state, definition, key, values[index]) | ||
if (value !== undefined && value !== null) { | ||
// @ts-expect-error Assume no booleans were in arrays. | ||
if (typeof value === 'number' || typeof value === 'string') { | ||
result.push(value) | ||
@@ -395,27 +587,57 @@ } | ||
* | ||
* @param {Schema} schema | ||
* @param {unknown} value | ||
* @param {string} prop | ||
* @param {AttributeClean[string]} definition | ||
* @returns {PropertyValue} | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {Readonly<PropertyDefinition>} definition | ||
* Definition. | ||
* @param {string} key | ||
* Field name. | ||
* @param {Readonly<unknown>} value | ||
* Unsafe value (but not an array). | ||
* @returns {boolean | number | string | undefined} | ||
* Safe value. | ||
*/ | ||
function handlePropertyValue(schema, value, prop, definition) { | ||
function propertyValue(state, definition, key, value) { | ||
if ( | ||
(typeof value === 'boolean' || | ||
typeof value === 'number' || | ||
typeof value === 'string') && | ||
safeProtocol(schema, value, prop) && | ||
(definition.length === 0 || | ||
definition.some((allowed) => | ||
allowed && typeof allowed === 'object' && 'flags' in allowed | ||
? allowed.test(String(value)) | ||
: allowed === value | ||
)) | ||
typeof value !== 'boolean' && | ||
typeof value !== 'number' && | ||
typeof value !== 'string' | ||
) { | ||
return schema.clobberPrefix && | ||
schema.clobber && | ||
schema.clobber.includes(prop) | ||
? schema.clobberPrefix + value | ||
: value | ||
return | ||
} | ||
if (!safeProtocol(state, key, value)) { | ||
return | ||
} | ||
// Just a string, or only one item in an array, means all values are OK. | ||
// More than one item means an allow list. | ||
if (typeof definition === 'object' && definition.length > 1) { | ||
let ok = false | ||
let index = 0 // Ignore `key`, which is the first item. | ||
while (++index < definition.length) { | ||
const allowed = definition[index] | ||
// Expression. | ||
if (allowed && typeof allowed === 'object' && 'flags' in allowed) { | ||
if (allowed.test(String(value))) { | ||
ok = true | ||
break | ||
} | ||
} | ||
// Primitive. | ||
else if (allowed === value) { | ||
ok = true | ||
break | ||
} | ||
} | ||
if (!ok) return | ||
} | ||
return state.schema.clobber && | ||
state.schema.clobberPrefix && | ||
state.schema.clobber.includes(key) | ||
? state.schema.clobberPrefix + value | ||
: value | ||
} | ||
@@ -426,8 +648,22 @@ | ||
* | ||
* @param {Schema} schema | ||
* @param {unknown} value | ||
* @param {string} prop | ||
* @param {State} state | ||
* Info passed around. | ||
* @param {string} key | ||
* Field name. | ||
* @param {Readonly<unknown>} value | ||
* Unsafe value. | ||
* @returns {boolean} | ||
* Whether it’s a safe value. | ||
*/ | ||
function safeProtocol(schema, value, prop) { | ||
function safeProtocol(state, key, value) { | ||
const protocols = | ||
state.schema.protocols && own.call(state.schema.protocols, key) | ||
? state.schema.protocols[key] | ||
: undefined | ||
// No protocols defined? Then everything is fine. | ||
if (!protocols || protocols.length === 0) { | ||
return true | ||
} | ||
const url = String(value) | ||
@@ -438,10 +674,4 @@ const colon = url.indexOf(':') | ||
const slash = url.indexOf('/') | ||
const protocols = | ||
schema.protocols && own.call(schema.protocols, prop) | ||
? schema.protocols[prop].concat() | ||
: [] | ||
let index = -1 | ||
if ( | ||
protocols.length === 0 || | ||
colon < 0 || | ||
@@ -456,6 +686,10 @@ // If the first colon is after a `?`, `#`, or `/`, it’s not a protocol. | ||
let index = -1 | ||
while (++index < protocols.length) { | ||
const protocol = protocols[index] | ||
if ( | ||
colon === protocols[index].length && | ||
url.slice(0, protocols[index].length) === protocols[index] | ||
colon === protocol.length && | ||
url.slice(0, protocol.length) === protocol | ||
) { | ||
@@ -470,33 +704,49 @@ return true | ||
/** | ||
* Create a map from a list of props or a list of properties and values. | ||
* Add data and position. | ||
* | ||
* @param {Attributes[string]} values | ||
* @returns {AttributeClean} | ||
* @param {Nodes} node | ||
* Node to patch safe data and position on. | ||
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe | ||
* Unsafe node-like value. | ||
* @returns {undefined} | ||
* Nothing. | ||
*/ | ||
function toPropertyValueMap(values) { | ||
/** @type {AttributeClean} */ | ||
const result = {} | ||
let index = -1 | ||
function patch(node, unsafe) { | ||
const cleanPosition = position( | ||
// @ts-expect-error: looks like a node. | ||
unsafe | ||
) | ||
while (++index < values.length) { | ||
const value = values[index] | ||
if (Array.isArray(value)) { | ||
result[value[0]] = value.slice(1) | ||
} else { | ||
result[value] = [] | ||
} | ||
if (unsafe.data) { | ||
node.data = structuredClone(unsafe.data) | ||
} | ||
return result | ||
if (cleanPosition) node.position = cleanPosition | ||
} | ||
/** | ||
* Check if `prop` is a data property. | ||
* | ||
* @param {string} prop | ||
* @returns {boolean} | ||
* @param {Readonly<Array<PropertyDefinition>>} definitions | ||
* @param {string} key | ||
* @returns {Readonly<PropertyDefinition> | undefined} | ||
*/ | ||
function data(prop) { | ||
return prop.length > 4 && prop.slice(0, 4).toLowerCase() === 'data' | ||
function findDefinition(definitions, key) { | ||
/** @type {PropertyDefinition | undefined} */ | ||
let dataDefault | ||
let index = -1 | ||
while (++index < definitions.length) { | ||
const entry = definitions[index] | ||
const name = typeof entry === 'string' ? entry : entry[0] | ||
if (name === key) { | ||
return entry | ||
} | ||
if (name === 'data*') dataDefault = entry | ||
} | ||
if (key.length > 4 && key.slice(0, 4).toLowerCase() === 'data') { | ||
return dataDefault | ||
} | ||
} |
@@ -1,2 +0,8 @@ | ||
/** @type {import('./index.js').Schema} */ | ||
export const defaultSchema: import('./index.js').Schema | ||
/** | ||
* Default schema. | ||
* | ||
* Follows GitHub style sanitation. | ||
* | ||
* @type {import('./index.js').Schema} | ||
*/ | ||
export const defaultSchema: import('./index.js').Schema; |
@@ -1,96 +0,68 @@ | ||
/** @type {import('./index.js').Schema} */ | ||
// Couple of ARIA attributes allowed in several, but not all, places. | ||
const aria = ['ariaDescribedBy', 'ariaLabel', 'ariaLabelledBy'] | ||
/** | ||
* Default schema. | ||
* | ||
* Follows GitHub style sanitation. | ||
* | ||
* @type {import('./index.js').Schema} | ||
*/ | ||
export const defaultSchema = { | ||
strip: ['script'], | ||
clobberPrefix: 'user-content-', | ||
clobber: ['name', 'id'], | ||
ancestors: { | ||
tbody: ['table'], | ||
tfoot: ['table'], | ||
thead: ['table'], | ||
td: ['table'], | ||
th: ['table'], | ||
thead: ['table'], | ||
tfoot: ['table'], | ||
tr: ['table'] | ||
}, | ||
protocols: { | ||
href: ['http', 'https', 'mailto', 'xmpp', 'irc', 'ircs'], | ||
cite: ['http', 'https'], | ||
src: ['http', 'https'], | ||
longDesc: ['http', 'https'] | ||
}, | ||
tagNames: [ | ||
'h1', | ||
'h2', | ||
'h3', | ||
'h4', | ||
'h5', | ||
'h6', | ||
'br', | ||
'b', | ||
'i', | ||
'strong', | ||
'em', | ||
'a', | ||
'pre', | ||
'code', | ||
'img', | ||
'tt', | ||
'div', | ||
'ins', | ||
'del', | ||
'sup', | ||
'sub', | ||
'p', | ||
'ol', | ||
'ul', | ||
'table', | ||
'thead', | ||
'tbody', | ||
'tfoot', | ||
'blockquote', | ||
'dl', | ||
'dt', | ||
'dd', | ||
'kbd', | ||
'q', | ||
'samp', | ||
'var', | ||
'hr', | ||
'ruby', | ||
'rt', | ||
'rp', | ||
'li', | ||
'tr', | ||
'td', | ||
'th', | ||
's', | ||
'strike', | ||
'summary', | ||
'details', | ||
'caption', | ||
'figure', | ||
'figcaption', | ||
'abbr', | ||
'bdo', | ||
'cite', | ||
'dfn', | ||
'mark', | ||
'small', | ||
'span', | ||
'time', | ||
'wbr', | ||
'input' | ||
], | ||
attributes: { | ||
a: ['href'], | ||
img: ['src', 'longDesc'], | ||
input: [ | ||
['type', 'checkbox'], | ||
['disabled', true] | ||
a: [ | ||
...aria, | ||
// Note: these 3 are used by GFM footnotes, they do work on all links. | ||
'dataFootnoteBackref', | ||
'dataFootnoteRef', | ||
['className', 'data-footnote-backref'], | ||
'href' | ||
], | ||
li: [['className', 'task-list-item']], | ||
div: ['itemScope', 'itemType'], | ||
blockquote: ['cite'], | ||
// Note: this class is not normally allowed by GH, when manually writing | ||
// `code` as HTML in markdown, they adds it some other way. | ||
// We can’t do that, so we have to allow it. | ||
code: [['className', /^language-./]], | ||
del: ['cite'], | ||
div: ['itemScope', 'itemType'], | ||
dl: [...aria], | ||
// Note: these 2 are used by GFM footnotes, they *sometimes* work. | ||
h2: [ | ||
['id', 'footnote-label'], | ||
['className', 'sr-only'] | ||
], | ||
img: [...aria, 'longDesc', 'src'], | ||
// Note: `input` is not normally allowed by GH, when manually writing | ||
// it in markdown, they add it from tasklists some other way. | ||
// We can’t do that, so we have to allow it. | ||
input: [ | ||
['disabled', true], | ||
['type', 'checkbox'] | ||
], | ||
ins: ['cite'], | ||
// Note: this class is not normally allowed by GH, when manually writing | ||
// `li` as HTML in markdown, they adds it some other way. | ||
// We can’t do that, so we have to allow it. | ||
li: [['className', 'task-list-item']], | ||
// Note: this class is not normally allowed by GH, when manually writing | ||
// `ol` as HTML in markdown, they adds it some other way. | ||
// We can’t do that, so we have to allow it. | ||
ol: [...aria, ['className', 'contains-task-list']], | ||
q: ['cite'], | ||
section: ['dataFootnotes', ['className', 'footnotes']], | ||
source: ['srcSet'], | ||
summary: [...aria], | ||
table: [...aria], | ||
// Note: this class is not normally allowed by GH, when manually writing | ||
// `ol` as HTML in markdown, they adds it some other way. | ||
// We can’t do that, so we have to allow it. | ||
ul: [...aria, ['className', 'contains-task-list']], | ||
'*': [ | ||
@@ -104,6 +76,2 @@ 'abbr', | ||
'alt', | ||
'ariaDescribedBy', | ||
'ariaHidden', | ||
'ariaLabel', | ||
'ariaLabelledBy', | ||
'axis', | ||
@@ -118,5 +86,5 @@ 'border', | ||
'clear', | ||
'cols', | ||
'colSpan', | ||
'color', | ||
'cols', | ||
'compact', | ||
@@ -128,10 +96,11 @@ 'coords', | ||
'encType', | ||
'htmlFor', | ||
'frame', | ||
'hSpace', | ||
'headers', | ||
'height', | ||
'hrefLang', | ||
'hSpace', | ||
'htmlFor', | ||
'id', | ||
'isMap', | ||
'id', | ||
'itemProp', | ||
'label', | ||
@@ -152,4 +121,4 @@ 'lang', | ||
'rev', | ||
'rowSpan', | ||
'rows', | ||
'rowSpan', | ||
'rules', | ||
@@ -166,17 +135,78 @@ 'scope', | ||
'title', | ||
'type', | ||
'useMap', | ||
'vAlign', | ||
'value', | ||
'vSpace', | ||
'width', | ||
'itemProp' | ||
'width' | ||
] | ||
}, | ||
clobber: ['ariaDescribedBy', 'ariaLabelledBy', 'id', 'name'], | ||
clobberPrefix: 'user-content-', | ||
protocols: { | ||
cite: ['http', 'https'], | ||
href: ['http', 'https', 'irc', 'ircs', 'mailto', 'xmpp'], | ||
longDesc: ['http', 'https'], | ||
src: ['http', 'https'] | ||
}, | ||
required: { | ||
input: { | ||
type: 'checkbox', | ||
disabled: true | ||
} | ||
} | ||
input: {disabled: true, type: 'checkbox'} | ||
}, | ||
strip: ['script'], | ||
tagNames: [ | ||
'a', | ||
'b', | ||
'blockquote', | ||
'br', | ||
'code', | ||
'dd', | ||
'del', | ||
'details', | ||
'div', | ||
'dl', | ||
'dt', | ||
'em', | ||
'h1', | ||
'h2', | ||
'h3', | ||
'h4', | ||
'h5', | ||
'h6', | ||
'hr', | ||
'i', | ||
'img', | ||
// Note: `input` is not normally allowed by GH, when manually writing | ||
// it in markdown, they add it from tasklists some other way. | ||
// We can’t do that, so we have to allow it. | ||
'input', | ||
'ins', | ||
'kbd', | ||
'li', | ||
'ol', | ||
'p', | ||
'picture', | ||
'pre', | ||
'q', | ||
'rp', | ||
'rt', | ||
'ruby', | ||
's', | ||
'samp', | ||
'section', | ||
'source', | ||
'span', | ||
'strike', | ||
'strong', | ||
'sub', | ||
'summary', | ||
'sup', | ||
'table', | ||
'tbody', | ||
'td', | ||
'tfoot', | ||
'th', | ||
'thead', | ||
'tr', | ||
'tt', | ||
'ul', | ||
'var' | ||
] | ||
} |
{ | ||
"name": "hast-util-sanitize", | ||
"version": "4.1.0", | ||
"version": "5.0.0", | ||
"description": "hast utility to sanitize nodes", | ||
@@ -31,4 +31,3 @@ "license": "MIT", | ||
"type": "module", | ||
"main": "index.js", | ||
"types": "index.d.ts", | ||
"exports": "./index.js", | ||
"files": [ | ||
@@ -40,17 +39,25 @@ "lib/", | ||
"dependencies": { | ||
"@types/hast": "^2.0.0" | ||
"@types/hast": "^3.0.0", | ||
"@ungap/structured-clone": "^1.2.0", | ||
"unist-util-position": "^5.0.0" | ||
}, | ||
"devDependencies": { | ||
"@types/node": "^18.0.0", | ||
"c8": "^7.0.0", | ||
"@types/node": "^20.0.0", | ||
"@types/ungap__structured-clone": "^0.3.0", | ||
"aria-attributes": "^2.0.0", | ||
"c8": "^8.0.0", | ||
"deepmerge": "^4.0.0", | ||
"hast-util-from-html": "^1.0.0", | ||
"hast-util-to-html": "^8.0.0", | ||
"hastscript": "^7.0.0", | ||
"prettier": "^2.0.0", | ||
"hastscript": "^8.0.0", | ||
"html-element-attributes": "^3.0.0", | ||
"html-tag-names": "^2.0.0", | ||
"prettier": "^3.0.0", | ||
"remark-cli": "^11.0.0", | ||
"remark-preset-wooorm": "^9.0.0", | ||
"type-coverage": "^2.0.0", | ||
"typescript": "^4.0.0", | ||
"unist-builder": "^3.0.0", | ||
"xo": "^0.53.0" | ||
"typescript": "^5.0.0", | ||
"unist-builder": "^4.0.0", | ||
"unist-util-visit": "^5.0.0", | ||
"xo": "^0.55.0" | ||
}, | ||
@@ -60,32 +67,18 @@ "scripts": { | ||
"build": "tsc --build --clean && tsc --build && type-coverage", | ||
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", | ||
"test-api": "node --conditions development test.js", | ||
"test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api", | ||
"format": "remark . -qfo && prettier . -w --log-level warn && xo --fix", | ||
"test-api": "node --conditions development test/index.js", | ||
"test-coverage": "c8 --100 --reporter lcov npm run test-api", | ||
"test": "npm run build && npm run format && npm run test-coverage" | ||
}, | ||
"prettier": { | ||
"tabWidth": 2, | ||
"useTabs": false, | ||
"singleQuote": true, | ||
"bracketSpacing": false, | ||
"semi": false, | ||
"trailingComma": "none" | ||
"singleQuote": true, | ||
"tabWidth": 2, | ||
"trailingComma": "none", | ||
"useTabs": false | ||
}, | ||
"xo": { | ||
"prettier": true, | ||
"overrides": [ | ||
{ | ||
"files": [ | ||
"test.js" | ||
], | ||
"rules": { | ||
"no-await-in-loop": 0, | ||
"no-script-url": 0 | ||
} | ||
} | ||
] | ||
}, | ||
"remarkConfig": { | ||
"plugins": [ | ||
"preset-wooorm" | ||
"remark-preset-wooorm" | ||
] | ||
@@ -96,3 +89,2 @@ }, | ||
"detail": true, | ||
"strict": true, | ||
"ignoreCatch": true, | ||
@@ -102,4 +94,24 @@ "#": "Couple of needed `any`s", | ||
"lib/index.d.ts" | ||
] | ||
], | ||
"strict": true | ||
}, | ||
"xo": { | ||
"overrides": [ | ||
{ | ||
"files": [ | ||
"test/**/*.js" | ||
], | ||
"rules": { | ||
"max-nested-callbacks": "off", | ||
"no-await-in-loop": "off", | ||
"no-script-url": "off" | ||
} | ||
} | ||
], | ||
"prettier": true, | ||
"rules": { | ||
"complexity": "off", | ||
"unicorn/prefer-at": "off" | ||
} | ||
} | ||
} |
329
readme.md
@@ -20,3 +20,4 @@ # hast-util-sanitize | ||
* [API](#api) | ||
* [`sanitize(tree[, schema])`](#sanitizetree-schema) | ||
* [`defaultSchema`](#defaultschema) | ||
* [`sanitize(tree[, options])`](#sanitizetree-options) | ||
* [`Schema`](#schema) | ||
@@ -47,3 +48,3 @@ * [Types](#types) | ||
This package is [ESM only][esm]. | ||
In Node.js (version 14.14+, 16.0+), install with [npm][]: | ||
In Node.js (version 16+), install with [npm][]: | ||
@@ -57,3 +58,3 @@ ```sh | ||
```js | ||
import {sanitize} from 'https://esm.sh/hast-util-sanitize@4' | ||
import {sanitize} from 'https://esm.sh/hast-util-sanitize@5' | ||
``` | ||
@@ -65,3 +66,3 @@ | ||
<script type="module"> | ||
import {sanitize} from 'https://esm.sh/hast-util-sanitize@4?bundle' | ||
import {sanitize} from 'https://esm.sh/hast-util-sanitize@5?bundle' | ||
</script> | ||
@@ -73,8 +74,8 @@ ``` | ||
```js | ||
import {u} from 'unist-builder' | ||
import {h} from 'hastscript' | ||
import {sanitize} from 'hast-util-sanitize' | ||
import {toHtml} from 'hast-util-to-html' | ||
import {u} from 'unist-builder' | ||
const tree = h('div', {onmouseover: 'alert("alpha")'}, [ | ||
const unsafe = h('div', {onmouseover: 'alert("alpha")'}, [ | ||
h( | ||
@@ -95,10 +96,9 @@ 'a', | ||
const unsanitized = toHtml(tree) | ||
const sanitized = toHtml(sanitize(tree)) | ||
const safe = sanitize(unsafe) | ||
console.log(unsanitized) | ||
console.log(sanitized) | ||
console.log(toHtml(unsafe)) | ||
console.log(toHtml(safe)) | ||
``` | ||
Unsanitized: | ||
Unsafe: | ||
@@ -113,3 +113,3 @@ ```html | ||
Sanitized: | ||
Safe: | ||
@@ -126,7 +126,14 @@ ```html | ||
This package exports the identifiers `sanitize` and `defaultSchema`. | ||
This package exports the identifiers [`defaultSchema`][api-default-schema] and | ||
[`sanitize`][api-sanitize]. | ||
There is no default export. | ||
### `sanitize(tree[, schema])` | ||
### `defaultSchema` | ||
Default schema ([`Schema`][api-schema]). | ||
Follows [GitHub][] style sanitation. | ||
### `sanitize(tree[, options])` | ||
Sanitize a tree. | ||
@@ -136,26 +143,30 @@ | ||
* `tree` ([`Node`][node]) — [*tree*][tree] to sanitize | ||
* `schema` ([`Schema`][schema], optional) — schema defining how to sanitize | ||
* `tree` ([`Node`][node]) | ||
— unsafe tree | ||
* `options` ([`Schema`][api-schema], default: | ||
[`defaultSchema`][api-default-schema]) | ||
— configuration | ||
###### Returns | ||
A new, sanitized, tree ([`Node`][node]). | ||
New, safe tree ([`Node`][node]). | ||
### `Schema` | ||
Sanitation schema that defines if and how nodes and properties should be | ||
cleaned. | ||
The default schema is exported as `defaultSchema`, which defaults to [GitHub][] | ||
style sanitation. | ||
If any top-level key isn’t given, it defaults to GitHub’s style too. | ||
Schema that defines what nodes and properties are allowed. | ||
For a thorough sample, see the code for [`defaultSchema`][default-schema]. | ||
The default schema is [`defaultSchema`][api-default-schema], which follows how | ||
GitHub cleans. | ||
If any top-level key is missing in the given schema, the corresponding | ||
value of the default schema is used. | ||
To extend the standard schema with a few changes, clone `defaultSchema` like so: | ||
To extend the standard schema with a few changes, clone `defaultSchema` | ||
like so: | ||
```js | ||
import deepmerge from 'deepmerge' | ||
import {h} from 'hastscript' | ||
import deepmerge from 'deepmerge' // You can use `structuredClone` in modern JS. | ||
import {sanitize, defaultSchema} from 'hast-util-sanitize' | ||
import {defaultSchema, sanitize} from 'hast-util-sanitize' | ||
// This allows `className` on all elements. | ||
const schema = deepmerge(defaultSchema, {attributes: {'*': ['className']}}) | ||
@@ -175,125 +186,99 @@ | ||
###### `attributes` | ||
##### Fields | ||
Map of tag names to allowed [*property names*][name] | ||
(`Record<string, Array<string>>`). | ||
###### `allowComments` | ||
The special `'*'` key defines [*property names*][name] allowed on all | ||
[*elements*][element]. | ||
Whether to allow comment nodes (`boolean`, default: `false`). | ||
One special value, `'data*'`, can be used to allow all `data` properties. | ||
For example: | ||
```js | ||
attributes: { | ||
a: ['href'], | ||
img: ['src', 'longDesc'], | ||
// … | ||
'*': [ | ||
'abbr', | ||
'accept', | ||
'acceptCharset', | ||
// … | ||
'vSpace', | ||
'width', | ||
'itemProp' | ||
] | ||
} | ||
allowComments: true | ||
``` | ||
Instead of a single string (such as `type`), which allows any [*property | ||
value*][value] of that [*property name*][name], it’s also possible to provide | ||
an array (such as `['type', 'checkbox']` or `['className', /^hljs-/]`), | ||
where the first entry is the *property name*, and all other entries are | ||
*property values* allowed (or regular expressions that are tested with values). | ||
This is how the default GitHub schema allows only disabled checkbox inputs: | ||
###### `allowDoctypes` | ||
```js | ||
attributes: { | ||
// … | ||
input: [ | ||
['type', 'checkbox'], | ||
['disabled', true] | ||
] | ||
// … | ||
} | ||
``` | ||
Whether to allow doctype nodes (`boolean`, default: `false`). | ||
This also plays well with properties that accept space- or comma-separated | ||
values, such as `class`. | ||
Say you wanted to allow certain classes on `span` elements for syntax | ||
highlighting, that can be done like this: | ||
For example: | ||
```js | ||
// … | ||
span: [ | ||
['className', 'token', 'number', 'operator'] | ||
] | ||
// … | ||
allowDoctypes: true | ||
``` | ||
###### `required` | ||
###### `ancestors` | ||
Map of tag names to required [*property names*][name] and their default | ||
[*property value*][value] (`Record<string, Record<string, *>>`). | ||
If the defined keys do not exist in an [*element*][element]’s | ||
[*properties*][properties], they are added and set to the specified value. | ||
Map of tag names to a list of tag names which are required ancestors | ||
(`Record<string, Array<string>>`, default: `defaultSchema.ancestors`). | ||
Note that properties are first checked based on the schema at `attributes`, | ||
so *properties* could be removed by that step and then added again through | ||
`required`. | ||
Elements with these tag names will be ignored if they occur outside of one | ||
of their allowed parents. | ||
For example: | ||
```js | ||
required: { | ||
input: {type: 'checkbox', disabled: true} | ||
ancestors: { | ||
tbody: ['table'], | ||
// … | ||
tr: ['table'] | ||
} | ||
``` | ||
###### `tagNames` | ||
###### `attributes` | ||
List of allowed tag names (`Array<string>`). | ||
Map of tag names to allowed [property names][name] | ||
(`Record<string, Array<[string, ...Array<RegExp | boolean | number | string>] | string>`, | ||
default: `defaultSchema.attributes`). | ||
```js | ||
tagNames: [ | ||
'h1', | ||
'h2', | ||
'h3', | ||
// … | ||
'strike', | ||
'summary', | ||
'details' | ||
] | ||
``` | ||
The special key `'*'` as a tag name defines property names allowed on all | ||
elements. | ||
###### `protocols` | ||
The special value `'data*'` as a property name can be used to allow all `data` | ||
properties. | ||
Map of protocols to allow in [*property values*][value] | ||
(`Record<string, Array<string>>`). | ||
For example: | ||
```js | ||
protocols: { | ||
href: ['http', 'https', 'mailto'], | ||
attributes: { | ||
a: [ | ||
'ariaDescribedBy', 'ariaLabel', 'ariaLabelledBy', /* … */, 'href' | ||
], | ||
// … | ||
longDesc: ['http', 'https'] | ||
'*': [ | ||
'abbr', | ||
'accept', | ||
'acceptCharset', | ||
// … | ||
'vAlign', | ||
'value', | ||
'width' | ||
] | ||
} | ||
``` | ||
###### `ancestors` | ||
Instead of a single string in the array, which allows any property value for | ||
the field, you can use an array to allow several values. | ||
For example, `input: ['type']` allows `type` set to any value on `input`s. | ||
But `input: [['type', 'checkbox', 'radio']]` allows `type` when set to | ||
`'checkbox'` or `'radio'`. | ||
Map of tag names to their required [*ancestor*][ancestor] [*elements*][element] | ||
(`Record<string, Array<string>>`). | ||
You can use regexes, so for example `span: [['className', /^hljs-/]]` allows | ||
any class that starts with `hljs-` on `span`s. | ||
```js | ||
ancestors: { | ||
li: ['ol', 'ul'], | ||
// … | ||
tr: ['table'] | ||
} | ||
``` | ||
When comma- or space-separated values are used (such as `className`), each | ||
value in is checked individually. | ||
For example, to allow certain classes on `span`s for syntax highlighting, use | ||
`span: [['className', 'number', 'operator', 'token']]`. | ||
This will allow `'number'`, `'operator'`, and `'token'` classes, but drop | ||
others. | ||
###### `clobber` | ||
List of allowed [*property names*][name] which can clobber (`Array<string>`). | ||
List of [*property names*][name] that clobber (`Array<string>`, default: | ||
`defaultSchema.clobber`). | ||
For example: | ||
```js | ||
clobber: ['name', 'id'] | ||
clobber: ['ariaDescribedBy', 'ariaLabelledBy', 'id', 'name'] | ||
``` | ||
@@ -303,4 +288,7 @@ | ||
Prefix to use before potentially clobbering [*property names*][name] (`string`). | ||
Prefix to use before clobbering properties (`string`, default: | ||
`defaultSchema.clobberPrefix`). | ||
For example: | ||
```js | ||
@@ -310,46 +298,97 @@ clobberPrefix: 'user-content-' | ||
###### `strip` | ||
###### `protocols` | ||
Names of [*elements*][element] to strip from the [*tree*][tree] | ||
(`Array<string>`). | ||
Map of [*property names*][name] to allowed protocols | ||
(`Record<string, Array<string>>`, default: `defaultSchema.protocols`). | ||
By default, unsafe *elements* are replaced by their [*children*][child]. | ||
Some *elements*, should however be entirely stripped from the *tree*. | ||
This defines URLs that are always allowed to have local URLs (relative to | ||
the current website, such as `this`, `#this`, `/this`, or `?this`), and | ||
only allowed to have remote URLs (such as `https://example.com`) if they | ||
use a known protocol. | ||
For example: | ||
```js | ||
strip: ['script'] | ||
protocols: { | ||
cite: ['http', 'https'], | ||
// … | ||
src: ['http', 'https'] | ||
} | ||
``` | ||
###### `allowComments` | ||
###### `required` | ||
Whether to allow [*comments*][comment] (`boolean`, default: `false`). | ||
Map of tag names to required [*property names*][name] with a default value | ||
(`Record<string, Record<string, unknown>>`, default: `defaultSchema.required`). | ||
This defines properties that must be set. | ||
If a field does not exist (after the element was made safe), these will be | ||
added with the given value. | ||
For example: | ||
```js | ||
allowComments: true | ||
required: { | ||
input: {disabled: true, type: 'checkbox'} | ||
} | ||
``` | ||
###### `allowDoctypes` | ||
> 👉 **Note**: properties are first checked based on `schema.attributes`, | ||
> then on `schema.required`. | ||
> That means properties could be removed by `attributes` and then added | ||
> again with `required`. | ||
Whether to allow [*doctypes*][doctype] (`boolean`, default: `false`). | ||
###### `strip` | ||
List of tag names to strip from the tree (`Array<string>`, default: | ||
`defaultSchema.strip`). | ||
By default, unsafe elements (those not in `schema.tagNames`) are replaced by | ||
what they contain. | ||
This option can drop their contents. | ||
For example: | ||
```js | ||
allowDoctypes: true | ||
strip: ['script'] | ||
``` | ||
###### `tagNames` | ||
List of allowed tag names (`Array<string>`, default: `defaultSchema.tagNames`). | ||
For example: | ||
```js | ||
tagNames: [ | ||
'a', | ||
'b', | ||
// … | ||
'ul', | ||
'var' | ||
] | ||
``` | ||
## Types | ||
This package is fully typed with [TypeScript][]. | ||
It exports the additional type `Schema`. | ||
It exports the additional type [`Schema`][api-schema]. | ||
## Compatibility | ||
Projects maintained by the unified collective are compatible with all maintained | ||
Projects maintained by the unified collective are compatible with maintained | ||
versions of Node.js. | ||
As of now, that is Node.js 14.14+ and 16.0+. | ||
Our projects sometimes work with older versions, but this is not guaranteed. | ||
When we cut a new major release, we drop support for unmaintained versions of | ||
Node. | ||
This means we try to keep the current release line, `hast-util-sanitize@^5`, | ||
compatible with Node.js 16. | ||
## Security | ||
By default, `hast-util-sanitize` will make everything safe to use. | ||
But when used incorrectly, deviating from the defaults can open you up to a | ||
Assuming you understand that certain attributes (including a limited set of | ||
classes) can be generated by users, and you write your CSS (and JS) | ||
accordingly. | ||
When used incorrectly, deviating from the defaults can open you up to a | ||
[cross-site scripting (XSS)][xss] attack. | ||
@@ -393,5 +432,5 @@ | ||
[size-badge]: https://img.shields.io/bundlephobia/minzip/hast-util-sanitize.svg | ||
[size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=hast-util-sanitize | ||
[size]: https://bundlephobia.com/result?p=hast-util-sanitize | ||
[size]: https://bundlejs.com/?q=hast-util-sanitize | ||
@@ -428,8 +467,2 @@ [sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg | ||
[tree]: https://github.com/syntax-tree/unist#tree | ||
[child]: https://github.com/syntax-tree/unist#child | ||
[ancestor]: https://github.com/syntax-tree/unist#ancestor | ||
[hast]: https://github.com/syntax-tree/hast | ||
@@ -439,22 +472,14 @@ | ||
[element]: https://github.com/syntax-tree/hast#element | ||
[doctype]: https://github.com/syntax-tree/hast#doctype | ||
[comment]: https://github.com/syntax-tree/hast#comment | ||
[properties]: https://github.com/syntax-tree/hast#properties | ||
[name]: https://github.com/syntax-tree/hast#propertyname | ||
[value]: https://github.com/syntax-tree/hast#propertyvalue | ||
[github]: https://github.com/gjtorikian/html-pipeline/blob/a2e02ac/lib/html_pipeline/sanitization_filter.rb | ||
[github]: https://github.com/jch/html-pipeline/blob/HEAD/lib/html/pipeline/sanitization_filter.rb | ||
[xss]: https://en.wikipedia.org/wiki/Cross-site_scripting | ||
[default-schema]: lib/schema.js | ||
[rehype-sanitize]: https://github.com/rehypejs/rehype-sanitize | ||
[schema]: #schema | ||
[api-default-schema]: #defaultschema | ||
[rehype-sanitize]: https://github.com/rehypejs/rehype-sanitize | ||
[api-sanitize]: #sanitizetree-options | ||
[api-schema]: #schema |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
45962
1125
471
3
18
1
+ Addedunist-util-position@^5.0.0
+ Added@types/hast@3.0.4(transitive)
+ Added@types/unist@3.0.3(transitive)
+ Added@ungap/structured-clone@1.2.0(transitive)
+ Addedunist-util-position@5.0.0(transitive)
- Removed@types/hast@2.3.10(transitive)
- Removed@types/unist@2.0.11(transitive)
Updated@types/hast@^3.0.0