Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hast-util-sanitize

Package Overview
Dependencies
Maintainers
2
Versions
20
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hast-util-sanitize - npm Package Compare versions

Comparing version 4.1.0 to 5.0.0

6

index.d.ts

@@ -1,3 +0,3 @@

export {sanitize} from './lib/index.js'
export {defaultSchema} from './lib/schema.js'
export type Schema = import('./lib/index.js').Schema
export { sanitize } from "./lib/index.js";
export { defaultSchema } from "./lib/schema.js";
export type Schema = import('./lib/index.js').Schema;
/**
* Utility to sanitize a tree
* Sanitize a tree.
*
* @param {Node} node
* Hast tree to sanitize
* @param {Schema} [schema]
* Schema defining how to sanitize - defaults to Github style sanitation
* @param {Readonly<Nodes>} node
* Unsafe tree.
* @param {Readonly<Schema> | null | undefined} [options]
* Configuration (default: `defaultSchema`).
* @returns {Nodes}
* New, safe tree.
*/
export function sanitize(node: Node, schema?: Schema | undefined): Node
export type Root = import('hast').Root
export type Content = import('hast').Content
export type Element = import('hast').Element
export type Properties = import('hast').Properties
export type Node = Content | Root
export function sanitize(node: Readonly<Nodes>, options?: Readonly<Schema> | null | undefined): Nodes;
export type Comment = import('hast').Comment;
export type Doctype = import('hast').Doctype;
export type Element = import('hast').Element;
export type ElementContent = import('hast').ElementContent;
export type Nodes = import('hast').Nodes;
export type Properties = import('hast').Properties;
export type Root = import('hast').Root;
export type RootContent = import('hast').RootContent;
export type Text = import('hast').Text;
/**
* Possible property values.
* Definition for a property.
*/
export type PropertyValue = Properties[string]
export type PropertyDefinition = [string, ...Array<Exclude<Properties[keyof Properties], Array<any>> | RegExp>] | string;
/**
* Possible primitive HTML attribute values.
* Schema that defines what nodes and properties are allowed.
*
* The default schema is `defaultSchema`, which follows how GitHub cleans.
* If any top-level key is missing in the given schema, the corresponding
* value of the default schema is used.
*
* To extend the standard schema with a few changes, clone `defaultSchema`
* like so:
*
* ```js
* import deepmerge from 'deepmerge'
* import {h} from 'hastscript'
* import {defaultSchema, sanitize} from 'hast-util-sanitize'
*
* // This allows `className` on all elements.
* const schema = deepmerge(defaultSchema, {attributes: {'*': ['className']}})
*
* const tree = sanitize(h('div', {className: ['foo']}), schema)
*
* // `tree` still has `className`.
* console.log(tree)
* // {
* // type: 'element',
* // tagName: 'div',
* // properties: {className: ['foo']},
* // children: []
* // }
* ```
*/
export type PrimitivePropertyValue = string | number | boolean
export type Schema = {
/**
* Whether to allow comment nodes (default: `false`).
*
* For example:
*
* ```js
* allowComments: true
* ```
*/
allowComments?: boolean | null | undefined;
/**
* Whether to allow doctype nodes (default: `false`).
*
* For example:
*
* ```js
* allowDoctypes: true
* ```
*/
allowDoctypes?: boolean | null | undefined;
/**
* Map of tag names to a list of tag names which are required ancestors
* (default: `defaultSchema.ancestors`).
*
* Elements with these tag names will be ignored if they occur outside of one
* of their allowed parents.
*
* For example:
*
* ```js
* ancestors: {
* tbody: ['table'],
* // …
* tr: ['table']
* }
* ```
*/
ancestors?: Record<string, Array<string>> | null | undefined;
/**
* Map of tag names to allowed property names (default:
* `defaultSchema.attributes`).
*
* The special key `'*'` as a tag name defines property names allowed on all
* elements.
*
* The special value `'data*'` as a property name can be used to allow all
* `data` properties.
*
* For example:
*
* ```js
* attributes: {
* 'ariaDescribedBy', 'ariaLabel', 'ariaLabelledBy', …, 'href'
* // …
* '*': [
* 'abbr',
* 'accept',
* 'acceptCharset',
* // …
* 'vAlign',
* 'value',
* 'width'
* ]
* }
* ```
*
* Instead of a single string in the array, which allows any property value
* for the field, you can use an array to allow several values.
* For example, `input: ['type']` allows `type` set to any value on `input`s.
* But `input: [['type', 'checkbox', 'radio']]` allows `type` when set to
* `'checkbox'` or `'radio'`.
*
* You can use regexes, so for example `span: [['className', /^hljs-/]]`
* allows any class that starts with `hljs-` on `span`s.
*
* When comma- or space-separated values are used (such as `className`), each
* value in is checked individually.
* For example, to allow certain classes on `span`s for syntax highlighting,
* use `span: [['className', 'number', 'operator', 'token']]`.
* This will allow `'number'`, `'operator'`, and `'token'` classes, but drop
* others.
*/
attributes?: Record<string, Array<PropertyDefinition>> | null | undefined;
/**
* List of property names that clobber (default: `defaultSchema.clobber`).
*
* For example:
*
* ```js
* clobber: ['ariaDescribedBy', 'ariaLabelledBy', 'id', 'name']
* ```
*/
clobber?: Array<string> | null | undefined;
/**
* Prefix to use before clobbering properties (default:
* `defaultSchema.clobberPrefix`).
*
* For example:
*
* ```js
* clobberPrefix: 'user-content-'
* ```
*/
clobberPrefix?: string | null | undefined;
/**
* Map of *property names* to allowed protocols (default:
* `defaultSchema.protocols`).
*
* This defines URLs that are always allowed to have local URLs (relative to
* the current website, such as `this`, `#this`, `/this`, or `?this`), and
* only allowed to have remote URLs (such as `https://example.com`) if they
* use a known protocol.
*
* For example:
*
* ```js
* protocols: {
* cite: ['http', 'https'],
* // …
* src: ['http', 'https']
* }
* ```
*/
protocols?: Record<string, Array<string> | null | undefined> | null | undefined;
/**
* Map of tag names to required property names with a default value
* (default: `defaultSchema.required`).
*
* This defines properties that must be set.
* If a field does not exist (after the element was made safe), these will be
* added with the given value.
*
* For example:
*
* ```js
* required: {
* input: {disabled: true, type: 'checkbox'}
* }
* ```
*
* > 👉 **Note**: properties are first checked based on `schema.attributes`,
* > then on `schema.required`.
* > That means properties could be removed by `attributes` and then added
* > again with `required`.
*/
required?: Record<string, Record<string, Properties[keyof Properties]>> | null | undefined;
/**
* List of tag names to strip from the tree (default: `defaultSchema.strip`).
*
* By default, unsafe elements (those not in `schema.tagNames`) are replaced
* by what they contain.
* This option can drop their contents.
*
* For example:
*
* ```js
* strip: ['script']
* ```
*/
strip?: Array<string> | null | undefined;
/**
* List of allowed tag names (default: `defaultSchema.tagNames`).
*
* For example:
*
* ```js
* tagNames: [
* 'a',
* 'b',
* // …
* 'ul',
* 'var'
* ]
* ```
*/
tagNames?: Array<string> | null | undefined;
};
/**
* Map of tag names to allow lists for each property.
* Info passed around.
*/
export type Attributes = Record<
string,
Array<string | [string, ...Array<PrimitivePropertyValue | RegExp>]>
>
/**
* Normalized input.
*/
export type AttributeClean = Record<
string,
Array<PrimitivePropertyValue | RegExp>
>
/**
* Sanitization configuration.
*/
export type Schema = {
/**
* Map of tag names to allowed properties.
*
* The special `'*'` key defines property names allowed on all elements.
*/
attributes?: Attributes | undefined
/**
* Map of tag names to required property names and their default property value.
*/
required?: Record<string, Record<string, PropertyValue>> | undefined
/**
* List of allowed tag names.
*/
tagNames?: Array<string> | undefined
/**
* Map of protocols to allow in property values.
*/
protocols?: Record<string, Array<string>> | undefined
/**
* Map of tag names to their required ancestor elements.
*/
ancestors?: Record<string, Array<string>> | undefined
/**
* List of allowed property names which can clobber.
*/
clobber?: Array<string> | undefined
/**
* Prefix to use before potentially clobbering property names.
*/
clobberPrefix?: string | undefined
/**
* Names of elements to strip from the tree.
*/
strip?: Array<string> | undefined
/**
* Whether to allow comments.
*/
allowComments?: boolean | undefined
/**
* Whether to allow doctypes.
*/
allowDoctypes?: boolean | undefined
}
export type Handler = (
schema: Schema,
value: any,
node: any,
stack: Array<string>
) => unknown
export type NodeDefinition = Record<string, Handler>
export type NodeDefinitionGetter = (
schema: Schema,
node: Node
) => NodeDefinition | undefined
export type NodeSchema = Record<string, NodeDefinition | NodeDefinitionGetter>
export type State = {
/**
* Schema.
*/
schema: Readonly<Schema>;
/**
* Tag names of ancestors.
*/
stack: Array<string>;
};
/**
* @typedef {import('hast').Root} Root
* @typedef {import('hast').Content} Content
* @typedef {import('hast').Comment} Comment
* @typedef {import('hast').Doctype} Doctype
* @typedef {import('hast').Element} Element
* @typedef {import('hast').ElementContent} ElementContent
* @typedef {import('hast').Nodes} Nodes
* @typedef {import('hast').Properties} Properties
* @typedef {Content | Root} Node
* @typedef {import('hast').Root} Root
* @typedef {import('hast').RootContent} RootContent
* @typedef {import('hast').Text} Text
*/
/**
* @typedef {[string, ...Array<Exclude<Properties[keyof Properties], Array<any>> | RegExp>] | string} PropertyDefinition
* Definition for a property.
*
* @typedef {Properties[string]} PropertyValue
* Possible property values.
* @typedef {string | number | boolean} PrimitivePropertyValue
* Possible primitive HTML attribute values.
* @typedef Schema
* Schema that defines what nodes and properties are allowed.
*
* @typedef {Record<string, Array<string | [string, ...Array<PrimitivePropertyValue | RegExp>]>>} Attributes
* Map of tag names to allow lists for each property.
* @typedef {Record<string, Array<PrimitivePropertyValue | RegExp>>} AttributeClean
* Normalized input.
* The default schema is `defaultSchema`, which follows how GitHub cleans.
* If any top-level key is missing in the given schema, the corresponding
* value of the default schema is used.
*
* @typedef Schema
* Sanitization configuration.
* @property {Attributes | undefined} [attributes]
* Map of tag names to allowed properties.
* To extend the standard schema with a few changes, clone `defaultSchema`
* like so:
*
* The special `'*'` key defines property names allowed on all elements.
* @property {Record<string, Record<string, PropertyValue>> | undefined} [required]
* Map of tag names to required property names and their default property value.
* @property {Array<string> | undefined} [tagNames]
* List of allowed tag names.
* @property {Record<string, Array<string>> | undefined} [protocols]
* Map of protocols to allow in property values.
* @property {Record<string, Array<string>> | undefined} [ancestors]
* Map of tag names to their required ancestor elements.
* @property {Array<string> | undefined} [clobber]
* List of allowed property names which can clobber.
* @property {string | undefined} [clobberPrefix]
* Prefix to use before potentially clobbering property names.
* @property {Array<string> | undefined} [strip]
* Names of elements to strip from the tree.
* @property {boolean | undefined} [allowComments]
* Whether to allow comments.
* @property {boolean | undefined} [allowDoctypes]
* Whether to allow doctypes.
* ```js
* import deepmerge from 'deepmerge'
* import {h} from 'hastscript'
* import {defaultSchema, sanitize} from 'hast-util-sanitize'
*
* @typedef {(schema: Schema, value: any, node: any, stack: Array<string>) => unknown} Handler
* @typedef {Record<string, Handler>} NodeDefinition
* @typedef {((schema: Schema, node: Node) => NodeDefinition | undefined)} NodeDefinitionGetter
* @typedef {Record<string, NodeDefinition | NodeDefinitionGetter>} NodeSchema
* // This allows `className` on all elements.
* const schema = deepmerge(defaultSchema, {attributes: {'*': ['className']}})
*
* const tree = sanitize(h('div', {className: ['foo']}), schema)
*
* // `tree` still has `className`.
* console.log(tree)
* // {
* // type: 'element',
* // tagName: 'div',
* // properties: {className: ['foo']},
* // children: []
* // }
* ```
* @property {boolean | null | undefined} [allowComments=false]
* Whether to allow comment nodes (default: `false`).
*
* For example:
*
* ```js
* allowComments: true
* ```
* @property {boolean | null | undefined} [allowDoctypes=false]
* Whether to allow doctype nodes (default: `false`).
*
* For example:
*
* ```js
* allowDoctypes: true
* ```
* @property {Record<string, Array<string>> | null | undefined} [ancestors]
* Map of tag names to a list of tag names which are required ancestors
* (default: `defaultSchema.ancestors`).
*
* Elements with these tag names will be ignored if they occur outside of one
* of their allowed parents.
*
* For example:
*
* ```js
* ancestors: {
* tbody: ['table'],
* // …
* tr: ['table']
* }
* ```
* @property {Record<string, Array<PropertyDefinition>> | null | undefined} [attributes]
* Map of tag names to allowed property names (default:
* `defaultSchema.attributes`).
*
* The special key `'*'` as a tag name defines property names allowed on all
* elements.
*
* The special value `'data*'` as a property name can be used to allow all
* `data` properties.
*
* For example:
*
* ```js
* attributes: {
* 'ariaDescribedBy', 'ariaLabel', 'ariaLabelledBy', …, 'href'
* // …
* '*': [
* 'abbr',
* 'accept',
* 'acceptCharset',
* // …
* 'vAlign',
* 'value',
* 'width'
* ]
* }
* ```
*
* Instead of a single string in the array, which allows any property value
* for the field, you can use an array to allow several values.
* For example, `input: ['type']` allows `type` set to any value on `input`s.
* But `input: [['type', 'checkbox', 'radio']]` allows `type` when set to
* `'checkbox'` or `'radio'`.
*
* You can use regexes, so for example `span: [['className', /^hljs-/]]`
* allows any class that starts with `hljs-` on `span`s.
*
* When comma- or space-separated values are used (such as `className`), each
* value in is checked individually.
* For example, to allow certain classes on `span`s for syntax highlighting,
* use `span: [['className', 'number', 'operator', 'token']]`.
* This will allow `'number'`, `'operator'`, and `'token'` classes, but drop
* others.
* @property {Array<string> | null | undefined} [clobber]
* List of property names that clobber (default: `defaultSchema.clobber`).
*
* For example:
*
* ```js
* clobber: ['ariaDescribedBy', 'ariaLabelledBy', 'id', 'name']
* ```
* @property {string | null | undefined} [clobberPrefix]
* Prefix to use before clobbering properties (default:
* `defaultSchema.clobberPrefix`).
*
* For example:
*
* ```js
* clobberPrefix: 'user-content-'
* ```
* @property {Record<string, Array<string> | null | undefined> | null | undefined} [protocols]
* Map of *property names* to allowed protocols (default:
* `defaultSchema.protocols`).
*
* This defines URLs that are always allowed to have local URLs (relative to
* the current website, such as `this`, `#this`, `/this`, or `?this`), and
* only allowed to have remote URLs (such as `https://example.com`) if they
* use a known protocol.
*
* For example:
*
* ```js
* protocols: {
* cite: ['http', 'https'],
* // …
* src: ['http', 'https']
* }
* ```
* @property {Record<string, Record<string, Properties[keyof Properties]>> | null | undefined} [required]
* Map of tag names to required property names with a default value
* (default: `defaultSchema.required`).
*
* This defines properties that must be set.
* If a field does not exist (after the element was made safe), these will be
* added with the given value.
*
* For example:
*
* ```js
* required: {
* input: {disabled: true, type: 'checkbox'}
* }
* ```
*
* > 👉 **Note**: properties are first checked based on `schema.attributes`,
* > then on `schema.required`.
* > That means properties could be removed by `attributes` and then added
* > again with `required`.
* @property {Array<string> | null | undefined} [strip]
* List of tag names to strip from the tree (default: `defaultSchema.strip`).
*
* By default, unsafe elements (those not in `schema.tagNames`) are replaced
* by what they contain.
* This option can drop their contents.
*
* For example:
*
* ```js
* strip: ['script']
* ```
* @property {Array<string> | null | undefined} [tagNames]
* List of allowed tag names (default: `defaultSchema.tagNames`).
*
* For example:
*
* ```js
* tagNames: [
* 'a',
* 'b',
* // …
* 'ul',
* 'var'
* ]
* ```
*
* @typedef State
* Info passed around.
* @property {Readonly<Schema>} schema
* Schema.
* @property {Array<string>} stack
* Tag names of ancestors.
*/
import structuredClone from '@ungap/structured-clone'
import {position} from 'unist-util-position'
import {defaultSchema} from './schema.js'

@@ -53,46 +217,32 @@

/** @type {NodeSchema} */
const nodeSchema = {
root: {children: all},
doctype: handleDoctype,
comment: handleComment,
element: {
tagName: handleTagName,
properties: handleProperties,
children: all
},
text: {value: handleValue},
'*': {data: allow, position: allow}
}
/**
* Utility to sanitize a tree
* Sanitize a tree.
*
* @param {Node} node
* Hast tree to sanitize
* @param {Schema} [schema]
* Schema defining how to sanitize - defaults to Github style sanitation
* @param {Readonly<Nodes>} node
* Unsafe tree.
* @param {Readonly<Schema> | null | undefined} [options]
* Configuration (default: `defaultSchema`).
* @returns {Nodes}
* New, safe tree.
*/
export function sanitize(node, schema) {
/** @type {Node} */
export function sanitize(node, options) {
/** @type {Nodes} */
let ctx = {type: 'root', children: []}
if (node && typeof node === 'object' && node.type) {
const replace = one(
Object.assign({}, defaultSchema, schema || {}),
node,
[]
)
/** @type {State} */
const state = {
schema: options ? {...defaultSchema, ...options} : defaultSchema,
stack: []
}
const replace = transform(state, node)
if (replace) {
if (Array.isArray(replace)) {
if (replace.length === 1) {
ctx = replace[0]
} else {
// @ts-expect-error Assume `root` is not a child.
ctx.children = replace
}
if (replace) {
if (Array.isArray(replace)) {
if (replace.length === 1) {
ctx = replace[0]
} else {
ctx = replace
ctx.children = replace
}
} else {
ctx = replace
}

@@ -107,48 +257,129 @@ }

*
* @param {Schema} schema
* @param {Node} node
* @param {Array<string>} stack
* @returns {Node | Array<Node> | undefined}
* @param {State} state
* Info passed around.
* @param {Readonly<unknown>} node
* Unsafe node.
* @returns {Array<ElementContent> | Nodes | undefined}
* Safe result.
*/
function one(schema, node, stack) {
const type = node && node.type
/** @type {Node} */
// @ts-expect-error rest of props added later.
const replacement = {type: node.type}
/** @type {boolean | undefined} */
let replace
function transform(state, node) {
if (node && typeof node === 'object') {
const unsafe = /** @type {Record<string, Readonly<unknown>>} */ (node)
const type = typeof unsafe.type === 'string' ? unsafe.type : ''
if (own.call(nodeSchema, type)) {
/** @type {NodeDefinition | NodeDefinitionGetter | undefined} */
let definition = nodeSchema[type]
switch (type) {
case 'comment': {
return comment(state, unsafe)
}
if (typeof definition === 'function') {
definition = definition(schema, node)
case 'doctype': {
return doctype(state, unsafe)
}
case 'element': {
return element(state, unsafe)
}
case 'root': {
return root(state, unsafe)
}
case 'text': {
return text(state, unsafe)
}
default:
}
}
}
if (definition) {
const allowed = Object.assign({}, definition, nodeSchema['*'])
/** @type {string} */
let key
/**
* Make a safe comment.
*
* @param {State} state
* Info passed around.
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe
* Unsafe comment-like value.
* @returns {Comment | undefined}
* Safe comment (if with `allowComments`).
*/
function comment(state, unsafe) {
if (state.schema.allowComments) {
// See <https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments>
const result = typeof unsafe.value === 'string' ? unsafe.value : ''
const index = result.indexOf('-->')
const value = index < 0 ? result : result.slice(0, index)
replace = true
/** @type {Comment} */
const node = {type: 'comment', value}
for (key in allowed) {
if (own.call(allowed, key)) {
// @ts-expect-error: fine.
// type-coverage:ignore-next-line
const result = allowed[key](schema, node[key], node, stack)
patch(node, unsafe)
// eslint-disable-next-line max-depth
if (result === false) {
replace = undefined
// Set the non-safe value.
// @ts-expect-error: fine.
// type-coverage:ignore-next-line
replacement[key] = node[key]
} else if (result !== undefined && result !== null) {
// @ts-expect-error: fine.
// type-coverage:ignore-next-line
replacement[key] = result
}
return node
}
}
/**
* Make a safe doctype.
*
* @param {State} state
* Info passed around.
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe
* Unsafe doctype-like value.
* @returns {Doctype | undefined}
* Safe doctype (if with `allowDoctypes`).
*/
function doctype(state, unsafe) {
if (state.schema.allowDoctypes) {
/** @type {Doctype} */
const node = {type: 'doctype'}
patch(node, unsafe)
return node
}
}
/**
* Make a safe element.
*
* @param {State} state
* Info passed around.
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe
* Unsafe element-like value.
* @returns {Array<ElementContent> | Element | undefined}
* Safe element.
*/
function element(state, unsafe) {
const name = typeof unsafe.tagName === 'string' ? unsafe.tagName : ''
state.stack.push(name)
const content = /** @type {Array<ElementContent>} */ (
children(state, unsafe.children)
)
const props = properties(state, unsafe.properties)
state.stack.pop()
let safeElement = false
if (
name.length > 0 &&
name !== '*' &&
(!state.schema.tagNames || state.schema.tagNames.includes(name))
) {
safeElement = true
// Some nodes can break out of their context if they don’t have a certain
// ancestor.
if (state.schema.ancestors && own.call(state.schema.ancestors, name)) {
const ancestors = state.schema.ancestors[name]
let index = -1
safeElement = false
while (++index < ancestors.length) {
if (state.stack.includes(ancestors[index])) {
safeElement = true
}

@@ -159,35 +390,85 @@ }

if (replace) {
return replacement
if (!safeElement) {
return state.schema.strip && !state.schema.strip.includes(name)
? content
: undefined
}
return replacement.type === 'element' &&
schema.strip &&
!schema.strip.includes(replacement.tagName)
? replacement.children
: undefined
/** @type {Element} */
const node = {
type: 'element',
tagName: name,
properties: props,
children: content
}
patch(node, unsafe)
return node
}
/**
* Sanitize `children`.
* Make a safe root.
*
* @type {Handler}
* @param {Array<Node>} children
* @param {Node} node
* @returns {Array<Node>}
* @param {State} state
* Info passed around.
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe
* Unsafe root-like value.
* @returns {Root}
* Safe root.
*/
function all(schema, children, node, stack) {
/** @type {Array<Node>} */
function root(state, unsafe) {
const content = /** @type {Array<RootContent>} */ (
children(state, unsafe.children)
)
/** @type {Root} */
const node = {type: 'root', children: content}
patch(node, unsafe)
return node
}
/**
* Make a safe text.
*
* @param {State} _
* Info passed around.
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe
* Unsafe text-like value.
* @returns {Text}
* Safe text.
*/
function text(_, unsafe) {
const value = typeof unsafe.value === 'string' ? unsafe.value : ''
/** @type {Text} */
const node = {type: 'text', value}
patch(node, unsafe)
return node
}
/**
* Make children safe.
*
* @param {State} state
* Info passed around.
* @param {Readonly<unknown>} children
* Unsafe value.
* @returns {Array<Nodes>}
* Safe children.
*/
function children(state, children) {
/** @type {Array<Nodes>} */
const results = []
if (Array.isArray(children)) {
const childrenUnknown = /** @type {Array<Readonly<unknown>>} */ (children)
let index = -1
if (node.type === 'element') {
stack.push(node.tagName)
}
while (++index < childrenUnknown.length) {
const value = transform(state, childrenUnknown[index])
while (++index < children.length) {
const value = one(schema, children[index], stack)
if (value) {

@@ -201,6 +482,2 @@ if (Array.isArray(value)) {

}
if (node.type === 'element') {
stack.pop()
}
}

@@ -211,31 +488,24 @@

/** @type {NodeDefinitionGetter} */
function handleDoctype(schema) {
return schema.allowDoctypes ? {name: handleDoctypeName} : undefined
}
/** @type {NodeDefinitionGetter} */
function handleComment(schema) {
return schema.allowComments ? {value: handleCommentValue} : undefined
}
/**
* Sanitize `properties`.
* Make element properties safe.
*
* @type {Handler}
* @param {Properties} properties
* @param {Element} node
* @param {State} state
* Info passed around.
* @param {Readonly<unknown>} properties
* Unsafe value.
* @returns {Properties}
* Safe value.
*/
function handleProperties(schema, properties, node, stack) {
const name = handleTagName(schema, node.tagName, node, stack)
/* c8 ignore next */
const attrs = schema.attributes || {}
/* c8 ignore next */
const reqs = schema.required || {}
const props = properties || {}
const allowed = Object.assign(
{},
toPropertyValueMap(attrs['*']),
toPropertyValueMap(name && own.call(attrs, name) ? attrs[name] : [])
function properties(state, properties) {
const tagName = state.stack[state.stack.length - 1]
const attributes = state.schema.attributes
const required = state.schema.required
const specific =
attributes && own.call(attributes, tagName)
? attributes[tagName]
: undefined
const defaults =
attributes && own.call(attributes, '*') ? attributes['*'] : undefined
const props = /** @type {Readonly<Record<string, Readonly<unknown>>>} */ (
properties && typeof properties === 'object' ? properties : {}
)

@@ -249,20 +519,17 @@ /** @type {Properties} */

if (own.call(props, key)) {
let value = props[key]
/** @type {AttributeClean[string]} */
/** @type {Readonly<PropertyDefinition> | undefined} */
let definition
if (own.call(allowed, key)) {
definition = allowed[key]
} else if (data(key) && own.call(allowed, 'data*')) {
definition = allowed['data*']
} else {
continue
}
if (specific) definition = findDefinition(specific, key)
if (!definition && defaults) definition = findDefinition(defaults, key)
value = Array.isArray(value)
? handlePropertyValues(schema, value, key, definition)
: handlePropertyValue(schema, value, key, definition)
if (definition) {
const unsafe = props[key]
const safe = Array.isArray(unsafe)
? propertyValues(state, definition, key, unsafe)
: propertyValue(state, definition, key, unsafe)
if (value !== undefined && value !== null) {
result[key] = value
if (safe !== null && safe !== undefined) {
result[key] = safe
}
}

@@ -272,6 +539,8 @@ }

if (name && own.call(reqs, name)) {
for (key in reqs[name]) {
if (!own.call(result, key)) {
result[key] = reqs[name][key]
if (required && own.call(required, tagName)) {
const properties = required[tagName]
for (key in properties) {
if (own.call(properties, key) && !own.call(result, key)) {
result[key] = properties[key]
}

@@ -285,101 +554,24 @@ }

/**
* Always return a valid HTML5 doctype.
*
* @type {Handler}
* @returns {string}
*/
function handleDoctypeName() {
return 'html'
}
/**
* Sanitize `tagName`.
*
* @param {Schema} schema
* @param {string} tagName
* @param {Node} _
* @param {Array<string>} stack
* @returns {string | false}
*/
function handleTagName(schema, tagName, _, stack) {
const name = typeof tagName === 'string' ? tagName : ''
let index = -1
if (
!name ||
name === '*' ||
(schema.tagNames && !schema.tagNames.includes(name))
) {
return false
}
// Some nodes can break out of their context if they don’t have a certain
// ancestor.
if (schema.ancestors && own.call(schema.ancestors, name)) {
while (++index < schema.ancestors[name].length) {
if (stack.includes(schema.ancestors[name][index])) {
return name
}
}
return false
}
return name
}
/**
* See <https://html.spec.whatwg.org/multipage/parsing.html#serialising-html-fragments>
*
* @type {Handler}
* @param {unknown} value
* @returns {string}
*/
function handleCommentValue(_, value) {
/** @type {string} */
const result = typeof value === 'string' ? value : ''
const index = result.indexOf('-->')
return index < 0 ? result : result.slice(0, index)
}
/**
* Sanitize `value`.
*
* @type {Handler}
* @param {unknown} value
* @returns {string}
*/
function handleValue(_, value) {
return typeof value === 'string' ? value : ''
}
/**
* Allow `value`.
*
* @type {Handler}
* @param {unknown} value
*/
function allow(_, value) {
return value
}
/**
* Sanitize a property value which is a list.
*
* @param {Schema} schema
* @param {Array<unknown>} values
* @param {string} prop
* @param {AttributeClean[string]} definition
* @returns {Array<string | number>}
* @param {State} state
* Info passed around.
* @param {Readonly<PropertyDefinition>} definition
* Definition.
* @param {string} key
* Field name.
* @param {Readonly<Array<Readonly<unknown>>>} values
* Unsafe value (but an array).
* @returns {Array<number | string>}
* Safe value.
*/
function handlePropertyValues(schema, values, prop, definition) {
function propertyValues(state, definition, key, values) {
let index = -1
/** @type {Array<string | number>} */
/** @type {Array<number | string>} */
const result = []
while (++index < values.length) {
const value = handlePropertyValue(schema, values[index], prop, definition)
const value = propertyValue(state, definition, key, values[index])
if (value !== undefined && value !== null) {
// @ts-expect-error Assume no booleans were in arrays.
if (typeof value === 'number' || typeof value === 'string') {
result.push(value)

@@ -395,27 +587,57 @@ }

*
* @param {Schema} schema
* @param {unknown} value
* @param {string} prop
* @param {AttributeClean[string]} definition
* @returns {PropertyValue}
* @param {State} state
* Info passed around.
* @param {Readonly<PropertyDefinition>} definition
* Definition.
* @param {string} key
* Field name.
* @param {Readonly<unknown>} value
* Unsafe value (but not an array).
* @returns {boolean | number | string | undefined}
* Safe value.
*/
function handlePropertyValue(schema, value, prop, definition) {
function propertyValue(state, definition, key, value) {
if (
(typeof value === 'boolean' ||
typeof value === 'number' ||
typeof value === 'string') &&
safeProtocol(schema, value, prop) &&
(definition.length === 0 ||
definition.some((allowed) =>
allowed && typeof allowed === 'object' && 'flags' in allowed
? allowed.test(String(value))
: allowed === value
))
typeof value !== 'boolean' &&
typeof value !== 'number' &&
typeof value !== 'string'
) {
return schema.clobberPrefix &&
schema.clobber &&
schema.clobber.includes(prop)
? schema.clobberPrefix + value
: value
return
}
if (!safeProtocol(state, key, value)) {
return
}
// Just a string, or only one item in an array, means all values are OK.
// More than one item means an allow list.
if (typeof definition === 'object' && definition.length > 1) {
let ok = false
let index = 0 // Ignore `key`, which is the first item.
while (++index < definition.length) {
const allowed = definition[index]
// Expression.
if (allowed && typeof allowed === 'object' && 'flags' in allowed) {
if (allowed.test(String(value))) {
ok = true
break
}
}
// Primitive.
else if (allowed === value) {
ok = true
break
}
}
if (!ok) return
}
return state.schema.clobber &&
state.schema.clobberPrefix &&
state.schema.clobber.includes(key)
? state.schema.clobberPrefix + value
: value
}

@@ -426,8 +648,22 @@

*
* @param {Schema} schema
* @param {unknown} value
* @param {string} prop
* @param {State} state
* Info passed around.
* @param {string} key
* Field name.
* @param {Readonly<unknown>} value
* Unsafe value.
* @returns {boolean}
* Whether it’s a safe value.
*/
function safeProtocol(schema, value, prop) {
function safeProtocol(state, key, value) {
const protocols =
state.schema.protocols && own.call(state.schema.protocols, key)
? state.schema.protocols[key]
: undefined
// No protocols defined? Then everything is fine.
if (!protocols || protocols.length === 0) {
return true
}
const url = String(value)

@@ -438,10 +674,4 @@ const colon = url.indexOf(':')

const slash = url.indexOf('/')
const protocols =
schema.protocols && own.call(schema.protocols, prop)
? schema.protocols[prop].concat()
: []
let index = -1
if (
protocols.length === 0 ||
colon < 0 ||

@@ -456,6 +686,10 @@ // If the first colon is after a `?`, `#`, or `/`, it’s not a protocol.

let index = -1
while (++index < protocols.length) {
const protocol = protocols[index]
if (
colon === protocols[index].length &&
url.slice(0, protocols[index].length) === protocols[index]
colon === protocol.length &&
url.slice(0, protocol.length) === protocol
) {

@@ -470,33 +704,49 @@ return true

/**
* Create a map from a list of props or a list of properties and values.
* Add data and position.
*
* @param {Attributes[string]} values
* @returns {AttributeClean}
* @param {Nodes} node
* Node to patch safe data and position on.
* @param {Readonly<Record<string, Readonly<unknown>>>} unsafe
* Unsafe node-like value.
* @returns {undefined}
* Nothing.
*/
function toPropertyValueMap(values) {
/** @type {AttributeClean} */
const result = {}
let index = -1
function patch(node, unsafe) {
const cleanPosition = position(
// @ts-expect-error: looks like a node.
unsafe
)
while (++index < values.length) {
const value = values[index]
if (Array.isArray(value)) {
result[value[0]] = value.slice(1)
} else {
result[value] = []
}
if (unsafe.data) {
node.data = structuredClone(unsafe.data)
}
return result
if (cleanPosition) node.position = cleanPosition
}
/**
* Check if `prop` is a data property.
*
* @param {string} prop
* @returns {boolean}
* @param {Readonly<Array<PropertyDefinition>>} definitions
* @param {string} key
* @returns {Readonly<PropertyDefinition> | undefined}
*/
function data(prop) {
return prop.length > 4 && prop.slice(0, 4).toLowerCase() === 'data'
function findDefinition(definitions, key) {
/** @type {PropertyDefinition | undefined} */
let dataDefault
let index = -1
while (++index < definitions.length) {
const entry = definitions[index]
const name = typeof entry === 'string' ? entry : entry[0]
if (name === key) {
return entry
}
if (name === 'data*') dataDefault = entry
}
if (key.length > 4 && key.slice(0, 4).toLowerCase() === 'data') {
return dataDefault
}
}

@@ -1,2 +0,8 @@

/** @type {import('./index.js').Schema} */
export const defaultSchema: import('./index.js').Schema
/**
* Default schema.
*
* Follows GitHub style sanitation.
*
* @type {import('./index.js').Schema}
*/
export const defaultSchema: import('./index.js').Schema;

@@ -1,96 +0,68 @@

/** @type {import('./index.js').Schema} */
// Couple of ARIA attributes allowed in several, but not all, places.
const aria = ['ariaDescribedBy', 'ariaLabel', 'ariaLabelledBy']
/**
* Default schema.
*
* Follows GitHub style sanitation.
*
* @type {import('./index.js').Schema}
*/
export const defaultSchema = {
strip: ['script'],
clobberPrefix: 'user-content-',
clobber: ['name', 'id'],
ancestors: {
tbody: ['table'],
tfoot: ['table'],
thead: ['table'],
td: ['table'],
th: ['table'],
thead: ['table'],
tfoot: ['table'],
tr: ['table']
},
protocols: {
href: ['http', 'https', 'mailto', 'xmpp', 'irc', 'ircs'],
cite: ['http', 'https'],
src: ['http', 'https'],
longDesc: ['http', 'https']
},
tagNames: [
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'br',
'b',
'i',
'strong',
'em',
'a',
'pre',
'code',
'img',
'tt',
'div',
'ins',
'del',
'sup',
'sub',
'p',
'ol',
'ul',
'table',
'thead',
'tbody',
'tfoot',
'blockquote',
'dl',
'dt',
'dd',
'kbd',
'q',
'samp',
'var',
'hr',
'ruby',
'rt',
'rp',
'li',
'tr',
'td',
'th',
's',
'strike',
'summary',
'details',
'caption',
'figure',
'figcaption',
'abbr',
'bdo',
'cite',
'dfn',
'mark',
'small',
'span',
'time',
'wbr',
'input'
],
attributes: {
a: ['href'],
img: ['src', 'longDesc'],
input: [
['type', 'checkbox'],
['disabled', true]
a: [
...aria,
// Note: these 3 are used by GFM footnotes, they do work on all links.
'dataFootnoteBackref',
'dataFootnoteRef',
['className', 'data-footnote-backref'],
'href'
],
li: [['className', 'task-list-item']],
div: ['itemScope', 'itemType'],
blockquote: ['cite'],
// Note: this class is not normally allowed by GH, when manually writing
// `code` as HTML in markdown, they adds it some other way.
// We can’t do that, so we have to allow it.
code: [['className', /^language-./]],
del: ['cite'],
div: ['itemScope', 'itemType'],
dl: [...aria],
// Note: these 2 are used by GFM footnotes, they *sometimes* work.
h2: [
['id', 'footnote-label'],
['className', 'sr-only']
],
img: [...aria, 'longDesc', 'src'],
// Note: `input` is not normally allowed by GH, when manually writing
// it in markdown, they add it from tasklists some other way.
// We can’t do that, so we have to allow it.
input: [
['disabled', true],
['type', 'checkbox']
],
ins: ['cite'],
// Note: this class is not normally allowed by GH, when manually writing
// `li` as HTML in markdown, they adds it some other way.
// We can’t do that, so we have to allow it.
li: [['className', 'task-list-item']],
// Note: this class is not normally allowed by GH, when manually writing
// `ol` as HTML in markdown, they adds it some other way.
// We can’t do that, so we have to allow it.
ol: [...aria, ['className', 'contains-task-list']],
q: ['cite'],
section: ['dataFootnotes', ['className', 'footnotes']],
source: ['srcSet'],
summary: [...aria],
table: [...aria],
// Note: this class is not normally allowed by GH, when manually writing
// `ol` as HTML in markdown, they adds it some other way.
// We can’t do that, so we have to allow it.
ul: [...aria, ['className', 'contains-task-list']],
'*': [

@@ -104,6 +76,2 @@ 'abbr',

'alt',
'ariaDescribedBy',
'ariaHidden',
'ariaLabel',
'ariaLabelledBy',
'axis',

@@ -118,5 +86,5 @@ 'border',

'clear',
'cols',
'colSpan',
'color',
'cols',
'compact',

@@ -128,10 +96,11 @@ 'coords',

'encType',
'htmlFor',
'frame',
'hSpace',
'headers',
'height',
'hrefLang',
'hSpace',
'htmlFor',
'id',
'isMap',
'id',
'itemProp',
'label',

@@ -152,4 +121,4 @@ 'lang',

'rev',
'rowSpan',
'rows',
'rowSpan',
'rules',

@@ -166,17 +135,78 @@ 'scope',

'title',
'type',
'useMap',
'vAlign',
'value',
'vSpace',
'width',
'itemProp'
'width'
]
},
clobber: ['ariaDescribedBy', 'ariaLabelledBy', 'id', 'name'],
clobberPrefix: 'user-content-',
protocols: {
cite: ['http', 'https'],
href: ['http', 'https', 'irc', 'ircs', 'mailto', 'xmpp'],
longDesc: ['http', 'https'],
src: ['http', 'https']
},
required: {
input: {
type: 'checkbox',
disabled: true
}
}
input: {disabled: true, type: 'checkbox'}
},
strip: ['script'],
tagNames: [
'a',
'b',
'blockquote',
'br',
'code',
'dd',
'del',
'details',
'div',
'dl',
'dt',
'em',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'hr',
'i',
'img',
// Note: `input` is not normally allowed by GH, when manually writing
// it in markdown, they add it from tasklists some other way.
// We can’t do that, so we have to allow it.
'input',
'ins',
'kbd',
'li',
'ol',
'p',
'picture',
'pre',
'q',
'rp',
'rt',
'ruby',
's',
'samp',
'section',
'source',
'span',
'strike',
'strong',
'sub',
'summary',
'sup',
'table',
'tbody',
'td',
'tfoot',
'th',
'thead',
'tr',
'tt',
'ul',
'var'
]
}
{
"name": "hast-util-sanitize",
"version": "4.1.0",
"version": "5.0.0",
"description": "hast utility to sanitize nodes",

@@ -31,4 +31,3 @@ "license": "MIT",

"type": "module",
"main": "index.js",
"types": "index.d.ts",
"exports": "./index.js",
"files": [

@@ -40,17 +39,25 @@ "lib/",

"dependencies": {
"@types/hast": "^2.0.0"
"@types/hast": "^3.0.0",
"@ungap/structured-clone": "^1.2.0",
"unist-util-position": "^5.0.0"
},
"devDependencies": {
"@types/node": "^18.0.0",
"c8": "^7.0.0",
"@types/node": "^20.0.0",
"@types/ungap__structured-clone": "^0.3.0",
"aria-attributes": "^2.0.0",
"c8": "^8.0.0",
"deepmerge": "^4.0.0",
"hast-util-from-html": "^1.0.0",
"hast-util-to-html": "^8.0.0",
"hastscript": "^7.0.0",
"prettier": "^2.0.0",
"hastscript": "^8.0.0",
"html-element-attributes": "^3.0.0",
"html-tag-names": "^2.0.0",
"prettier": "^3.0.0",
"remark-cli": "^11.0.0",
"remark-preset-wooorm": "^9.0.0",
"type-coverage": "^2.0.0",
"typescript": "^4.0.0",
"unist-builder": "^3.0.0",
"xo": "^0.53.0"
"typescript": "^5.0.0",
"unist-builder": "^4.0.0",
"unist-util-visit": "^5.0.0",
"xo": "^0.55.0"
},

@@ -60,32 +67,18 @@ "scripts": {

"build": "tsc --build --clean && tsc --build && type-coverage",
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix",
"test-api": "node --conditions development test.js",
"test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api",
"format": "remark . -qfo && prettier . -w --log-level warn && xo --fix",
"test-api": "node --conditions development test/index.js",
"test-coverage": "c8 --100 --reporter lcov npm run test-api",
"test": "npm run build && npm run format && npm run test-coverage"
},
"prettier": {
"tabWidth": 2,
"useTabs": false,
"singleQuote": true,
"bracketSpacing": false,
"semi": false,
"trailingComma": "none"
"singleQuote": true,
"tabWidth": 2,
"trailingComma": "none",
"useTabs": false
},
"xo": {
"prettier": true,
"overrides": [
{
"files": [
"test.js"
],
"rules": {
"no-await-in-loop": 0,
"no-script-url": 0
}
}
]
},
"remarkConfig": {
"plugins": [
"preset-wooorm"
"remark-preset-wooorm"
]

@@ -96,3 +89,2 @@ },

"detail": true,
"strict": true,
"ignoreCatch": true,

@@ -102,4 +94,24 @@ "#": "Couple of needed `any`s",

"lib/index.d.ts"
]
],
"strict": true
},
"xo": {
"overrides": [
{
"files": [
"test/**/*.js"
],
"rules": {
"max-nested-callbacks": "off",
"no-await-in-loop": "off",
"no-script-url": "off"
}
}
],
"prettier": true,
"rules": {
"complexity": "off",
"unicorn/prefer-at": "off"
}
}
}

@@ -20,3 +20,4 @@ # hast-util-sanitize

* [API](#api)
* [`sanitize(tree[, schema])`](#sanitizetree-schema)
* [`defaultSchema`](#defaultschema)
* [`sanitize(tree[, options])`](#sanitizetree-options)
* [`Schema`](#schema)

@@ -47,3 +48,3 @@ * [Types](#types)

This package is [ESM only][esm].
In Node.js (version 14.14+, 16.0+), install with [npm][]:
In Node.js (version 16+), install with [npm][]:

@@ -57,3 +58,3 @@ ```sh

```js
import {sanitize} from 'https://esm.sh/hast-util-sanitize@4'
import {sanitize} from 'https://esm.sh/hast-util-sanitize@5'
```

@@ -65,3 +66,3 @@

<script type="module">
import {sanitize} from 'https://esm.sh/hast-util-sanitize@4?bundle'
import {sanitize} from 'https://esm.sh/hast-util-sanitize@5?bundle'
</script>

@@ -73,8 +74,8 @@ ```

```js
import {u} from 'unist-builder'
import {h} from 'hastscript'
import {sanitize} from 'hast-util-sanitize'
import {toHtml} from 'hast-util-to-html'
import {u} from 'unist-builder'
const tree = h('div', {onmouseover: 'alert("alpha")'}, [
const unsafe = h('div', {onmouseover: 'alert("alpha")'}, [
h(

@@ -95,10 +96,9 @@ 'a',

const unsanitized = toHtml(tree)
const sanitized = toHtml(sanitize(tree))
const safe = sanitize(unsafe)
console.log(unsanitized)
console.log(sanitized)
console.log(toHtml(unsafe))
console.log(toHtml(safe))
```
Unsanitized:
Unsafe:

@@ -113,3 +113,3 @@ ```html

Sanitized:
Safe:

@@ -126,7 +126,14 @@ ```html

This package exports the identifiers `sanitize` and `defaultSchema`.
This package exports the identifiers [`defaultSchema`][api-default-schema] and
[`sanitize`][api-sanitize].
There is no default export.
### `sanitize(tree[, schema])`
### `defaultSchema`
Default schema ([`Schema`][api-schema]).
Follows [GitHub][] style sanitation.
### `sanitize(tree[, options])`
Sanitize a tree.

@@ -136,26 +143,30 @@

* `tree` ([`Node`][node]) — [*tree*][tree] to sanitize
* `schema` ([`Schema`][schema], optional) — schema defining how to sanitize
* `tree` ([`Node`][node])
— unsafe tree
* `options` ([`Schema`][api-schema], default:
[`defaultSchema`][api-default-schema])
— configuration
###### Returns
A new, sanitized, tree ([`Node`][node]).
New, safe tree ([`Node`][node]).
### `Schema`
Sanitation schema that defines if and how nodes and properties should be
cleaned.
The default schema is exported as `defaultSchema`, which defaults to [GitHub][]
style sanitation.
If any top-level key isn’t given, it defaults to GitHub’s style too.
Schema that defines what nodes and properties are allowed.
For a thorough sample, see the code for [`defaultSchema`][default-schema].
The default schema is [`defaultSchema`][api-default-schema], which follows how
GitHub cleans.
If any top-level key is missing in the given schema, the corresponding
value of the default schema is used.
To extend the standard schema with a few changes, clone `defaultSchema` like so:
To extend the standard schema with a few changes, clone `defaultSchema`
like so:
```js
import deepmerge from 'deepmerge'
import {h} from 'hastscript'
import deepmerge from 'deepmerge' // You can use `structuredClone` in modern JS.
import {sanitize, defaultSchema} from 'hast-util-sanitize'
import {defaultSchema, sanitize} from 'hast-util-sanitize'
// This allows `className` on all elements.
const schema = deepmerge(defaultSchema, {attributes: {'*': ['className']}})

@@ -175,125 +186,99 @@

###### `attributes`
##### Fields
Map of tag names to allowed [*property names*][name]
(`Record<string, Array<string>>`).
###### `allowComments`
The special `'*'` key defines [*property names*][name] allowed on all
[*elements*][element].
Whether to allow comment nodes (`boolean`, default: `false`).
One special value, `'data*'`, can be used to allow all `data` properties.
For example:
```js
attributes: {
a: ['href'],
img: ['src', 'longDesc'],
// …
'*': [
'abbr',
'accept',
'acceptCharset',
// …
'vSpace',
'width',
'itemProp'
]
}
allowComments: true
```
Instead of a single string (such as `type`), which allows any [*property
value*][value] of that [*property name*][name], it’s also possible to provide
an array (such as `['type', 'checkbox']` or `['className', /^hljs-/]`),
where the first entry is the *property name*, and all other entries are
*property values* allowed (or regular expressions that are tested with values).
This is how the default GitHub schema allows only disabled checkbox inputs:
###### `allowDoctypes`
```js
attributes: {
// …
input: [
['type', 'checkbox'],
['disabled', true]
]
// …
}
```
Whether to allow doctype nodes (`boolean`, default: `false`).
This also plays well with properties that accept space- or comma-separated
values, such as `class`.
Say you wanted to allow certain classes on `span` elements for syntax
highlighting, that can be done like this:
For example:
```js
// …
span: [
['className', 'token', 'number', 'operator']
]
// …
allowDoctypes: true
```
###### `required`
###### `ancestors`
Map of tag names to required [*property names*][name] and their default
[*property value*][value] (`Record<string, Record<string, *>>`).
If the defined keys do not exist in an [*element*][element]’s
[*properties*][properties], they are added and set to the specified value.
Map of tag names to a list of tag names which are required ancestors
(`Record<string, Array<string>>`, default: `defaultSchema.ancestors`).
Note that properties are first checked based on the schema at `attributes`,
so *properties* could be removed by that step and then added again through
`required`.
Elements with these tag names will be ignored if they occur outside of one
of their allowed parents.
For example:
```js
required: {
input: {type: 'checkbox', disabled: true}
ancestors: {
tbody: ['table'],
// …
tr: ['table']
}
```
###### `tagNames`
###### `attributes`
List of allowed tag names (`Array<string>`).
Map of tag names to allowed [property names][name]
(`Record<string, Array<[string, ...Array<RegExp | boolean | number | string>] | string>`,
default: `defaultSchema.attributes`).
```js
tagNames: [
'h1',
'h2',
'h3',
// …
'strike',
'summary',
'details'
]
```
The special key `'*'` as a tag name defines property names allowed on all
elements.
###### `protocols`
The special value `'data*'` as a property name can be used to allow all `data`
properties.
Map of protocols to allow in [*property values*][value]
(`Record<string, Array<string>>`).
For example:
```js
protocols: {
href: ['http', 'https', 'mailto'],
attributes: {
a: [
'ariaDescribedBy', 'ariaLabel', 'ariaLabelledBy', /* … */, 'href'
],
// …
longDesc: ['http', 'https']
'*': [
'abbr',
'accept',
'acceptCharset',
// …
'vAlign',
'value',
'width'
]
}
```
###### `ancestors`
Instead of a single string in the array, which allows any property value for
the field, you can use an array to allow several values.
For example, `input: ['type']` allows `type` set to any value on `input`s.
But `input: [['type', 'checkbox', 'radio']]` allows `type` when set to
`'checkbox'` or `'radio'`.
Map of tag names to their required [*ancestor*][ancestor] [*elements*][element]
(`Record<string, Array<string>>`).
You can use regexes, so for example `span: [['className', /^hljs-/]]` allows
any class that starts with `hljs-` on `span`s.
```js
ancestors: {
li: ['ol', 'ul'],
// …
tr: ['table']
}
```
When comma- or space-separated values are used (such as `className`), each
value in is checked individually.
For example, to allow certain classes on `span`s for syntax highlighting, use
`span: [['className', 'number', 'operator', 'token']]`.
This will allow `'number'`, `'operator'`, and `'token'` classes, but drop
others.
###### `clobber`
List of allowed [*property names*][name] which can clobber (`Array<string>`).
List of [*property names*][name] that clobber (`Array<string>`, default:
`defaultSchema.clobber`).
For example:
```js
clobber: ['name', 'id']
clobber: ['ariaDescribedBy', 'ariaLabelledBy', 'id', 'name']
```

@@ -303,4 +288,7 @@

Prefix to use before potentially clobbering [*property names*][name] (`string`).
Prefix to use before clobbering properties (`string`, default:
`defaultSchema.clobberPrefix`).
For example:
```js

@@ -310,46 +298,97 @@ clobberPrefix: 'user-content-'

###### `strip`
###### `protocols`
Names of [*elements*][element] to strip from the [*tree*][tree]
(`Array<string>`).
Map of [*property names*][name] to allowed protocols
(`Record<string, Array<string>>`, default: `defaultSchema.protocols`).
By default, unsafe *elements* are replaced by their [*children*][child].
Some *elements*, should however be entirely stripped from the *tree*.
This defines URLs that are always allowed to have local URLs (relative to
the current website, such as `this`, `#this`, `/this`, or `?this`), and
only allowed to have remote URLs (such as `https://example.com`) if they
use a known protocol.
For example:
```js
strip: ['script']
protocols: {
cite: ['http', 'https'],
// …
src: ['http', 'https']
}
```
###### `allowComments`
###### `required`
Whether to allow [*comments*][comment] (`boolean`, default: `false`).
Map of tag names to required [*property names*][name] with a default value
(`Record<string, Record<string, unknown>>`, default: `defaultSchema.required`).
This defines properties that must be set.
If a field does not exist (after the element was made safe), these will be
added with the given value.
For example:
```js
allowComments: true
required: {
input: {disabled: true, type: 'checkbox'}
}
```
###### `allowDoctypes`
> 👉 **Note**: properties are first checked based on `schema.attributes`,
> then on `schema.required`.
> That means properties could be removed by `attributes` and then added
> again with `required`.
Whether to allow [*doctypes*][doctype] (`boolean`, default: `false`).
###### `strip`
List of tag names to strip from the tree (`Array<string>`, default:
`defaultSchema.strip`).
By default, unsafe elements (those not in `schema.tagNames`) are replaced by
what they contain.
This option can drop their contents.
For example:
```js
allowDoctypes: true
strip: ['script']
```
###### `tagNames`
List of allowed tag names (`Array<string>`, default: `defaultSchema.tagNames`).
For example:
```js
tagNames: [
'a',
'b',
// …
'ul',
'var'
]
```
## Types
This package is fully typed with [TypeScript][].
It exports the additional type `Schema`.
It exports the additional type [`Schema`][api-schema].
## Compatibility
Projects maintained by the unified collective are compatible with all maintained
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
As of now, that is Node.js 14.14+ and 16.0+.
Our projects sometimes work with older versions, but this is not guaranteed.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line, `hast-util-sanitize@^5`,
compatible with Node.js 16.
## Security
By default, `hast-util-sanitize` will make everything safe to use.
But when used incorrectly, deviating from the defaults can open you up to a
Assuming you understand that certain attributes (including a limited set of
classes) can be generated by users, and you write your CSS (and JS)
accordingly.
When used incorrectly, deviating from the defaults can open you up to a
[cross-site scripting (XSS)][xss] attack.

@@ -393,5 +432,5 @@

[size-badge]: https://img.shields.io/bundlephobia/minzip/hast-util-sanitize.svg
[size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=hast-util-sanitize
[size]: https://bundlephobia.com/result?p=hast-util-sanitize
[size]: https://bundlejs.com/?q=hast-util-sanitize

@@ -428,8 +467,2 @@ [sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg

[tree]: https://github.com/syntax-tree/unist#tree
[child]: https://github.com/syntax-tree/unist#child
[ancestor]: https://github.com/syntax-tree/unist#ancestor
[hast]: https://github.com/syntax-tree/hast

@@ -439,22 +472,14 @@

[element]: https://github.com/syntax-tree/hast#element
[doctype]: https://github.com/syntax-tree/hast#doctype
[comment]: https://github.com/syntax-tree/hast#comment
[properties]: https://github.com/syntax-tree/hast#properties
[name]: https://github.com/syntax-tree/hast#propertyname
[value]: https://github.com/syntax-tree/hast#propertyvalue
[github]: https://github.com/gjtorikian/html-pipeline/blob/a2e02ac/lib/html_pipeline/sanitization_filter.rb
[github]: https://github.com/jch/html-pipeline/blob/HEAD/lib/html/pipeline/sanitization_filter.rb
[xss]: https://en.wikipedia.org/wiki/Cross-site_scripting
[default-schema]: lib/schema.js
[rehype-sanitize]: https://github.com/rehypejs/rehype-sanitize
[schema]: #schema
[api-default-schema]: #defaultschema
[rehype-sanitize]: https://github.com/rehypejs/rehype-sanitize
[api-sanitize]: #sanitizetree-options
[api-schema]: #schema
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc