hast-util-to-text
Advanced tools
Comparing version 3.1.1 to 3.1.2
@@ -1,41 +0,3 @@ | ||
/** | ||
* Implementation of the `innerText` getter: | ||
* <https://html.spec.whatwg.org/#the-innertext-idl-attribute> | ||
* Note that we act as if `node` is being rendered, and as if we’re a | ||
* CSS-supporting user agent. | ||
* | ||
* @param {HastNode} node | ||
* @param {Options} [options={}] | ||
* @returns {string} | ||
*/ | ||
export function toText(node: HastNode, options?: Options | undefined): string | ||
export type TestFunctionAnything = | ||
import('hast-util-is-element').TestFunctionAnything | ||
export type HastChild = import('hast').Parent['children'][number] | ||
export type HastText = import('hast').Text | ||
export type HastComment = import('hast').Comment | ||
export type HastRoot = import('hast').Root | ||
export type HastElement = import('hast').Element | ||
export type HastProperties = import('hast').Properties | ||
export type HastNode = HastChild | HastRoot | ||
export type HastParent = HastRoot | HastElement | ||
export type Whitespace = 'normal' | 'pre' | 'nowrap' | 'pre-wrap' | ||
export type BreakValue = boolean | ||
export type BreakNumber = 1 | 2 | ||
export type BreakForce = '\n' | ||
export type BreakBefore = BreakValue | BreakNumber | undefined | ||
export type BreakAfter = BreakValue | BreakNumber | BreakForce | undefined | ||
export type CollectionOptions = { | ||
whitespace: Whitespace | ||
breakBefore: BreakBefore | ||
breakAfter: BreakAfter | ||
} | ||
/** | ||
* Configuration. | ||
*/ | ||
export type Options = { | ||
/** | ||
* Initial CSS whitespace setting to use. | ||
*/ | ||
whitespace?: Whitespace | undefined | ||
} | ||
export {toText} from './lib/index.js' | ||
export type Options = import('./lib/index.js').Options | ||
export type Whitespace = import('./lib/index.js').Whitespace |
535
index.js
/** | ||
* @typedef {import('hast-util-is-element').TestFunctionAnything} TestFunctionAnything | ||
* @typedef {import('hast').Parent['children'][number]} HastChild | ||
* @typedef {import('hast').Text} HastText | ||
* @typedef {import('hast').Comment} HastComment | ||
* @typedef {import('hast').Root} HastRoot | ||
* @typedef {import('hast').Element} HastElement | ||
* @typedef {import('hast').Properties} HastProperties | ||
* @typedef {HastChild|HastRoot} HastNode | ||
* @typedef {HastRoot|HastElement} HastParent | ||
* | ||
* @typedef {'normal'|'pre'|'nowrap'|'pre-wrap'} Whitespace | ||
* @typedef {boolean} BreakValue | ||
* @typedef {1|2} BreakNumber | ||
* @typedef {'\n'} BreakForce | ||
* @typedef {BreakValue|BreakNumber|undefined} BreakBefore | ||
* @typedef {BreakValue|BreakNumber|BreakForce|undefined} BreakAfter | ||
* | ||
* @typedef CollectionOptions | ||
* @property {Whitespace} whitespace | ||
* @property {BreakBefore} breakBefore | ||
* @property {BreakAfter} breakAfter | ||
* | ||
* @typedef Options | ||
* Configuration. | ||
* @property {Whitespace} [whitespace='normal'] | ||
* Initial CSS whitespace setting to use. | ||
* @typedef {import('./lib/index.js').Options} Options | ||
* @typedef {import('./lib/index.js').Whitespace} Whitespace | ||
*/ | ||
import {convertElement} from 'hast-util-is-element' | ||
import {findAfter} from 'unist-util-find-after' | ||
const searchLineFeeds = /\n/g | ||
const searchTabOrSpaces = /[\t ]+/g | ||
const br = convertElement('br') | ||
const p = convertElement('p') | ||
const cell = convertElement(['th', 'td']) | ||
const row = convertElement('tr') | ||
// Note that we don’t need to include void elements here as they don’t have text. | ||
// See: <https://github.com/wooorm/html-void-elements> | ||
const notRendered = convertElement([ | ||
// List from: <https://html.spec.whatwg.org/#hidden-elements> | ||
'datalist', | ||
'head', | ||
'noembed', | ||
'noframes', | ||
'noscript', // Act as if we support scripting. | ||
'rp', | ||
'script', | ||
'style', | ||
'template', | ||
'title', | ||
// Hidden attribute. | ||
hidden, | ||
// From: <https://html.spec.whatwg.org/#flow-content-3> | ||
closedDialog | ||
]) | ||
// See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints> | ||
const blockOrCaption = convertElement([ | ||
'address', // Flow content | ||
'article', // Sections and headings | ||
'aside', // Sections and headings | ||
'blockquote', // Flow content | ||
'body', // Page | ||
'caption', // `table-caption` | ||
'center', // Flow content (legacy) | ||
'dd', // Lists | ||
'dialog', // Flow content | ||
'dir', // Lists (legacy) | ||
'dl', // Lists | ||
'dt', // Lists | ||
'div', // Flow content | ||
'figure', // Flow content | ||
'figcaption', // Flow content | ||
'footer', // Flow content | ||
'form,', // Flow content | ||
'h1', // Sections and headings | ||
'h2', // Sections and headings | ||
'h3', // Sections and headings | ||
'h4', // Sections and headings | ||
'h5', // Sections and headings | ||
'h6', // Sections and headings | ||
'header', // Flow content | ||
'hgroup', // Sections and headings | ||
'hr', // Flow content | ||
'html', // Page | ||
'legend', // Flow content | ||
'listing', // Flow content (legacy) | ||
'main', // Flow content | ||
'menu', // Lists | ||
'nav', // Sections and headings | ||
'ol', // Lists | ||
'p', // Flow content | ||
'plaintext', // Flow content (legacy) | ||
'pre', // Flow content | ||
'section', // Sections and headings | ||
'ul', // Lists | ||
'xmp' // Flow content (legacy) | ||
]) | ||
/** | ||
* Implementation of the `innerText` getter: | ||
* <https://html.spec.whatwg.org/#the-innertext-idl-attribute> | ||
* Note that we act as if `node` is being rendered, and as if we’re a | ||
* CSS-supporting user agent. | ||
* | ||
* @param {HastNode} node | ||
* @param {Options} [options={}] | ||
* @returns {string} | ||
*/ | ||
export function toText(node, options = {}) { | ||
/** @type {Array.<HastChild>} */ | ||
// @ts-ignore looks like a parent. | ||
const children = node.children || [] | ||
const block = blockOrCaption(node) | ||
const whitespace = inferWhitespace(node, { | ||
whitespace: options.whitespace || 'normal', | ||
breakBefore: false, | ||
breakAfter: false | ||
}) | ||
let index = -1 | ||
/** @type {Array.<string|BreakNumber>} */ | ||
let results | ||
/** @type {string|BreakNumber} */ | ||
let value | ||
/** @type {number|undefined} */ | ||
let count | ||
// Treat `text` and `comment` as having normal white-space. | ||
// This deviates from the spec as in the DOM the node’s `.data` has to be | ||
// returned. | ||
// If you want that behavior use `hast-util-to-string`. | ||
// All other nodes are later handled as if they are `element`s (so the | ||
// algorithm also works on a `root`). | ||
// Nodes without children are treated as a void element, so `doctype` is thus | ||
// ignored. | ||
if (node.type === 'text' || node.type === 'comment') { | ||
return collectText(node, {whitespace, breakBefore: true, breakAfter: true}) | ||
} | ||
// 1. If this element is not being rendered, or if the user agent is a | ||
// non-CSS user agent, then return the same value as the textContent IDL | ||
// attribute on this element. | ||
// | ||
// Note: we’re not supporting stylesheets so we’re acting as if the node | ||
// is rendered. | ||
// | ||
// If you want that behavior use `hast-util-to-string`. | ||
// Important: we’ll have to account for this later though. | ||
// 2. Let results be a new empty list. | ||
results = [] | ||
// 3. For each child node node of this element: | ||
while (++index < children.length) { | ||
// 3.1. Let current be the list resulting in running the inner text | ||
// collection steps with node. | ||
// Each item in results will either be a JavaScript string or a | ||
// positive integer (a required line break count). | ||
// 3.2. For each item item in current, append item to results. | ||
results = results.concat( | ||
// @ts-ignore Looks like a parent. | ||
innerTextCollection(children[index], node, { | ||
whitespace, | ||
breakBefore: index ? null : block, | ||
breakAfter: | ||
index < children.length - 1 ? br(children[index + 1]) : block | ||
}) | ||
) | ||
} | ||
// 4. Remove any items from results that are the empty string. | ||
// 5. Remove any runs of consecutive required line break count items at the | ||
// start or end of results. | ||
// 6. Replace each remaining run of consecutive required line break count | ||
// items with a string consisting of as many U+000A LINE FEED (LF) | ||
// characters as the maximum of the values in the required line break | ||
// count items. | ||
index = -1 | ||
/** @type {Array.<string>} */ | ||
const result = [] | ||
while (++index < results.length) { | ||
value = results[index] | ||
if (typeof value === 'number') { | ||
if (count !== undefined && value > count) count = value | ||
} else if (value) { | ||
if (count) result.push('\n'.repeat(count)) | ||
count = 0 | ||
result.push(value) | ||
} | ||
} | ||
// 7. Return the concatenation of the string items in results. | ||
return result.join('') | ||
} | ||
/** | ||
* <https://html.spec.whatwg.org/#inner-text-collection-steps> | ||
* | ||
* @param {HastNode} node | ||
* @param {HastParent} parent | ||
* @param {CollectionOptions} options | ||
* @returns {Array.<string|BreakNumber>} | ||
*/ | ||
function innerTextCollection(node, parent, options) { | ||
if (node.type === 'element') { | ||
return collectElement(node, parent, options) | ||
} | ||
if (node.type === 'text') { | ||
return [ | ||
options.whitespace === 'normal' | ||
? collectText(node, options) | ||
: collectPreText(node) | ||
] | ||
} | ||
return [] | ||
} | ||
/** | ||
* Collect an element. | ||
* | ||
* @param {HastElement} node | ||
* @param {HastParent} parent | ||
* @param {CollectionOptions} options | ||
*/ | ||
function collectElement(node, parent, options) { | ||
// First we infer the `white-space` property. | ||
const whitespace = inferWhitespace(node, options) | ||
const children = node.children || [] | ||
let index = -1 | ||
/** @type {Array.<string|BreakNumber>} */ | ||
let items = [] | ||
/** @type {BreakNumber|undefined} */ | ||
let prefix | ||
/** @type {BreakNumber|BreakForce|undefined} */ | ||
let suffix | ||
// We’re ignoring point 3, and exiting without any content here, because we | ||
// deviated from the spec in `toText` at step 3. | ||
if (notRendered(node)) { | ||
return items | ||
} | ||
// Note: we first detect if there is going to be a break before or after the | ||
// contents, as that changes the white-space handling. | ||
// 2. If node’s computed value of `visibility` is not `visible`, then return | ||
// items. | ||
// | ||
// Note: Ignored, as everything is visible by default user agent styles. | ||
// 3. If node is not being rendered, then return items. [...] | ||
// | ||
// Note: We already did this above. | ||
// See `collectText` for step 4. | ||
// 5. If node is a `<br>` element, then append a string containing a single | ||
// U+000A LINE FEED (LF) character to items. | ||
if (br(node)) { | ||
suffix = '\n' | ||
} | ||
// 7. If node’s computed value of `display` is `table-row`, and node’s CSS | ||
// box is not the last `table-row` box of the nearest ancestor `table` | ||
// box, then append a string containing a single U+000A LINE FEED (LF) | ||
// character to items. | ||
// | ||
// See: <https://html.spec.whatwg.org/#tables-2> | ||
// Note: needs further investigation as this does not account for implicit | ||
// rows. | ||
else if (row(node) && findAfter(parent, node, row)) { | ||
suffix = '\n' | ||
} | ||
// 8. If node is a `<p>` element, then append 2 (a required line break count) | ||
// at the beginning and end of items. | ||
else if (p(node)) { | ||
prefix = 2 | ||
suffix = 2 | ||
} | ||
// 9. If node’s used value of `display` is block-level or `table-caption`, | ||
// then append 1 (a required line break count) at the beginning and end of | ||
// items. | ||
else if (blockOrCaption(node)) { | ||
prefix = 1 | ||
suffix = 1 | ||
} | ||
// 1. Let items be the result of running the inner text collection steps with | ||
// each child node of node in tree order, and then concatenating the | ||
// results to a single list. | ||
while (++index < children.length) { | ||
items = items.concat( | ||
innerTextCollection(children[index], node, { | ||
whitespace, | ||
breakBefore: index ? undefined : prefix, | ||
breakAfter: | ||
index < children.length - 1 ? br(children[index + 1]) : suffix | ||
}) | ||
) | ||
} | ||
// 6. If node’s computed value of `display` is `table-cell`, and node’s CSS | ||
// box is not the last `table-cell` box of its enclosing `table-row` box, | ||
// then append a string containing a single U+0009 CHARACTER TABULATION | ||
// (tab) character to items. | ||
// | ||
// See: <https://html.spec.whatwg.org/#tables-2> | ||
if (cell(node) && findAfter(parent, node, cell)) { | ||
items.push('\t') | ||
} | ||
// Add the pre- and suffix. | ||
if (prefix) items.unshift(prefix) | ||
if (suffix) items.push(suffix) | ||
return items | ||
} | ||
/** | ||
* 4. If node is a Text node, then for each CSS text box produced by node, | ||
* in content order, compute the text of the box after application of the | ||
* CSS `white-space` processing rules and `text-transform` rules, set | ||
* items to the list of the resulting strings, and return items. | ||
* The CSS `white-space` processing rules are slightly modified: | ||
* collapsible spaces at the end of lines are always collapsed, but they | ||
* are only removed if the line is the last line of the block, or it ends | ||
* with a br element. | ||
* Soft hyphens should be preserved. | ||
* | ||
* Note: See `collectText` and `collectPreText`. | ||
* Note: we don’t deal with `text-transform`, no element has that by | ||
* default. | ||
* | ||
* See: <https://drafts.csswg.org/css-text/#white-space-phase-1> | ||
* | ||
* @param {HastText|HastComment} node | ||
* @param {CollectionOptions} options | ||
* @returns {string} | ||
*/ | ||
function collectText(node, options) { | ||
const value = String(node.value) | ||
/** @type {Array.<string>} */ | ||
const lines = [] | ||
/** @type {Array.<string>} */ | ||
const result = [] | ||
let start = 0 | ||
let index = -1 | ||
/** @type {RegExpMatchArray|null} */ | ||
let match | ||
/** @type {number} */ | ||
let end | ||
/** @type {string|undefined} */ | ||
let join | ||
while (start < value.length) { | ||
searchLineFeeds.lastIndex = start | ||
match = searchLineFeeds.exec(value) | ||
// @ts-expect-error: `index` is set. | ||
end = match ? match.index : value.length | ||
lines.push( | ||
// Any sequence of collapsible spaces and tabs immediately preceding or | ||
// following a segment break is removed. | ||
trimAndCollapseSpacesAndTabs( | ||
// [...] ignoring bidi formatting characters (characters with the | ||
// Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if | ||
// they were not there. | ||
value | ||
.slice(start, end) | ||
.replace(/[\u061C\u200E\u200F\u202A-\u202E\u2066-\u2069]/g, ''), | ||
options.breakBefore, | ||
options.breakAfter | ||
) | ||
) | ||
start = end + 1 | ||
} | ||
// Collapsible segment breaks are transformed for rendering according to the | ||
// segment break transformation rules. | ||
// So here we jump to 4.1.2 of [CSSTEXT]: | ||
// Any collapsible segment break immediately following another collapsible | ||
// segment break is removed | ||
while (++index < lines.length) { | ||
// * If the character immediately before or immediately after the segment | ||
// break is the zero-width space character (U+200B), then the break is | ||
// removed, leaving behind the zero-width space. | ||
if ( | ||
lines[index].charCodeAt(lines[index].length - 1) === 0x200b /* ZWSP */ || | ||
(index < lines.length - 1 && | ||
lines[index + 1].charCodeAt(0) === 0x200b) /* ZWSP */ | ||
) { | ||
result.push(lines[index]) | ||
join = '' | ||
} | ||
// * Otherwise, if the East Asian Width property [UAX11] of both the | ||
// character before and after the segment break is Fullwidth, Wide, or | ||
// Halfwidth (not Ambiguous), and neither side is Hangul, then the | ||
// segment break is removed. | ||
// | ||
// Note: ignored. | ||
// * Otherwise, if the writing system of the segment break is Chinese, | ||
// Japanese, or Yi, and the character before or after the segment break | ||
// is punctuation or a symbol (Unicode general category P* or S*) and | ||
// has an East Asian Width property of Ambiguous, and the character on | ||
// the other side of the segment break is Fullwidth, Wide, or Halfwidth, | ||
// and not Hangul, then the segment break is removed. | ||
// | ||
// Note: ignored. | ||
// * Otherwise, the segment break is converted to a space (U+0020). | ||
else if (lines[index]) { | ||
if (join) result.push(join) | ||
result.push(lines[index]) | ||
join = ' ' | ||
} | ||
} | ||
return result.join('') | ||
} | ||
/** | ||
* @param {HastText|HastComment} node | ||
* @returns {string} | ||
*/ | ||
function collectPreText(node) { | ||
return String(node.value) | ||
} | ||
/** | ||
* 3. Every collapsible tab is converted to a collapsible space (U+0020). | ||
* 4. Any collapsible space immediately following another collapsible | ||
* space—even one outside the boundary of the inline containing that | ||
* space, provided both spaces are within the same inline formatting | ||
* context—is collapsed to have zero advance width. (It is invisible, | ||
* but retains its soft wrap opportunity, if any.) | ||
* | ||
* @param {string} value | ||
* @param {BreakBefore} breakBefore | ||
* @param {BreakAfter} breakAfter | ||
* @returns {string} | ||
*/ | ||
function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) { | ||
/** @type {Array.<string>} */ | ||
const result = [] | ||
let start = 0 | ||
/** @type {RegExpMatchArray|null} */ | ||
let match | ||
/** @type {number} */ | ||
let end | ||
while (start < value.length) { | ||
searchTabOrSpaces.lastIndex = start | ||
match = searchTabOrSpaces.exec(value) | ||
// @ts-expect-error: `index` is set. | ||
end = match ? match.index : value.length | ||
// If we’re not directly after a segment break, but there was white space, | ||
// add an empty value that will be turned into a space. | ||
if (!start && !end && match && !breakBefore) { | ||
result.push('') | ||
} | ||
if (start !== end) { | ||
result.push(value.slice(start, end)) | ||
} | ||
start = match ? end + match[0].length : end | ||
} | ||
// If we reached the end, there was trailing white space, and there’s no | ||
// segment break after this node, add an empty value that will be turned | ||
// into a space. | ||
// @ts-expect-error: `end` is defined. | ||
if (start !== end && !breakAfter) { | ||
result.push('') | ||
} | ||
return result.join(' ') | ||
} | ||
/** | ||
* We don’t support void elements here (so `nobr wbr` -> `normal` is ignored). | ||
* | ||
* @param {HastNode} node | ||
* @param {CollectionOptions} options | ||
* @returns {Whitespace} | ||
*/ | ||
function inferWhitespace(node, options) { | ||
/** @type {HastProperties} */ | ||
let props | ||
if (node.type === 'element') { | ||
props = node.properties || {} | ||
switch (node.tagName) { | ||
case 'listing': | ||
case 'plaintext': | ||
case 'xmp': | ||
return 'pre' | ||
case 'nobr': | ||
return 'nowrap' | ||
case 'pre': | ||
return props.wrap ? 'pre-wrap' : 'pre' | ||
case 'td': | ||
case 'th': | ||
return props.noWrap ? 'nowrap' : options.whitespace | ||
case 'textarea': | ||
return 'pre-wrap' | ||
default: | ||
} | ||
} | ||
return options.whitespace | ||
} | ||
/** @type {TestFunctionAnything} */ | ||
function hidden(node) { | ||
return Boolean((node.properties || {}).hidden) | ||
} | ||
/** @type {TestFunctionAnything} */ | ||
function closedDialog(node) { | ||
return node.tagName === 'dialog' && !(node.properties || {}).open | ||
} | ||
export {toText} from './lib/index.js' |
{ | ||
"name": "hast-util-to-text", | ||
"version": "3.1.1", | ||
"version": "3.1.2", | ||
"description": "hast utility to get the plain-text value of a node according to the `innerText` algorithm", | ||
@@ -33,2 +33,3 @@ "license": "MIT", | ||
"files": [ | ||
"lib/", | ||
"index.d.ts", | ||
@@ -39,2 +40,3 @@ "index.js" | ||
"@types/hast": "^2.0.0", | ||
"@types/unist": "^2.0.0", | ||
"hast-util-is-element": "^2.0.0", | ||
@@ -44,21 +46,19 @@ "unist-util-find-after": "^4.0.0" | ||
"devDependencies": { | ||
"@types/tape": "^4.0.0", | ||
"@types/node": "^18.0.0", | ||
"c8": "^7.0.0", | ||
"hastscript": "^7.0.0", | ||
"prettier": "^2.0.0", | ||
"remark-cli": "^10.0.0", | ||
"remark-cli": "^11.0.0", | ||
"remark-preset-wooorm": "^9.0.0", | ||
"rimraf": "^3.0.0", | ||
"tape": "^5.0.0", | ||
"type-coverage": "^2.0.0", | ||
"typescript": "^4.0.0", | ||
"unist-builder": "^3.0.0", | ||
"xo": "^0.45.0" | ||
"xo": "^0.53.0" | ||
}, | ||
"scripts": { | ||
"prepack": "npm run build && npm run format", | ||
"build": "rimraf \"*.d.ts\" && tsc && type-coverage", | ||
"build": "tsc --build --clean && tsc --build && type-coverage", | ||
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", | ||
"test-api": "node test.js", | ||
"test-coverage": "c8 --check-coverage --branches 100 --functions 100 --lines 100 --statements 100 --reporter lcov node test.js", | ||
"test-api": "node --conditions development test.js", | ||
"test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api", | ||
"test": "npm run build && npm run format && npm run test-coverage" | ||
@@ -77,3 +77,4 @@ }, | ||
"rules": { | ||
"unicorn/numeric-separators-style": "off" | ||
"unicorn/numeric-separators-style": "off", | ||
"unicorn/prefer-code-point": "off" | ||
} | ||
@@ -80,0 +81,0 @@ }, |
185
readme.md
@@ -11,19 +11,50 @@ # hast-util-to-text | ||
[**hast**][hast] utility to get the plain-text value of a [*node*][node]. | ||
[hast][] utility to get the plain-text value of a node. | ||
This is like the DOMs `Node#innerText` getter but there are some deviations from | ||
the spec. | ||
The resulting text is returned. | ||
## Contents | ||
You’d typically want to use [`hast-util-to-string`][to-string] | ||
(`textContent`), but `hast-util-to-text` (`innerText`) adds for example line | ||
breaks where `<br>` elements are used. | ||
* [What is this?](#what-is-this) | ||
* [When should I use this?](#when-should-i-use-this) | ||
* [Install](#install) | ||
* [Use](#use) | ||
* [API](#api) | ||
* [`toText(tree[, options])`](#totexttree-options) | ||
* [`Options`](#options) | ||
* [`Whitespace`](#whitespace) | ||
* [Types](#types) | ||
* [Compatibility](#compatibility) | ||
* [Security](#security) | ||
* [Related](#related) | ||
* [Contribute](#contribute) | ||
* [License](#license) | ||
## What is this? | ||
This package is a utility that takes a [hast][] node and gets its plain-text | ||
value. | ||
It is like the DOMs `Node#innerText`, which is a bit nicer than | ||
`Node#textContent`, because this turns `<br>` elements into line breaks and | ||
uses `'\t'` (tabs) between table cells. | ||
There are some small deviations from the spec, because the DOM has knowledge of | ||
associated CSS, and can take into account that elements have `display: none` or | ||
`text-transform` association with them, and this utility can’t do that. | ||
## When should I use this? | ||
This is a small utility that is useful when you want a plain-text version of a | ||
node that is close to how it’s “visible” to users. | ||
This utility is similar to [`hast-util-to-string`][hast-util-to-string], which | ||
is simpler, and more like the `Node#textContent` algorithm discussed above. | ||
There is also a package [`hast-util-from-text`][hast-util-from-text], which sort | ||
of does the inverse: it takes a string, sets that as text on the node, while | ||
turning line endings into `<br>`s | ||
## Install | ||
This package is [ESM only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c): | ||
Node 12+ is needed to use it and it must be `import`ed instead of `require`d. | ||
This package is [ESM only][esm]. | ||
In Node.js (version 14.14+ or 16.0+), install with [npm][]: | ||
[npm][]: | ||
```sh | ||
@@ -33,2 +64,16 @@ npm install hast-util-to-text | ||
In Deno with [`esm.sh`][esmsh]: | ||
```js | ||
import {toText} from "https://esm.sh/hast-util-to-text@3" | ||
``` | ||
In browsers with [`esm.sh`][esmsh]: | ||
```html | ||
<script type="module"> | ||
import {toText} from "https://esm.sh/hast-util-to-text@3?bundle" | ||
</script> | ||
``` | ||
## Use | ||
@@ -62,37 +107,75 @@ | ||
This package exports the following identifiers: `toText`. | ||
This package exports the identifier [`toText`][totext]. | ||
There is no default export. | ||
### `toText(node, options?)` | ||
### `toText(tree[, options])` | ||
Utility to get the plain-text value of a [*node*][node]. | ||
Get the plain-text value of a node. | ||
* If `node` is a [*comment*][comment], returns its `value` | ||
* If `node` is a [*text*][text], applies normal white-space collapsing to its | ||
`value`, as defined by the [CSS Text][css] spec | ||
* If `node` is a [*root*][root] or [*element*][element], applies an algorithm | ||
similar to the `innerText` getter as defined by [HTML][] | ||
###### Parameters | ||
###### `options.whitespace` | ||
* `tree` ([`Node`][node]) | ||
— tree to turn into text | ||
* `options` ([`Options`][options], optional) | ||
— configuration | ||
Default whitespace setting to use (`'normal'` or `'pre'`, default: `'normal'`). | ||
###### Returns | ||
`string` — Stringified `node`. | ||
Serialized `tree` (`string`). | ||
###### Algorithm | ||
* if `tree` is a [comment][], returns its `value` | ||
* if `tree` is a [text][], applies normal whitespace collapsing to its | ||
`value`, as defined by the [CSS Text][css] spec | ||
* if `tree` is a [root][] or [element][], applies an algorithm similar to the | ||
`innerText` getter as defined by [HTML][] | ||
###### Notes | ||
* If `node` is an [*element*][element] that is not displayed (such as a | ||
`head`), we’ll still use the `innerText` algorithm instead of switching to | ||
`textContent` | ||
* If [*descendants*][descendant] of `node` are [*elements*][element] that are | ||
not displayed, they are ignored | ||
> 👉 **Note**: the algorithm acts as if `tree` is being rendered, and as if | ||
> we’re a CSS-supporting user agent, with scripting enabled. | ||
* if `tree` is an element that is not displayed (such as a `head`), we’ll | ||
still use the `innerText` algorithm instead of switching to `textContent` | ||
* if descendants of `tree` are elements that are not displayed, they are | ||
ignored | ||
* CSS is not considered, except for the default user agent style sheet | ||
* A line feed is collapsed instead of ignored in cases where Fullwidth, Wide, | ||
* a line feed is collapsed instead of ignored in cases where Fullwidth, Wide, | ||
or Halfwidth East Asian Width characters are used, the same goes for a case | ||
with Chinese, Japanese, or Yi writing systems | ||
* Replaced [*elements*][element] (such as `audio`) are treated like | ||
non-replaced *elements* | ||
* replaced elements (such as `audio`) are treated like non-replaced elements | ||
### `Options` | ||
Configuration (TypeScript type). | ||
##### Fields | ||
* `whitespace` ([`Whitespace`][whitespace], default: `'normal'`) | ||
— default whitespace setting to use | ||
### `Whitespace` | ||
Valid and useful whitespace values (from [CSS][]) (TypeScript type). | ||
##### Type | ||
```ts | ||
type Whitespace = 'normal' | 'pre' | 'nowrap' | 'pre-wrap' | ||
``` | ||
## Types | ||
This package is fully typed with [TypeScript][]. | ||
It exports the additional types [`Options`][options] and | ||
[`Whitespace`][whitespace]. | ||
## Compatibility | ||
Projects maintained by the unified collective are compatible with all maintained | ||
versions of Node.js. | ||
As of now, that is Node.js 14.14+ and 16.0+. | ||
Our projects sometimes work with older versions, but this is not guaranteed. | ||
## Security | ||
@@ -105,13 +188,13 @@ | ||
* [`hast-util-to-string`](https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-to-string) | ||
— Get the plain-text value (`textContent`) | ||
* [`hast-util-to-string`](https://github.com/rehypejs/rehype-minify/tree/main/packages/hast-util-to-string) | ||
— get the plain-text value (`textContent`) | ||
* [`hast-util-from-text`](https://github.com/syntax-tree/hast-util-from-text) | ||
— Set the plain-text value (`innerText`) | ||
* [`hast-util-from-string`](https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-from-string) | ||
— Set the plain-text value (`textContent`) | ||
— set the plain-text value (`innerText`) | ||
* [`hast-util-from-string`](https://github.com/rehypejs/rehype-minify/tree/main/packages/hast-util-from-string) | ||
— set the plain-text value (`textContent`) | ||
## Contribute | ||
See [`contributing.md` in `syntax-tree/.github`][contributing] for ways to get | ||
started. | ||
See [`contributing.md`][contributing] in [`syntax-tree/.github`][health] for | ||
ways to get started. | ||
See [`support.md`][support] for ways to get help. | ||
@@ -157,2 +240,8 @@ | ||
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c | ||
[esmsh]: https://esm.sh | ||
[typescript]: https://www.typescriptlang.org | ||
[license]: license | ||
@@ -162,15 +251,17 @@ | ||
[contributing]: https://github.com/syntax-tree/.github/blob/HEAD/contributing.md | ||
[health]: https://github.com/syntax-tree/.github | ||
[support]: https://github.com/syntax-tree/.github/blob/HEAD/support.md | ||
[contributing]: https://github.com/syntax-tree/.github/blob/main/contributing.md | ||
[coc]: https://github.com/syntax-tree/.github/blob/HEAD/code-of-conduct.md | ||
[support]: https://github.com/syntax-tree/.github/blob/main/support.md | ||
[html]: https://html.spec.whatwg.org/#the-innertext-idl-attribute | ||
[coc]: https://github.com/syntax-tree/.github/blob/main/code-of-conduct.md | ||
[html]: https://html.spec.whatwg.org/multipage/dom.html#the-innertext-idl-attribute | ||
[css]: https://drafts.csswg.org/css-text/#white-space-phase-1 | ||
[to-string]: https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-to-string | ||
[hast-util-to-string]: https://github.com/rehypejs/rehype-minify/tree/main/packages/hast-util-to-string | ||
[descendant]: https://github.com/syntax-tree/unist#descendant | ||
[hast-util-from-text]: https://github.com/syntax-tree/hast-util-from-text | ||
@@ -190,1 +281,7 @@ [hast]: https://github.com/syntax-tree/hast | ||
[xss]: https://en.wikipedia.org/wiki/Cross-site_scripting | ||
[totext]: #totexttree-options | ||
[options]: #options | ||
[whitespace]: #whitespace |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
33437
10
7
652
281
4
1
+ Added@types/unist@^2.0.0