hast-util-to-text
Advanced tools
Comparing version 2.0.1 to 3.0.0
330
index.js
@@ -1,20 +0,40 @@ | ||
'use strict' | ||
/** | ||
* @typedef {import('hast-util-is-element').TestFunctionAnything} TestFunctionAnything | ||
* @typedef {import('hast').Parent['children'][number]} HastChild | ||
* @typedef {import('hast').Text} HastText | ||
* @typedef {import('hast').Comment} HastComment | ||
* @typedef {import('hast').Root} HastRoot | ||
* @typedef {import('hast').Element} HastElement | ||
* @typedef {import('hast').Properties} HastProperties | ||
* @typedef {HastChild|HastRoot} HastNode | ||
* @typedef {HastRoot|HastElement} HastParent | ||
* | ||
* @typedef {'normal'|'pre'|'nowrap'|'pre-wrap'} Whitespace | ||
* @typedef {boolean} BreakValue | ||
* @typedef {1|2} BreakNumber | ||
* @typedef {'\n'} BreakForce | ||
* @typedef {BreakValue|BreakNumber} BreakBefore | ||
* @typedef {BreakValue|BreakNumber|BreakForce} BreakAfter | ||
* | ||
* @typedef CollectionOptions | ||
* @property {Whitespace} whitespace | ||
* @property {BreakBefore} breakBefore | ||
* @property {BreakAfter} breakAfter | ||
*/ | ||
var repeat = require('repeat-string') | ||
var convert = require('hast-util-is-element/convert') | ||
var findAfter = require('unist-util-find-after') | ||
import repeat from 'repeat-string' | ||
import {convertElement} from 'hast-util-is-element' | ||
import {findAfter} from 'unist-util-find-after' | ||
module.exports = toText | ||
var searchLineFeeds = /\n/g | ||
var searchTabOrSpaces = /[\t ]+/g | ||
var br = convert('br') | ||
var p = convert('p') | ||
var cell = convert(['th', 'td']) | ||
var row = convert('tr') | ||
var br = convertElement('br') | ||
var p = convertElement('p') | ||
var cell = convertElement(['th', 'td']) | ||
var row = convertElement('tr') | ||
// Note that we don’t need to include void elements here as they don’t have text. | ||
// See: <https://github.com/wooorm/html-void-elements> | ||
var notRendered = convert([ | ||
var notRendered = convertElement([ | ||
// List from: <https://html.spec.whatwg.org/#hidden-elements> | ||
@@ -25,2 +45,3 @@ 'datalist', | ||
'noframes', | ||
'noscript', // Act as if we support scripting. | ||
'rp', | ||
@@ -31,4 +52,2 @@ 'script', | ||
'title', | ||
// Act as if we support scripting. | ||
'noscript', | ||
// Hidden attribute. | ||
@@ -41,60 +60,71 @@ hidden, | ||
// See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints> | ||
var blockOrCaption = convert([ | ||
var blockOrCaption = convertElement([ | ||
'address', // Flow content | ||
'article', // Sections and headings | ||
'aside', // Sections and headings | ||
'blockquote', // Flow content | ||
'body', // Page | ||
'caption', // `table-caption` | ||
// Page | ||
'html', | ||
'body', | ||
// Flow content | ||
'address', | ||
'blockquote', | ||
'center', // Legacy | ||
'dialog', | ||
'div', | ||
'figure', | ||
'figcaption', | ||
'footer', | ||
'form,', | ||
'header', | ||
'hr', | ||
'legend', | ||
'listing', // Legacy | ||
'main', | ||
'p', | ||
'plaintext', // Legacy | ||
'pre', | ||
'xmp', // Legacy | ||
// Sections and headings | ||
'article', | ||
'aside', | ||
'h1', | ||
'h2', | ||
'h3', | ||
'h4', | ||
'h5', | ||
'h6', | ||
'hgroup', | ||
'nav', | ||
'section', | ||
// Lists | ||
'dir', // Legacy | ||
'dd', | ||
'dl', | ||
'dt', | ||
'menu', | ||
'ol', | ||
'ul' | ||
'center', // Flow content (legacy) | ||
'dd', // Lists | ||
'dialog', // Flow content | ||
'dir', // Lists (legacy) | ||
'dl', // Lists | ||
'dt', // Lists | ||
'div', // Flow content | ||
'figure', // Flow content | ||
'figcaption', // Flow content | ||
'footer', // Flow content | ||
'form,', // Flow content | ||
'h1', // Sections and headings | ||
'h2', // Sections and headings | ||
'h3', // Sections and headings | ||
'h4', // Sections and headings | ||
'h5', // Sections and headings | ||
'h6', // Sections and headings | ||
'header', // Flow content | ||
'hgroup', // Sections and headings | ||
'hr', // Flow content | ||
'html', // Page | ||
'legend', // Flow content | ||
'listing', // Flow content (legacy) | ||
'main', // Flow content | ||
'menu', // Lists | ||
'nav', // Sections and headings | ||
'ol', // Lists | ||
'p', // Flow content | ||
'plaintext', // Flow content (legacy) | ||
'pre', // Flow content | ||
'section', // Sections and headings | ||
'ul', // Lists | ||
'xmp' // Flow content (legacy) | ||
]) | ||
// Implementation of the `innerText` getter: | ||
// <https://html.spec.whatwg.org/#the-innertext-idl-attribute> | ||
// Note that we act as if `node` is being rendered, and as if we’re a | ||
// CSS-supporting user agent. | ||
function toText(node) { | ||
/** | ||
* Implementation of the `innerText` getter: | ||
* <https://html.spec.whatwg.org/#the-innertext-idl-attribute> | ||
* Note that we act as if `node` is being rendered, and as if we’re a | ||
* CSS-supporting user agent. | ||
* | ||
* @param {HastNode} node | ||
* @returns {string} | ||
*/ | ||
export function toText(node) { | ||
/** @type {Array.<HastChild>} */ | ||
// @ts-ignore looks like a parent. | ||
var children = node.children || [] | ||
var block = blockOrCaption(node) | ||
var whiteSpace = inferWhiteSpace(node, {}) | ||
var whitespace = inferWhitespace(node, { | ||
whitespace: 'normal', | ||
breakBefore: false, | ||
breakAfter: false | ||
}) | ||
var index = -1 | ||
/** @type {Array.<string|BreakNumber>} */ | ||
var results | ||
/** @type {Array.<string>} */ | ||
var result | ||
/** @type {string|BreakNumber} */ | ||
var value | ||
/** @type {number} */ | ||
var count | ||
@@ -111,7 +141,3 @@ | ||
if (node.type === 'text' || node.type === 'comment') { | ||
return collectText(node, { | ||
whiteSpace: whiteSpace, | ||
breakBefore: true, | ||
breakAfter: true | ||
}) | ||
return collectText(node, {whitespace, breakBefore: true, breakAfter: true}) | ||
} | ||
@@ -140,4 +166,5 @@ | ||
results = results.concat( | ||
innerTextCollection(children[index], index, node, { | ||
whiteSpace: whiteSpace, | ||
// @ts-ignore Looks like a parent. | ||
innerTextCollection(children[index], node, { | ||
whitespace, | ||
breakBefore: index ? null : block, | ||
@@ -176,6 +203,13 @@ breakAfter: | ||
// <https://html.spec.whatwg.org/#inner-text-collection-steps> | ||
function innerTextCollection(node, index, parent, options) { | ||
/** | ||
* <https://html.spec.whatwg.org/#inner-text-collection-steps> | ||
* | ||
* @param {HastNode} node | ||
* @param {HastParent} parent | ||
* @param {CollectionOptions} options | ||
* @returns {Array.<string|BreakNumber>} | ||
*/ | ||
function innerTextCollection(node, parent, options) { | ||
if (node.type === 'element') { | ||
return collectElement(node, index, parent, options) | ||
return collectElement(node, parent, options) | ||
} | ||
@@ -185,5 +219,5 @@ | ||
return [ | ||
options.whiteSpace === 'normal' | ||
options.whitespace === 'normal' | ||
? collectText(node, options) | ||
: collectPreText(node, options) | ||
: collectPreText(node) | ||
] | ||
@@ -195,10 +229,19 @@ } | ||
// Collect an element. | ||
function collectElement(node, _, parent, options) { | ||
/** | ||
* Collect an element. | ||
* | ||
* @param {HastElement} node | ||
* @param {HastParent} parent | ||
* @param {CollectionOptions} options | ||
*/ | ||
function collectElement(node, parent, options) { | ||
// First we infer the `white-space` property. | ||
var whiteSpace = inferWhiteSpace(node, options) | ||
var whitespace = inferWhitespace(node, options) | ||
var children = node.children || [] | ||
var index = -1 | ||
/** @type {Array.<string|BreakNumber>} */ | ||
var items = [] | ||
/** @type {BreakNumber} */ | ||
var prefix | ||
/** @type {BreakNumber|BreakForce} */ | ||
var suffix | ||
@@ -264,5 +307,5 @@ | ||
items = items.concat( | ||
innerTextCollection(children[index], index, node, { | ||
whiteSpace: whiteSpace, | ||
breakBefore: index ? null : prefix, | ||
innerTextCollection(children[index], node, { | ||
whitespace, | ||
breakBefore: index ? undefined : prefix, | ||
breakAfter: | ||
@@ -291,25 +334,36 @@ index < children.length - 1 ? br(children[index + 1]) : suffix | ||
// 4. If node is a Text node, then for each CSS text box produced by node, | ||
// in content order, compute the text of the box after application of the | ||
// CSS `white-space` processing rules and `text-transform` rules, set | ||
// items to the list of the resulting strings, and return items. | ||
// The CSS `white-space` processing rules are slightly modified: | ||
// collapsible spaces at the end of lines are always collapsed, but they | ||
// are only removed if the line is the last line of the block, or it ends | ||
// with a br element. | ||
// Soft hyphens should be preserved. | ||
// | ||
// Note: See `collectText` and `collectPreText`. | ||
// Note: we don’t deal with `text-transform`, no element has that by | ||
// default. | ||
// | ||
// See: <https://drafts.csswg.org/css-text/#white-space-phase-1> | ||
/** | ||
* 4. If node is a Text node, then for each CSS text box produced by node, | ||
* in content order, compute the text of the box after application of the | ||
* CSS `white-space` processing rules and `text-transform` rules, set | ||
* items to the list of the resulting strings, and return items. | ||
* The CSS `white-space` processing rules are slightly modified: | ||
* collapsible spaces at the end of lines are always collapsed, but they | ||
* are only removed if the line is the last line of the block, or it ends | ||
* with a br element. | ||
* Soft hyphens should be preserved. | ||
* | ||
* Note: See `collectText` and `collectPreText`. | ||
* Note: we don’t deal with `text-transform`, no element has that by | ||
* default. | ||
* | ||
* See: <https://drafts.csswg.org/css-text/#white-space-phase-1> | ||
* | ||
* @param {HastText|HastComment} node | ||
* @param {CollectionOptions} options | ||
* @returns {string} | ||
*/ | ||
function collectText(node, options) { | ||
var value = String(node.value) | ||
/** @type {Array.<string>} */ | ||
var lines = [] | ||
/** @type {Array.<string>} */ | ||
var result = [] | ||
var start = 0 | ||
var index = -1 | ||
/** @type {RegExpMatchArray} */ | ||
var match | ||
/** @type {number} */ | ||
var end | ||
/** @type {string} */ | ||
var join | ||
@@ -325,3 +379,3 @@ | ||
// following a segment break is removed. | ||
trimAndcollapseSpacesAndTabs( | ||
trimAndCollapseSpacesAndTabs( | ||
// [...] ignoring bidi formatting characters (characters with the | ||
@@ -332,3 +386,3 @@ // Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if | ||
.slice(start, end) | ||
.replace(/[\u061c\u200e\u200f\u202a-\u202e\u2066-\u2069]/g, ''), | ||
.replace(/[\u061C\u200E\u200F\u202A-\u202E\u2066-\u2069]/g, ''), | ||
options.breakBefore, | ||
@@ -386,2 +440,6 @@ options.breakAfter | ||
/** | ||
* @param {HastText|HastComment} node | ||
* @returns {string} | ||
*/ | ||
function collectPreText(node) { | ||
@@ -391,12 +449,22 @@ return String(node.value) | ||
// 3. Every collapsible tab is converted to a collapsible space (U+0020). | ||
// 4. Any collapsible space immediately following another collapsible | ||
// space—even one outside the boundary of the inline containing that | ||
// space, provided both spaces are within the same inline formatting | ||
// context—is collapsed to have zero advance width. (It is invisible, | ||
// but retains its soft wrap opportunity, if any.) | ||
function trimAndcollapseSpacesAndTabs(value, breakBefore, breakAfter) { | ||
/** | ||
* 3. Every collapsible tab is converted to a collapsible space (U+0020). | ||
* 4. Any collapsible space immediately following another collapsible | ||
* space—even one outside the boundary of the inline containing that | ||
* space, provided both spaces are within the same inline formatting | ||
* context—is collapsed to have zero advance width. (It is invisible, | ||
* but retains its soft wrap opportunity, if any.) | ||
* | ||
* @param {string} value | ||
* @param {BreakBefore} breakBefore | ||
* @param {BreakAfter} breakAfter | ||
* @returns {string} | ||
*/ | ||
function trimAndCollapseSpacesAndTabs(value, breakBefore, breakAfter) { | ||
/** @type {Array.<string>} */ | ||
var result = [] | ||
var start = 0 | ||
/** @type {RegExpMatchArray} */ | ||
var match | ||
/** @type {number} */ | ||
var end | ||
@@ -432,32 +500,44 @@ | ||
// We don’t support void elements here (so `nobr wbr` -> `normal` is ignored). | ||
function inferWhiteSpace(node, options) { | ||
var props = node.properties || {} | ||
var inherit = options.whiteSpace || 'normal' | ||
/** | ||
* We don’t support void elements here (so `nobr wbr` -> `normal` is ignored). | ||
* | ||
* @param {HastNode} node | ||
* @param {CollectionOptions} options | ||
* @returns {Whitespace} | ||
*/ | ||
function inferWhitespace(node, options) { | ||
/** @type {HastProperties} */ | ||
var props | ||
switch (node.tagName) { | ||
case 'listing': | ||
case 'plaintext': | ||
case 'xmp': | ||
return 'pre' | ||
case 'nobr': | ||
return 'nowrap' | ||
case 'pre': | ||
return props.wrap ? 'pre-wrap' : 'pre' | ||
case 'td': | ||
case 'th': | ||
return props.noWrap ? 'nowrap' : inherit | ||
case 'textarea': | ||
return 'pre-wrap' | ||
default: | ||
return inherit | ||
if (node.type === 'element') { | ||
props = node.properties || {} | ||
switch (node.tagName) { | ||
case 'listing': | ||
case 'plaintext': | ||
case 'xmp': | ||
return 'pre' | ||
case 'nobr': | ||
return 'nowrap' | ||
case 'pre': | ||
return props.wrap ? 'pre-wrap' : 'pre' | ||
case 'td': | ||
case 'th': | ||
return props.noWrap ? 'nowrap' : options.whitespace | ||
case 'textarea': | ||
return 'pre-wrap' | ||
default: | ||
} | ||
} | ||
return options.whitespace | ||
} | ||
/** @type {TestFunctionAnything} */ | ||
function hidden(node) { | ||
return (node.properties || {}).hidden | ||
return Boolean((node.properties || {}).hidden) | ||
} | ||
/** @type {TestFunctionAnything} */ | ||
function closedDialog(node) { | ||
return node.tagName === 'dialog' && !(node.properties || {}).open | ||
} |
{ | ||
"name": "hast-util-to-text", | ||
"version": "2.0.1", | ||
"version": "3.0.0", | ||
"description": "hast utility to get the plain-text value of a node according to the `innerText` algorithm", | ||
@@ -28,37 +28,39 @@ "license": "MIT", | ||
], | ||
"sideEffects": false, | ||
"type": "module", | ||
"main": "index.js", | ||
"types": "index.d.ts", | ||
"files": [ | ||
"index.d.ts", | ||
"index.js" | ||
], | ||
"dependencies": { | ||
"hast-util-is-element": "^1.0.0", | ||
"@types/hast": "^2.0.0", | ||
"@types/repeat-string": "^1.0.0", | ||
"hast-util-is-element": "^2.0.0", | ||
"repeat-string": "^1.0.0", | ||
"unist-util-find-after": "^3.0.0" | ||
"unist-util-find-after": "^4.0.0" | ||
}, | ||
"devDependencies": { | ||
"browserify": "^17.0.0", | ||
"hastscript": "^6.0.0", | ||
"nyc": "^15.0.0", | ||
"@types/tape": "^4.0.0", | ||
"c8": "^7.0.0", | ||
"hastscript": "^7.0.0", | ||
"prettier": "^2.0.0", | ||
"remark-cli": "^9.0.0", | ||
"remark-preset-wooorm": "^8.0.0", | ||
"rimraf": "^3.0.0", | ||
"tape": "^5.0.0", | ||
"tinyify": "^3.0.0", | ||
"unist-builder": "^2.0.0", | ||
"xo": "^0.34.0" | ||
"type-coverage": "^2.0.0", | ||
"typescript": "^4.0.0", | ||
"unist-builder": "^3.0.0", | ||
"xo": "^0.39.0" | ||
}, | ||
"scripts": { | ||
"prepack": "npm run build && npm run format", | ||
"build": "rimraf \"*.d.ts\" && tsc && type-coverage", | ||
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", | ||
"build-bundle": "browserify . -s hastUtilToText -o hast-util-to-text.js", | ||
"build-mangle": "browserify . -s hastUtilToText -p tinyify -o hast-util-to-text.min.js", | ||
"build": "npm run build-bundle && npm run build-mangle", | ||
"test-api": "node test", | ||
"test-coverage": "nyc --reporter lcov tape test.js", | ||
"test": "npm run format && npm run build && npm run test-coverage" | ||
"test-api": "node test.js", | ||
"test-coverage": "c8 --check-coverage --branches 100 --functions 100 --lines 100 --statements 100 --reporter lcov node test.js", | ||
"test": "npm run build && npm run format && npm run test-coverage" | ||
}, | ||
"nyc": { | ||
"check-coverage": true, | ||
"lines": 100, | ||
"functions": 100, | ||
"branches": 100 | ||
}, | ||
"prettier": { | ||
@@ -74,9 +76,5 @@ "tabWidth": 2, | ||
"prettier": true, | ||
"esnext": false, | ||
"ignores": [ | ||
"hast-util-to-text.js" | ||
], | ||
"rules": { | ||
"unicorn/escape-case": "off", | ||
"no-constant-condition": "off" | ||
"no-var": "off", | ||
"prefer-arrow-callback": "off" | ||
} | ||
@@ -88,3 +86,8 @@ }, | ||
] | ||
}, | ||
"typeCoverage": { | ||
"atLeast": 100, | ||
"detail": true, | ||
"strict": true | ||
} | ||
} |
@@ -23,2 +23,5 @@ # hast-util-to-text | ||
This package is [ESM only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c): | ||
Node 12+ is needed to use it and it must be `import`ed instead of `require`d. | ||
[npm][]: | ||
@@ -33,4 +36,4 @@ | ||
```js | ||
var h = require('hastscript') | ||
var toText = require('hast-util-to-text') | ||
import {h} from 'hastscript' | ||
import {toText} from 'hast-util-to-text' | ||
@@ -59,2 +62,5 @@ var tree = h('div', [ | ||
This package exports the following identifiers: `toText`. | ||
There is no default export. | ||
### `toText(node)` | ||
@@ -122,5 +128,5 @@ | ||
[build-badge]: https://img.shields.io/travis/syntax-tree/hast-util-to-text.svg | ||
[build-badge]: https://github.com/syntax-tree/hast-util-to-text/workflows/main/badge.svg | ||
[build]: https://travis-ci.org/syntax-tree/hast-util-to-text | ||
[build]: https://github.com/syntax-tree/hast-util-to-text/actions | ||
@@ -127,0 +133,0 @@ [coverage-badge]: https://img.shields.io/codecov/c/github/syntax-tree/hast-util-to-text.svg |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
26548
5
506
184
Yes
5
12
+ Added@types/hast@^2.0.0
+ Added@types/repeat-string@^1.0.0
+ Added@types/hast@2.3.10(transitive)
+ Added@types/repeat-string@1.6.5(transitive)
+ Added@types/unist@2.0.11(transitive)
+ Addedhast-util-is-element@2.1.3(transitive)
+ Addedunist-util-find-after@4.0.1(transitive)
+ Addedunist-util-is@5.2.1(transitive)
- Removedhast-util-is-element@1.1.0(transitive)
- Removedunist-util-find-after@3.0.0(transitive)
- Removedunist-util-is@4.1.0(transitive)
Updatedhast-util-is-element@^2.0.0
Updatedunist-util-find-after@^4.0.0