hast-util-to-text
Advanced tools
Comparing version 2.0.0 to 2.0.1
448
index.js
'use strict' | ||
var repeat = require('repeat-string') | ||
var is = require('hast-util-is-element') | ||
var convert = require('hast-util-is-element/convert') | ||
var findAfter = require('unist-util-find-after') | ||
@@ -9,29 +9,77 @@ | ||
// Methods. | ||
var min = Math.min | ||
var max = Math.max | ||
var searchLineFeeds = /\n/g | ||
var searchTabOrSpaces = /[\t ]+/g | ||
// White space codes. | ||
var tab = 0x9 | ||
var space = 0x20 | ||
var zeroWidthSpace = 0x200b | ||
var br = convert('br') | ||
var p = convert('p') | ||
var cell = convert(['th', 'td']) | ||
var row = convert('tr') | ||
// Bidi control characters codes. | ||
var alm = 0x61c | ||
var ltr = 0x200e | ||
var rtl = 0x200f | ||
var lre = 0x202a | ||
var rle = 0x202b | ||
var pdf = 0x202c | ||
var lro = 0x202d | ||
var rlo = 0x202e | ||
var lri = 0x2066 | ||
var rli = 0x2067 | ||
var fsi = 0x2068 | ||
var pdi = 0x2069 | ||
// Note that we don’t need to include void elements here as they don’t have text. | ||
// See: <https://github.com/wooorm/html-void-elements> | ||
var notRendered = convert([ | ||
// List from: <https://html.spec.whatwg.org/#hidden-elements> | ||
'datalist', | ||
'head', | ||
'noembed', | ||
'noframes', | ||
'rp', | ||
'script', | ||
'style', | ||
'template', | ||
'title', | ||
// Act as if we support scripting. | ||
'noscript', | ||
// Hidden attribute. | ||
hidden, | ||
// From: <https://html.spec.whatwg.org/#flow-content-3> | ||
closedDialog | ||
]) | ||
// Characters. | ||
var tabChar = '\t' | ||
var lineFeedChar = '\n' | ||
var spaceChar = ' ' | ||
// See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints> | ||
var blockOrCaption = convert([ | ||
'caption', // `table-caption` | ||
// Page | ||
'html', | ||
'body', | ||
// Flow content | ||
'address', | ||
'blockquote', | ||
'center', // Legacy | ||
'dialog', | ||
'div', | ||
'figure', | ||
'figcaption', | ||
'footer', | ||
'form,', | ||
'header', | ||
'hr', | ||
'legend', | ||
'listing', // Legacy | ||
'main', | ||
'p', | ||
'plaintext', // Legacy | ||
'pre', | ||
'xmp', // Legacy | ||
// Sections and headings | ||
'article', | ||
'aside', | ||
'h1', | ||
'h2', | ||
'h3', | ||
'h4', | ||
'h5', | ||
'h6', | ||
'hgroup', | ||
'nav', | ||
'section', | ||
// Lists | ||
'dir', // Legacy | ||
'dd', | ||
'dl', | ||
'dt', | ||
'menu', | ||
'ol', | ||
'ul' | ||
]) | ||
@@ -44,3 +92,2 @@ // Implementation of the `innerText` getter: | ||
var children = node.children || [] | ||
var length = children.length | ||
var block = blockOrCaption(node) | ||
@@ -50,3 +97,2 @@ var whiteSpace = inferWhiteSpace(node, {}) | ||
var results | ||
var current | ||
var result | ||
@@ -86,3 +132,3 @@ var value | ||
// 3. For each child node node of this element: | ||
while (++index < length) { | ||
while (++index < children.length) { | ||
// 3.1. Let current be the list resulting in running the inner text | ||
@@ -92,10 +138,11 @@ // collection steps with node. | ||
// positive integer (a required line break count). | ||
current = innerTextCollection(children[index], index, node, { | ||
whiteSpace: whiteSpace, | ||
breakBefore: index === 0 ? block : false, | ||
breakAfter: index === length - 1 ? block : is(children[index + 1], 'br') | ||
}) | ||
// 3.2. For each item item in current, append item to results. | ||
results = results.concat(current) | ||
results = results.concat( | ||
innerTextCollection(children[index], index, node, { | ||
whiteSpace: whiteSpace, | ||
breakBefore: index ? null : block, | ||
breakAfter: | ||
index < children.length - 1 ? br(children[index + 1]) : block | ||
}) | ||
) | ||
} | ||
@@ -111,17 +158,11 @@ | ||
index = -1 | ||
length = results.length | ||
result = [] | ||
while (++index < length) { | ||
while (++index < results.length) { | ||
value = results[index] | ||
if (typeof value === 'number') { | ||
if (count !== undefined && value > count) { | ||
count = value | ||
} | ||
} else if (value !== '') { | ||
if (count) { | ||
result.push(repeat(lineFeedChar, count)) | ||
} | ||
if (count !== undefined && value > count) count = value | ||
} else if (value) { | ||
if (count) result.push(repeat('\n', count)) | ||
count = 0 | ||
@@ -154,10 +195,8 @@ result.push(value) | ||
// Collect an element. | ||
function collectElement(node, index, parent, options) { | ||
function collectElement(node, _, parent, options) { | ||
// First we infer the `white-space` property. | ||
var whiteSpace = inferWhiteSpace(node, options) | ||
var children = node.children || [] | ||
var length = children.length | ||
var offset = -1 | ||
var index = -1 | ||
var items = [] | ||
var current | ||
var prefix | ||
@@ -188,4 +227,4 @@ var suffix | ||
// U+000A LINE FEED (LF) character to items. | ||
if (is(node, 'br')) { | ||
suffix = lineFeedChar | ||
if (br(node)) { | ||
suffix = '\n' | ||
} | ||
@@ -202,3 +241,3 @@ | ||
else if (row(node) && findAfter(parent, node, row)) { | ||
suffix = lineFeedChar | ||
suffix = '\n' | ||
} | ||
@@ -208,3 +247,3 @@ | ||
// at the beginning and end of items. | ||
else if (is(node, 'p')) { | ||
else if (p(node)) { | ||
prefix = 2 | ||
@@ -225,11 +264,11 @@ suffix = 2 | ||
// results to a single list. | ||
while (++offset < length) { | ||
current = innerTextCollection(children[offset], offset, node, { | ||
whiteSpace: whiteSpace, | ||
breakBefore: offset === 0 ? prefix : false, | ||
breakAfter: | ||
offset === length - 1 ? suffix : is(children[offset + 1], 'br') | ||
}) | ||
items = items.concat(current) | ||
while (++index < children.length) { | ||
items = items.concat( | ||
innerTextCollection(children[index], index, node, { | ||
whiteSpace: whiteSpace, | ||
breakBefore: index ? null : prefix, | ||
breakAfter: | ||
index < children.length - 1 ? br(children[index + 1]) : suffix | ||
}) | ||
) | ||
} | ||
@@ -244,14 +283,9 @@ | ||
if (cell(node) && findAfter(parent, node, cell)) { | ||
items.push(tabChar) | ||
items.push('\t') | ||
} | ||
// Add the pre- and suffix. | ||
if (prefix) { | ||
items.unshift(prefix) | ||
} | ||
if (prefix) items.unshift(prefix) | ||
if (suffix) items.push(suffix) | ||
if (suffix) { | ||
items.push(suffix) | ||
} | ||
return items | ||
@@ -276,47 +310,34 @@ } | ||
function collectText(node, options) { | ||
var breakBefore = options.breakBefore | ||
var breakAfter = options.breakAfter | ||
var value = String(node.value) | ||
var index = -1 | ||
var length = value.length | ||
var lines = [] | ||
var result = [] | ||
var lineStart | ||
var lineEnd | ||
var line | ||
var nextLine | ||
var queue | ||
var start = 0 | ||
var index = -1 | ||
var match | ||
var end | ||
var join | ||
lineStart = 0 | ||
lineEnd = value.indexOf(lineFeedChar) | ||
lineEnd = lineEnd === -1 ? value.length : lineEnd | ||
while (start < value.length) { | ||
searchLineFeeds.lastIndex = start | ||
match = searchLineFeeds.exec(value) | ||
end = match ? match.index : value.length | ||
while (lineEnd !== -1) { | ||
line = value.slice(lineStart, lineEnd) | ||
lines.push( | ||
// Any sequence of collapsible spaces and tabs immediately preceding or | ||
// following a segment break is removed. | ||
trimAndcollapseSpacesAndTabs( | ||
// [...] ignoring bidi formatting characters (characters with the | ||
// Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if | ||
// they were not there. | ||
value | ||
.slice(start, end) | ||
.replace(/[\u061c\u200e\u200f\u202a-\u202e\u2066-\u2069]/g, ''), | ||
options.breakBefore, | ||
options.breakAfter | ||
) | ||
) | ||
// [...] ignoring bidi formatting characters (characters with the | ||
// Bidi_Control property [UAX9]) as if they were not there. | ||
line = removeBidiControlCharacters(line) | ||
// Any sequence of collapsible spaces and tabs immediately preceding or | ||
// following a segment break is removed. | ||
line = trimAndcollapseSpacesAndTabs(line, breakBefore, breakAfter) | ||
// Add the line. | ||
lines.push(line) | ||
// Stop. | ||
if (lineEnd === value.length) { | ||
break | ||
} | ||
lineStart = lineEnd + 1 | ||
lineEnd = value.indexOf(lineFeedChar, lineStart) | ||
lineEnd = lineEnd === -1 ? value.length : lineEnd | ||
start = end + 1 | ||
} | ||
index = -1 | ||
length = lines.length | ||
queue = '' | ||
// Collapsible segment breaks are transformed for rendering according to the | ||
@@ -327,6 +348,3 @@ // segment break transformation rules. | ||
// segment break is removed | ||
while (++index < length) { | ||
line = lines[index] | ||
nextLine = lines[index + 1] || '' | ||
while (++index < lines.length) { | ||
// * If the character immediately before or immediately after the segment | ||
@@ -336,8 +354,8 @@ // break is the zero-width space character (U+200B), then the break is | ||
if ( | ||
line.charCodeAt(line.length - 1) === zeroWidthSpace || | ||
nextLine.charCodeAt(0) === zeroWidthSpace | ||
lines[index].charCodeAt(lines[index].length - 1) === 0x200b /* ZWSP */ || | ||
(index < lines.length - 1 && | ||
lines[index + 1].charCodeAt(0) === 0x200b) /* ZWSP */ | ||
) { | ||
result.push(line) | ||
queue = '' | ||
continue | ||
result.push(lines[index]) | ||
join = '' | ||
} | ||
@@ -351,3 +369,2 @@ | ||
// Note: ignored. | ||
// * Otherwise, if the writing system of the segment break is Chinese, | ||
@@ -363,9 +380,6 @@ // Japanese, or Yi, and the character before or after the segment break | ||
// * Otherwise, the segment break is converted to a space (U+0020). | ||
if (line) { | ||
if (queue) { | ||
result.push(queue) | ||
} | ||
result.push(line) | ||
queue = spaceChar | ||
else if (lines[index]) { | ||
if (join) result.push(join) | ||
result.push(lines[index]) | ||
join = ' ' | ||
} | ||
@@ -381,18 +395,2 @@ } | ||
function removeBidiControlCharacters(value) { | ||
var index = -1 | ||
var length = value.length | ||
var result = '' | ||
while (++index < length) { | ||
if (isBidiControlCharacter(value.charCodeAt(index))) { | ||
continue | ||
} | ||
result += value.charAt(index) | ||
} | ||
return result | ||
} | ||
// 3. Every collapsible tab is converted to a collapsible space (U+0020). | ||
@@ -405,60 +403,33 @@ // 4. Any collapsible space immediately following another collapsible | ||
function trimAndcollapseSpacesAndTabs(value, breakBefore, breakAfter) { | ||
var result = [] | ||
var start = 0 | ||
var match | ||
var end | ||
var length = value.length | ||
var result = [] | ||
var char | ||
// Move forward past initial white space. | ||
while (start <= length) { | ||
char = value.charCodeAt(start) | ||
while (start < value.length) { | ||
searchTabOrSpaces.lastIndex = start | ||
match = searchTabOrSpaces.exec(value) | ||
end = match ? match.index : value.length | ||
if (char !== space && char !== tab) { | ||
break | ||
// If we’re not directly after a segment break, but there was white space, | ||
// add an empty value that will be turned into a space. | ||
if (!start && !end && match && !breakBefore) { | ||
result.push('') | ||
} | ||
start++ | ||
if (start !== end) { | ||
result.push(value.slice(start, end)) | ||
} | ||
start = match ? end + match[0].length : end | ||
} | ||
// If we’re not directly after a segment break, but there was white space, | ||
// add an empty value that will be turned into a space. | ||
if (start !== 0 && !breakBefore) { | ||
// If we reached the end, there was trailing white space, and there’s no | ||
// segment break after this node, add an empty value that will be turned | ||
// into a space. | ||
if (start !== end && !breakAfter) { | ||
result.push('') | ||
} | ||
end = next(start - 1) | ||
while (start < length) { | ||
end = end === -1 ? length : end | ||
result.push(value.slice(start, end)) | ||
start = end | ||
while (start <= length) { | ||
char = value.charCodeAt(start) | ||
if (char !== space && char !== tab) { | ||
break | ||
} | ||
start++ | ||
} | ||
// If we reached the end, there was trailing white space, and there’s no | ||
// segment break after this node, add an empty value that will be turned | ||
// into a space. | ||
if (start === length && start !== end && !breakAfter) { | ||
result.push('') | ||
} | ||
end = next(start) | ||
} | ||
return result.join(' ') | ||
function next(index) { | ||
var spaceIndex = value.indexOf(spaceChar, index + 1) | ||
var tabIndex = value.indexOf(tabChar, index + 1) | ||
var fn = spaceIndex === -1 || tabIndex === -1 ? max : min | ||
return fn(spaceIndex, tabIndex) | ||
} | ||
} | ||
@@ -490,105 +461,8 @@ | ||
function isBidiControlCharacter(char) { | ||
switch (char) { | ||
case alm: | ||
case ltr: | ||
case rtl: | ||
case lre: | ||
case rle: | ||
case pdf: | ||
case lro: | ||
case rlo: | ||
case lri: | ||
case rli: | ||
case fsi: | ||
case pdi: | ||
return true | ||
default: | ||
return false | ||
} | ||
function hidden(node) { | ||
return (node.properties || {}).hidden | ||
} | ||
function cell(node) { | ||
return is(node, ['th', 'td']) | ||
function closedDialog(node) { | ||
return node.tagName === 'dialog' && !(node.properties || {}).open | ||
} | ||
function row(node) { | ||
return is(node, ['tr']) | ||
} | ||
// See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints> | ||
function blockOrCaption(node) { | ||
return is(node, [ | ||
'caption', // `table-caption` | ||
// Page | ||
'html', | ||
'body', | ||
// Flow content | ||
'address', | ||
'blockquote', | ||
'center', // Legacy | ||
'dialog', | ||
'div', | ||
'figure', | ||
'figcaption', | ||
'footer', | ||
'form,', | ||
'header', | ||
'hr', | ||
'legend', | ||
'listing', // Legacy | ||
'main', | ||
'p', | ||
'plaintext', // Legacy | ||
'pre', | ||
'xmp', // Legacy | ||
// Sections and headings | ||
'article', | ||
'aside', | ||
'h1', | ||
'h2', | ||
'h3', | ||
'h4', | ||
'h5', | ||
'h6', | ||
'hgroup', | ||
'nav', | ||
'section', | ||
// Lists | ||
'dir', // Legacy | ||
'dd', | ||
'dl', | ||
'dt', | ||
'menu', | ||
'ol', | ||
'ul' | ||
]) | ||
} | ||
// Note that we don’t need to include void elements here as they don’t have text. | ||
// | ||
// See: <https://github.com/wooorm/html-void-elements> | ||
function notRendered(node) { | ||
var properties = node.properties || {} | ||
return ( | ||
// List from: <https://html.spec.whatwg.org/#hidden-elements> | ||
is(node, [ | ||
'datalist', | ||
'head', | ||
'noembed', | ||
'noframes', | ||
'rp', | ||
'script', | ||
'style', | ||
'template', | ||
'title', | ||
// Act as if we support scripting. | ||
'noscript' | ||
]) || | ||
// Hidden attribute. | ||
properties.hidden || | ||
// From: <https://html.spec.whatwg.org/#flow-content-3> | ||
(is(node, 'dialog') && !properties.open) | ||
) | ||
} |
{ | ||
"name": "hast-util-to-text", | ||
"version": "2.0.0", | ||
"version": "2.0.1", | ||
"description": "hast utility to get the plain-text value of a node according to the `innerText` algorithm", | ||
@@ -37,15 +37,15 @@ "license": "MIT", | ||
"devDependencies": { | ||
"browserify": "^16.0.0", | ||
"hastscript": "^5.0.0", | ||
"browserify": "^17.0.0", | ||
"hastscript": "^6.0.0", | ||
"nyc": "^15.0.0", | ||
"prettier": "^1.0.0", | ||
"remark-cli": "^7.0.0", | ||
"remark-preset-wooorm": "^6.0.0", | ||
"tape": "^4.0.0", | ||
"tinyify": "^2.0.0", | ||
"prettier": "^2.0.0", | ||
"remark-cli": "^9.0.0", | ||
"remark-preset-wooorm": "^8.0.0", | ||
"tape": "^5.0.0", | ||
"tinyify": "^3.0.0", | ||
"unist-builder": "^2.0.0", | ||
"xo": "^0.27.0" | ||
"xo": "^0.34.0" | ||
}, | ||
"scripts": { | ||
"format": "remark . -qfo && prettier --write \"**/*.js\" && xo --fix", | ||
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix", | ||
"build-bundle": "browserify . -s hastUtilToText -o hast-util-to-text.js", | ||
@@ -77,3 +77,7 @@ "build-mangle": "browserify . -s hastUtilToText -p tinyify -o hast-util-to-text.min.js", | ||
"hast-util-to-text.js" | ||
] | ||
], | ||
"rules": { | ||
"unicorn/escape-case": "off", | ||
"no-constant-condition": "off" | ||
} | ||
}, | ||
@@ -80,0 +84,0 @@ "remarkConfig": { |
@@ -96,7 +96,7 @@ # hast-util-to-text | ||
* [`hast-util-to-string`](https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-to-string) | ||
* [`hast-util-to-string`](https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-to-string) | ||
— Get the plain-text value (`textContent`) | ||
* [`hast-util-from-text`](https://github.com/syntax-tree/hast-util-from-text) | ||
— Set the plain-text value (`innerText`) | ||
* [`hast-util-from-string`](https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-from-string) | ||
* [`hast-util-from-string`](https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-from-string) | ||
— Set the plain-text value (`textContent`) | ||
@@ -142,5 +142,5 @@ | ||
[chat-badge]: https://img.shields.io/badge/chat-spectrum-7b16ff.svg | ||
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg | ||
[chat]: https://spectrum.chat/unified/rehype | ||
[chat]: https://github.com/syntax-tree/unist/discussions | ||
@@ -153,7 +153,7 @@ [npm]: https://docs.npmjs.com/cli/install | ||
[contributing]: https://github.com/syntax-tree/.github/blob/master/contributing.md | ||
[contributing]: https://github.com/syntax-tree/.github/blob/HEAD/contributing.md | ||
[support]: https://github.com/syntax-tree/.github/blob/master/support.md | ||
[support]: https://github.com/syntax-tree/.github/blob/HEAD/support.md | ||
[coc]: https://github.com/syntax-tree/.github/blob/master/code-of-conduct.md | ||
[coc]: https://github.com/syntax-tree/.github/blob/HEAD/code-of-conduct.md | ||
@@ -164,3 +164,3 @@ [html]: https://html.spec.whatwg.org/#the-innertext-idl-attribute | ||
[to-string]: https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-to-string | ||
[to-string]: https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-to-string | ||
@@ -167,0 +167,0 @@ [descendant]: https://github.com/syntax-tree/unist#descendant |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
22075
396