Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

hast-util-to-text

Package Overview
Dependencies
Maintainers
2
Versions
11
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

hast-util-to-text - npm Package Compare versions

Comparing version 2.0.0 to 2.0.1

448

index.js
'use strict'
var repeat = require('repeat-string')
var is = require('hast-util-is-element')
var convert = require('hast-util-is-element/convert')
var findAfter = require('unist-util-find-after')

@@ -9,29 +9,77 @@

// Methods.
var min = Math.min
var max = Math.max
var searchLineFeeds = /\n/g
var searchTabOrSpaces = /[\t ]+/g
// White space codes.
var tab = 0x9
var space = 0x20
var zeroWidthSpace = 0x200b
var br = convert('br')
var p = convert('p')
var cell = convert(['th', 'td'])
var row = convert('tr')
// Bidi control characters codes.
var alm = 0x61c
var ltr = 0x200e
var rtl = 0x200f
var lre = 0x202a
var rle = 0x202b
var pdf = 0x202c
var lro = 0x202d
var rlo = 0x202e
var lri = 0x2066
var rli = 0x2067
var fsi = 0x2068
var pdi = 0x2069
// Note that we don’t need to include void elements here as they don’t have text.
// See: <https://github.com/wooorm/html-void-elements>
var notRendered = convert([
// List from: <https://html.spec.whatwg.org/#hidden-elements>
'datalist',
'head',
'noembed',
'noframes',
'rp',
'script',
'style',
'template',
'title',
// Act as if we support scripting.
'noscript',
// Hidden attribute.
hidden,
// From: <https://html.spec.whatwg.org/#flow-content-3>
closedDialog
])
// Characters.
var tabChar = '\t'
var lineFeedChar = '\n'
var spaceChar = ' '
// See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints>
var blockOrCaption = convert([
'caption', // `table-caption`
// Page
'html',
'body',
// Flow content
'address',
'blockquote',
'center', // Legacy
'dialog',
'div',
'figure',
'figcaption',
'footer',
'form,',
'header',
'hr',
'legend',
'listing', // Legacy
'main',
'p',
'plaintext', // Legacy
'pre',
'xmp', // Legacy
// Sections and headings
'article',
'aside',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'hgroup',
'nav',
'section',
// Lists
'dir', // Legacy
'dd',
'dl',
'dt',
'menu',
'ol',
'ul'
])

@@ -44,3 +92,2 @@ // Implementation of the `innerText` getter:

var children = node.children || []
var length = children.length
var block = blockOrCaption(node)

@@ -50,3 +97,2 @@ var whiteSpace = inferWhiteSpace(node, {})

var results
var current
var result

@@ -86,3 +132,3 @@ var value

// 3. For each child node node of this element:
while (++index < length) {
while (++index < children.length) {
// 3.1. Let current be the list resulting in running the inner text

@@ -92,10 +138,11 @@ // collection steps with node.

// positive integer (a required line break count).
current = innerTextCollection(children[index], index, node, {
whiteSpace: whiteSpace,
breakBefore: index === 0 ? block : false,
breakAfter: index === length - 1 ? block : is(children[index + 1], 'br')
})
// 3.2. For each item item in current, append item to results.
results = results.concat(current)
results = results.concat(
innerTextCollection(children[index], index, node, {
whiteSpace: whiteSpace,
breakBefore: index ? null : block,
breakAfter:
index < children.length - 1 ? br(children[index + 1]) : block
})
)
}

@@ -111,17 +158,11 @@

index = -1
length = results.length
result = []
while (++index < length) {
while (++index < results.length) {
value = results[index]
if (typeof value === 'number') {
if (count !== undefined && value > count) {
count = value
}
} else if (value !== '') {
if (count) {
result.push(repeat(lineFeedChar, count))
}
if (count !== undefined && value > count) count = value
} else if (value) {
if (count) result.push(repeat('\n', count))
count = 0

@@ -154,10 +195,8 @@ result.push(value)

// Collect an element.
function collectElement(node, index, parent, options) {
function collectElement(node, _, parent, options) {
// First we infer the `white-space` property.
var whiteSpace = inferWhiteSpace(node, options)
var children = node.children || []
var length = children.length
var offset = -1
var index = -1
var items = []
var current
var prefix

@@ -188,4 +227,4 @@ var suffix

// U+000A LINE FEED (LF) character to items.
if (is(node, 'br')) {
suffix = lineFeedChar
if (br(node)) {
suffix = '\n'
}

@@ -202,3 +241,3 @@

else if (row(node) && findAfter(parent, node, row)) {
suffix = lineFeedChar
suffix = '\n'
}

@@ -208,3 +247,3 @@

// at the beginning and end of items.
else if (is(node, 'p')) {
else if (p(node)) {
prefix = 2

@@ -225,11 +264,11 @@ suffix = 2

// results to a single list.
while (++offset < length) {
current = innerTextCollection(children[offset], offset, node, {
whiteSpace: whiteSpace,
breakBefore: offset === 0 ? prefix : false,
breakAfter:
offset === length - 1 ? suffix : is(children[offset + 1], 'br')
})
items = items.concat(current)
while (++index < children.length) {
items = items.concat(
innerTextCollection(children[index], index, node, {
whiteSpace: whiteSpace,
breakBefore: index ? null : prefix,
breakAfter:
index < children.length - 1 ? br(children[index + 1]) : suffix
})
)
}

@@ -244,14 +283,9 @@

if (cell(node) && findAfter(parent, node, cell)) {
items.push(tabChar)
items.push('\t')
}
// Add the pre- and suffix.
if (prefix) {
items.unshift(prefix)
}
if (prefix) items.unshift(prefix)
if (suffix) items.push(suffix)
if (suffix) {
items.push(suffix)
}
return items

@@ -276,47 +310,34 @@ }

function collectText(node, options) {
var breakBefore = options.breakBefore
var breakAfter = options.breakAfter
var value = String(node.value)
var index = -1
var length = value.length
var lines = []
var result = []
var lineStart
var lineEnd
var line
var nextLine
var queue
var start = 0
var index = -1
var match
var end
var join
lineStart = 0
lineEnd = value.indexOf(lineFeedChar)
lineEnd = lineEnd === -1 ? value.length : lineEnd
while (start < value.length) {
searchLineFeeds.lastIndex = start
match = searchLineFeeds.exec(value)
end = match ? match.index : value.length
while (lineEnd !== -1) {
line = value.slice(lineStart, lineEnd)
lines.push(
// Any sequence of collapsible spaces and tabs immediately preceding or
// following a segment break is removed.
trimAndcollapseSpacesAndTabs(
// [...] ignoring bidi formatting characters (characters with the
// Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if
// they were not there.
value
.slice(start, end)
.replace(/[\u061c\u200e\u200f\u202a-\u202e\u2066-\u2069]/g, ''),
options.breakBefore,
options.breakAfter
)
)
// [...] ignoring bidi formatting characters (characters with the
// Bidi_Control property [UAX9]) as if they were not there.
line = removeBidiControlCharacters(line)
// Any sequence of collapsible spaces and tabs immediately preceding or
// following a segment break is removed.
line = trimAndcollapseSpacesAndTabs(line, breakBefore, breakAfter)
// Add the line.
lines.push(line)
// Stop.
if (lineEnd === value.length) {
break
}
lineStart = lineEnd + 1
lineEnd = value.indexOf(lineFeedChar, lineStart)
lineEnd = lineEnd === -1 ? value.length : lineEnd
start = end + 1
}
index = -1
length = lines.length
queue = ''
// Collapsible segment breaks are transformed for rendering according to the

@@ -327,6 +348,3 @@ // segment break transformation rules.

// segment break is removed
while (++index < length) {
line = lines[index]
nextLine = lines[index + 1] || ''
while (++index < lines.length) {
// * If the character immediately before or immediately after the segment

@@ -336,8 +354,8 @@ // break is the zero-width space character (U+200B), then the break is

if (
line.charCodeAt(line.length - 1) === zeroWidthSpace ||
nextLine.charCodeAt(0) === zeroWidthSpace
lines[index].charCodeAt(lines[index].length - 1) === 0x200b /* ZWSP */ ||
(index < lines.length - 1 &&
lines[index + 1].charCodeAt(0) === 0x200b) /* ZWSP */
) {
result.push(line)
queue = ''
continue
result.push(lines[index])
join = ''
}

@@ -351,3 +369,2 @@

// Note: ignored.
// * Otherwise, if the writing system of the segment break is Chinese,

@@ -363,9 +380,6 @@ // Japanese, or Yi, and the character before or after the segment break

// * Otherwise, the segment break is converted to a space (U+0020).
if (line) {
if (queue) {
result.push(queue)
}
result.push(line)
queue = spaceChar
else if (lines[index]) {
if (join) result.push(join)
result.push(lines[index])
join = ' '
}

@@ -381,18 +395,2 @@ }

function removeBidiControlCharacters(value) {
var index = -1
var length = value.length
var result = ''
while (++index < length) {
if (isBidiControlCharacter(value.charCodeAt(index))) {
continue
}
result += value.charAt(index)
}
return result
}
// 3. Every collapsible tab is converted to a collapsible space (U+0020).

@@ -405,60 +403,33 @@ // 4. Any collapsible space immediately following another collapsible

function trimAndcollapseSpacesAndTabs(value, breakBefore, breakAfter) {
var result = []
var start = 0
var match
var end
var length = value.length
var result = []
var char
// Move forward past initial white space.
while (start <= length) {
char = value.charCodeAt(start)
while (start < value.length) {
searchTabOrSpaces.lastIndex = start
match = searchTabOrSpaces.exec(value)
end = match ? match.index : value.length
if (char !== space && char !== tab) {
break
// If we’re not directly after a segment break, but there was white space,
// add an empty value that will be turned into a space.
if (!start && !end && match && !breakBefore) {
result.push('')
}
start++
if (start !== end) {
result.push(value.slice(start, end))
}
start = match ? end + match[0].length : end
}
// If we’re not directly after a segment break, but there was white space,
// add an empty value that will be turned into a space.
if (start !== 0 && !breakBefore) {
// If we reached the end, there was trailing white space, and there’s no
// segment break after this node, add an empty value that will be turned
// into a space.
if (start !== end && !breakAfter) {
result.push('')
}
end = next(start - 1)
while (start < length) {
end = end === -1 ? length : end
result.push(value.slice(start, end))
start = end
while (start <= length) {
char = value.charCodeAt(start)
if (char !== space && char !== tab) {
break
}
start++
}
// If we reached the end, there was trailing white space, and there’s no
// segment break after this node, add an empty value that will be turned
// into a space.
if (start === length && start !== end && !breakAfter) {
result.push('')
}
end = next(start)
}
return result.join(' ')
function next(index) {
var spaceIndex = value.indexOf(spaceChar, index + 1)
var tabIndex = value.indexOf(tabChar, index + 1)
var fn = spaceIndex === -1 || tabIndex === -1 ? max : min
return fn(spaceIndex, tabIndex)
}
}

@@ -490,105 +461,8 @@

function isBidiControlCharacter(char) {
switch (char) {
case alm:
case ltr:
case rtl:
case lre:
case rle:
case pdf:
case lro:
case rlo:
case lri:
case rli:
case fsi:
case pdi:
return true
default:
return false
}
function hidden(node) {
return (node.properties || {}).hidden
}
function cell(node) {
return is(node, ['th', 'td'])
function closedDialog(node) {
return node.tagName === 'dialog' && !(node.properties || {}).open
}
function row(node) {
return is(node, ['tr'])
}
// See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints>
function blockOrCaption(node) {
return is(node, [
'caption', // `table-caption`
// Page
'html',
'body',
// Flow content
'address',
'blockquote',
'center', // Legacy
'dialog',
'div',
'figure',
'figcaption',
'footer',
'form,',
'header',
'hr',
'legend',
'listing', // Legacy
'main',
'p',
'plaintext', // Legacy
'pre',
'xmp', // Legacy
// Sections and headings
'article',
'aside',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'hgroup',
'nav',
'section',
// Lists
'dir', // Legacy
'dd',
'dl',
'dt',
'menu',
'ol',
'ul'
])
}
// Note that we don’t need to include void elements here as they don’t have text.
//
// See: <https://github.com/wooorm/html-void-elements>
function notRendered(node) {
var properties = node.properties || {}
return (
// List from: <https://html.spec.whatwg.org/#hidden-elements>
is(node, [
'datalist',
'head',
'noembed',
'noframes',
'rp',
'script',
'style',
'template',
'title',
// Act as if we support scripting.
'noscript'
]) ||
// Hidden attribute.
properties.hidden ||
// From: <https://html.spec.whatwg.org/#flow-content-3>
(is(node, 'dialog') && !properties.open)
)
}
{
"name": "hast-util-to-text",
"version": "2.0.0",
"version": "2.0.1",
"description": "hast utility to get the plain-text value of a node according to the `innerText` algorithm",

@@ -37,15 +37,15 @@ "license": "MIT",

"devDependencies": {
"browserify": "^16.0.0",
"hastscript": "^5.0.0",
"browserify": "^17.0.0",
"hastscript": "^6.0.0",
"nyc": "^15.0.0",
"prettier": "^1.0.0",
"remark-cli": "^7.0.0",
"remark-preset-wooorm": "^6.0.0",
"tape": "^4.0.0",
"tinyify": "^2.0.0",
"prettier": "^2.0.0",
"remark-cli": "^9.0.0",
"remark-preset-wooorm": "^8.0.0",
"tape": "^5.0.0",
"tinyify": "^3.0.0",
"unist-builder": "^2.0.0",
"xo": "^0.27.0"
"xo": "^0.34.0"
},
"scripts": {
"format": "remark . -qfo && prettier --write \"**/*.js\" && xo --fix",
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix",
"build-bundle": "browserify . -s hastUtilToText -o hast-util-to-text.js",

@@ -77,3 +77,7 @@ "build-mangle": "browserify . -s hastUtilToText -p tinyify -o hast-util-to-text.min.js",

"hast-util-to-text.js"
]
],
"rules": {
"unicorn/escape-case": "off",
"no-constant-condition": "off"
}
},

@@ -80,0 +84,0 @@ "remarkConfig": {

@@ -96,7 +96,7 @@ # hast-util-to-text

* [`hast-util-to-string`](https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-to-string)
* [`hast-util-to-string`](https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-to-string)
— Get the plain-text value (`textContent`)
* [`hast-util-from-text`](https://github.com/syntax-tree/hast-util-from-text)
— Set the plain-text value (`innerText`)
* [`hast-util-from-string`](https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-from-string)
* [`hast-util-from-string`](https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-from-string)
— Set the plain-text value (`textContent`)

@@ -142,5 +142,5 @@

[chat-badge]: https://img.shields.io/badge/chat-spectrum-7b16ff.svg
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://spectrum.chat/unified/rehype
[chat]: https://github.com/syntax-tree/unist/discussions

@@ -153,7 +153,7 @@ [npm]: https://docs.npmjs.com/cli/install

[contributing]: https://github.com/syntax-tree/.github/blob/master/contributing.md
[contributing]: https://github.com/syntax-tree/.github/blob/HEAD/contributing.md
[support]: https://github.com/syntax-tree/.github/blob/master/support.md
[support]: https://github.com/syntax-tree/.github/blob/HEAD/support.md
[coc]: https://github.com/syntax-tree/.github/blob/master/code-of-conduct.md
[coc]: https://github.com/syntax-tree/.github/blob/HEAD/code-of-conduct.md

@@ -164,3 +164,3 @@ [html]: https://html.spec.whatwg.org/#the-innertext-idl-attribute

[to-string]: https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-to-string
[to-string]: https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-to-string

@@ -167,0 +167,0 @@ [descendant]: https://github.com/syntax-tree/unist#descendant

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc