hast-util-to-text - npm Package Compare versions

Comparing version 2.0.0 to 2.0.1

448

index.js

		'use strict'

		var repeat = require('repeat-string')
		var is = require('hast-util-is-element')
		var convert = require('hast-util-is-element/convert')
		var findAfter = require('unist-util-find-after')
		@@ -9,29 +9,77 @@

		// Methods.
		var min = Math.min
		var max = Math.max
		var searchLineFeeds = /\n/g
		var searchTabOrSpaces = /[\t ]+/g

		// White space codes.
		var tab = 0x9
		var space = 0x20
		var zeroWidthSpace = 0x200b
		var br = convert('br')
		var p = convert('p')
		var cell = convert(['th', 'td'])
		var row = convert('tr')

		// Bidi control characters codes.
		var alm = 0x61c
		var ltr = 0x200e
		var rtl = 0x200f
		var lre = 0x202a
		var rle = 0x202b
		var pdf = 0x202c
		var lro = 0x202d
		var rlo = 0x202e
		var lri = 0x2066
		var rli = 0x2067
		var fsi = 0x2068
		var pdi = 0x2069
		// Note that we don’t need to include void elements here as they don’t have text.
		// See: <https://github.com/wooorm/html-void-elements>
		var notRendered = convert([
		// List from: <https://html.spec.whatwg.org/#hidden-elements>
		'datalist',
		'head',
		'noembed',
		'noframes',
		'rp',
		'script',
		'style',
		'template',
		'title',
		// Act as if we support scripting.
		'noscript',
		// Hidden attribute.
		hidden,
		// From: <https://html.spec.whatwg.org/#flow-content-3>
		closedDialog
		])

		// Characters.
		var tabChar = '\t'
		var lineFeedChar = '\n'
		var spaceChar = ' '
		// See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints>
		var blockOrCaption = convert([
		'caption', // `table-caption`
		// Page
		'html',
		'body',
		// Flow content
		'address',
		'blockquote',
		'center', // Legacy
		'dialog',
		'div',
		'figure',
		'figcaption',
		'footer',
		'form,',
		'header',
		'hr',
		'legend',
		'listing', // Legacy
		'main',
		'p',
		'plaintext', // Legacy
		'pre',
		'xmp', // Legacy
		// Sections and headings
		'article',
		'aside',
		'h1',
		'h2',
		'h3',
		'h4',
		'h5',
		'h6',
		'hgroup',
		'nav',
		'section',
		// Lists
		'dir', // Legacy
		'dd',
		'dl',
		'dt',
		'menu',
		'ol',
		'ul'
		])

		@@ -44,3 +92,2 @@ // Implementation of the `innerText` getter:
		var children = node.children \|\| []
		var length = children.length
		var block = blockOrCaption(node)
		@@ -50,3 +97,2 @@ var whiteSpace = inferWhiteSpace(node, {})
		var results
		var current
		var result
		@@ -86,3 +132,3 @@ var value
		// 3. For each child node node of this element:
		while (++index < length) {
		while (++index < children.length) {
		// 3.1. Let current be the list resulting in running the inner text
		@@ -92,10 +138,11 @@ // collection steps with node.
		// positive integer (a required line break count).
		current = innerTextCollection(children[index], index, node, {
		whiteSpace: whiteSpace,
		breakBefore: index === 0 ? block : false,
		breakAfter: index === length - 1 ? block : is(children[index + 1], 'br')
		})

		// 3.2. For each item item in current, append item to results.
		results = results.concat(current)
		results = results.concat(
		innerTextCollection(children[index], index, node, {
		whiteSpace: whiteSpace,
		breakBefore: index ? null : block,
		breakAfter:
		index < children.length - 1 ? br(children[index + 1]) : block
		})
		)
		}
		@@ -111,17 +158,11 @@
		index = -1
		length = results.length
		result = []

		while (++index < length) {
		while (++index < results.length) {
		value = results[index]

		if (typeof value === 'number') {
		if (count !== undefined && value > count) {
		count = value
		}
		} else if (value !== '') {
		if (count) {
		result.push(repeat(lineFeedChar, count))
		}

		if (count !== undefined && value > count) count = value
		} else if (value) {
		if (count) result.push(repeat('\n', count))
		count = 0
		@@ -154,10 +195,8 @@ result.push(value)
		// Collect an element.
		function collectElement(node, index, parent, options) {
		function collectElement(node, _, parent, options) {
		// First we infer the `white-space` property.
		var whiteSpace = inferWhiteSpace(node, options)
		var children = node.children \|\| []
		var length = children.length
		var offset = -1
		var index = -1
		var items = []
		var current
		var prefix
		@@ -188,4 +227,4 @@ var suffix
		// U+000A LINE FEED (LF) character to items.
		if (is(node, 'br')) {
		suffix = lineFeedChar
		if (br(node)) {
		suffix = '\n'
		}
		@@ -202,3 +241,3 @@
		else if (row(node) && findAfter(parent, node, row)) {
		suffix = lineFeedChar
		suffix = '\n'
		}
		@@ -208,3 +247,3 @@
		// at the beginning and end of items.
		else if (is(node, 'p')) {
		else if (p(node)) {
		prefix = 2
		@@ -225,11 +264,11 @@ suffix = 2
		// results to a single list.
		while (++offset < length) {
		current = innerTextCollection(children[offset], offset, node, {
		whiteSpace: whiteSpace,
		breakBefore: offset === 0 ? prefix : false,
		breakAfter:
		offset === length - 1 ? suffix : is(children[offset + 1], 'br')
		})

		items = items.concat(current)
		while (++index < children.length) {
		items = items.concat(
		innerTextCollection(children[index], index, node, {
		whiteSpace: whiteSpace,
		breakBefore: index ? null : prefix,
		breakAfter:
		index < children.length - 1 ? br(children[index + 1]) : suffix
		})
		)
		}
		@@ -244,14 +283,9 @@
		if (cell(node) && findAfter(parent, node, cell)) {
		items.push(tabChar)
		items.push('\t')
		}

		// Add the pre- and suffix.
		if (prefix) {
		items.unshift(prefix)
		}
		if (prefix) items.unshift(prefix)
		if (suffix) items.push(suffix)

		if (suffix) {
		items.push(suffix)
		}

		return items
		@@ -276,47 +310,34 @@ }
		function collectText(node, options) {
		var breakBefore = options.breakBefore
		var breakAfter = options.breakAfter
		var value = String(node.value)
		var index = -1
		var length = value.length
		var lines = []
		var result = []
		var lineStart
		var lineEnd
		var line
		var nextLine
		var queue
		var start = 0
		var index = -1
		var match
		var end
		var join

		lineStart = 0
		lineEnd = value.indexOf(lineFeedChar)
		lineEnd = lineEnd === -1 ? value.length : lineEnd
		while (start < value.length) {
		searchLineFeeds.lastIndex = start
		match = searchLineFeeds.exec(value)
		end = match ? match.index : value.length

		while (lineEnd !== -1) {
		line = value.slice(lineStart, lineEnd)
		lines.push(
		// Any sequence of collapsible spaces and tabs immediately preceding or
		// following a segment break is removed.
		trimAndcollapseSpacesAndTabs(
		// [...] ignoring bidi formatting characters (characters with the
		// Bidi_Control property [UAX9]: ALM, LTR, RTL, LRE-RLO, LRI-PDI) as if
		// they were not there.
		value
		.slice(start, end)
		.replace(/[\u061c\u200e\u200f\u202a-\u202e\u2066-\u2069]/g, ''),
		options.breakBefore,
		options.breakAfter
		)
		)

		// [...] ignoring bidi formatting characters (characters with the
		// Bidi_Control property [UAX9]) as if they were not there.
		line = removeBidiControlCharacters(line)

		// Any sequence of collapsible spaces and tabs immediately preceding or
		// following a segment break is removed.
		line = trimAndcollapseSpacesAndTabs(line, breakBefore, breakAfter)

		// Add the line.
		lines.push(line)

		// Stop.
		if (lineEnd === value.length) {
		break
		}

		lineStart = lineEnd + 1
		lineEnd = value.indexOf(lineFeedChar, lineStart)
		lineEnd = lineEnd === -1 ? value.length : lineEnd
		start = end + 1
		}

		index = -1
		length = lines.length
		queue = ''

		// Collapsible segment breaks are transformed for rendering according to the
		@@ -327,6 +348,3 @@ // segment break transformation rules.
		// segment break is removed
		while (++index < length) {
		line = lines[index]
		nextLine = lines[index + 1] \|\| ''

		while (++index < lines.length) {
		// * If the character immediately before or immediately after the segment
		@@ -336,8 +354,8 @@ // break is the zero-width space character (U+200B), then the break is
		if (
		line.charCodeAt(line.length - 1) === zeroWidthSpace \|\|
		nextLine.charCodeAt(0) === zeroWidthSpace
		lines[index].charCodeAt(lines[index].length - 1) === 0x200b /* ZWSP */ \|\|
		(index < lines.length - 1 &&
		lines[index + 1].charCodeAt(0) === 0x200b) /* ZWSP */
		) {
		result.push(line)
		queue = ''
		continue
		result.push(lines[index])
		join = ''
		}
		@@ -351,3 +369,2 @@
		// Note: ignored.

		// * Otherwise, if the writing system of the segment break is Chinese,
		@@ -363,9 +380,6 @@ // Japanese, or Yi, and the character before or after the segment break
		// * Otherwise, the segment break is converted to a space (U+0020).
		if (line) {
		if (queue) {
		result.push(queue)
		}

		result.push(line)
		queue = spaceChar
		else if (lines[index]) {
		if (join) result.push(join)
		result.push(lines[index])
		join = ' '
		}
		@@ -381,18 +395,2 @@ }

		function removeBidiControlCharacters(value) {
		var index = -1
		var length = value.length
		var result = ''

		while (++index < length) {
		if (isBidiControlCharacter(value.charCodeAt(index))) {
		continue
		}

		result += value.charAt(index)
		}

		return result
		}

		// 3. Every collapsible tab is converted to a collapsible space (U+0020).
		@@ -405,60 +403,33 @@ // 4. Any collapsible space immediately following another collapsible
		function trimAndcollapseSpacesAndTabs(value, breakBefore, breakAfter) {
		var result = []
		var start = 0
		var match
		var end
		var length = value.length
		var result = []
		var char

		// Move forward past initial white space.
		while (start <= length) {
		char = value.charCodeAt(start)
		while (start < value.length) {
		searchTabOrSpaces.lastIndex = start
		match = searchTabOrSpaces.exec(value)
		end = match ? match.index : value.length

		if (char !== space && char !== tab) {
		break
		// If we’re not directly after a segment break, but there was white space,
		// add an empty value that will be turned into a space.
		if (!start && !end && match && !breakBefore) {
		result.push('')
		}

		start++
		if (start !== end) {
		result.push(value.slice(start, end))
		}

		start = match ? end + match[0].length : end
		}

		// If we’re not directly after a segment break, but there was white space,
		// add an empty value that will be turned into a space.
		if (start !== 0 && !breakBefore) {
		// If we reached the end, there was trailing white space, and there’s no
		// segment break after this node, add an empty value that will be turned
		// into a space.
		if (start !== end && !breakAfter) {
		result.push('')
		}

		end = next(start - 1)

		while (start < length) {
		end = end === -1 ? length : end
		result.push(value.slice(start, end))
		start = end

		while (start <= length) {
		char = value.charCodeAt(start)

		if (char !== space && char !== tab) {
		break
		}

		start++
		}

		// If we reached the end, there was trailing white space, and there’s no
		// segment break after this node, add an empty value that will be turned
		// into a space.
		if (start === length && start !== end && !breakAfter) {
		result.push('')
		}

		end = next(start)
		}

		return result.join(' ')

		function next(index) {
		var spaceIndex = value.indexOf(spaceChar, index + 1)
		var tabIndex = value.indexOf(tabChar, index + 1)
		var fn = spaceIndex === -1 \|\| tabIndex === -1 ? max : min
		return fn(spaceIndex, tabIndex)
		}
		}
		@@ -490,105 +461,8 @@

		function isBidiControlCharacter(char) {
		switch (char) {
		case alm:
		case ltr:
		case rtl:
		case lre:
		case rle:
		case pdf:
		case lro:
		case rlo:
		case lri:
		case rli:
		case fsi:
		case pdi:
		return true
		default:
		return false
		}
		function hidden(node) {
		return (node.properties \|\| {}).hidden
		}

		function cell(node) {
		return is(node, ['th', 'td'])
		function closedDialog(node) {
		return node.tagName === 'dialog' && !(node.properties \|\| {}).open
		}

		function row(node) {
		return is(node, ['tr'])
		}

		// See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints>
		function blockOrCaption(node) {
		return is(node, [
		'caption', // `table-caption`
		// Page
		'html',
		'body',
		// Flow content
		'address',
		'blockquote',
		'center', // Legacy
		'dialog',
		'div',
		'figure',
		'figcaption',
		'footer',
		'form,',
		'header',
		'hr',
		'legend',
		'listing', // Legacy
		'main',
		'p',
		'plaintext', // Legacy
		'pre',
		'xmp', // Legacy
		// Sections and headings
		'article',
		'aside',
		'h1',
		'h2',
		'h3',
		'h4',
		'h5',
		'h6',
		'hgroup',
		'nav',
		'section',
		// Lists
		'dir', // Legacy
		'dd',
		'dl',
		'dt',
		'menu',
		'ol',
		'ul'
		])
		}

		// Note that we don’t need to include void elements here as they don’t have text.
		//
		// See: <https://github.com/wooorm/html-void-elements>
		function notRendered(node) {
		var properties = node.properties \|\| {}

		return (
		// List from: <https://html.spec.whatwg.org/#hidden-elements>
		is(node, [
		'datalist',
		'head',
		'noembed',
		'noframes',
		'rp',
		'script',
		'style',
		'template',
		'title',
		// Act as if we support scripting.
		'noscript'
		]) \|\|
		// Hidden attribute.
		properties.hidden \|\|
		// From: <https://html.spec.whatwg.org/#flow-content-3>
		(is(node, 'dialog') && !properties.open)
		)
		}

package.json

		{
		"name": "hast-util-to-text",
		"version": "2.0.0",
		"version": "2.0.1",
		"description": "hast utility to get the plain-text value of a node according to the `innerText` algorithm",
		@@ -37,15 +37,15 @@ "license": "MIT",
		"devDependencies": {
		"browserify": "^16.0.0",
		"hastscript": "^5.0.0",
		"browserify": "^17.0.0",
		"hastscript": "^6.0.0",
		"nyc": "^15.0.0",
		"prettier": "^1.0.0",
		"remark-cli": "^7.0.0",
		"remark-preset-wooorm": "^6.0.0",
		"tape": "^4.0.0",
		"tinyify": "^2.0.0",
		"prettier": "^2.0.0",
		"remark-cli": "^9.0.0",
		"remark-preset-wooorm": "^8.0.0",
		"tape": "^5.0.0",
		"tinyify": "^3.0.0",
		"unist-builder": "^2.0.0",
		"xo": "^0.27.0"
		"xo": "^0.34.0"
		},
		"scripts": {
		"format": "remark . -qfo && prettier --write \"*/.js\" && xo --fix",
		"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix",
		"build-bundle": "browserify . -s hastUtilToText -o hast-util-to-text.js",
		@@ -77,3 +77,7 @@ "build-mangle": "browserify . -s hastUtilToText -p tinyify -o hast-util-to-text.min.js",
		"hast-util-to-text.js"
		]
		],
		"rules": {
		"unicorn/escape-case": "off",
		"no-constant-condition": "off"
		}
		},
		@@ -80,0 +84,0 @@ "remarkConfig": {

readme.md

		@@ -96,7 +96,7 @@ # hast-util-to-text

		* [`hast-util-to-string`](https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-to-string)
		* [`hast-util-to-string`](https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-to-string)
		— Get the plain-text value (`textContent`)
		* [`hast-util-from-text`](https://github.com/syntax-tree/hast-util-from-text)
		— Set the plain-text value (`innerText`)
		* [`hast-util-from-string`](https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-from-string)
		* [`hast-util-from-string`](https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-from-string)
		— Set the plain-text value (`textContent`)
		@@ -142,5 +142,5 @@

		[chat-badge]: https://img.shields.io/badge/chat-spectrum-7b16ff.svg
		[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg

		[chat]: https://spectrum.chat/unified/rehype
		[chat]: https://github.com/syntax-tree/unist/discussions

		@@ -153,7 +153,7 @@ [npm]: https://docs.npmjs.com/cli/install

		[contributing]: https://github.com/syntax-tree/.github/blob/master/contributing.md
		[contributing]: https://github.com/syntax-tree/.github/blob/HEAD/contributing.md

		[support]: https://github.com/syntax-tree/.github/blob/master/support.md
		[support]: https://github.com/syntax-tree/.github/blob/HEAD/support.md

		[coc]: https://github.com/syntax-tree/.github/blob/master/code-of-conduct.md
		[coc]: https://github.com/syntax-tree/.github/blob/HEAD/code-of-conduct.md

		@@ -164,3 +164,3 @@ [html]: https://html.spec.whatwg.org/#the-innertext-idl-attribute

		[to-string]: https://github.com/rehypejs/rehype-minify/tree/master/packages/hast-util-to-string
		[to-string]: https://github.com/rehypejs/rehype-minify/tree/HEAD/packages/hast-util-to-string

		@@ -167,0 +167,0 @@ [descendant]: https://github.com/syntax-tree/unist#descendant

hast-util-to-text - npm Package Compare versions

New alerts

Fixed alerts

Worsened metrics