parse-latin - npm Package Compare versions

Comparing version 4.2.0 to 4.2.1

lib/plugin/merge-final-word-symbol.js

		@@ -26,3 +26,4 @@ 'use strict'
		(!next \|\| next.type !== 'WordNode') &&
		(prev && prev.type === 'WordNode')
		prev &&
		prev.type === 'WordNode'
		) {
		@@ -29,0 +30,0 @@ // Remove `child` from parent.

package.json

		{
		"name": "parse-latin",
		"version": "4.2.0",
		"version": "4.2.1",
		"description": "Latin-script (natural language) parser",
		@@ -16,2 +16,6 @@ "license": "MIT",
		"bugs": "https://github.com/wooorm/parse-latin/issues",
		"funding": {
		"type": "github",
		"url": "https://github.com/sponsors/wooorm"
		},
		"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
		@@ -32,16 +36,16 @@ "contributors": [
		"browserify": "^16.0.0",
		"is-hidden": "^1.0.1",
		"is-hidden": "^1.0.0",
		"negate": "^1.0.0",
		"nlcst-test": "^1.0.0",
		"nyc": "^14.0.0",
		"prettier": "^1.12.1",
		"regenerate": "^1.3.1",
		"remark-cli": "^6.0.0",
		"remark-preset-wooorm": "^4.0.0",
		"nyc": "^15.0.0",
		"prettier": "^1.0.0",
		"regenerate": "^1.0.0",
		"remark-cli": "^7.0.0",
		"remark-preset-wooorm": "^6.0.0",
		"tape": "^4.0.0",
		"tinyify": "^2.5.0",
		"tinyify": "^2.0.0",
		"unicode-12.1.0": "^0.8.0",
		"unist-util-remove-position": "^1.1.0",
		"unist-util-remove-position": "^2.0.0",
		"vfile": "^4.0.0",
		"xo": "^0.24.0"
		"xo": "^0.25.0"
		},
		@@ -76,7 +80,8 @@ "scripts": {
		"rules": {
		"unicorn/prefer-reflect-apply": "off",
		"unicorn/no-hex-escape": "off",
		"no-misleading-character-class": "off",
		"no-useless-escape": "off",
		"guard-for-in": "off",
		"max-depth": "off",
		"unicorn/no-hex-escape": "off"
		"max-depth": "off"
		},
		@@ -83,0 +88,0 @@ "ignores": [

readme.md

		@@ -9,24 +9,24 @@ # parse-latin

		A Latin script language parser for [retext][retext] producing
		[NLCST][nlcst] nodes.
		A Latin-script language parser for [retext][retext] producing [nlcst][]
		nodes.

		Whether Old-English (“þā gewearþ þǣm hlāforde and þǣm hȳrigmannum wiþ
		ānum penninge”), Icelandic (“Hvað er að frétta”), French (“Où sont
		les toilettes?”), `parse-latin` does a good job at tokenising it.
		Whether Old-English (“þā gewearþ þǣm hlāforde and þǣm hȳrigmannum wiþ ānum
		penninge”), Icelandic (“Hvað er að frétta”), French (“Où sont les toilettes?”),
		`parse-latin` does a good job at tokenizing it.

		Note also that `parse-latin` does a decent job at tokenising
		Latin-like scripts, Cyrillic (“Добро пожаловать!”), Georgian (“როგორა
		ხარ?”), Armenian (“Շատ հաճելի է”), and such.
		Note also that `parse-latin` does a decent job at tokenizing Latin-like scripts,
		Cyrillic (“Добро пожаловать!”), Georgian (“როგორა ხარ?”), Armenian (“Շատ հաճելի
		է”), and such.

		## Installation
		## Install

		[npm][]:

		```bash
		```sh
		npm install parse-latin
		```

		## Usage
		## Use

		```javascript
		```js
		var inspect = require('unist-util-inspect')
		@@ -61,4 +61,4 @@ var Latin = require('parse-latin')

		Exposes the functionality needed to tokenise natural Latin-script
		languages into a syntax tree.
		Exposes the functionality needed to tokenize natural Latin-script languages into
		a syntax tree.
		If `value` is passed here, it’s not needed to give it to `#parse()`.
		@@ -68,28 +68,28 @@

		Tokenise `value` (`string`) into letters and numbers (words), white space, and
		everything else (punctuation). The returned nodes are a flat list without
		paragraphs or sentences.
		Tokenize `value` (`string`) into letters and numbers (words), white space, and
		everything else (punctuation).
		The returned nodes are a flat list without paragraphs or sentences.

		###### Returns

		[`Array.<NLCSTNode>`][nlcst] — Nodes.
		[`Array.<Node>`][nlcst] — Nodes.

		#### `ParseLatin#parse(value)`

		Tokenise `value` (`string`) into an [NLCST][nlcst] tree. The returned node is
		a `RootNode` with in it paragraphs and sentences.
		Tokenize `value` (`string`) into an [NLCST][] tree.
		The returned node is a `RootNode` with in it paragraphs and sentences.

		###### Returns

		[`NLCSTNode`][nlcst] — Root node.
		[`Node`][nlcst] — Root node.

		## Algorithm

		> Note: The easiest way to see how parse-latin tokenizes and parses,
		> is by using the [online parser demo](https://wooorm.github.io/parse-latin),
		> which shows the syntax tree corresponding to the typed text.
		> Note: The easiest way to see how parse-latin tokenizes and parses, is by
		> using the [online parser demo][demo], which
		> shows the syntax tree corresponding to the typed text.

		`parse-latin` splits text into white space, word, and punctuation
		tokens. `parse-latin` starts out with a pretty easy definition,
		one that most other tokenisers use:
		`parse-latin` splits text into white space, word, and punctuation tokens.
		`parse-latin` starts out with a pretty easy definition, one that most other
		tokenizers use:

		@@ -100,13 +100,11 @@ * A “word” is one or more letter or number characters

		Then, it manipulates and merges those tokens into an [NLCST][]
		syntax tree, adding sentences and paragraphs where needed.
		Then, it manipulates and merges those tokens into a ([nlcst][]) syntax tree,
		adding sentences and paragraphs where needed.

		* Some punctuation marks are part of the word they occur in, e.g.,
		`non-profit`, `she’s`, `G.I.`, `11:00`, `N/A`, `&c`,
		`nineteenth- and...`
		* Some full-stops do not mark a sentence end, e.g., `1.`, `e.g.`,
		`id.`
		* Although full-stops, question marks, and exclamation marks
		(sometimes) end a sentence, that end might not occur directly
		after the mark, e.g., `.)`, `."`
		* Some punctuation marks are part of the word they occur in, such as
		`non-profit`, `she’s`, `G.I.`, `11:00`, `N/A`, `&c`, `nineteenth- and…`
		* Some full-stops do not mark a sentence end, such as `1.`, `e.g.`, `id.`
		* Although full-stops, question marks, and exclamation marks (sometimes) end a
		sentence, that end might not occur directly after the mark, such as `.)`,
		`."`
		* And many more exceptions
		@@ -142,2 +140,4 @@

		[demo]: https://wooorm.com/parse-latin/

		[license]: license
		@@ -144,0 +144,0 @@

parse-latin - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics