@@ -13,3 +13,8 @@ var pos = require('pos');
		var paragraphs = text.split( /[\r\n\|\n\|\r]+/g );
		return paragraphs.map(self.paragraphToSentences);
		return {
		raw: text,
		type: 'text',
		length: paragraphs.length,
		children: paragraphs.map(self.paragraphToSentences)
		};
		};
		@@ -24,2 +29,3 @@
		type: 'paragraph',
		length: sentences.length,
		children: sentences.map(self.sentenceToWords)
		@@ -36,2 +42,3 @@ };
		type: 'sentence',
		length: words.length,
		children: words.map(self.wordToChars)
		@@ -66,2 +73,3 @@ };
		type: 'word',
		length: chars.length,
		children: chars
		@@ -68,0 +76,0 @@ };

package.json

		{
		"name": "text-parse",
		"version": "1.1.0",
		"version": "2.0.0",
		"description": "Text parser",
		"keywords": ["text", "parser", "plaintext"],
		"keywords": [
		"text",
		"parser",
		"plaintext"
		],
		"author": "Stephen Morrison <srmorrisonjit@gmail.com>",
		"licenses": {
		"type": "MIT",
		"url":"http://srmor.mit-license.org/"
		"url": "http://srmor.mit-license.org/"
		},
		@@ -19,2 +23,2 @@ "dependencies": {
		"main": "index"
		}
		}

216

README.md

		@@ -13,2 +13,4 @@ # Text Parse

		[![Build Status](https://travis-ci.org/srmor/text-parse.png?branch=master)](https://travis-ci.org/srmor/text-parse)

		```
		@@ -27,3 +29,3 @@ make test

		Optionally text-parse can return the part of speech of each word (powered by [pos](https://github.com/fortnightlabs/pos-js)). To enable this option just include it in the option object parameter of the parser. So instead of using `var parseTree = parse(text)` you can do `var parseTree = parse(text, {pos: true})` and it will return a `partOfSpeech` attribute on all the word objects. The part of speech tags are the same tags that (pos)[https://github.com/fortnightlabs/pos-js] returns.
		Optionally text-parse can return the part of speech of each word (powered by [pos](https://github.com/fortnightlabs/pos-js)). To enable this option just include it in the option object parameter of the parser. So instead of using `var parseTree = parse(text)` you can do `var parseTree = parse(text, {pos: true})` and it will return a `partOfSpeech` attribute on all the word objects. The part of speech tags are the same tags that [pos](https://github.com/fortnightlabs/pos-js) returns.

		@@ -42,99 +44,115 @@ ## Example
		```
		[ { raw: 'Sed ut unde, omnis error sit voluptatem; accusantium\'s.',
		type: 'paragraph',
		children:
		[ { raw: 'Sed ut unde, omnis error sit voluptatem; accusantium\'s.',
		type: 'sentence',
		children:
		[ { raw: 'Sed',
		noPunctuation: 'Sed',
		type: 'word',
		children:
		[ { raw: 'S', type: 'letter' },
		{ raw: 'e', type: 'letter' },
		{ raw: 'd', type: 'letter' },
		] },
		{ raw: 'ut',
		noPunctuation: 'ut',
		type: 'word',
		children:
		[ { raw: 'u', type: 'letter' },
		{ raw: 't', type: 'letter' },
		] },
		{ raw: 'unde,',
		noPunctuation: 'unde',
		type: 'word',
		children:
		[ { raw: 'u', type: 'letter' },
		{ raw: 'n', type: 'letter' },
		{ raw: 'd', type: 'letter' },
		{ raw: 'e', type: 'letter' },
		{ raw: ',', type: 'punctuation' },
		] },
		{ raw: 'omnis',
		noPunctuation: 'omnis',
		type: 'word',
		children:
		[ { raw: 'o', type: 'letter' },
		{ raw: 'm', type: 'letter' },
		{ raw: 'n', type: 'letter' },
		{ raw: 'i', type: 'letter' },
		{ raw: 's', type: 'letter' },
		] },
		{ raw: 'error',
		noPunctuation: 'error',
		type: 'word',
		children:
		[ { raw: 'e', type: 'letter' },
		{ raw: 'r', type: 'letter' },
		{ raw: 'r', type: 'letter' },
		{ raw: 'o', type: 'letter' },
		{ raw: 'r', type: 'letter' },
		] },
		{ raw: 'sit',
		noPunctuation: 'sit',
		type: 'word',
		children:
		[ { raw: 's', type: 'letter' },
		{ raw: 'i', type: 'letter' },
		{ raw: 't', type: 'letter' },
		] },
		{ raw: 'voluptatem;',
		noPunctuation: 'voluptatem',
		type: 'word',
		children:
		[ { raw: 'v', type: 'letter' },
		{ raw: 'o', type: 'letter' },
		{ raw: 'l', type: 'letter' },
		{ raw: 'u', type: 'letter' },
		{ raw: 'p', type: 'letter' },
		{ raw: 't', type: 'letter' },
		{ raw: 'a', type: 'letter' },
		{ raw: 't', type: 'letter' },
		{ raw: 'e', type: 'letter' },
		{ raw: 'm', type: 'letter' },
		{ raw: ';', type: 'punctuation' },
		] },
		{ raw: 'accusantium\'s.',
		noPunctuation: 'accusantiums',
		type: 'word',
		children:
		[ { raw: 'a', type: 'letter' },
		{ raw: 'c', type: 'letter' },
		{ raw: 'c', type: 'letter' },
		{ raw: 'u', type: 'letter' },
		{ raw: 's', type: 'letter' },
		{ raw: 'a', type: 'letter' },
		{ raw: 'n', type: 'letter' },
		{ raw: 't', type: 'letter' },
		{ raw: 'i', type: 'letter' },
		{ raw: 'u', type: 'letter' },
		{ raw: 'm', type: 'letter' },
		{ raw: '\'', type: 'punctuation' },
		{ raw: 's', type: 'letter' },
		{ raw: '.', type: 'punctuation' },
		] },
		] },
		] },
		]
		{
		raw: 'Sed ut unde, omnis error sit voluptatem; accusantium's.',
		type: 'text',
		length: 1,
		children:
		[{ raw: 'Sed ut unde, omnis error sit voluptatem; accusantium\'s.',
		type: 'paragraph',
		length: 1,
		children:
		[ { raw: 'Sed ut unde, omnis error sit voluptatem; accusantium\'s.',
		type: 'sentence',
		length: 8,
		children:
		[ { raw: 'Sed',
		noPunctuation: 'Sed',
		type: 'word',
		length: 3,
		children:
		[ { raw: 'S', type: 'letter' },
		{ raw: 'e', type: 'letter' },
		{ raw: 'd', type: 'letter' },
		] },
		{ raw: 'ut',
		noPunctuation: 'ut',
		type: 'word',
		length: 2,
		children:
		[ { raw: 'u', type: 'letter' },
		{ raw: 't', type: 'letter' },
		] },
		{ raw: 'unde,',
		noPunctuation: 'unde',
		type: 'word',
		length: 5,
		children:
		[ { raw: 'u', type: 'letter' },
		{ raw: 'n', type: 'letter' },
		{ raw: 'd', type: 'letter' },
		{ raw: 'e', type: 'letter' },
		{ raw: ',', type: 'punctuation' },
		] },
		{ raw: 'omnis',
		noPunctuation: 'omnis',
		type: 'word',
		length: 5,
		children:
		[ { raw: 'o', type: 'letter' },
		{ raw: 'm', type: 'letter' },
		{ raw: 'n', type: 'letter' },
		{ raw: 'i', type: 'letter' },
		{ raw: 's', type: 'letter' },
		] },
		{ raw: 'error',
		noPunctuation: 'error',
		type: 'word',
		length: 5,
		children:
		[ { raw: 'e', type: 'letter' },
		{ raw: 'r', type: 'letter' },
		{ raw: 'r', type: 'letter' },
		{ raw: 'o', type: 'letter' },
		{ raw: 'r', type: 'letter' },
		] },
		{ raw: 'sit',
		noPunctuation: 'sit',
		type: 'word',
		length: 3,
		children:
		[ { raw: 's', type: 'letter' },
		{ raw: 'i', type: 'letter' },
		{ raw: 't', type: 'letter' },
		] },
		{ raw: 'voluptatem;',
		noPunctuation: 'voluptatem',
		type: 'word',
		length: 11,
		children:
		[ { raw: 'v', type: 'letter' },
		{ raw: 'o', type: 'letter' },
		{ raw: 'l', type: 'letter' },
		{ raw: 'u', type: 'letter' },
		{ raw: 'p', type: 'letter' },
		{ raw: 't', type: 'letter' },
		{ raw: 'a', type: 'letter' },
		{ raw: 't', type: 'letter' },
		{ raw: 'e', type: 'letter' },
		{ raw: 'm', type: 'letter' },
		{ raw: ';', type: 'punctuation' },
		] },
		{ raw: 'accusantium\'s.',
		noPunctuation: 'accusantiums',
		type: 'word',
		length: 14,
		children:
		[ { raw: 'a', type: 'letter' },
		{ raw: 'c', type: 'letter' },
		{ raw: 'c', type: 'letter' },
		{ raw: 'u', type: 'letter' },
		{ raw: 's', type: 'letter' },
		{ raw: 'a', type: 'letter' },
		{ raw: 'n', type: 'letter' },
		{ raw: 't', type: 'letter' },
		{ raw: 'i', type: 'letter' },
		{ raw: 'u', type: 'letter' },
		{ raw: 'm', type: 'letter' },
		{ raw: '\'', type: 'punctuation' },
		{ raw: 's', type: 'letter' },
		{ raw: '.', type: 'punctuation' },
		] },
		] },
		] },
		]
		}
		```
		@@ -163,2 +181,2 @@
		IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
		CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

text-parse - npm Package Compare versions

Improved metrics