Comparing version 1.1.0 to 2.0.0
@@ -13,3 +13,8 @@ var pos = require('pos'); | ||
var paragraphs = text.split( /[\r\n|\n|\r]+/g ); | ||
return paragraphs.map(self.paragraphToSentences); | ||
return { | ||
raw: text, | ||
type: 'text', | ||
length: paragraphs.length, | ||
children: paragraphs.map(self.paragraphToSentences) | ||
}; | ||
}; | ||
@@ -24,2 +29,3 @@ | ||
type: 'paragraph', | ||
length: sentences.length, | ||
children: sentences.map(self.sentenceToWords) | ||
@@ -36,2 +42,3 @@ }; | ||
type: 'sentence', | ||
length: words.length, | ||
children: words.map(self.wordToChars) | ||
@@ -66,2 +73,3 @@ }; | ||
type: 'word', | ||
length: chars.length, | ||
children: chars | ||
@@ -68,0 +76,0 @@ }; |
{ | ||
"name": "text-parse", | ||
"version": "1.1.0", | ||
"version": "2.0.0", | ||
"description": "Text parser", | ||
"keywords": ["text", "parser", "plaintext"], | ||
"keywords": [ | ||
"text", | ||
"parser", | ||
"plaintext" | ||
], | ||
"author": "Stephen Morrison <srmorrisonjit@gmail.com>", | ||
"licenses": { | ||
"type": "MIT", | ||
"url":"http://srmor.mit-license.org/" | ||
"url": "http://srmor.mit-license.org/" | ||
}, | ||
@@ -19,2 +23,2 @@ "dependencies": { | ||
"main": "index" | ||
} | ||
} |
216
README.md
@@ -13,2 +13,4 @@ # Text Parse | ||
[![Build Status](https://travis-ci.org/srmor/text-parse.png?branch=master)](https://travis-ci.org/srmor/text-parse) | ||
``` | ||
@@ -27,3 +29,3 @@ make test | ||
Optionally text-parse can return the part of speech of each word (powered by [pos](https://github.com/fortnightlabs/pos-js)). To enable this option just include it in the option object parameter of the parser. So instead of using `var parseTree = parse(text)` you can do `var parseTree = parse(text, {pos: true})` and it will return a `partOfSpeech` attribute on all the word objects. The part of speech tags are the same tags that (pos)[https://github.com/fortnightlabs/pos-js] returns. | ||
Optionally text-parse can return the part of speech of each word (powered by [pos](https://github.com/fortnightlabs/pos-js)). To enable this option just include it in the option object parameter of the parser. So instead of using `var parseTree = parse(text)` you can do `var parseTree = parse(text, {pos: true})` and it will return a `partOfSpeech` attribute on all the word objects. The part of speech tags are the same tags that [pos](https://github.com/fortnightlabs/pos-js) returns. | ||
@@ -42,99 +44,115 @@ ## Example | ||
``` | ||
[ { raw: 'Sed ut unde, omnis error sit voluptatem; accusantium\'s.', | ||
type: 'paragraph', | ||
children: | ||
[ { raw: 'Sed ut unde, omnis error sit voluptatem; accusantium\'s.', | ||
type: 'sentence', | ||
children: | ||
[ { raw: 'Sed', | ||
noPunctuation: 'Sed', | ||
type: 'word', | ||
children: | ||
[ { raw: 'S', type: 'letter' }, | ||
{ raw: 'e', type: 'letter' }, | ||
{ raw: 'd', type: 'letter' }, | ||
] }, | ||
{ raw: 'ut', | ||
noPunctuation: 'ut', | ||
type: 'word', | ||
children: | ||
[ { raw: 'u', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
] }, | ||
{ raw: 'unde,', | ||
noPunctuation: 'unde', | ||
type: 'word', | ||
children: | ||
[ { raw: 'u', type: 'letter' }, | ||
{ raw: 'n', type: 'letter' }, | ||
{ raw: 'd', type: 'letter' }, | ||
{ raw: 'e', type: 'letter' }, | ||
{ raw: ',', type: 'punctuation' }, | ||
] }, | ||
{ raw: 'omnis', | ||
noPunctuation: 'omnis', | ||
type: 'word', | ||
children: | ||
[ { raw: 'o', type: 'letter' }, | ||
{ raw: 'm', type: 'letter' }, | ||
{ raw: 'n', type: 'letter' }, | ||
{ raw: 'i', type: 'letter' }, | ||
{ raw: 's', type: 'letter' }, | ||
] }, | ||
{ raw: 'error', | ||
noPunctuation: 'error', | ||
type: 'word', | ||
children: | ||
[ { raw: 'e', type: 'letter' }, | ||
{ raw: 'r', type: 'letter' }, | ||
{ raw: 'r', type: 'letter' }, | ||
{ raw: 'o', type: 'letter' }, | ||
{ raw: 'r', type: 'letter' }, | ||
] }, | ||
{ raw: 'sit', | ||
noPunctuation: 'sit', | ||
type: 'word', | ||
children: | ||
[ { raw: 's', type: 'letter' }, | ||
{ raw: 'i', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
] }, | ||
{ raw: 'voluptatem;', | ||
noPunctuation: 'voluptatem', | ||
type: 'word', | ||
children: | ||
[ { raw: 'v', type: 'letter' }, | ||
{ raw: 'o', type: 'letter' }, | ||
{ raw: 'l', type: 'letter' }, | ||
{ raw: 'u', type: 'letter' }, | ||
{ raw: 'p', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
{ raw: 'a', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
{ raw: 'e', type: 'letter' }, | ||
{ raw: 'm', type: 'letter' }, | ||
{ raw: ';', type: 'punctuation' }, | ||
] }, | ||
{ raw: 'accusantium\'s.', | ||
noPunctuation: 'accusantiums', | ||
type: 'word', | ||
children: | ||
[ { raw: 'a', type: 'letter' }, | ||
{ raw: 'c', type: 'letter' }, | ||
{ raw: 'c', type: 'letter' }, | ||
{ raw: 'u', type: 'letter' }, | ||
{ raw: 's', type: 'letter' }, | ||
{ raw: 'a', type: 'letter' }, | ||
{ raw: 'n', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
{ raw: 'i', type: 'letter' }, | ||
{ raw: 'u', type: 'letter' }, | ||
{ raw: 'm', type: 'letter' }, | ||
{ raw: '\'', type: 'punctuation' }, | ||
{ raw: 's', type: 'letter' }, | ||
{ raw: '.', type: 'punctuation' }, | ||
] }, | ||
] }, | ||
] }, | ||
] | ||
{ | ||
raw: 'Sed ut unde, omnis error sit voluptatem; accusantium's.', | ||
type: 'text', | ||
length: 1, | ||
children: | ||
[{ raw: 'Sed ut unde, omnis error sit voluptatem; accusantium\'s.', | ||
type: 'paragraph', | ||
length: 1, | ||
children: | ||
[ { raw: 'Sed ut unde, omnis error sit voluptatem; accusantium\'s.', | ||
type: 'sentence', | ||
length: 8, | ||
children: | ||
[ { raw: 'Sed', | ||
noPunctuation: 'Sed', | ||
type: 'word', | ||
length: 3, | ||
children: | ||
[ { raw: 'S', type: 'letter' }, | ||
{ raw: 'e', type: 'letter' }, | ||
{ raw: 'd', type: 'letter' }, | ||
] }, | ||
{ raw: 'ut', | ||
noPunctuation: 'ut', | ||
type: 'word', | ||
length: 2, | ||
children: | ||
[ { raw: 'u', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
] }, | ||
{ raw: 'unde,', | ||
noPunctuation: 'unde', | ||
type: 'word', | ||
length: 5, | ||
children: | ||
[ { raw: 'u', type: 'letter' }, | ||
{ raw: 'n', type: 'letter' }, | ||
{ raw: 'd', type: 'letter' }, | ||
{ raw: 'e', type: 'letter' }, | ||
{ raw: ',', type: 'punctuation' }, | ||
] }, | ||
{ raw: 'omnis', | ||
noPunctuation: 'omnis', | ||
type: 'word', | ||
length: 5, | ||
children: | ||
[ { raw: 'o', type: 'letter' }, | ||
{ raw: 'm', type: 'letter' }, | ||
{ raw: 'n', type: 'letter' }, | ||
{ raw: 'i', type: 'letter' }, | ||
{ raw: 's', type: 'letter' }, | ||
] }, | ||
{ raw: 'error', | ||
noPunctuation: 'error', | ||
type: 'word', | ||
length: 5, | ||
children: | ||
[ { raw: 'e', type: 'letter' }, | ||
{ raw: 'r', type: 'letter' }, | ||
{ raw: 'r', type: 'letter' }, | ||
{ raw: 'o', type: 'letter' }, | ||
{ raw: 'r', type: 'letter' }, | ||
] }, | ||
{ raw: 'sit', | ||
noPunctuation: 'sit', | ||
type: 'word', | ||
length: 3, | ||
children: | ||
[ { raw: 's', type: 'letter' }, | ||
{ raw: 'i', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
] }, | ||
{ raw: 'voluptatem;', | ||
noPunctuation: 'voluptatem', | ||
type: 'word', | ||
length: 11, | ||
children: | ||
[ { raw: 'v', type: 'letter' }, | ||
{ raw: 'o', type: 'letter' }, | ||
{ raw: 'l', type: 'letter' }, | ||
{ raw: 'u', type: 'letter' }, | ||
{ raw: 'p', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
{ raw: 'a', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
{ raw: 'e', type: 'letter' }, | ||
{ raw: 'm', type: 'letter' }, | ||
{ raw: ';', type: 'punctuation' }, | ||
] }, | ||
{ raw: 'accusantium\'s.', | ||
noPunctuation: 'accusantiums', | ||
type: 'word', | ||
length: 14, | ||
children: | ||
[ { raw: 'a', type: 'letter' }, | ||
{ raw: 'c', type: 'letter' }, | ||
{ raw: 'c', type: 'letter' }, | ||
{ raw: 'u', type: 'letter' }, | ||
{ raw: 's', type: 'letter' }, | ||
{ raw: 'a', type: 'letter' }, | ||
{ raw: 'n', type: 'letter' }, | ||
{ raw: 't', type: 'letter' }, | ||
{ raw: 'i', type: 'letter' }, | ||
{ raw: 'u', type: 'letter' }, | ||
{ raw: 'm', type: 'letter' }, | ||
{ raw: '\'', type: 'punctuation' }, | ||
{ raw: 's', type: 'letter' }, | ||
{ raw: '.', type: 'punctuation' }, | ||
] }, | ||
] }, | ||
] }, | ||
] | ||
} | ||
``` | ||
@@ -163,2 +181,2 @@ | ||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
10407
78
179