retext-japanese
Advanced tools
Comparing version 0.1.0 to 0.2.0
72
index.js
@@ -13,3 +13,2 @@ /** | ||
*/ | ||
var Parser = require('parse-japanese-basic') | ||
@@ -40,3 +39,22 @@ var visit = require('unist-util-visit') | ||
/** | ||
* Create TextNode for SymbolNode, PunctuationNode, WhiteSpaceNode, SourceNode, and TextNode with POS. | ||
* Create WordNode with POS. | ||
* @param item | ||
* @returns {{type: string, value: *}} | ||
*/ | ||
function createWordNode (item) { | ||
var wordNode = parser.createParentNode('Word') | ||
if (options && options.pos) { | ||
wordNode.data = item | ||
} | ||
var textNode = parser.createTextNode('Text', item.surface_form) | ||
parser.add(textNode, wordNode) | ||
return wordNode | ||
} | ||
/** | ||
* Create TextNode for SymbolNode, PunctuationNode, WhiteSpaceNode and SourceNode with POS. | ||
* @param type | ||
@@ -77,3 +95,4 @@ * @param item | ||
/** | ||
* 以降の解析はParagraphNodeの子ノードはTextNodeとWhiteSpaceNodeとなることを前提としている。 | ||
* 以降の解析はparse-japanece-basicによるNLCSTを前提としている。 | ||
* ParagraphNodeの子ノードはTextNodeとWhiteSpaceNodeとなる | ||
* ParagraphNode[2] | ||
@@ -99,3 +118,2 @@ * ├─ TextNode: 'あいうえお' | ||
var sentenceNode = parser.createParentNode('Sentence') | ||
var wordNode = parser.createParentNode('Word') | ||
@@ -115,48 +133,30 @@ // kuromoji.jsにより形態素解析を行う | ||
// 文字が空白の場合 | ||
if (item.pos_detail_1 === WS) { | ||
// インラインの場合 | ||
if (linedepth) { | ||
parser.add(createTextNode('WhiteSpace', item), wordNode) | ||
} else { | ||
// アウトラインの場合 | ||
// WordNodeに子ノードが存在する場合、WordNodeを終了する | ||
if (wordNode.children.length) { | ||
parser.add(wordNode, sentenceNode) | ||
wordNode = parser.createParentNode('Word') | ||
} | ||
parser.add(createTextNode('WhiteSpace', item), sentenceNode) | ||
} | ||
// 文字が空白の場合 | ||
parser.add(createTextNode('WhiteSpace', item), sentenceNode) | ||
} else if (item.pos_detail_1 === M_OP) { | ||
// 文字が開括弧の場合 | ||
linedepth++ | ||
parser.add(createTextNode('Punctuation', item), wordNode) | ||
parser.add(createTextNode('Punctuation', item), sentenceNode) | ||
} else if (item.pos_detail_1 === M_CP) { | ||
// 文字が閉括弧の場合 | ||
linedepth-- | ||
parser.add(createTextNode('Punctuation', item), wordNode) | ||
parser.add(createTextNode('Punctuation', item), sentenceNode) | ||
} else if (item.pos_detail_1 === M_P) { | ||
// 文字が句点の場合 | ||
parser.add(createTextNode('Punctuation', item), wordNode) | ||
// アウトラインの場合、WordNodeを終了し、次のWordNodeを作る | ||
if (!linedepth) { | ||
parser.add(wordNode, sentenceNode) | ||
wordNode = parser.createParentNode('Word') | ||
parser.add(createTextNode('Punctuation', item), sentenceNode) | ||
// インラインではない場合 | ||
if (!linedepth && index !== data.length - 1) { | ||
// 行末でなければ次のSentenceNodeを作る | ||
if (index !== data.length - 1) { | ||
sentenceNode = parser.createParentNode('Sentence') | ||
parser.add(sentenceNode, paragraphNode) | ||
} | ||
sentenceNode = parser.createParentNode('Sentence') | ||
parser.add(sentenceNode, paragraphNode) | ||
} | ||
} else { | ||
// 改行以外のその他の文字の場合 | ||
parser.add(createTextNode('Text', item), wordNode) | ||
} | ||
// 行末の場合 | ||
if (index === data.length - 1) { | ||
// WordNodeに子ノードが存在する場合、WordNodeを終了する(句点で終わらない文章の場合) | ||
if (wordNode.children.length) { | ||
parser.add(wordNode, sentenceNode) | ||
// 記号であれば PunctuationNode を作る | ||
if (item.pos === '記号' && item.pos_detail_1 === '一般') { | ||
parser.add(createTextNode('Punctuation', item), sentenceNode) | ||
} else { | ||
parser.add(createWordNode(item), sentenceNode) | ||
} | ||
@@ -163,0 +163,0 @@ } |
{ | ||
"name": "retext-japanese", | ||
"description": "Japanese language support for retext", | ||
"version": "0.1.0", | ||
"version": "0.2.0", | ||
"author": "Kenichiro Murata <kenichiro.murata@gmail.com>", | ||
@@ -11,3 +11,3 @@ "bugs": { | ||
"kuromoji": "0.0.4", | ||
"parse-japanese-basic": "^0.2.0", | ||
"parse-japanese-basic": "^0.3.0", | ||
"unist-util-visit": "^1.0.0" | ||
@@ -20,3 +20,3 @@ }, | ||
"mocha": "^2.3.3", | ||
"power-assert": "^1.0.1", | ||
"power-assert": "^1.1.0", | ||
"retext": "^1.0.0", | ||
@@ -29,4 +29,3 @@ "retext-emoji": "^1.0.0", | ||
"files": [ | ||
"index.js", | ||
"lib" | ||
"index.js" | ||
], | ||
@@ -53,3 +52,3 @@ "homepage": "https://github.com/muraken720/retext-japanese#readme", | ||
"standard": "standard", | ||
"test": "mocha --compilers js:espower-babel/guess test/**/*.js" | ||
"test": "npm run standard && mocha --compilers js:espower-babel/guess test/**/*.js" | ||
}, | ||
@@ -56,0 +55,0 @@ "standard": { |
109
README.md
@@ -19,3 +19,3 @@ # retext-japanese [![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat)](https://github.com/feross/standard) | ||
var retext = require('retext') | ||
var japanese = require('../') | ||
var japanese = require('retext-japanese') | ||
var inspect = require('unist-util-inspect') | ||
@@ -52,35 +52,50 @@ | ||
* └─ ParagraphNode[4] (3:1-3:39, 4-44) | ||
* ├─ SentenceNode[3] (3:1-3:11, 4-14) | ||
* ├─ SentenceNode[7] (3:1-3:11, 4-14) | ||
* │ ├─ WordNode[1] (3:1-3:2, 4-5) | ||
* │ │ └─ TextNode: "1" (3:1-3:2, 4-5) | ||
* │ ├─ WhiteSpaceNode: " " (3:2-3:3, 5-6) | ||
* │ └─ WordNode[5] (3:3-3:11, 6-14) | ||
* │ ├─ TextNode: "これ" (3:3-3:5, 6-8) | ||
* │ ├─ TextNode: "は" (3:5-3:6, 8-9) | ||
* │ ├─ TextNode: "前段" (3:6-3:8, 9-11) | ||
* │ ├─ TextNode: "です" (3:8-3:10, 11-13) | ||
* │ └─ PunctuationNode: "。" (3:10-3:11, 13-14) | ||
* ├─ SentenceNode[1] (3:11-3:30, 14-33) | ||
* │ └─ WordNode[14] (3:11-3:30, 14-33) | ||
* │ ├─ TextNode: "これ" (3:11-3:13, 14-16) | ||
* │ ├─ TextNode: "は" (3:13-3:14, 16-17) | ||
* │ ├─ TextNode: "中段" (3:14-3:16, 17-19) | ||
* │ ├─ PunctuationNode: "(" (3:16-3:17, 19-20) | ||
* │ ├─ TextNode: "2" (3:17-3:18, 20-21) | ||
* │ ├─ TextNode: "文" (3:18-3:19, 21-22) | ||
* │ ├─ TextNode: "の" (3:19-3:20, 22-23) | ||
* │ ├─ TextNode: "場合" (3:20-3:22, 23-25) | ||
* │ ├─ TextNode: "は" (3:22-3:23, 25-26) | ||
* │ ├─ TextNode: "後段" (3:23-3:25, 26-28) | ||
* │ ├─ PunctuationNode: "。" (3:25-3:26, 28-29) | ||
* │ ├─ PunctuationNode: ")" (3:26-3:27, 29-30) | ||
* │ ├─ TextNode: "です" (3:27-3:29, 30-32) | ||
* │ └─ PunctuationNode: "。" (3:29-3:30, 32-33) | ||
* ├─ SentenceNode[1] (3:30-3:38, 33-41) | ||
* │ └─ WordNode[5] (3:30-3:38, 33-41) | ||
* │ ├─ TextNode: "これ" (3:30-3:32, 33-35) | ||
* │ ├─ TextNode: "は" (3:32-3:33, 35-36) | ||
* │ ├─ TextNode: "後段" (3:33-3:35, 36-38) | ||
* │ ├─ TextNode: "です" (3:35-3:37, 38-40) | ||
* │ └─ PunctuationNode: "。" (3:37-3:38, 40-41) | ||
* │ ├─ WordNode[1] (3:3-3:5, 6-8) | ||
* │ │ └─ TextNode: "これ" (3:3-3:5, 6-8) | ||
* │ ├─ WordNode[1] (3:5-3:6, 8-9) | ||
* │ │ └─ TextNode: "は" (3:5-3:6, 8-9) | ||
* │ ├─ WordNode[1] (3:6-3:8, 9-11) | ||
* │ │ └─ TextNode: "前段" (3:6-3:8, 9-11) | ||
* │ ├─ WordNode[1] (3:8-3:10, 11-13) | ||
* │ │ └─ TextNode: "です" (3:8-3:10, 11-13) | ||
* │ └─ PunctuationNode: "。" (3:10-3:11, 13-14) | ||
* ├─ SentenceNode[14] (3:11-3:30, 14-33) | ||
* │ ├─ WordNode[1] (3:11-3:13, 14-16) | ||
* │ │ └─ TextNode: "これ" (3:11-3:13, 14-16) | ||
* │ ├─ WordNode[1] (3:13-3:14, 16-17) | ||
* │ │ └─ TextNode: "は" (3:13-3:14, 16-17) | ||
* │ ├─ WordNode[1] (3:14-3:16, 17-19) | ||
* │ │ └─ TextNode: "中段" (3:14-3:16, 17-19) | ||
* │ ├─ PunctuationNode: "(" (3:16-3:17, 19-20) | ||
* │ ├─ WordNode[1] (3:17-3:18, 20-21) | ||
* │ │ └─ TextNode: "2" (3:17-3:18, 20-21) | ||
* │ ├─ WordNode[1] (3:18-3:19, 21-22) | ||
* │ │ └─ TextNode: "文" (3:18-3:19, 21-22) | ||
* │ ├─ WordNode[1] (3:19-3:20, 22-23) | ||
* │ │ └─ TextNode: "の" (3:19-3:20, 22-23) | ||
* │ ├─ WordNode[1] (3:20-3:22, 23-25) | ||
* │ │ └─ TextNode: "場合" (3:20-3:22, 23-25) | ||
* │ ├─ WordNode[1] (3:22-3:23, 25-26) | ||
* │ │ └─ TextNode: "は" (3:22-3:23, 25-26) | ||
* │ ├─ WordNode[1] (3:23-3:25, 26-28) | ||
* │ │ └─ TextNode: "後段" (3:23-3:25, 26-28) | ||
* │ ├─ PunctuationNode: "。" (3:25-3:26, 28-29) | ||
* │ ├─ PunctuationNode: ")" (3:26-3:27, 29-30) | ||
* │ ├─ WordNode[1] (3:27-3:29, 30-32) | ||
* │ │ └─ TextNode: "です" (3:27-3:29, 30-32) | ||
* │ └─ PunctuationNode: "。" (3:29-3:30, 32-33) | ||
* ├─ SentenceNode[5] (3:30-3:38, 33-41) | ||
* │ ├─ WordNode[1] (3:30-3:32, 33-35) | ||
* │ │ └─ TextNode: "これ" (3:30-3:32, 33-35) | ||
* │ ├─ WordNode[1] (3:32-3:33, 35-36) | ||
* │ │ └─ TextNode: "は" (3:32-3:33, 35-36) | ||
* │ ├─ WordNode[1] (3:33-3:35, 36-38) | ||
* │ │ └─ TextNode: "後段" (3:33-3:35, 36-38) | ||
* │ ├─ WordNode[1] (3:35-3:37, 38-40) | ||
* │ │ └─ TextNode: "です" (3:35-3:37, 38-40) | ||
* │ └─ PunctuationNode: "。" (3:37-3:38, 40-41) | ||
* └─ WhiteSpaceNode: "\n" (3:38-3:39, 43-44) | ||
@@ -114,14 +129,20 @@ * | ||
/** | ||
* RootNode[1] (1:1-1:14, 0-13) | ||
* └─ ParagraphNode[2] (1:1-1:14, 0-13) | ||
* ├─ SentenceNode[1] (1:1-1:13, 0-12) | ||
* │ └─ WordNode[7] (1:1-1:13, 0-12) | ||
* │ ├─ TextNode: "すもも" (1:1-1:4, 0-3) [data={"word_id":404420,"word_type":"KNOWN","word_position":1,"surface_form":"すもも","pos":"名詞","pos_detail_1":"一般","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"すもも","reading":"スモモ","pronunciation":"スモモ"}] | ||
* │ ├─ TextNode: "も" (1:4-1:5, 3-4) [data={"word_id":2595480,"word_type":"KNOWN","word_position":4,"surface_form":"も","pos":"助詞","pos_detail_1":"係助詞","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"も","reading":"モ","pronunciation":"モ"}] | ||
* │ ├─ TextNode: "もも" (1:5-1:7, 4-6) [data={"word_id":604730,"word_type":"KNOWN","word_position":5,"surface_form":"もも","pos":"名詞","pos_detail_1":"一般","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"もも","reading":"モモ","pronunciation":"モモ"}] | ||
* │ ├─ TextNode: "も" (1:7-1:8, 6-7) [data={"word_id":2595480,"word_type":"KNOWN","word_position":7,"surface_form":"も","pos":"助詞","pos_detail_1":"係助詞","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"も","reading":"モ","pronunciation":"モ"}] | ||
* │ ├─ TextNode: "もも" (1:8-1:10, 7-9) [data={"word_id":604730,"word_type":"KNOWN","word_position":8,"surface_form":"もも","pos":"名詞","pos_detail_1":"一般","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"もも","reading":"モモ","pronunciation":"モモ"}] | ||
* │ ├─ TextNode: "の" (1:10-1:11, 9-10) [data={"word_id":2595360,"word_type":"KNOWN","word_position":10,"surface_form":"の","pos":"助詞","pos_detail_1":"連体化","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"の","reading":"ノ","pronunciation":"ノ"}] | ||
* │ └─ TextNode: "うち" (1:11-1:13, 10-12) [data={"word_id":1467000,"word_type":"KNOWN","word_position":11,"surface_form":"うち","pos":"名詞","pos_detail_1":"非自立","pos_detail_2":"副詞可能","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"うち","reading":"ウチ","pronunciation":"ウチ"}] | ||
* └─ WhiteSpaceNode: "\n" (1:13-1:14, 12-13) | ||
* RootNode[1] | ||
* └─ ParagraphNode[2] | ||
* ├─ SentenceNode[7] | ||
* │ ├─ WordNode[1] [data={"word_id":404420,"word_type":"KNOWN","word_position":1,"surface_form":"すもも","pos":"名詞","pos_detail_1":"一般","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"すもも","reading":"スモモ","pronunciation":"スモモ"}] | ||
* │ │ └─ TextNode: "すもも" | ||
* │ ├─ WordNode[1] [data={"word_id":2595480,"word_type":"KNOWN","word_position":4,"surface_form":"も","pos":"助詞","pos_detail_1":"係助詞","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"も","reading":"モ","pronunciation":"モ"}] | ||
* │ │ └─ TextNode: "も" | ||
* │ ├─ WordNode[1] [data={"word_id":604730,"word_type":"KNOWN","word_position":5,"surface_form":"もも","pos":"名詞","pos_detail_1":"一般","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"もも","reading":"モモ","pronunciation":"モモ"}] | ||
* │ │ └─ TextNode: "もも" | ||
* │ ├─ WordNode[1] [data={"word_id":2595480,"word_type":"KNOWN","word_position":7,"surface_form":"も","pos":"助詞","pos_detail_1":"係助詞","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"も","reading":"モ","pronunciation":"モ"}] | ||
* │ │ └─ TextNode: "も" | ||
* │ ├─ WordNode[1] [data={"word_id":604730,"word_type":"KNOWN","word_position":8,"surface_form":"もも","pos":"名詞","pos_detail_1":"一般","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"もも","reading":"モモ","pronunciation":"モモ"}] | ||
* │ │ └─ TextNode: "もも" | ||
* │ ├─ WordNode[1] [data={"word_id":2595360,"word_type":"KNOWN","word_position":10,"surface_form":"の","pos":"助詞","pos_detail_1":"連体化","pos_detail_2":"*","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"の","reading":"ノ","pronunciation":"ノ"}] | ||
* │ │ └─ TextNode: "の" | ||
* │ └─ WordNode[1] [data={"word_id":1467000,"word_type":"KNOWN","word_position":11,"surface_form":"うち","pos":"名詞","pos_detail_1":"非自立","pos_detail_2":"副詞可能","pos_detail_3":"*","conjugated_type":"*","conjugated_form":"*","basic_form":"うち","reading":"ウチ","pronunciation":"ウチ"}] | ||
* │ └─ TextNode: "うち" | ||
* └─ WhiteSpaceNode: "\n" | ||
* | ||
@@ -146,3 +167,3 @@ * === doc === | ||
* `pos` (`boolean`, default: `false`) - Whether to add part-of-speech information(by using [kuromoji.js](https://github.com/takuyaa/kuromoji.js)) to nodes. | ||
* `dicDir` (`string`, default: `node_modules/parse-japanese/node_modules/kuromoji/dist/dict/`) - Whether to set Dictionaries directory for kuromoji.js. | ||
* `dicDir` (`string`, default: `node_modules/retext-japanese/node_modules/kuromoji/dist/dict/`) - Whether to set Dictionaries directory for kuromoji.js. | ||
@@ -149,0 +170,0 @@ ## Related |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
15841
175
139
+ Addedparse-japanese-basic@0.3.0(transitive)
- Removedparse-japanese-basic@0.2.0(transitive)
Updatedparse-japanese-basic@^0.3.0