Comparing version 1.0.5 to 1.0.6
@@ -1,1 +0,1 @@ | ||
module.exports = require("./lib/PATtree.js"); | ||
module.exports = require("./lib/PATtree.js"); |
@@ -27,3 +27,3 @@ var Node = require("./Node.js"); | ||
console.trace(); | ||
throw "no parent"; | ||
throw "no parent"; | ||
} else { | ||
@@ -35,3 +35,3 @@ var parent = node.parent; | ||
return false; | ||
} | ||
} | ||
} | ||
@@ -43,3 +43,3 @@ }, | ||
console.trace(); | ||
throw "no parent"; | ||
throw "no parent"; | ||
} else { | ||
@@ -51,3 +51,3 @@ var parent = node.parent; | ||
return false; | ||
} | ||
} | ||
} | ||
@@ -253,3 +253,3 @@ }, | ||
if(node.id == id) { | ||
return node; | ||
return node; | ||
} else if(!node.left && !node.right) { | ||
@@ -278,3 +278,3 @@ return; | ||
if(preCallback) { | ||
preCallback(node); | ||
preCallback(node); | ||
} | ||
@@ -304,3 +304,3 @@ if(node.left) { | ||
} | ||
return; | ||
return; | ||
}, | ||
@@ -315,3 +315,3 @@ | ||
this._inOrderTraverse(node.right, callback); | ||
} | ||
} | ||
return; | ||
@@ -326,4 +326,4 @@ }, | ||
this._postOrderTraverse(node.right, callback); | ||
} | ||
callback(node); | ||
} | ||
callback(node); | ||
return; | ||
@@ -358,3 +358,1 @@ }, | ||
} | ||
@@ -1,2 +0,2 @@ | ||
var uuid = require("../node_modules/node-uuid").v4; | ||
var uuid = require("node-uuid").v4; | ||
@@ -18,3 +18,3 @@ module.exports = Node; | ||
Node.prototype = { | ||
setLeftChild: function(node) { | ||
@@ -21,0 +21,0 @@ this.left = node; |
{ | ||
"name": "pat-tree", | ||
"version": "1.0.5", | ||
"version": "1.0.6", | ||
"description": "PAT tree construction for Chinese documents, keyword extraction and text segmentation", | ||
@@ -23,3 +23,3 @@ "main": "index.js", | ||
"Chinese", | ||
"ckip", | ||
"ckip", | ||
"keyword extraction", | ||
@@ -26,0 +26,0 @@ "text segmentation" |
@@ -12,3 +12,3 @@ pat-tree | ||
PAT tree construction for Chinese documents. | ||
Provide functionality to add documents and construct PAT tree in memory, | ||
Provide functionality to add documents and construct PAT tree in memory, | ||
convert to JSON for storing to database, | ||
@@ -35,4 +35,4 @@ extract keywords, and text segmentation. | ||
# Usage | ||
### Instantiate | ||
@@ -56,3 +56,3 @@ | ||
```javascript | ||
var SLPs = tree.extractSLP(TFThreshold, SEThreshold, verbose); | ||
var SLPs = tree.extractSLP(TFThreshold, SEThreshold, verbose); | ||
// SLPs: array of JSON objects, which are signifiant lexical patterns and their relative informations. | ||
@@ -76,4 +76,4 @@ ``` | ||
tree.extractSLP(10, 0.5); | ||
var result = tree.segmentDoc(doc, asArray); | ||
tree.extractSLP(10, 0.5); | ||
var result = tree.segmentDoc(doc, asArray); | ||
``` | ||
@@ -87,3 +87,3 @@ | ||
`result` is the result of document segmentation as an string of terms seperated by whitespaces, | ||
`result` is the result of document segmentation as an string of terms seperated by whitespaces, | ||
or an array of terms if `asArray` is set to true. | ||
@@ -95,3 +95,3 @@ | ||
```javascript | ||
var json = tree.toJSON(); | ||
var json = tree.toJSON(); | ||
``` | ||
@@ -119,3 +119,3 @@ The result json has following three content: | ||
if(err) throw err; | ||
}); | ||
}); | ||
@@ -125,3 +125,3 @@ // All nodes of the tree would be stored to database | ||
if(err) throw err; | ||
}); | ||
}); | ||
``` | ||
@@ -135,3 +135,3 @@ | ||
``` | ||
If you use `tree.toJSON()` to generate the JSON object and store the three objects to different collections, | ||
If you use `tree.toJSON()` to generate the JSON object and store the three objects to different collections, | ||
you can construct them to the original JSON object and use `tree.reborn(json)` to reborn the tree. | ||
@@ -153,6 +153,6 @@ | ||
}) | ||
}) | ||
}) | ||
``` | ||
The `patTree` object would now be the same as the previously stored status, | ||
The `patTree` object would now be the same as the previously stored status, | ||
and you can do all operations like `patTree.addDocuments(doc)` to it. | ||
@@ -162,4 +162,4 @@ | ||
> **CATUION** | ||
> If you reborn the tree by above method, and do some operations like `patTree.addDocument(doc)`, | ||
> and you want to store the tree back to database as illustrated in *Convert to JSON*, | ||
> If you reborn the tree by above method, and do some operations like `patTree.addDocument(doc)`, | ||
> and you want to store the tree back to database as illustrated in *Convert to JSON*, | ||
> you **MUST** drop the collections(header, documents, tree) in the database first, | ||
@@ -213,3 +213,3 @@ > avoiding any record that is previously stored. | ||
parent: parentNode, // the parent of this node, data type: Node | ||
left: leftChildNode, // data type: Node | ||
left: leftChildNode, // data type: Node | ||
right: rightChildNode, // data type: Node | ||
@@ -221,3 +221,3 @@ } | ||
Internal nodes has following structure: | ||
### Internal nodes | ||
@@ -227,17 +227,17 @@ | ||
internalNode = { | ||
// ... | ||
// ... | ||
type: "internal", | ||
type: "internal", | ||
// indicates this is an internal node | ||
position: 13, | ||
// the branch position of external nodes, data type: integer | ||
prefix: "00101", | ||
prefix: "00101", | ||
// the sharing prefix of external nodes, data type: string of 0s and 1s | ||
externalNodeNum: 87, | ||
// number of external nodes contained in subtree of this node, | ||
externalNodeNum: 87, | ||
// number of external nodes contained in subtree of this node, | ||
// data type: integer | ||
totalFrequency: 89, | ||
totalFrequency: 89, | ||
// number of the total frequency of the external nodes in the collection, | ||
// data type: integer | ||
sistringRepres: node | ||
sistringRepres: node | ||
// one of the external node in the subree of this internal node, | ||
@@ -256,7 +256,7 @@ // data type: Node | ||
type: "external", | ||
type: "external", | ||
// indicates this is an external node, | ||
sistring: "00101100110101", | ||
sistring: "00101100110101", | ||
// binary representation of the character, data type: string | ||
indexes: ["0.1,3", "1.2.5"] | ||
indexes: ["0.1,3", "1.2.5"] | ||
// the positions where the sistring appears in the collection, | ||
@@ -288,3 +288,3 @@ // data type: array | ||
All operations are fast, but require more memory and disk space to operate successfully. | ||
Running on Macbook Pro Retina, connected to local MongoDB, given 8GB memory size | ||
Running on Macbook Pro Retina, connected to local MongoDB, given 8GB memory size | ||
by specifying V8 option `--max_old_space_size=8000`, has following performance. | ||
@@ -297,3 +297,3 @@ | ||
* After above operation, find all collections in database and reborn the tree by `tree.reborn()` takes about 1 minutes. | ||
* After above operation, do text segmentation on 32,769 posts by `tree.segmentDoc()`, given SLPs extracted above, | ||
* After above operation, do text segmentation on 32,769 posts by `tree.segmentDoc()`, given SLPs extracted above, | ||
takes about 5 minutes. | ||
@@ -303,2 +303,3 @@ | ||
* 1.0.6 Correct require path | ||
* 1.0.5 Restructure folders | ||
@@ -305,0 +306,0 @@ * 1.0.4 `segmentDoc` no need to pass in SLPs, and enable to return array of terms. |
Sorry, the diff of this file is not supported yet
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
2325
310
0
50236