Comparing version 0.2.4 to 0.2.5
91
index.js
@@ -147,3 +147,2 @@ var Tree = require("./lib/BTree"); | ||
}) | ||
//console.log("lexical patterns count: " + lexicalPatters.length); | ||
lexicalPatters.sort(function(item1, item2) { | ||
@@ -155,3 +154,2 @@ return item2.sistring.length - item1.sistring.length; | ||
var sistring = lexicalPatters[i].sistring; | ||
//console.log("sistring: " + sistring); | ||
var fstOverlapIndex = -1; | ||
@@ -198,3 +196,2 @@ var sndOverlapIndex = -1; | ||
} | ||
//console.log("end of processing " + i + "th item"); | ||
} | ||
@@ -221,3 +218,2 @@ return result; | ||
this._addSistring(sistring, index); | ||
//this.printTreeContent(); | ||
} | ||
@@ -230,8 +226,6 @@ }, | ||
this.maxSistringLength = sistring.length; | ||
this._appendZeroes(this.maxSistringLength); | ||
} else { | ||
for(var i = sistring.length; i < this.maxSistringLength; i++) { | ||
sistring += "0"; | ||
} | ||
sistring += Array(this.maxSistringLength - sistring.length + 1).join("0"); | ||
} | ||
this._appendZeroes(this.maxSistringLength); | ||
this._insert(tree, tree.root, sistring, index); | ||
@@ -241,3 +235,2 @@ }, | ||
_insert: function(tree, node, sistring, index) { | ||
//var node = tree.getNode(nodeId); | ||
var indexes = []; | ||
@@ -249,9 +242,2 @@ indexes.push(index); | ||
node.indexes = indexes; | ||
/* | ||
tree.setNodeData(node, { | ||
type: this.EXTERNAL, | ||
sistring: sistring, | ||
indexes: indexes | ||
}); | ||
*/ | ||
} else if(node.type == this.INTERNAL) { | ||
@@ -274,9 +260,2 @@ var prefix = node.prefix; | ||
leftChild.indexes = indexes; | ||
/* | ||
leftChild. = { | ||
type: this.EXTERNAL, | ||
sistring: sistring, | ||
indexes: indexes | ||
}; | ||
*/ | ||
tree.appendLeftChild(node, leftChild); | ||
@@ -292,9 +271,2 @@ } else { | ||
rightChild.indexes = indexes; | ||
/* | ||
rightChild. = { | ||
type: this.EXTERNAL, | ||
sistring: sistring, | ||
indexes: indexes | ||
} | ||
*/ | ||
tree.appendRightChild(node, rightChild); | ||
@@ -322,7 +294,2 @@ } else { | ||
_rebuildInternalSubtree: function(tree, node, sistring, index) { | ||
/* | ||
if(!this._checkConnections()) { | ||
throw "tree broken"; | ||
} | ||
*/ | ||
@@ -347,9 +314,2 @@ var nodeString; | ||
externalNode.indexes = indexes; | ||
/* | ||
externalNode. = { | ||
type: this.EXTERNAL, | ||
sistring: sistring, | ||
indexes: indexes | ||
}; | ||
*/ | ||
@@ -363,15 +323,3 @@ var subtreeRoot = subtree.getRoot(); | ||
subtreeRoot.sistringRepres = externalNode; | ||
/* | ||
subtreeRoot. = { | ||
type: this.INTERNAL, | ||
position: branchBit, | ||
prefix: sistring.slice(0, branchBit), | ||
externalNodeNum: 0, | ||
totalFrequency: 0, | ||
sistringRepres: externalNode | ||
}; | ||
*/ | ||
var type = tree.getNodeType(node); | ||
@@ -385,3 +333,3 @@ | ||
var nodeBranchBit = nodeString[branchBit].valueOf(); | ||
var nodeBranchBit = nodeString[branchBit].valueOf(); | ||
var sistringBranchBit = sistring[branchBit].valueOf(); | ||
@@ -398,4 +346,2 @@ if(nodeBranchBit == 0 && sistringBranchBit == 1) { | ||
if(type == "root") { | ||
@@ -410,8 +356,2 @@ tree.root = subtree.root; | ||
this._updateParents(subtreeRoot); | ||
/* | ||
if(!this._checkConnections()) { | ||
throw "tree broken while inserting sistring " + sistring; | ||
} | ||
*/ | ||
}, | ||
@@ -421,3 +361,2 @@ | ||
var owner = this; | ||
//var sistringRepres = []; | ||
var left = node.left; | ||
@@ -431,7 +370,5 @@ var right = node.right; | ||
totalFrequency += left.totalFrequency; | ||
//sistringRepres = sistringRepres.concat(left.sistringRepres); | ||
} else if(left.type == owner.EXTERNAL) { | ||
externalNodeNum += 1; | ||
totalFrequency += left.indexes.length; | ||
//sistringRepres.push(left); | ||
} else { | ||
@@ -444,7 +381,5 @@ console.trace(); | ||
totalFrequency += right.totalFrequency; | ||
//sistringRepres = sistringRepres.concat(right.sistringRepres); | ||
} else if(right.type == owner.EXTERNAL) { | ||
externalNodeNum += 1; | ||
totalFrequency += right.indexes.length; | ||
//sistringRepres.push(right); | ||
} else { | ||
@@ -458,3 +393,2 @@ console.trace(); | ||
} | ||
//node.sistringRepres = sistringRepres; | ||
node.externalNodeNum = externalNodeNum; | ||
@@ -472,8 +406,5 @@ node.totalFrequency = totalFrequency; | ||
if(node && node.type == external) { | ||
var sistring = node.sistring; | ||
var sistringLen = sistring.length; | ||
var sistringLen = node.sistring.length;; | ||
if(sistringLen < length) { | ||
for(var i = sistringLen; i < length; i++) { | ||
node.sistring += "0"; | ||
} | ||
node.sistring += Array(length - sistringLen + 1).join("0"); | ||
} | ||
@@ -492,3 +423,2 @@ } | ||
} | ||
//console.log("result: " + result); | ||
return result; | ||
@@ -505,3 +435,2 @@ }, | ||
} | ||
//console.log(i); | ||
return i; | ||
@@ -515,3 +444,2 @@ }, | ||
} | ||
//console.log(output); | ||
return output; | ||
@@ -527,10 +455,4 @@ }, | ||
var wordIndex = indexes[2]; | ||
//console.log(" doc index: " + docIndex); | ||
//console.log(" sentense index: " + sentenseIndex); | ||
//console.log(" word index: " + wordIndex); | ||
var output = ""; | ||
var sentense = this.documents[docIndex][sentenseIndex]; | ||
//console.log(" sentense: " + sentense); | ||
var comparison = ""; | ||
@@ -541,3 +463,2 @@ for(var i = wordIndex; i < sentense.length && comparison.length != sistring.length; i++) { | ||
arr.push(sentense[i]); | ||
//console.log(" word: " + word); | ||
comparison += this._toBinary(arr); | ||
@@ -561,3 +482,2 @@ output += word; | ||
var sentense = this.documents[docIndex][sentenseIndex]; | ||
//console.log(" sentense: " + sentense); | ||
var comparison = ""; | ||
@@ -568,3 +488,2 @@ for(var i = wordIndex; i < sentense.length && comparison.length < prefix.length; i++) { | ||
arr.push(sentense[i]); | ||
//console.log(" word: " + word); | ||
@@ -571,0 +490,0 @@ var binaryWord = this._toBinary(arr); |
@@ -25,4 +25,2 @@ var Node = require("./Node.js"); | ||
isLeftChild: function(node) { | ||
//console.log(this.root.id); | ||
//var node = this.getNode(childId); | ||
if(!node.parent) { | ||
@@ -42,3 +40,2 @@ console.trace(); | ||
isRightChild: function(node) { | ||
//var node = this.getNode(childId) | ||
if(!node.parent) { | ||
@@ -58,3 +55,2 @@ console.trace(); | ||
getNodeType: function(node) { | ||
//var node = this.getNode(id); | ||
var type; | ||
@@ -89,3 +85,2 @@ if(this.isRoot(node)) { | ||
getParent: function(node) { | ||
//var node = this.getNode(id); | ||
if(!node.parent) { | ||
@@ -101,3 +96,2 @@ console.trace(); | ||
getLeftChild: function(node) { | ||
//var node = this.getNode(id); | ||
if(!node.left) { | ||
@@ -113,3 +107,2 @@ console.trace(); | ||
getRightChild: function(node) { | ||
//var node = this.getNode(id); | ||
if(!node.right) { | ||
@@ -128,11 +121,11 @@ console.trace(); | ||
preOrderTraverse: function(callback) { | ||
this._traverse(this.root, callback); | ||
this._preOrderTraverse(this.root, callback); | ||
}, | ||
inOrderTraverse: function(callback) { | ||
this._traverse(this.root, null, callback); | ||
this._inOrderTraverse(this.root, callback); | ||
}, | ||
postOrderTraverse: function(callback) { | ||
this._traverse(this.root, null, null, callback); | ||
this._postOrderTraverse(this.root, callback); | ||
}, | ||
@@ -153,3 +146,2 @@ | ||
appendLeftChild: function(node, subtree) { | ||
//var node = this.getNode(id); | ||
if(node.left) { | ||
@@ -176,3 +168,2 @@ console.trace(); | ||
appendRightChild: function(node, subtree) { | ||
//var node = this.getNode(id); | ||
if(node.right) { | ||
@@ -198,3 +189,2 @@ console.trace(); | ||
detachLeftChild: function(node) { | ||
//var node = this.getNode(id); | ||
if(!node.left) { | ||
@@ -211,3 +201,2 @@ return; | ||
detachRightChild: function(node) { | ||
//var node = this.getNode(id); | ||
if(!node.right) { | ||
@@ -224,3 +213,2 @@ return; | ||
removeSubtree: function(node) { | ||
//var node = this.getNode(id); | ||
if(node) { | ||
@@ -236,4 +224,2 @@ this._remove(node); | ||
reparentToLeft: function(node, newParent) { | ||
//var node = this.getNode(id); | ||
//var newParent = this.getNode(newParentId); | ||
if(newParent.left) { | ||
@@ -257,4 +243,2 @@ console.trace(); | ||
reparentToRight: function(node, newParent) { | ||
//var node = this.getNode(id); | ||
//var newParent = this.getNode(newParentId); | ||
if(newParent.right) { | ||
@@ -278,3 +262,2 @@ console.trace(); | ||
_find: function(node, id) { | ||
//console.log(node.id); | ||
if(node.id == id) { | ||
@@ -322,2 +305,35 @@ return node; | ||
_preOrderTraverse: function(node, callback) { | ||
callback(node); | ||
if(node.left) { | ||
this._preOrderTraverse(node.left, callback); | ||
} | ||
if(node.right) { | ||
this._preOrderTraverse(node.right, callback); | ||
} | ||
return; | ||
}, | ||
_inOrderTraverse: function(node, callback) { | ||
if(node.left) { | ||
this._inOrderTraverse(node.left, callback); | ||
} | ||
callback(node); | ||
if(node.right) { | ||
this._inOrderTraverse(node.right, callback); | ||
} | ||
return; | ||
}, | ||
_postOrderTraverse: function(node, callback) { | ||
if(node.left) { | ||
this._postOrderTraverse(node.left, callback); | ||
} | ||
if(node.right) { | ||
this._postOrderTraverse(node.right, callback); | ||
} | ||
callback(node); | ||
return; | ||
}, | ||
_remove: function(node) { | ||
@@ -324,0 +340,0 @@ var result = true; |
@@ -11,3 +11,2 @@ var uuid = require("../node_modules/node-uuid").v4; | ||
} | ||
//this.data = null; | ||
this.parent = null; | ||
@@ -14,0 +13,0 @@ this.left = null; |
{ | ||
"name": "pat-tree", | ||
"version": "0.2.4", | ||
"version": "0.2.5", | ||
"description": "PAT tree construction for Chinese documents, keyword extraction and text segmentation", | ||
@@ -5,0 +5,0 @@ "main": "index.js", |
@@ -41,3 +41,3 @@ pat-tree | ||
```javascript | ||
tree.addDocument(input); | ||
tree.addDocument(doc); | ||
``` | ||
@@ -74,6 +74,6 @@ | ||
``` | ||
The result json has following three keys: | ||
`header`: JSON object, | ||
`documents`: array, | ||
`tree`: array | ||
The result json has following three content: | ||
* `json.header`: JSON object, | ||
* `json.documents`: array, | ||
* `json.tree`: array | ||
@@ -88,15 +88,16 @@ You could store them to database and use `tree.reborn()` to generate the tree again. | ||
// One header object would be stored to database | ||
db.collection("header").insert(json.header, function(err, result) { | ||
if(err) throw err; | ||
}); | ||
for(var i = 0; i < json.documents.length; i++) { | ||
db.collection("documents").insert(json.documents[i], function(err, result) { | ||
if(err) throw err; | ||
}); | ||
} | ||
for(var i = 0; i < json.tree.length; i++) { | ||
db.collection("tree").insert(json.tree[i], function(err, result) { | ||
if(err) throw err; | ||
}); | ||
} | ||
// All documents would be stored to database | ||
db.collection("documents").insert(json.documents, function(err, result) { | ||
if(err) throw err; | ||
}); | ||
// All nodes of the tree would be stored to database | ||
db.collection("tree").insert(json.tree, function(err, result) { | ||
if(err) throw err; | ||
}); | ||
``` | ||
@@ -110,3 +111,3 @@ | ||
``` | ||
If you use `tree.toJSON()` to generate JSON object and store the three items to different collections, | ||
If you use `tree.toJSON()` to generate the JSON object and store the three objects to different collections, | ||
you can construct them to the original JSON object and use `tree.reborn(json)` to reborn the tree. | ||
@@ -120,3 +121,3 @@ | ||
var json = {}; | ||
json.header = headers[0]; | ||
json.header = headers[0]; // there should be only one header. | ||
json.documents = documents; | ||
@@ -131,7 +132,14 @@ json.tree = tree; | ||
``` | ||
Then you can use all functions of this module to `patTree`, for example, | ||
you can add one more document with `patTree.addDocument(doc)` and extract SLPs, | ||
and then store the tree back to database. Notice that, if you need the SLPs, | ||
you should store them to database manually. | ||
The `patTree` object would now be the same as the previously stored status, | ||
and you can do all operations like `patTree.addDocuments(doc)` to it. | ||
> **CATUION** | ||
> If you reborn the tree by above method, and do some operations like `patTree.addDocument(doc)`, | ||
> and you want to store the tree back to database as illustrated in *Convert to JSON*, | ||
> you **MUST** drop the collections(header, documents, tree) in the database first, | ||
> avoiding any record that is previously stored. | ||
# Additional functions | ||
@@ -251,4 +259,5 @@ | ||
* 0.2.4 Fix bug in reborn() | ||
* 0.2.3 Add functions toJSON() and reborn() | ||
* 0.2.5 Greatly improve performance of `addDocument()` | ||
* 0.2.4 Fix bug in `reborn()` | ||
* 0.2.3 Add functions `toJSON()` and `reborn()` | ||
* 0.2.2 Change function name of splitDoc to segmentDoc | ||
@@ -255,0 +264,0 @@ * 0.2.1 Mofify README file |
267
32096
852