New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

pat-tree

Package Overview
Dependencies
Maintainers
1
Versions
25
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

pat-tree - npm Package Compare versions

Comparing version 0.1.2 to 0.1.3

164

index.js

@@ -17,2 +17,19 @@ var Tree = require("./lib/BTree");

traverse: function(preCallback, inCallback, postCallback) {
this.tree.traverse(preCallback, inCallback, postCallback);
},
preOrderTraverse: function(callback) {
this.tree.preOrderTraverse(callback);
},
inOrderTraverse: function(callback) {
this.tree.inOrderTraverse(callback);
},
postOrderTraverse: function(callback) {
this.tree.postOrderTraverse(callback);
},
addDocument: function(doc) {

@@ -53,2 +70,3 @@ var sentenses = this._splitDocument(doc);

this._insert(tree, tree.root.id, sistring, index);
this._updateParents();
},

@@ -58,8 +76,9 @@

var node = tree.getNode(nodeId);
var indexes = [];
indexes.push(index);
if(tree.isRoot(nodeId) && tree.root.data == null) {
tree.setNodeData(nodeId, {
type: this.EXTERNAL,
index: index,
sistring: sistring,
count: 1
sistring: sistring,
indexes: indexes
});

@@ -83,4 +102,3 @@ } else if(node.data.type == this.INTERNAL) {

sistring: sistring,
index: index,
count: 1
indexes: indexes
};

@@ -97,4 +115,3 @@ tree.appendLeftChild(nodeId, leftChild);

sistring: sistring,
index: index,
count: 1
indexes: indexes
}

@@ -113,3 +130,3 @@ tree.appendRightChild(nodeId, rightChild);

if(node.data.sistring == sistring) {
node.data.count++;
node.data.indexes.push(index);
} else {

@@ -129,11 +146,12 @@ this._rebuildInternalSubtree(tree, node, sistring, index);

*/
var nodeString;
var sistrings = [];
var indexes = [];
indexes.push(index);
if(node.data.type == this.INTERNAL) {
nodeString = node.data.prefix;
sistrings = sistrings.concat(node.data.sistrings);
} else if(node.data.type == this.EXTERNAL) {
nodeString = node.data.sistring;
sistrings.push(node);
}

@@ -148,7 +166,5 @@ var branchBit = this._findBranchPosition(nodeString, sistring);

sistring: sistring,
index: index,
count: 1
indexes: indexes
};
sistrings.push(externalNode);

@@ -160,3 +176,5 @@ var subtreeRoot = subtree.getRoot();

prefix: sistring.slice(0, branchBit),
sistrings: sistrings
externalNodeNum: 0,
totalFrequency: 0,
sistrings: []
};

@@ -195,2 +213,3 @@

}
/*

@@ -203,2 +222,47 @@ if(!this._checkConnections()) {

_updateParents: function() {
var owner = this;
this.postOrderTraverse(function(node) {
if(node.data.type == owner.INTERNAL) {
var sistrings = [];
var left = node.left;
var right = node.right;
var externalNodeNum = 0;
var totalFrequency = 0;
if(left && right) {
if(left.data.type == owner.INTERNAL) {
externalNodeNum += left.data.externalNodeNum;
totalFrequency += left.data.totalFrequency;
sistrings = sistrings.concat(left.data.sistrings);
} else if(left.data.type == owner.EXTERNAL) {
externalNodeNum += 1;
totalFrequency += left.data.indexes.length;
sistrings.push(left);
} else {
console.trace();
throw "unknown node type (neither internal nor external)"
}
if(right.data.type == owner.INTERNAL) {
externalNodeNum += right.data.externalNodeNum;
totalFrequency += right.data.totalFrequency;
sistrings = sistrings.concat(right.data.sistrings);
} else if(right.data.type == owner.EXTERNAL) {
externalNodeNum += 1;
totalFrequency += right.data.indexes.length;
sistrings.push(right);
} else {
console.trace();
throw "unknown node type (neither internal nor external)"
}
} else {
console.trace();
throw "internal node lost left or right child"
}
node.data.sistrings = sistrings;
node.data.externalNodeNum = externalNodeNum;
node.data.totalFrequency = totalFrequency;
}
});
},
_appendZeroes: function(length) {

@@ -253,3 +317,5 @@ var tree = this.tree;

_restoreSistring: function(sistring, index) {
_restoreSistring: function(externalNode) {
var sistring = externalNode.data.sistring;
var index = externalNode.data.indexes[0];
var indexes = index.split(".");

@@ -280,2 +346,32 @@ var docIndex = indexes[0];

_restorePrefix: function(internalNode) {
var prefix = internalNode.data.prefix;
var externalNode = internalNode.data.sistrings[0];
var index = externalNode.data.indexes[0];
var indexes = index.split(".");
var docIndex = indexes[0];
var sentenseIndex = indexes[1];
var wordIndex = indexes[2];
var output = "";
var sentense = this.documents[docIndex][sentenseIndex];
//console.log(" sentense: " + sentense);
var comparison = "";
for(var i = wordIndex; i < sentense.length && comparison.length < prefix.length; i++) {
var arr = [];
var word = sentense[i];
arr.push(sentense[i]);
//console.log(" word: " + word);
var binaryWord = this._toBinary(arr);
if(comparison.length + binaryWord.length >= prefix.length) {
break;
} else {
comparison += binaryWord;
output += word;
}
}
return output;
},
_toString: function(b) {

@@ -299,6 +395,6 @@ var output = "";

_printTreeContent: function() {
printTreeContent: function(printExternalNodes, printDocuments) {
var tree = this.tree;
var owner = this;
tree.preOrderTraverse(function(node) {
this.preOrderTraverse(function(node) {
console.log("id: " + node.id);

@@ -313,16 +409,26 @@ var type = tree.getNodeType(node.id);

console.log("position: " + node.data.position);
console.log("prefix: " + node.data.prefix);
console.log("sistrings: ");
for(var i = 0; i < node.data.sistrings.length; i++) {
var externalNodeData = node.data.sistrings[i].data;
console.log(" sistring: " + owner._restoreSistring(externalNodeData.sistring, externalNodeData.index));
console.log(" count: " + externalNodeData.count);
}
console.log("prefix: " + owner._restorePrefix(node));
console.log("externalNodeNum: " + node.data.externalNodeNum);
console.log("totalFrequency: " + node.data.totalFrequency);
if(printExternalNodes)
{
console.log("sistrings: ");
for(var i = 0; i < node.data.sistrings.length; i++) {
var externalNode = node.data.sistrings[i];
console.log(" sistring: " + owner._restoreSistring(externalNode));
console.log(" indexes: " + externalNode.data.indexes);
}
}
} else if(node.data.type == owner.EXTERNAL) {
console.log("sistring: " + owner._restoreSistring(node.data.sistring, node.data.index));
console.log("index: " + node.data.index);
console.log("count: " + node.data.count);
console.log("sistring: " + owner._restoreSistring(node));
console.log("indexes: " + node.data.indexes);
}
console.log();
});
});
if(printDocuments)
{
console.log("documents:");
console.log(this.documents);
}
},

@@ -329,0 +435,0 @@

2

package.json
{
"name": "pat-tree",
"version": "0.1.2",
"version": "0.1.3",
"description": "PAT tree construction for Chinese documents",

@@ -5,0 +5,0 @@ "main": "index.js",

@@ -9,13 +9,103 @@ pat-tree

## Installation
# Installation
npm install pat-tree --save
## Test
# Test
npm test
## Release History
# Usage
* 0.1.2 Initial release
## Init
var PATtree = require("pat-tree");
var tree = new PATtree();
## Add document
tree.addDocument(input);
## Print tree content
tree.printTreeContent(printExternalNodes, printDocuments);
If **printExternalNodes** is set to true, print out all external nodes for each internal node.
If **printDocuments** is set to true, print out the whole collection of the tree.
## Traversal
tree.traverse(preCallback, inCallback, postCallback);
For convenient, there are functions for each order of traversal
tree.preOrderTraverse(callback);
tree.inOrderTraverse(callback);
tree.postOrderTraverse(callback);
For example
tree.preOrderTraverse(function(node) {
console.log("node id: " + node.id);
})
# Data type
## Node
Every nodes has some common informaitons, an node has the following structure:
node = {
id: 3, // the id of this node, data type: JSON, auto generated.
parent: 1, // the parent id of this node, data type: integer
left: leftChildNode, // data type: Node
right: rightChildNode, // data type: Node
data: {} // payload for this node, data type : JSON
}
Data is different for internal nodes and external nodes,
Internal nodes has following structure:
## Internal nodes
internalNode.data = {
type: "internal", // indicates this is an internal node
position: 13, // the branch position of external nodes, data type: integer
prefix: "00101", // the sharing prefix of external nodes, data type: string of 0s and 1s
externalNodeNum: 87, // number of external nodes contained in subtree of this node, data type: integer
totalFrequency: 89, // number of the total frequency of the external nodes in the collection, data type: integer
sistrings: ArrayOfExternalNodes // array of external nodes, data type: array
}
## External nodes
External nodes has following structure:
externalNode.data = {
type: "external", // indicates this is an external node,
sistring: "00101100110101", // binary representation of the character, data type: string
indexes: ["0.1,3", "1.2.5"] // the positions where the sistring appears in the collection, data type: array
}
# Collection
The whole collection consists of documents, which consists of sentenses, which consists of words.
An example could be this:
[ [ '嗨你好',
'這是測試文件1' ],
[ '你好',
'這是測試文件2' ] ]
An index is in following structure:
DocumentPosition.SentensePosition.wordPosition
For example, **"0.1.2"** is the index of the character "測".
# Release History
* 0.1.1 First release
* 0.1.2 Construction complete
* 0.1.3 Able to restore Chinese characters
* 0.1.4 Add external node number and term frequency to internal nodes

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc