@dodona/dolos-lib
Advanced tools
Comparing version 1.4.0 to 1.5.0
import { Report } from "./lib/analyze/report"; | ||
import { CustomOptions, Options } from "./lib/util/options"; | ||
import { File } from "./lib/file/file"; | ||
export { Report, ScoredPairs } from "./lib/analyze/report"; | ||
export { Fragment } from "./lib/analyze/fragment"; | ||
export { Region } from "./lib/util/region"; | ||
export { Pair } from "./lib/analyze/pair"; | ||
export { Options } from "./lib/util/options"; | ||
export declare class Dolos { | ||
@@ -10,0 +5,0 @@ readonly options: Options; |
@@ -25,6 +25,5 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.Dolos = exports.Options = exports.Pair = exports.Region = exports.Fragment = exports.Report = void 0; | ||
exports.Dolos = void 0; | ||
const analyze_1 = require("./lib/analyze"); | ||
const options_1 = require("./lib/util/options"); | ||
const codeTokenizer_1 = require("./lib/tokenizer/codeTokenizer"); | ||
const file_1 = require("./lib/file/file"); | ||
@@ -37,12 +36,2 @@ const result_1 = require("./lib/util/result"); | ||
const fs = fs_1.default.promises; | ||
var report_1 = require("./lib/analyze/report"); | ||
Object.defineProperty(exports, "Report", { enumerable: true, get: function () { return report_1.Report; } }); | ||
var fragment_1 = require("./lib/analyze/fragment"); | ||
Object.defineProperty(exports, "Fragment", { enumerable: true, get: function () { return fragment_1.Fragment; } }); | ||
var region_1 = require("./lib/util/region"); | ||
Object.defineProperty(exports, "Region", { enumerable: true, get: function () { return region_1.Region; } }); | ||
var pair_1 = require("./lib/analyze/pair"); | ||
Object.defineProperty(exports, "Pair", { enumerable: true, get: function () { return pair_1.Pair; } }); | ||
var options_2 = require("./lib/util/options"); | ||
Object.defineProperty(exports, "Options", { enumerable: true, get: function () { return options_2.Options; } }); | ||
function newTokenizer(language) { | ||
@@ -52,4 +41,8 @@ if (language == "chars") { | ||
} | ||
else if (codeTokenizer_1.CodeTokenizer.supportedLanguages.includes(language)) { | ||
return new codeTokenizer_1.CodeTokenizer(language); | ||
else { | ||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||
const CodeTokenizer = require("./lib/tokenizer/codeTokenizer").CodeTokenizer; | ||
if (CodeTokenizer.supportedLanguages.includes(language)) { | ||
return new CodeTokenizer(language); | ||
} | ||
} | ||
@@ -56,0 +49,0 @@ throw new Error(`No tokenizer found for ${language}`); |
@@ -6,2 +6,3 @@ import { HashFilter } from "../hashing/hashFilter"; | ||
import { Report } from "./report"; | ||
import { TokenizedFile } from "../file/tokenizedFile"; | ||
export declare class Index { | ||
@@ -42,2 +43,15 @@ private readonly options; | ||
/** | ||
* Same as {@link Index#compareFiles} but files are already tokenized | ||
* Compare a list of files with each other and the files already stored in the | ||
* index. The compared files are also added to the index. | ||
* | ||
* @param tokenizedFiles: the tokenized file objects which need to be compared to the index | ||
* and each other. The file hashes will be added to the index. | ||
* @param hashFilter: an optional HashFilter. By default the HashFilter of the | ||
* Index object will be used. | ||
* @return an Report object, which is a list of Pairs | ||
* (containing all the pairedOccurrences between two files). | ||
*/ | ||
compareTokenizedFiles(tokenizedFiles: TokenizedFile[], hashFilter?: HashFilter): Promise<Report>; | ||
/** | ||
* Compare a file to the index. A map will be returned containing the filename | ||
@@ -50,3 +64,3 @@ * of the matching file, along with a list of matching position between the | ||
* Index object will be used. | ||
* @return report wuth the results of the comparison | ||
* @return report with the results of the comparison | ||
* contains the common hashes (occurrences) between two files. | ||
@@ -53,0 +67,0 @@ */ |
@@ -52,2 +52,17 @@ "use strict"; | ||
const tokenizedFiles = files.map(f => this.tokenizer.tokenizeFile(f)); | ||
return this.compareTokenizedFiles(tokenizedFiles, hashFilter); | ||
} | ||
/** | ||
* Same as {@link Index#compareFiles} but files are already tokenized | ||
* Compare a list of files with each other and the files already stored in the | ||
* index. The compared files are also added to the index. | ||
* | ||
* @param tokenizedFiles: the tokenized file objects which need to be compared to the index | ||
* and each other. The file hashes will be added to the index. | ||
* @param hashFilter: an optional HashFilter. By default the HashFilter of the | ||
* Index object will be used. | ||
* @return an Report object, which is a list of Pairs | ||
* (containing all the pairedOccurrences between two files). | ||
*/ | ||
async compareTokenizedFiles(tokenizedFiles, hashFilter = this.hashFilter) { | ||
const report = new report_1.Report(this.options, tokenizedFiles); | ||
@@ -60,5 +75,12 @@ for (const file of tokenizedFiles) { | ||
// sanity check | ||
(0, assert_1.default)(region_1.Region.isInOrder(file.mapping[start], file.mapping[stop]), `Invallid ordering: | ||
expected ${file.mapping[start]} | ||
to start be before the end of ${file.mapping[stop]}`); | ||
(0, assert_1.default)(region_1.Region.isInOrder(file.mapping[start], file.mapping[stop]) | ||
// If we end our kgram on a ')', the location of the opening token is used. | ||
// However, the location of this token in the file might be before | ||
// the location of the starting token of the kmer | ||
// For example: the last token of every ast is ')', closing the program. | ||
// The location of this token is always (0, 0), since the program root is the first token. | ||
// In this way, the 'end' token is before any other token in the AST. | ||
|| file.ast[stop] === ")", `Invalid ordering: | ||
expected ${file.mapping[start]} | ||
to start be before the end of ${file.mapping[stop]}`); | ||
const location = region_1.Region.merge(file.mapping[start], file.mapping[stop]); | ||
@@ -94,3 +116,3 @@ const part = { | ||
* Index object will be used. | ||
* @return report wuth the results of the comparison | ||
* @return report with the results of the comparison | ||
* contains the common hashes (occurrences) between two files. | ||
@@ -97,0 +119,0 @@ */ |
@@ -46,3 +46,4 @@ import { Token, Tokenizer } from "./tokenizer"; | ||
private tokenizeNode; | ||
private getChildrenRegions; | ||
} | ||
//# sourceMappingURL=codeTokenizer.d.ts.map |
@@ -78,3 +78,4 @@ "use strict"; | ||
*tokenizeNode(node) { | ||
const location = new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column); | ||
const fullSpan = new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column); | ||
const location = region_1.Region.diff(fullSpan, ...this.getChildrenRegions(node))[0]; | ||
yield this.newToken("(", location); | ||
@@ -88,2 +89,7 @@ // "(node.type child1 child2 ...)" | ||
} | ||
getChildrenRegions(node) { | ||
const nodeToRegion = (node) => new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column); | ||
const getChildrenRegion = (node) => node.children.reduce((list, child) => [...list, ...getChildrenRegion(child), nodeToRegion(node)], []); | ||
return node.children.map(getChildrenRegion).flat(); | ||
} | ||
} | ||
@@ -90,0 +96,0 @@ exports.CodeTokenizer = CodeTokenizer; |
@@ -16,3 +16,14 @@ /** | ||
toString(): string; | ||
/** | ||
* This function takes the 'difference' of one region with a list of other regions. | ||
* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region. | ||
* | ||
* In this case, this is useful for determining the region a node covers without taking its children into account. | ||
* | ||
* Every region that belongs to the diff (that is covered by source, and not by any other) is called a 'good' region. | ||
* @param source | ||
* @param other | ||
*/ | ||
static diff(source: Region, ...other: Region[]): Region[]; | ||
} | ||
//# sourceMappingURL=region.d.ts.map |
@@ -89,4 +89,63 @@ "use strict"; | ||
} | ||
/** | ||
* This function takes the 'difference' of one region with a list of other regions. | ||
* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region. | ||
* | ||
* In this case, this is useful for determining the region a node covers without taking its children into account. | ||
* | ||
* Every region that belongs to the diff (that is covered by source, and not by any other) is called a 'good' region. | ||
* @param source | ||
* @param other | ||
*/ | ||
static diff(source, ...other) { | ||
const regionToPoints = (r) => [[r.startRow, r.startCol], [r.endRow, r.endCol]]; | ||
const [startPoint, endPoint] = regionToPoints(source); | ||
const pointArray = other.map(regionToPoints); | ||
// This map contains all the startpoints mapped to their respective endpoints. | ||
// This is how we will identify the closing point of this token | ||
const pointMap = new Map(pointArray); | ||
const points = [startPoint, endPoint, ...pointMap.keys(), ...pointMap.values()]; | ||
const sortfunc = (a, b) => a[0] == b[0] ? (a[1] - b[1]) : (a[0] - b[0]); | ||
points.sort(sortfunc); | ||
// The "points" array now contains all the points (both opening and closing) sorted by their position. | ||
// We will traverse this array from left to right (beginning of region to end of region) to evaluate whether | ||
// a spot is covered by the source region and/or other regions. | ||
const result = []; | ||
// This stack contains all regions that are 'active' or cover this interval | ||
// (at the current point in the traversal process) | ||
const stack = new Set(); | ||
let hasStarted = false; | ||
let currentIndex = 0; | ||
let firstPoint = null; | ||
// Traversing the points list | ||
while (points[currentIndex] !== endPoint) { | ||
const p = points[currentIndex]; | ||
// Extra boolean to check whether we are currently covering the source interval | ||
if (p === startPoint) | ||
hasStarted = true; | ||
// If this point is a starting point of a child region | ||
if (pointMap.has(p)) { | ||
// If we used to be covered by the source region (hasStarted) and by no child regions (stack size == 0) | ||
// then this region is 'good'. | ||
if (stack.size == 0 && hasStarted) | ||
result.push(new Region(...firstPoint, ...p)); | ||
// Register that the current region is covered by a child | ||
stack.add(pointMap.get(p)); | ||
} | ||
else { | ||
// If this point is the end point of a region, then we remove the end point of this region from the stack. | ||
// We also register the current point as the starting point of a 'good' region | ||
stack.delete(p); | ||
if (stack.size == 0 && hasStarted) | ||
firstPoint = p; | ||
} | ||
currentIndex += 1; | ||
} | ||
if (stack.size == 0) { | ||
result.push(new Region(...firstPoint, ...endPoint)); | ||
} | ||
return result; | ||
} | ||
} | ||
exports.Region = Region; | ||
//# sourceMappingURL=region.js.map |
{ | ||
"name": "@dodona/dolos-lib", | ||
"version": "1.4.0", | ||
"main": "dist/dolos.js", | ||
"version": "1.5.0", | ||
"main": "dist/index.js", | ||
"description": "Code similarity detection based on the Winnowing algorithm", | ||
"types": "dist/dolos.d.ts", | ||
"types": "dist/index.d.ts", | ||
"scripts": { | ||
@@ -37,5 +37,7 @@ "hash": "tsc && node dist/bin/hash.js", | ||
"ts-node": "^10.2.1", | ||
"typescript": "^4.3.2" | ||
"typescript": "^4.5.5" | ||
}, | ||
"dependencies": { | ||
}, | ||
"optionalDependencies": { | ||
"tree-sitter": "^0.20.0", | ||
@@ -42,0 +44,0 @@ "tree-sitter-bash": "^0.19.0", |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
No README
QualityPackage does not have a README. This may indicate a failed publish or a low quality package.
Found 1 instance in 1 package
95977
57
2409
1
50
- Removedtree-sitter@^0.20.0
- Removedtree-sitter-bash@^0.19.0
- Removedtree-sitter-c@^0.20.1
- Removedtree-sitter-c-sharp@^0.19.0
- Removedtree-sitter-java@^0.19.1
- Removedtree-sitter-javascript@^0.19.0
- Removedtree-sitter-python@^0.19.0