import { Report } from "./lib/analyze/report";
		import { CustomOptions, Options } from "./lib/util/options";
		import { File } from "./lib/file/file";
		export { Report, ScoredPairs } from "./lib/analyze/report";
		export { Fragment } from "./lib/analyze/fragment";
		export { Region } from "./lib/util/region";
		export { Pair } from "./lib/analyze/pair";
		export { Options } from "./lib/util/options";
		export declare class Dolos {
		@@ -10,0 +5,0 @@ readonly options: Options;

dist/dolos.js

		@@ -25,6 +25,5 @@ "use strict";
		Object.defineProperty(exports, "__esModule", { value: true });
		exports.Dolos = exports.Options = exports.Pair = exports.Region = exports.Fragment = exports.Report = void 0;
		exports.Dolos = void 0;
		const analyze_1 = require("./lib/analyze");
		const options_1 = require("./lib/util/options");
		const codeTokenizer_1 = require("./lib/tokenizer/codeTokenizer");
		const file_1 = require("./lib/file/file");
		@@ -37,12 +36,2 @@ const result_1 = require("./lib/util/result");
		const fs = fs_1.default.promises;
		var report_1 = require("./lib/analyze/report");
		Object.defineProperty(exports, "Report", { enumerable: true, get: function () { return report_1.Report; } });
		var fragment_1 = require("./lib/analyze/fragment");
		Object.defineProperty(exports, "Fragment", { enumerable: true, get: function () { return fragment_1.Fragment; } });
		var region_1 = require("./lib/util/region");
		Object.defineProperty(exports, "Region", { enumerable: true, get: function () { return region_1.Region; } });
		var pair_1 = require("./lib/analyze/pair");
		Object.defineProperty(exports, "Pair", { enumerable: true, get: function () { return pair_1.Pair; } });
		var options_2 = require("./lib/util/options");
		Object.defineProperty(exports, "Options", { enumerable: true, get: function () { return options_2.Options; } });
		function newTokenizer(language) {
		@@ -52,4 +41,8 @@ if (language == "chars") {
		}
		else if (codeTokenizer_1.CodeTokenizer.supportedLanguages.includes(language)) {
		return new codeTokenizer_1.CodeTokenizer(language);
		else {
		// eslint-disable-next-line @typescript-eslint/no-var-requires
		const CodeTokenizer = require("./lib/tokenizer/codeTokenizer").CodeTokenizer;
		if (CodeTokenizer.supportedLanguages.includes(language)) {
		return new CodeTokenizer(language);
		}
		}
		@@ -56,0 +49,0 @@ throw new Error(`No tokenizer found for ${language}`);

dist/lib/analyze/index.d.ts

		@@ -6,2 +6,3 @@ import { HashFilter } from "../hashing/hashFilter";
		import { Report } from "./report";
		import { TokenizedFile } from "../file/tokenizedFile";
		export declare class Index {
		@@ -42,2 +43,15 @@ private readonly options;
		/**
		* Same as {@link Index#compareFiles} but files are already tokenized
		* Compare a list of files with each other and the files already stored in the
		* index. The compared files are also added to the index.
		*
		* @param tokenizedFiles: the tokenized file objects which need to be compared to the index
		* and each other. The file hashes will be added to the index.
		* @param hashFilter: an optional HashFilter. By default the HashFilter of the
		* Index object will be used.
		* @return an Report object, which is a list of Pairs
		* (containing all the pairedOccurrences between two files).
		*/
		compareTokenizedFiles(tokenizedFiles: TokenizedFile[], hashFilter?: HashFilter): Promise<Report>;
		/**
		* Compare a file to the index. A map will be returned containing the filename
		@@ -50,3 +64,3 @@ * of the matching file, along with a list of matching position between the
		* Index object will be used.
		* @return report wuth the results of the comparison
		* @return report with the results of the comparison
		* contains the common hashes (occurrences) between two files.
		@@ -53,0 +67,0 @@ */

dist/lib/analyze/index.js

		@@ -52,2 +52,17 @@ "use strict";
		const tokenizedFiles = files.map(f => this.tokenizer.tokenizeFile(f));
		return this.compareTokenizedFiles(tokenizedFiles, hashFilter);
		}
		/**
		* Same as {@link Index#compareFiles} but files are already tokenized
		* Compare a list of files with each other and the files already stored in the
		* index. The compared files are also added to the index.
		*
		* @param tokenizedFiles: the tokenized file objects which need to be compared to the index
		* and each other. The file hashes will be added to the index.
		* @param hashFilter: an optional HashFilter. By default the HashFilter of the
		* Index object will be used.
		* @return an Report object, which is a list of Pairs
		* (containing all the pairedOccurrences between two files).
		*/
		async compareTokenizedFiles(tokenizedFiles, hashFilter = this.hashFilter) {
		const report = new report_1.Report(this.options, tokenizedFiles);
		@@ -60,5 +75,12 @@ for (const file of tokenizedFiles) {
		// sanity check
		(0, assert_1.default)(region_1.Region.isInOrder(file.mapping[start], file.mapping[stop]), `Invallid ordering:
		expected ${file.mapping[start]}
		to start be before the end of ${file.mapping[stop]}`);
		(0, assert_1.default)(region_1.Region.isInOrder(file.mapping[start], file.mapping[stop])
		// If we end our kgram on a ')', the location of the opening token is used.
		// However, the location of this token in the file might be before
		// the location of the starting token of the kmer
		// For example: the last token of every ast is ')', closing the program.
		// The location of this token is always (0, 0), since the program root is the first token.
		// In this way, the 'end' token is before any other token in the AST.
		\|\| file.ast[stop] === ")", `Invalid ordering:
		expected ${file.mapping[start]}
		to start be before the end of ${file.mapping[stop]}`);
		const location = region_1.Region.merge(file.mapping[start], file.mapping[stop]);
		@@ -94,3 +116,3 @@ const part = {
		* Index object will be used.
		* @return report wuth the results of the comparison
		* @return report with the results of the comparison
		* contains the common hashes (occurrences) between two files.
		@@ -97,0 +119,0 @@ */

dist/lib/tokenizer/codeTokenizer.d.ts

		@@ -46,3 +46,4 @@ import { Token, Tokenizer } from "./tokenizer";
		private tokenizeNode;
		private getChildrenRegions;
		}
		//# sourceMappingURL=codeTokenizer.d.ts.map

dist/lib/tokenizer/codeTokenizer.js

		@@ -78,3 +78,4 @@ "use strict";
		*tokenizeNode(node) {
		const location = new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column);
		const fullSpan = new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column);
		const location = region_1.Region.diff(fullSpan, ...this.getChildrenRegions(node))[0];
		yield this.newToken("(", location);
		@@ -88,2 +89,7 @@ // "(node.type child1 child2 ...)"
		}
		getChildrenRegions(node) {
		const nodeToRegion = (node) => new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column);
		const getChildrenRegion = (node) => node.children.reduce((list, child) => [...list, ...getChildrenRegion(child), nodeToRegion(node)], []);
		return node.children.map(getChildrenRegion).flat();
		}
		}
		@@ -90,0 +96,0 @@ exports.CodeTokenizer = CodeTokenizer;

dist/lib/util/region.d.ts

		@@ -16,3 +16,14 @@ /**
		toString(): string;
		/**
		* This function takes the 'difference' of one region with a list of other regions.
		* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region.
		*
		* In this case, this is useful for determining the region a node covers without taking its children into account.
		*
		* Every region that belongs to the diff (that is covered by source, and not by any other) is called a 'good' region.
		* @param source
		* @param other
		*/
		static diff(source: Region, ...other: Region[]): Region[];
		}
		//# sourceMappingURL=region.d.ts.map

dist/lib/util/region.js

		@@ -89,4 +89,63 @@ "use strict";
		}
		/**
		* This function takes the 'difference' of one region with a list of other regions.
		* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region.
		*
		* In this case, this is useful for determining the region a node covers without taking its children into account.
		*
		* Every region that belongs to the diff (that is covered by source, and not by any other) is called a 'good' region.
		* @param source
		* @param other
		*/
		static diff(source, ...other) {
		const regionToPoints = (r) => [[r.startRow, r.startCol], [r.endRow, r.endCol]];
		const [startPoint, endPoint] = regionToPoints(source);
		const pointArray = other.map(regionToPoints);
		// This map contains all the startpoints mapped to their respective endpoints.
		// This is how we will identify the closing point of this token
		const pointMap = new Map(pointArray);
		const points = [startPoint, endPoint, ...pointMap.keys(), ...pointMap.values()];
		const sortfunc = (a, b) => a[0] == b[0] ? (a[1] - b[1]) : (a[0] - b[0]);
		points.sort(sortfunc);
		// The "points" array now contains all the points (both opening and closing) sorted by their position.
		// We will traverse this array from left to right (beginning of region to end of region) to evaluate whether
		// a spot is covered by the source region and/or other regions.
		const result = [];
		// This stack contains all regions that are 'active' or cover this interval
		// (at the current point in the traversal process)
		const stack = new Set();
		let hasStarted = false;
		let currentIndex = 0;
		let firstPoint = null;
		// Traversing the points list
		while (points[currentIndex] !== endPoint) {
		const p = points[currentIndex];
		// Extra boolean to check whether we are currently covering the source interval
		if (p === startPoint)
		hasStarted = true;
		// If this point is a starting point of a child region
		if (pointMap.has(p)) {
		// If we used to be covered by the source region (hasStarted) and by no child regions (stack size == 0)
		// then this region is 'good'.
		if (stack.size == 0 && hasStarted)
		result.push(new Region(...firstPoint, ...p));
		// Register that the current region is covered by a child
		stack.add(pointMap.get(p));
		}
		else {
		// If this point is the end point of a region, then we remove the end point of this region from the stack.
		// We also register the current point as the starting point of a 'good' region
		stack.delete(p);
		if (stack.size == 0 && hasStarted)
		firstPoint = p;
		}
		currentIndex += 1;
		}
		if (stack.size == 0) {
		result.push(new Region(...firstPoint, ...endPoint));
		}
		return result;
		}
		}
		exports.Region = Region;
		//# sourceMappingURL=region.js.map

package.json

		{
		"name": "@dodona/dolos-lib",
		"version": "1.4.0",
		"main": "dist/dolos.js",
		"version": "1.5.0",
		"main": "dist/index.js",
		"description": "Code similarity detection based on the Winnowing algorithm",
		"types": "dist/dolos.d.ts",
		"types": "dist/index.d.ts",
		"scripts": {
		@@ -37,5 +37,7 @@ "hash": "tsc && node dist/bin/hash.js",
		"ts-node": "^10.2.1",
		"typescript": "^4.3.2"
		"typescript": "^4.5.5"
		},
		"dependencies": {
		},
		"optionalDependencies": {
		"tree-sitter": "^0.20.0",
		@@ -42,0 +44,0 @@ "tree-sitter-bash": "^0.19.0",

@dodona/dolos-lib - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Dependency changes