Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@dodona/dolos-lib

Package Overview
Dependencies
Maintainers
5
Versions
22
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@dodona/dolos-lib - npm Package Compare versions

Comparing version 1.4.0 to 1.5.0

dist/index.d.ts

5

dist/dolos.d.ts
import { Report } from "./lib/analyze/report";
import { CustomOptions, Options } from "./lib/util/options";
import { File } from "./lib/file/file";
export { Report, ScoredPairs } from "./lib/analyze/report";
export { Fragment } from "./lib/analyze/fragment";
export { Region } from "./lib/util/region";
export { Pair } from "./lib/analyze/pair";
export { Options } from "./lib/util/options";
export declare class Dolos {

@@ -10,0 +5,0 @@ readonly options: Options;

21

dist/dolos.js

@@ -25,6 +25,5 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.Dolos = exports.Options = exports.Pair = exports.Region = exports.Fragment = exports.Report = void 0;
exports.Dolos = void 0;
const analyze_1 = require("./lib/analyze");
const options_1 = require("./lib/util/options");
const codeTokenizer_1 = require("./lib/tokenizer/codeTokenizer");
const file_1 = require("./lib/file/file");

@@ -37,12 +36,2 @@ const result_1 = require("./lib/util/result");

const fs = fs_1.default.promises;
var report_1 = require("./lib/analyze/report");
Object.defineProperty(exports, "Report", { enumerable: true, get: function () { return report_1.Report; } });
var fragment_1 = require("./lib/analyze/fragment");
Object.defineProperty(exports, "Fragment", { enumerable: true, get: function () { return fragment_1.Fragment; } });
var region_1 = require("./lib/util/region");
Object.defineProperty(exports, "Region", { enumerable: true, get: function () { return region_1.Region; } });
var pair_1 = require("./lib/analyze/pair");
Object.defineProperty(exports, "Pair", { enumerable: true, get: function () { return pair_1.Pair; } });
var options_2 = require("./lib/util/options");
Object.defineProperty(exports, "Options", { enumerable: true, get: function () { return options_2.Options; } });
function newTokenizer(language) {

@@ -52,4 +41,8 @@ if (language == "chars") {

}
else if (codeTokenizer_1.CodeTokenizer.supportedLanguages.includes(language)) {
return new codeTokenizer_1.CodeTokenizer(language);
else {
// eslint-disable-next-line @typescript-eslint/no-var-requires
const CodeTokenizer = require("./lib/tokenizer/codeTokenizer").CodeTokenizer;
if (CodeTokenizer.supportedLanguages.includes(language)) {
return new CodeTokenizer(language);
}
}

@@ -56,0 +49,0 @@ throw new Error(`No tokenizer found for ${language}`);

@@ -6,2 +6,3 @@ import { HashFilter } from "../hashing/hashFilter";

import { Report } from "./report";
import { TokenizedFile } from "../file/tokenizedFile";
export declare class Index {

@@ -42,2 +43,15 @@ private readonly options;

/**
* Same as {@link Index#compareFiles} but files are already tokenized
* Compare a list of files with each other and the files already stored in the
* index. The compared files are also added to the index.
*
* @param tokenizedFiles: the tokenized file objects which need to be compared to the index
* and each other. The file hashes will be added to the index.
* @param hashFilter: an optional HashFilter. By default the HashFilter of the
* Index object will be used.
* @return an Report object, which is a list of Pairs
* (containing all the pairedOccurrences between two files).
*/
compareTokenizedFiles(tokenizedFiles: TokenizedFile[], hashFilter?: HashFilter): Promise<Report>;
/**
* Compare a file to the index. A map will be returned containing the filename

@@ -50,3 +64,3 @@ * of the matching file, along with a list of matching position between the

* Index object will be used.
* @return report wuth the results of the comparison
* @return report with the results of the comparison
* contains the common hashes (occurrences) between two files.

@@ -53,0 +67,0 @@ */

@@ -52,2 +52,17 @@ "use strict";

const tokenizedFiles = files.map(f => this.tokenizer.tokenizeFile(f));
return this.compareTokenizedFiles(tokenizedFiles, hashFilter);
}
/**
* Same as {@link Index#compareFiles} but files are already tokenized
* Compare a list of files with each other and the files already stored in the
* index. The compared files are also added to the index.
*
* @param tokenizedFiles: the tokenized file objects which need to be compared to the index
* and each other. The file hashes will be added to the index.
* @param hashFilter: an optional HashFilter. By default the HashFilter of the
* Index object will be used.
* @return an Report object, which is a list of Pairs
* (containing all the pairedOccurrences between two files).
*/
async compareTokenizedFiles(tokenizedFiles, hashFilter = this.hashFilter) {
const report = new report_1.Report(this.options, tokenizedFiles);

@@ -60,5 +75,12 @@ for (const file of tokenizedFiles) {

// sanity check
(0, assert_1.default)(region_1.Region.isInOrder(file.mapping[start], file.mapping[stop]), `Invallid ordering:
expected ${file.mapping[start]}
to start be before the end of ${file.mapping[stop]}`);
(0, assert_1.default)(region_1.Region.isInOrder(file.mapping[start], file.mapping[stop])
// If we end our kgram on a ')', the location of the opening token is used.
// However, the location of this token in the file might be before
// the location of the starting token of the kmer
// For example: the last token of every ast is ')', closing the program.
// The location of this token is always (0, 0), since the program root is the first token.
// In this way, the 'end' token is before any other token in the AST.
|| file.ast[stop] === ")", `Invalid ordering:
expected ${file.mapping[start]}
to start be before the end of ${file.mapping[stop]}`);
const location = region_1.Region.merge(file.mapping[start], file.mapping[stop]);

@@ -94,3 +116,3 @@ const part = {

* Index object will be used.
* @return report wuth the results of the comparison
* @return report with the results of the comparison
* contains the common hashes (occurrences) between two files.

@@ -97,0 +119,0 @@ */

@@ -46,3 +46,4 @@ import { Token, Tokenizer } from "./tokenizer";

private tokenizeNode;
private getChildrenRegions;
}
//# sourceMappingURL=codeTokenizer.d.ts.map

@@ -78,3 +78,4 @@ "use strict";

*tokenizeNode(node) {
const location = new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column);
const fullSpan = new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column);
const location = region_1.Region.diff(fullSpan, ...this.getChildrenRegions(node))[0];
yield this.newToken("(", location);

@@ -88,2 +89,7 @@ // "(node.type child1 child2 ...)"

}
getChildrenRegions(node) {
const nodeToRegion = (node) => new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column);
const getChildrenRegion = (node) => node.children.reduce((list, child) => [...list, ...getChildrenRegion(child), nodeToRegion(node)], []);
return node.children.map(getChildrenRegion).flat();
}
}

@@ -90,0 +96,0 @@ exports.CodeTokenizer = CodeTokenizer;

@@ -16,3 +16,14 @@ /**

toString(): string;
/**
* This function takes the 'difference' of one region with a list of other regions.
* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region.
*
* In this case, this is useful for determining the region a node covers without taking its children into account.
*
* Every region that belongs to the diff (that is covered by source, and not by any other) is called a 'good' region.
* @param source
* @param other
*/
static diff(source: Region, ...other: Region[]): Region[];
}
//# sourceMappingURL=region.d.ts.map

@@ -89,4 +89,63 @@ "use strict";

}
/**
* This function takes the 'difference' of one region with a list of other regions.
* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region.
*
* In this case, this is useful for determining the region a node covers without taking its children into account.
*
* Every region that belongs to the diff (that is covered by source, and not by any other) is called a 'good' region.
* @param source
* @param other
*/
static diff(source, ...other) {
const regionToPoints = (r) => [[r.startRow, r.startCol], [r.endRow, r.endCol]];
const [startPoint, endPoint] = regionToPoints(source);
const pointArray = other.map(regionToPoints);
// This map contains all the startpoints mapped to their respective endpoints.
// This is how we will identify the closing point of this token
const pointMap = new Map(pointArray);
const points = [startPoint, endPoint, ...pointMap.keys(), ...pointMap.values()];
const sortfunc = (a, b) => a[0] == b[0] ? (a[1] - b[1]) : (a[0] - b[0]);
points.sort(sortfunc);
// The "points" array now contains all the points (both opening and closing) sorted by their position.
// We will traverse this array from left to right (beginning of region to end of region) to evaluate whether
// a spot is covered by the source region and/or other regions.
const result = [];
// This stack contains all regions that are 'active' or cover this interval
// (at the current point in the traversal process)
const stack = new Set();
let hasStarted = false;
let currentIndex = 0;
let firstPoint = null;
// Traversing the points list
while (points[currentIndex] !== endPoint) {
const p = points[currentIndex];
// Extra boolean to check whether we are currently covering the source interval
if (p === startPoint)
hasStarted = true;
// If this point is a starting point of a child region
if (pointMap.has(p)) {
// If we used to be covered by the source region (hasStarted) and by no child regions (stack size == 0)
// then this region is 'good'.
if (stack.size == 0 && hasStarted)
result.push(new Region(...firstPoint, ...p));
// Register that the current region is covered by a child
stack.add(pointMap.get(p));
}
else {
// If this point is the end point of a region, then we remove the end point of this region from the stack.
// We also register the current point as the starting point of a 'good' region
stack.delete(p);
if (stack.size == 0 && hasStarted)
firstPoint = p;
}
currentIndex += 1;
}
if (stack.size == 0) {
result.push(new Region(...firstPoint, ...endPoint));
}
return result;
}
}
exports.Region = Region;
//# sourceMappingURL=region.js.map
{
"name": "@dodona/dolos-lib",
"version": "1.4.0",
"main": "dist/dolos.js",
"version": "1.5.0",
"main": "dist/index.js",
"description": "Code similarity detection based on the Winnowing algorithm",
"types": "dist/dolos.d.ts",
"types": "dist/index.d.ts",
"scripts": {

@@ -37,5 +37,7 @@ "hash": "tsc && node dist/bin/hash.js",

"ts-node": "^10.2.1",
"typescript": "^4.3.2"
"typescript": "^4.5.5"
},
"dependencies": {
},
"optionalDependencies": {
"tree-sitter": "^0.20.0",

@@ -42,0 +44,0 @@ "tree-sitter-bash": "^0.19.0",

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc