New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

@dodona/dolos-lib

Package Overview
Dependencies
Maintainers
5
Versions
23
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@dodona/dolos-lib - npm Package Compare versions

Comparing version 2.0.0 to 2.0.1

dist/lib/util/language.d.ts

7

dist/dolos.d.ts

@@ -6,4 +6,7 @@ import { Report } from "./lib/analyze/report";

readonly options: Options;
private readonly tokenizer;
private readonly index;
private languageDetected;
private language;
private tokenizer;
private index;
private readonly languagePicker;
constructor(customOptions?: CustomOptions);

@@ -10,0 +13,0 @@ private fromZIP;

@@ -33,22 +33,15 @@ "use strict";

const path = __importStar(require("path"));
const charTokenizer_1 = require("./lib/tokenizer/charTokenizer");
const fs_1 = __importStar(require("fs"));
const child_process_1 = require("child_process");
const os_1 = require("os");
const language_1 = require("./lib/util/language");
const fs = fs_1.default.promises;
function newTokenizer(language) {
if (language == "chars") {
return new charTokenizer_1.CharTokenizer();
}
else {
// eslint-disable-next-line @typescript-eslint/no-var-requires
const CodeTokenizer = require("./lib/tokenizer/codeTokenizer").CodeTokenizer;
return new CodeTokenizer(language);
}
}
class Dolos {
constructor(customOptions) {
this.languageDetected = false;
this.language = null;
this.tokenizer = null;
this.index = null;
this.languagePicker = new language_1.LanguagePicker();
this.options = new options_1.Options(customOptions);
this.tokenizer = newTokenizer(this.options.language);
this.index = new analyze_1.Index(this.tokenizer, this.options);
}

@@ -121,2 +114,3 @@ async fromZIP(zipPath) {

async analyze(files) {
var _a;
if (files.length < 2) {

@@ -131,2 +125,18 @@ throw new Error("You need to supply at least two files");

}
else if (this.index == null) {
if (this.options.language) {
this.language = this.languagePicker.findLanguage(this.options.language);
}
else {
this.language = this.languagePicker.detectLanguage(files);
this.languageDetected = true;
}
this.tokenizer = this.language.createTokenizer();
this.index = new analyze_1.Index(this.tokenizer, this.options);
}
if (this.languageDetected) {
for (const file of files) {
(_a = this.language) === null || _a === void 0 ? void 0 : _a.checkLanguage(file);
}
}
return this.index.compareFiles(files);

@@ -133,0 +143,0 @@ }

@@ -15,3 +15,4 @@ export { Report, ScoredPairs, Occurrence, EncodedSemanticResult, DecodedSemanticResult } from "./lib/analyze/report";

export { DefaultMap } from "./lib/util/defaultMap";
export { Language, LanguagePicker, LanguageError } from "./lib/util/language";
export * from "./lib/util/utils";
//# sourceMappingURL=index.d.ts.map

@@ -17,3 +17,3 @@ "use strict";

Object.defineProperty(exports, "__esModule", { value: true });
exports.DefaultMap = exports.PairedOccurrence = exports.SharedFingerprint = exports.SemanticAnalyzer = exports.EmptyTokenizer = exports.Dolos = exports.Index = exports.TokenizedFile = exports.File = exports.Options = exports.Pair = exports.Region = exports.Fragment = exports.Report = void 0;
exports.LanguageError = exports.LanguagePicker = exports.Language = exports.DefaultMap = exports.PairedOccurrence = exports.SharedFingerprint = exports.SemanticAnalyzer = exports.EmptyTokenizer = exports.Dolos = exports.Index = exports.TokenizedFile = exports.File = exports.Options = exports.Pair = exports.Region = exports.Fragment = exports.Report = void 0;
var report_1 = require("./lib/analyze/report");

@@ -47,3 +47,7 @@ Object.defineProperty(exports, "Report", { enumerable: true, get: function () { return report_1.Report; } });

Object.defineProperty(exports, "DefaultMap", { enumerable: true, get: function () { return defaultMap_1.DefaultMap; } });
var language_1 = require("./lib/util/language");
Object.defineProperty(exports, "Language", { enumerable: true, get: function () { return language_1.Language; } });
Object.defineProperty(exports, "LanguagePicker", { enumerable: true, get: function () { return language_1.LanguagePicker; } });
Object.defineProperty(exports, "LanguageError", { enumerable: true, get: function () { return language_1.LanguageError; } });
__exportStar(require("./lib/util/utils"), exports);
//# sourceMappingURL=index.js.map

@@ -67,3 +67,3 @@ "use strict";

async compareTokenizedFiles(tokenizedFiles, hashFilter = this.hashFilter) {
const report = new report_1.Report(this.options, tokenizedFiles);
const report = new report_1.Report(this.options, this.tokenizer.language, tokenizedFiles);
const map = await this.createMatches(tokenizedFiles, hashFilter);

@@ -70,0 +70,0 @@ for (const [hash, occurrences] of map.entries()) {

import { Pair } from "./pair";
import { TokenizedFile } from "../file/tokenizedFile";
import { ASTRegion } from "./pairedOccurrence";
import { Options } from "../util/options";
import { DolosOptions, Options } from "../util/options";
import { SharedFingerprint } from "./sharedFingerprint";
import { NodeStats } from "./SemanticAnalyzer";
import { Language } from "../util/language";
declare type Hash = number;

@@ -27,2 +28,5 @@ export interface ScoredPairs {

}
export interface Metadata extends DolosOptions {
languageDetected: boolean;
}
export interface EncodedSemanticResult extends SemanticResult {

@@ -36,2 +40,3 @@ occurrences: Array<number>;

readonly options: Options;
readonly language: Language;
private scored?;

@@ -43,3 +48,3 @@ private fingerprints;

semanticResults: Array<EncodedSemanticResult>;
constructor(options: Options, files: TokenizedFile[]);
constructor(options: Options, language: Language, files: TokenizedFile[]);
addOccurrences(hash: Hash, ...parts: Array<Occurrence>): void;

@@ -52,2 +57,3 @@ /**

sharedFingerprints(): Array<SharedFingerprint>;
metadata(): Metadata;
/**

@@ -54,0 +60,0 @@ * Combining all shared fingerprints and build pairs

@@ -16,4 +16,5 @@ "use strict";

class Report {
constructor(options, files) {
constructor(options, language, files) {
this.options = options;
this.language = language;
// collection of all shared fingerprints

@@ -77,2 +78,9 @@ this.fingerprints = new Map();

}
metadata() {
return {
...this.options.asObject(),
language: this.language.name,
languageDetected: this.options.language == undefined,
};
}
/**

@@ -79,0 +87,0 @@ * Combining all shared fingerprints and build pairs

@@ -41,3 +41,4 @@ import { Result } from "../util/result";

get content(): string;
get extension(): string;
}
//# sourceMappingURL=file.d.ts.map

@@ -10,2 +10,3 @@ "use strict";

const identifiable_1 = __importDefault(require("../util/identifiable"));
const path_1 = __importDefault(require("path"));
const fs = fs_1.default.promises;

@@ -56,4 +57,7 @@ /**

}
get extension() {
return path_1.default.extname(this.path);
}
}
exports.File = File;
//# sourceMappingURL=file.js.map
import { Token, Tokenizer } from "./tokenizer";
import { ProgrammingLanguage } from "../util/language";
export declare class CodeTokenizer extends Tokenizer {
static supportedLanguages: string[];
/**
* Returns true if the grammar of the given language is supported.
*
* @param language The name of the language to check
*/
static isSupportedLanguage(language: string): boolean;
/**
* Registers an additional language to Dolos. For this to work, the supporting
* module of the name `tree-sitter-someLanguage` must first be installed
* manually through yarn or npm.
*
* The function will throw an error when the supported module is not found.
*
* @param language The name of the language to register
*/
static registerLanguage(language: string): void;
readonly language: string;
private readonly parser;

@@ -29,3 +12,3 @@ /**

*/
constructor(language: string);
constructor(language: ProgrammingLanguage);
/**

@@ -32,0 +15,0 @@ * Runs the parser on a given string. Returns a stringified version of the

@@ -10,2 +10,3 @@ "use strict";

const tokenizer_1 = require("./tokenizer");
const assert_1 = __importDefault(require("assert"));
class CodeTokenizer extends tokenizer_1.Tokenizer {

@@ -20,51 +21,7 @@ /**

constructor(language) {
super();
if (!CodeTokenizer.isSupportedLanguage(language)) {
CodeTokenizer.registerLanguage(language);
}
this.language = language;
super(language);
this.parser = new tree_sitter_1.default();
let languageModule;
if (language === "elm") {
languageModule = require("@elm-tooling/tree-sitter-elm");
}
else {
// eslint-disable-next-line @typescript-eslint/no-var-requires
languageModule = require("tree-sitter-" + language);
}
this.parser.setLanguage(languageModule);
this.parser.setLanguage(language.getLanguageModule());
}
/**
* Returns true if the grammar of the given language is supported.
*
* @param language The name of the language to check
*/
static isSupportedLanguage(language) {
return this.supportedLanguages.includes(language);
}
/**
* Registers an additional language to Dolos. For this to work, the supporting
* module of the name `tree-sitter-someLanguage` must first be installed
* manually through yarn or npm.
*
* The function will throw an error when the supported module is not found.
*
* @param language The name of the language to register
*/
static registerLanguage(language) {
try {
if (language === "elm") {
require("@elm-tooling/tree-sitter-elm");
}
else {
require("tree-sitter-" + language);
}
}
catch (error) {
throw new Error(`The module 'tree-sitter-${language}' could not be found. ` +
"Try to install it using npm or yarn, but it may not be supported (yet).");
}
this.supportedLanguages.push(language);
}
/**
* Runs the parser on a given string. Returns a stringified version of the

@@ -92,3 +49,4 @@ * abstract syntax tree.

const fullSpan = new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column);
const location = region_1.Region.diff(fullSpan, ...this.getChildrenRegions(node))[0];
const location = region_1.Region.firstDiff(fullSpan, this.getChildrenRegions(node));
(0, assert_1.default)(location !== null, "There should be at least one diff'ed region");
yield this.newToken("(", location);

@@ -109,3 +67,2 @@ // "(node.type child1 child2 ...)"

exports.CodeTokenizer = CodeTokenizer;
CodeTokenizer.supportedLanguages = ["c", "c-sharp", "bash", "java", "javascript", "python", "elm"];
//# sourceMappingURL=codeTokenizer.js.map
import { File } from "../file/file";
import { Region } from "../util/region";
import { TokenizedFile } from "../file/tokenizedFile";
import { Language } from "../util/language";
export interface Token {

@@ -9,2 +10,4 @@ token: string;

export declare abstract class Tokenizer {
readonly language: Language;
constructor(language: Language);
/**

@@ -11,0 +14,0 @@ * Runs the tokenizer on a given Buffer. Returns an async iterator returning

@@ -6,2 +6,5 @@ "use strict";

class Tokenizer {
constructor(language) {
this.language = language;
}
/**

@@ -8,0 +11,0 @@ * Returns a tokenized version of the given file.

export interface DolosOptions {
kgramLength: number;
kgramsInWindow: number;
language: string;
language: string | null;
limitResults: number | null;

@@ -20,3 +20,2 @@ maxFingerprintCount: number | null;

static defaultKgramsInWindow: number;
static defaultLanguage: string;
static defaultMinFragmentLength: number;

@@ -32,3 +31,3 @@ static defaultMinSimilarity: number;

get limitResults(): number | null;
get language(): string;
get language(): string | null;
get kgramLength(): number;

@@ -35,0 +34,0 @@ get kgramsInWindow(): number;

@@ -49,3 +49,3 @@ "use strict";

get language() {
return definedOrDefault(this.custom.language, Options.defaultLanguage);
return definedOrNull(this.custom.language);
}

@@ -109,3 +109,2 @@ get kgramLength() {

Options.defaultKgramsInWindow = 17;
Options.defaultLanguage = "javascript";
Options.defaultMinFragmentLength = 0;

@@ -112,0 +111,0 @@ Options.defaultMinSimilarity = 0;

@@ -17,3 +17,3 @@ /**

/**
* This function takes the 'difference' of one region with a list of other regions.
* This function takes the first 'difference' of one region with a list of other regions.
* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region.

@@ -25,6 +25,7 @@ *

* @param source
* @param other
* @param others
* @returns the first 'difference' region, or null if there is none.
*/
static diff(source: Region, ...other: Region[]): Region[];
static firstDiff(source: Region, others: Region[]): Region | null;
}
//# sourceMappingURL=region.d.ts.map

@@ -90,3 +90,3 @@ "use strict";

/**
* This function takes the 'difference' of one region with a list of other regions.
* This function takes the first 'difference' of one region with a list of other regions.
* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region.

@@ -98,8 +98,9 @@ *

* @param source
* @param other
* @param others
* @returns the first 'difference' region, or null if there is none.
*/
static diff(source, ...other) {
static firstDiff(source, others) {
const regionToPoints = (r) => [[r.startRow, r.startCol], [r.endRow, r.endCol]];
const [startPoint, endPoint] = regionToPoints(source);
const pointArray = other.map(regionToPoints);
const pointArray = others.map(regionToPoints);
// This map contains all the startpoints mapped to their respective endpoints.

@@ -114,3 +115,2 @@ // This is how we will identify the closing point of this token

// a spot is covered by the source region and/or other regions.
const result = [];
// This stack contains all regions that are 'active' or cover this interval

@@ -126,4 +126,5 @@ // (at the current point in the traversal process)

// Extra boolean to check whether we are currently covering the source interval
if (p === startPoint)
if (p === startPoint) {
hasStarted = true;
}
// If this point is a starting point of a child region

@@ -133,5 +134,8 @@ if (pointMap.has(p)) {

// then this region is 'good'.
if (stack.size == 0 && hasStarted)
result.push(new Region(...firstPoint, ...p));
if (stack.size == 0 && hasStarted) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return new Region(...firstPoint, ...p);
}
// Register that the current region is covered by a child
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
stack.add(pointMap.get(p));

@@ -143,4 +147,5 @@ }

stack.delete(p);
if (stack.size == 0 && hasStarted)
if (stack.size == 0 && hasStarted) {
firstPoint = p;
}
}

@@ -150,5 +155,8 @@ currentIndex += 1;

if (stack.size == 0) {
result.push(new Region(...firstPoint, ...endPoint));
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
return new Region(...firstPoint, ...endPoint);
}
return result;
else {
return null;
}
}

@@ -155,0 +163,0 @@ }

{
"name": "@dodona/dolos-lib",
"version": "2.0.0",
"version": "2.0.1",
"main": "dist/index.js",

@@ -8,8 +8,5 @@ "description": "Code similarity detection based on the Winnowing algorithm",

"engines": {
"node": ">12 <17"
"node": ">12"
},
"scripts": {
"hash": "tsc && node dist/bin/hash.js",
"test-hash-performance": "tsc && time head -c 1000000 /dev/random | node dist/bin/hash.js > /dev/null",
"test-winnow-performance": "tsc && time head -c 1000000 /dev/random | node dist/bin/hash.js > /dev/null",
"test": "nyc ava",

@@ -16,0 +13,0 @@ "test:watch": "ava --watch",

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc