@dodona/dolos-lib
Advanced tools
Comparing version 2.0.0 to 2.0.1
@@ -6,4 +6,7 @@ import { Report } from "./lib/analyze/report"; | ||
readonly options: Options; | ||
private readonly tokenizer; | ||
private readonly index; | ||
private languageDetected; | ||
private language; | ||
private tokenizer; | ||
private index; | ||
private readonly languagePicker; | ||
constructor(customOptions?: CustomOptions); | ||
@@ -10,0 +13,0 @@ private fromZIP; |
@@ -33,22 +33,15 @@ "use strict"; | ||
const path = __importStar(require("path")); | ||
const charTokenizer_1 = require("./lib/tokenizer/charTokenizer"); | ||
const fs_1 = __importStar(require("fs")); | ||
const child_process_1 = require("child_process"); | ||
const os_1 = require("os"); | ||
const language_1 = require("./lib/util/language"); | ||
const fs = fs_1.default.promises; | ||
function newTokenizer(language) { | ||
if (language == "chars") { | ||
return new charTokenizer_1.CharTokenizer(); | ||
} | ||
else { | ||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||
const CodeTokenizer = require("./lib/tokenizer/codeTokenizer").CodeTokenizer; | ||
return new CodeTokenizer(language); | ||
} | ||
} | ||
class Dolos { | ||
constructor(customOptions) { | ||
this.languageDetected = false; | ||
this.language = null; | ||
this.tokenizer = null; | ||
this.index = null; | ||
this.languagePicker = new language_1.LanguagePicker(); | ||
this.options = new options_1.Options(customOptions); | ||
this.tokenizer = newTokenizer(this.options.language); | ||
this.index = new analyze_1.Index(this.tokenizer, this.options); | ||
} | ||
@@ -121,2 +114,3 @@ async fromZIP(zipPath) { | ||
async analyze(files) { | ||
var _a; | ||
if (files.length < 2) { | ||
@@ -131,2 +125,18 @@ throw new Error("You need to supply at least two files"); | ||
} | ||
else if (this.index == null) { | ||
if (this.options.language) { | ||
this.language = this.languagePicker.findLanguage(this.options.language); | ||
} | ||
else { | ||
this.language = this.languagePicker.detectLanguage(files); | ||
this.languageDetected = true; | ||
} | ||
this.tokenizer = this.language.createTokenizer(); | ||
this.index = new analyze_1.Index(this.tokenizer, this.options); | ||
} | ||
if (this.languageDetected) { | ||
for (const file of files) { | ||
(_a = this.language) === null || _a === void 0 ? void 0 : _a.checkLanguage(file); | ||
} | ||
} | ||
return this.index.compareFiles(files); | ||
@@ -133,0 +143,0 @@ } |
@@ -15,3 +15,4 @@ export { Report, ScoredPairs, Occurrence, EncodedSemanticResult, DecodedSemanticResult } from "./lib/analyze/report"; | ||
export { DefaultMap } from "./lib/util/defaultMap"; | ||
export { Language, LanguagePicker, LanguageError } from "./lib/util/language"; | ||
export * from "./lib/util/utils"; | ||
//# sourceMappingURL=index.d.ts.map |
@@ -17,3 +17,3 @@ "use strict"; | ||
Object.defineProperty(exports, "__esModule", { value: true }); | ||
exports.DefaultMap = exports.PairedOccurrence = exports.SharedFingerprint = exports.SemanticAnalyzer = exports.EmptyTokenizer = exports.Dolos = exports.Index = exports.TokenizedFile = exports.File = exports.Options = exports.Pair = exports.Region = exports.Fragment = exports.Report = void 0; | ||
exports.LanguageError = exports.LanguagePicker = exports.Language = exports.DefaultMap = exports.PairedOccurrence = exports.SharedFingerprint = exports.SemanticAnalyzer = exports.EmptyTokenizer = exports.Dolos = exports.Index = exports.TokenizedFile = exports.File = exports.Options = exports.Pair = exports.Region = exports.Fragment = exports.Report = void 0; | ||
var report_1 = require("./lib/analyze/report"); | ||
@@ -47,3 +47,7 @@ Object.defineProperty(exports, "Report", { enumerable: true, get: function () { return report_1.Report; } }); | ||
Object.defineProperty(exports, "DefaultMap", { enumerable: true, get: function () { return defaultMap_1.DefaultMap; } }); | ||
var language_1 = require("./lib/util/language"); | ||
Object.defineProperty(exports, "Language", { enumerable: true, get: function () { return language_1.Language; } }); | ||
Object.defineProperty(exports, "LanguagePicker", { enumerable: true, get: function () { return language_1.LanguagePicker; } }); | ||
Object.defineProperty(exports, "LanguageError", { enumerable: true, get: function () { return language_1.LanguageError; } }); | ||
__exportStar(require("./lib/util/utils"), exports); | ||
//# sourceMappingURL=index.js.map |
@@ -67,3 +67,3 @@ "use strict"; | ||
async compareTokenizedFiles(tokenizedFiles, hashFilter = this.hashFilter) { | ||
const report = new report_1.Report(this.options, tokenizedFiles); | ||
const report = new report_1.Report(this.options, this.tokenizer.language, tokenizedFiles); | ||
const map = await this.createMatches(tokenizedFiles, hashFilter); | ||
@@ -70,0 +70,0 @@ for (const [hash, occurrences] of map.entries()) { |
import { Pair } from "./pair"; | ||
import { TokenizedFile } from "../file/tokenizedFile"; | ||
import { ASTRegion } from "./pairedOccurrence"; | ||
import { Options } from "../util/options"; | ||
import { DolosOptions, Options } from "../util/options"; | ||
import { SharedFingerprint } from "./sharedFingerprint"; | ||
import { NodeStats } from "./SemanticAnalyzer"; | ||
import { Language } from "../util/language"; | ||
declare type Hash = number; | ||
@@ -27,2 +28,5 @@ export interface ScoredPairs { | ||
} | ||
export interface Metadata extends DolosOptions { | ||
languageDetected: boolean; | ||
} | ||
export interface EncodedSemanticResult extends SemanticResult { | ||
@@ -36,2 +40,3 @@ occurrences: Array<number>; | ||
readonly options: Options; | ||
readonly language: Language; | ||
private scored?; | ||
@@ -43,3 +48,3 @@ private fingerprints; | ||
semanticResults: Array<EncodedSemanticResult>; | ||
constructor(options: Options, files: TokenizedFile[]); | ||
constructor(options: Options, language: Language, files: TokenizedFile[]); | ||
addOccurrences(hash: Hash, ...parts: Array<Occurrence>): void; | ||
@@ -52,2 +57,3 @@ /** | ||
sharedFingerprints(): Array<SharedFingerprint>; | ||
metadata(): Metadata; | ||
/** | ||
@@ -54,0 +60,0 @@ * Combining all shared fingerprints and build pairs |
@@ -16,4 +16,5 @@ "use strict"; | ||
class Report { | ||
constructor(options, files) { | ||
constructor(options, language, files) { | ||
this.options = options; | ||
this.language = language; | ||
// collection of all shared fingerprints | ||
@@ -77,2 +78,9 @@ this.fingerprints = new Map(); | ||
} | ||
metadata() { | ||
return { | ||
...this.options.asObject(), | ||
language: this.language.name, | ||
languageDetected: this.options.language == undefined, | ||
}; | ||
} | ||
/** | ||
@@ -79,0 +87,0 @@ * Combining all shared fingerprints and build pairs |
@@ -41,3 +41,4 @@ import { Result } from "../util/result"; | ||
get content(): string; | ||
get extension(): string; | ||
} | ||
//# sourceMappingURL=file.d.ts.map |
@@ -10,2 +10,3 @@ "use strict"; | ||
const identifiable_1 = __importDefault(require("../util/identifiable")); | ||
const path_1 = __importDefault(require("path")); | ||
const fs = fs_1.default.promises; | ||
@@ -56,4 +57,7 @@ /** | ||
} | ||
get extension() { | ||
return path_1.default.extname(this.path); | ||
} | ||
} | ||
exports.File = File; | ||
//# sourceMappingURL=file.js.map |
import { Token, Tokenizer } from "./tokenizer"; | ||
import { ProgrammingLanguage } from "../util/language"; | ||
export declare class CodeTokenizer extends Tokenizer { | ||
static supportedLanguages: string[]; | ||
/** | ||
* Returns true if the grammar of the given language is supported. | ||
* | ||
* @param language The name of the language to check | ||
*/ | ||
static isSupportedLanguage(language: string): boolean; | ||
/** | ||
* Registers an additional language to Dolos. For this to work, the supporting | ||
* module of the name `tree-sitter-someLanguage` must first be installed | ||
* manually through yarn or npm. | ||
* | ||
* The function will throw an error when the supported module is not found. | ||
* | ||
* @param language The name of the language to register | ||
*/ | ||
static registerLanguage(language: string): void; | ||
readonly language: string; | ||
private readonly parser; | ||
@@ -29,3 +12,3 @@ /** | ||
*/ | ||
constructor(language: string); | ||
constructor(language: ProgrammingLanguage); | ||
/** | ||
@@ -32,0 +15,0 @@ * Runs the parser on a given string. Returns a stringified version of the |
@@ -10,2 +10,3 @@ "use strict"; | ||
const tokenizer_1 = require("./tokenizer"); | ||
const assert_1 = __importDefault(require("assert")); | ||
class CodeTokenizer extends tokenizer_1.Tokenizer { | ||
@@ -20,51 +21,7 @@ /** | ||
constructor(language) { | ||
super(); | ||
if (!CodeTokenizer.isSupportedLanguage(language)) { | ||
CodeTokenizer.registerLanguage(language); | ||
} | ||
this.language = language; | ||
super(language); | ||
this.parser = new tree_sitter_1.default(); | ||
let languageModule; | ||
if (language === "elm") { | ||
languageModule = require("@elm-tooling/tree-sitter-elm"); | ||
} | ||
else { | ||
// eslint-disable-next-line @typescript-eslint/no-var-requires | ||
languageModule = require("tree-sitter-" + language); | ||
} | ||
this.parser.setLanguage(languageModule); | ||
this.parser.setLanguage(language.getLanguageModule()); | ||
} | ||
/** | ||
* Returns true if the grammar of the given language is supported. | ||
* | ||
* @param language The name of the language to check | ||
*/ | ||
static isSupportedLanguage(language) { | ||
return this.supportedLanguages.includes(language); | ||
} | ||
/** | ||
* Registers an additional language to Dolos. For this to work, the supporting | ||
* module of the name `tree-sitter-someLanguage` must first be installed | ||
* manually through yarn or npm. | ||
* | ||
* The function will throw an error when the supported module is not found. | ||
* | ||
* @param language The name of the language to register | ||
*/ | ||
static registerLanguage(language) { | ||
try { | ||
if (language === "elm") { | ||
require("@elm-tooling/tree-sitter-elm"); | ||
} | ||
else { | ||
require("tree-sitter-" + language); | ||
} | ||
} | ||
catch (error) { | ||
throw new Error(`The module 'tree-sitter-${language}' could not be found. ` + | ||
"Try to install it using npm or yarn, but it may not be supported (yet)."); | ||
} | ||
this.supportedLanguages.push(language); | ||
} | ||
/** | ||
* Runs the parser on a given string. Returns a stringified version of the | ||
@@ -92,3 +49,4 @@ * abstract syntax tree. | ||
const fullSpan = new region_1.Region(node.startPosition.row, node.startPosition.column, node.endPosition.row, node.endPosition.column); | ||
const location = region_1.Region.diff(fullSpan, ...this.getChildrenRegions(node))[0]; | ||
const location = region_1.Region.firstDiff(fullSpan, this.getChildrenRegions(node)); | ||
(0, assert_1.default)(location !== null, "There should be at least one diff'ed region"); | ||
yield this.newToken("(", location); | ||
@@ -109,3 +67,2 @@ // "(node.type child1 child2 ...)" | ||
exports.CodeTokenizer = CodeTokenizer; | ||
CodeTokenizer.supportedLanguages = ["c", "c-sharp", "bash", "java", "javascript", "python", "elm"]; | ||
//# sourceMappingURL=codeTokenizer.js.map |
import { File } from "../file/file"; | ||
import { Region } from "../util/region"; | ||
import { TokenizedFile } from "../file/tokenizedFile"; | ||
import { Language } from "../util/language"; | ||
export interface Token { | ||
@@ -9,2 +10,4 @@ token: string; | ||
export declare abstract class Tokenizer { | ||
readonly language: Language; | ||
constructor(language: Language); | ||
/** | ||
@@ -11,0 +14,0 @@ * Runs the tokenizer on a given Buffer. Returns an async iterator returning |
@@ -6,2 +6,5 @@ "use strict"; | ||
class Tokenizer { | ||
constructor(language) { | ||
this.language = language; | ||
} | ||
/** | ||
@@ -8,0 +11,0 @@ * Returns a tokenized version of the given file. |
export interface DolosOptions { | ||
kgramLength: number; | ||
kgramsInWindow: number; | ||
language: string; | ||
language: string | null; | ||
limitResults: number | null; | ||
@@ -20,3 +20,2 @@ maxFingerprintCount: number | null; | ||
static defaultKgramsInWindow: number; | ||
static defaultLanguage: string; | ||
static defaultMinFragmentLength: number; | ||
@@ -32,3 +31,3 @@ static defaultMinSimilarity: number; | ||
get limitResults(): number | null; | ||
get language(): string; | ||
get language(): string | null; | ||
get kgramLength(): number; | ||
@@ -35,0 +34,0 @@ get kgramsInWindow(): number; |
@@ -49,3 +49,3 @@ "use strict"; | ||
get language() { | ||
return definedOrDefault(this.custom.language, Options.defaultLanguage); | ||
return definedOrNull(this.custom.language); | ||
} | ||
@@ -109,3 +109,2 @@ get kgramLength() { | ||
Options.defaultKgramsInWindow = 17; | ||
Options.defaultLanguage = "javascript"; | ||
Options.defaultMinFragmentLength = 0; | ||
@@ -112,0 +111,0 @@ Options.defaultMinSimilarity = 0; |
@@ -17,3 +17,3 @@ /** | ||
/** | ||
* This function takes the 'difference' of one region with a list of other regions. | ||
* This function takes the first 'difference' of one region with a list of other regions. | ||
* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region. | ||
@@ -25,6 +25,7 @@ * | ||
* @param source | ||
* @param other | ||
* @param others | ||
* @returns the first 'difference' region, or null if there is none. | ||
*/ | ||
static diff(source: Region, ...other: Region[]): Region[]; | ||
static firstDiff(source: Region, others: Region[]): Region | null; | ||
} | ||
//# sourceMappingURL=region.d.ts.map |
@@ -90,3 +90,3 @@ "use strict"; | ||
/** | ||
* This function takes the 'difference' of one region with a list of other regions. | ||
* This function takes the first 'difference' of one region with a list of other regions. | ||
* The 'difference' of a region is every interval [(x1,y1), (x2, y2)] that is only covered by the source region. | ||
@@ -98,8 +98,9 @@ * | ||
* @param source | ||
* @param other | ||
* @param others | ||
* @returns the first 'difference' region, or null if there is none. | ||
*/ | ||
static diff(source, ...other) { | ||
static firstDiff(source, others) { | ||
const regionToPoints = (r) => [[r.startRow, r.startCol], [r.endRow, r.endCol]]; | ||
const [startPoint, endPoint] = regionToPoints(source); | ||
const pointArray = other.map(regionToPoints); | ||
const pointArray = others.map(regionToPoints); | ||
// This map contains all the startpoints mapped to their respective endpoints. | ||
@@ -114,3 +115,2 @@ // This is how we will identify the closing point of this token | ||
// a spot is covered by the source region and/or other regions. | ||
const result = []; | ||
// This stack contains all regions that are 'active' or cover this interval | ||
@@ -126,4 +126,5 @@ // (at the current point in the traversal process) | ||
// Extra boolean to check whether we are currently covering the source interval | ||
if (p === startPoint) | ||
if (p === startPoint) { | ||
hasStarted = true; | ||
} | ||
// If this point is a starting point of a child region | ||
@@ -133,5 +134,8 @@ if (pointMap.has(p)) { | ||
// then this region is 'good'. | ||
if (stack.size == 0 && hasStarted) | ||
result.push(new Region(...firstPoint, ...p)); | ||
if (stack.size == 0 && hasStarted) { | ||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion | ||
return new Region(...firstPoint, ...p); | ||
} | ||
// Register that the current region is covered by a child | ||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion | ||
stack.add(pointMap.get(p)); | ||
@@ -143,4 +147,5 @@ } | ||
stack.delete(p); | ||
if (stack.size == 0 && hasStarted) | ||
if (stack.size == 0 && hasStarted) { | ||
firstPoint = p; | ||
} | ||
} | ||
@@ -150,5 +155,8 @@ currentIndex += 1; | ||
if (stack.size == 0) { | ||
result.push(new Region(...firstPoint, ...endPoint)); | ||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion | ||
return new Region(...firstPoint, ...endPoint); | ||
} | ||
return result; | ||
else { | ||
return null; | ||
} | ||
} | ||
@@ -155,0 +163,0 @@ } |
{ | ||
"name": "@dodona/dolos-lib", | ||
"version": "2.0.0", | ||
"version": "2.0.1", | ||
"main": "dist/index.js", | ||
@@ -8,8 +8,5 @@ "description": "Code similarity detection based on the Winnowing algorithm", | ||
"engines": { | ||
"node": ">12 <17" | ||
"node": ">12" | ||
}, | ||
"scripts": { | ||
"hash": "tsc && node dist/bin/hash.js", | ||
"test-hash-performance": "tsc && time head -c 1000000 /dev/random | node dist/bin/hash.js > /dev/null", | ||
"test-winnow-performance": "tsc && time head -c 1000000 /dev/random | node dist/bin/hash.js > /dev/null", | ||
"test": "nyc ava", | ||
@@ -16,0 +13,0 @@ "test:watch": "ava --watch", |
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
Dynamic require
Supply chain riskDynamic require can indicate the package is performing dangerous or unsafe dynamic code execution.
Found 1 instance in 1 package
129581
61
3146