@dodona/dolos-lib
Advanced tools
Comparing version 2.2.4 to 2.3.0
@@ -12,2 +12,3 @@ import { Report } from "./lib/analyze/report"; | ||
constructor(customOptions?: CustomOptions); | ||
private fromDirectory; | ||
private fromZIP; | ||
@@ -14,0 +15,0 @@ private fromCSV; |
@@ -28,2 +28,3 @@ "use strict"; | ||
const analyze_1 = require("./lib/analyze"); | ||
const report_1 = require("./lib/analyze/report"); | ||
const options_1 = require("./lib/util/options"); | ||
@@ -48,2 +49,19 @@ const file_1 = require("./lib/file/file"); | ||
} | ||
async fromDirectory(dirPath) { | ||
const dirs = [dirPath]; | ||
const files = []; | ||
let i = 0; | ||
while (i < dirs.length) { | ||
for (const entry of await fs.readdir(dirs[i], { withFileTypes: true })) { | ||
if (entry.isDirectory()) { | ||
dirs.push(path.join(dirs[i], entry.name)); | ||
} | ||
else if (entry.isFile()) { | ||
files.push(file_1.File.fromPath(path.join(dirs[i], entry.name))); | ||
} | ||
} | ||
i += 1; | ||
} | ||
return await result_1.Result.all(files); | ||
} | ||
async fromZIP(zipPath) { | ||
@@ -60,9 +78,8 @@ const tmpDir = await fs.mkdtemp(path.join((0, os_1.tmpdir)(), "dolos-unzip-")); | ||
const infoPath = path.join(tmpDir, "info.csv"); | ||
try { | ||
await fs.access(infoPath, fs_1.constants.R_OK); | ||
if (await fs.access(infoPath, fs_1.constants.R_OK).then(() => true).catch(() => false)) { | ||
return await this.fromCSV(infoPath); | ||
} | ||
catch { | ||
throw new Error("Zip does not contain a required 'info.csv' file"); | ||
else { | ||
return await this.fromDirectory(tmpDir); | ||
} | ||
return await this.fromCSV(infoPath); | ||
} | ||
@@ -87,3 +104,3 @@ finally { | ||
createdAt: new Date(row.created_at), | ||
labels: row.labels | ||
labels: row.label || row.labels | ||
})) | ||
@@ -126,12 +143,3 @@ .map((row) => file_1.File.fromPath(path.join(dirname, row.filename), row)); | ||
var _a; | ||
if (files.length < 2) { | ||
throw new Error("You need to supply at least two files"); | ||
} | ||
else if (files.length == 2 && this.options.maxFingerprintPercentage !== null) { | ||
throw new Error("You have given a maximum hash percentage but your are " + | ||
"comparing two files. Each matching hash will thus " + | ||
"be present in 100% of the files. This option does only" + | ||
"make sense when comparing more than two files."); | ||
} | ||
else if (this.index == null) { | ||
if (this.index == null) { | ||
if (this.options.language) { | ||
@@ -147,8 +155,29 @@ this.language = this.languagePicker.findLanguage(this.options.language); | ||
} | ||
const warnings = []; | ||
let filteredFiles; | ||
if (this.languageDetected) { | ||
for (const file of files) { | ||
(_a = this.language) === null || _a === void 0 ? void 0 : _a.checkLanguage(file); | ||
filteredFiles = files.filter(file => { var _a; return (_a = this.language) === null || _a === void 0 ? void 0 : _a.extensionMatches(file.path); }); | ||
const diff = files.length - filteredFiles.length; | ||
if (diff > 0) { | ||
warnings.push(`The language of the files was detected as ${(_a = this.language) === null || _a === void 0 ? void 0 : _a.name} ` + | ||
`but ${diff} files were ignored because they did not have a matching extension.` + | ||
"You can override this behavior by setting the language explicitly."); | ||
} | ||
} | ||
return this.index.compareFiles(files, nameCandidate); | ||
else { | ||
filteredFiles = files; | ||
} | ||
if (files.length < 2) { | ||
throw new Error("You need to supply at least two files"); | ||
} | ||
else if (files.length == 2 && this.options.maxFingerprintPercentage !== null) { | ||
throw new Error("You have given a maximum hash percentage but your are " + | ||
"comparing two files. Each matching hash will thus " + | ||
"be present in 100% of the files. This option does only" + | ||
"make sense when comparing more than two files."); | ||
} | ||
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion | ||
const tokenizedFiles = filteredFiles.map(f => this.tokenizer.tokenizeFile(f)); | ||
const fingerprints = await this.index.createMatches(tokenizedFiles); | ||
return new report_1.Report(this.options, this.language, tokenizedFiles, fingerprints, nameCandidate, warnings); | ||
} | ||
@@ -155,0 +184,0 @@ } |
@@ -10,2 +10,3 @@ import { Pair } from "./pair"; | ||
createdAt: string; | ||
warnings: string[]; | ||
} | ||
@@ -16,2 +17,3 @@ export declare class Report { | ||
readonly files: TokenizedFile[]; | ||
readonly warnings: string[]; | ||
private readonly kgramMaxFileOccurrences; | ||
@@ -22,3 +24,3 @@ private fingerprints; | ||
readonly createdAt: string; | ||
constructor(options: Options, language: Language | null, files: TokenizedFile[], fingerprints: Map<Hash, SharedFingerprint>, name?: string); | ||
constructor(options: Options, language: Language | null, files: TokenizedFile[], fingerprints: Map<Hash, SharedFingerprint>, name?: string, warnings?: string[]); | ||
getPair(file1: TokenizedFile, file2: TokenizedFile): Pair; | ||
@@ -25,0 +27,0 @@ allPairs(): Array<Pair>; |
@@ -7,3 +7,3 @@ "use strict"; | ||
class Report { | ||
constructor(options, language, files, fingerprints, name) { | ||
constructor(options, language, files, fingerprints, name, warnings = []) { | ||
var _a; | ||
@@ -13,2 +13,3 @@ this.options = options; | ||
this.files = files; | ||
this.warnings = warnings; | ||
this.pairs = []; | ||
@@ -66,2 +67,3 @@ this.createdAt = new Date().toISOString(); | ||
languageDetected: this.options.language == undefined, | ||
warnings: this.warnings, | ||
}; | ||
@@ -68,0 +70,0 @@ } |
@@ -43,5 +43,5 @@ import { Tokenizer } from "../tokenizer/tokenizer"; | ||
/** | ||
* Find the language to use for tokenization based on the extension of the | ||
* first file. If the extension does not match any known language, then | ||
* a LanguageError is thrown. | ||
* Find the language to use for tokenization based on the most common | ||
* extension of the files. If the extension does not match any known language, | ||
* then a LanguageError is thrown. | ||
* | ||
@@ -48,0 +48,0 @@ * @param files the files to tokenize |
@@ -94,5 +94,5 @@ "use strict"; | ||
/** | ||
* Find the language to use for tokenization based on the extension of the | ||
* first file. If the extension does not match any known language, then | ||
* a LanguageError is thrown. | ||
* Find the language to use for tokenization based on the most common | ||
* extension of the files. If the extension does not match any known language, | ||
* then a LanguageError is thrown. | ||
* | ||
@@ -102,10 +102,17 @@ * @param files the files to tokenize | ||
detectLanguage(files) { | ||
const firstFile = files[0]; | ||
const language = this.byExtension.get(firstFile.extension); | ||
if (language == null) { | ||
throw new LanguageError(`Could not detect language based on extension (${firstFile.extension}).`); | ||
} | ||
var _a; | ||
const counts = new Map(); | ||
let maxCount = 0; | ||
let language = undefined; | ||
for (const file of files) { | ||
language.checkLanguage(file); | ||
const count = ((_a = counts.get(file.extension)) !== null && _a !== void 0 ? _a : 0) + 1; | ||
if (count > maxCount) { | ||
maxCount = count; | ||
language = this.byExtension.get(file.extension); | ||
} | ||
counts.set(file.extension, count); | ||
} | ||
if (language == undefined) { | ||
throw new LanguageError("Could not detect language based on extension."); | ||
} | ||
return language; | ||
@@ -112,0 +119,0 @@ } |
{ | ||
"name": "@dodona/dolos-lib", | ||
"version": "2.2.4", | ||
"version": "2.3.0", | ||
"main": "dist/index.js", | ||
@@ -30,3 +30,3 @@ "description": "Code similarity detection based on the Winnowing algorithm", | ||
"@types/d3-dsv": "2.0.3", | ||
"@types/node": "18.16.18", | ||
"@types/node": "18.16.19", | ||
"@typescript-eslint/eslint-plugin": "5.60.1", | ||
@@ -36,3 +36,3 @@ "@typescript-eslint/parser": "5.60.1", | ||
"benchmark": "2.1.4", | ||
"eslint": "8.42.0", | ||
"eslint": "8.44.0", | ||
"np": "7.7.0", | ||
@@ -39,0 +39,0 @@ "nyc": "15.1.0", |
112816
2759