@dodona/dolos-core
Advanced tools
Comparing version 1.0.2 to 1.1.0
@@ -11,2 +11,4 @@ import { Range } from "../util/range.js"; | ||
shared: Set<SharedFingerprint>; | ||
ignored: Set<SharedFingerprint>; | ||
isIgnored: boolean; | ||
} | ||
@@ -20,5 +22,8 @@ export interface Occurrence { | ||
private readonly kgramsInWindow; | ||
private maxFingerprintFileCount; | ||
private readonly hashFilter; | ||
private readonly files; | ||
private readonly ignoredFiles; | ||
private readonly index; | ||
private readonly ignoredHashes; | ||
/** | ||
@@ -29,6 +34,14 @@ * Creates a Fingerprint Index which is able to compare files with each other | ||
*/ | ||
constructor(kgramLength: number, kgramsInWindow: number, kgramData?: boolean); | ||
constructor(kgramLength: number, kgramsInWindow: number, kgramData?: boolean, maxFingerprintFileCount?: number); | ||
addIgnoredFile(file: TokenizedFile): void; | ||
getMaxFingerprintFileCount(): number; | ||
updateMaxFingerprintFileCount(maxFingerprintFileCount: number | undefined): void; | ||
addFiles(tokenizedFiles: TokenizedFile[]): Map<Hash, SharedFingerprint>; | ||
private addEntry; | ||
addIgnoredHashes(hashes: Array<Hash>): void; | ||
private ignoreSharedFingerprint; | ||
private unIgnoreSharedFingerprint; | ||
sharedFingerprints(): Array<SharedFingerprint>; | ||
entries(): Array<FileEntry>; | ||
ignoredEntries(): Array<FileEntry>; | ||
getPair(file1: TokenizedFile, file2: TokenizedFile): Pair; | ||
@@ -35,0 +48,0 @@ allPairs(sortBy?: string): Array<Pair>; |
@@ -13,9 +13,43 @@ import { Range } from "../util/range.js"; | ||
*/ | ||
constructor(kgramLength, kgramsInWindow, kgramData) { | ||
constructor(kgramLength, kgramsInWindow, kgramData, maxFingerprintFileCount = Number.MAX_SAFE_INTEGER) { | ||
this.kgramLength = kgramLength; | ||
this.kgramsInWindow = kgramsInWindow; | ||
this.maxFingerprintFileCount = maxFingerprintFileCount; | ||
this.hashFilter = new WinnowFilter(this.kgramLength, this.kgramsInWindow, kgramData); | ||
this.files = new Map(); | ||
this.ignoredFiles = new Map(); | ||
this.index = new Map(); | ||
this.ignoredHashes = new Set(); | ||
} | ||
addIgnoredFile(file) { | ||
assert(!this.ignoredFiles.has(file.id), `This file has already been ignored: ${file.file.path}`); | ||
const entry = { | ||
file, | ||
kgrams: [], | ||
isIgnored: true, | ||
shared: new Set(), | ||
ignored: new Set() | ||
}; | ||
this.ignoredFiles.set(file.id, entry); | ||
this.addEntry(entry); | ||
} | ||
getMaxFingerprintFileCount() { | ||
return this.maxFingerprintFileCount; | ||
} | ||
updateMaxFingerprintFileCount(maxFingerprintFileCount) { | ||
if (maxFingerprintFileCount == this.maxFingerprintFileCount) { | ||
return; | ||
} | ||
this.maxFingerprintFileCount = maxFingerprintFileCount || Number.MAX_SAFE_INTEGER; | ||
for (const shared of this.index.values()) { | ||
if (!this.ignoredHashes.has(shared.hash)) { | ||
if (shared.fileCount() > this.maxFingerprintFileCount && !shared.ignored) { | ||
this.ignoreSharedFingerprint(shared); | ||
} | ||
else if (shared.fileCount() <= this.maxFingerprintFileCount && shared.ignored) { | ||
this.unIgnoreSharedFingerprint(shared); | ||
} | ||
} | ||
} | ||
} | ||
addFiles(tokenizedFiles) { | ||
@@ -26,42 +60,80 @@ for (const f of tokenizedFiles) { | ||
for (const file of tokenizedFiles) { | ||
let kgram = 0; | ||
const entry = { | ||
file, | ||
kgrams: [], | ||
shared: new Set() | ||
isIgnored: false, | ||
shared: new Set(), | ||
ignored: new Set() | ||
}; | ||
this.files.set(file.id, entry); | ||
for (const { data, hash, start, stop } of this.hashFilter.fingerprints(file.tokens)) { | ||
// add kgram to file | ||
entry.kgrams.push(new Range(start, stop)); | ||
// sanity check | ||
assert(Region.isInOrder(file.mapping[start], file.mapping[stop]) | ||
// If we end our kgram on a ')', the location of the opening token is used. | ||
// However, the location of this token in the file might be before | ||
// the location of the starting token of the kmer | ||
// For example: the last token of every ast is ')', closing the program. | ||
// The location of this token is always (0, 0), since the program root is the first token. | ||
// In this way, the 'end' token is before any other token in the AST. | ||
|| file.tokens[stop] === ")", `Invalid ordering: | ||
this.addEntry(entry); | ||
} | ||
return this.index; | ||
} | ||
addEntry(entry) { | ||
const file = entry.file; | ||
let kgram = 0; | ||
for (const { data, hash, start, stop } of this.hashFilter.fingerprints(file.tokens)) { | ||
// add kgram to file | ||
entry.kgrams.push(new Range(start, stop)); | ||
// sanity check | ||
assert(Region.isInOrder(file.mapping[start], file.mapping[stop]) | ||
// If we end our kgram on a ')', the location of the opening token is used. | ||
// However, the location of this token in the file might be before | ||
// the location of the starting token of the kmer | ||
// For example: the last token of every ast is ')', closing the program. | ||
// The location of this token is always (0, 0), since the program root is the first token. | ||
// In this way, the 'end' token is before any other token in the AST. | ||
|| file.tokens[stop] === ")", `Invalid ordering: | ||
expected ${file.mapping[start]} | ||
to start be before the end of ${file.mapping[stop]}`); | ||
const location = Region.merge(file.mapping[start], file.mapping[stop]); | ||
const part = { | ||
file, | ||
side: { index: kgram, start, stop, data, location } | ||
}; | ||
// look if the index already contains the given hashing | ||
let shared = this.index.get(hash); | ||
if (!shared) { | ||
// if the hashing does not yet exist in the index, add it | ||
shared = new SharedFingerprint(hash, data); | ||
this.index.set(hash, shared); | ||
} | ||
shared.add(part); | ||
const location = Region.merge(file.mapping[start], file.mapping[stop]); | ||
const part = { | ||
file, | ||
side: { index: kgram, start, stop, data, location } | ||
}; | ||
// look if the index already contains the given hashing | ||
let shared = this.index.get(hash); | ||
if (!shared) { | ||
// if the hashing does not yet exist in the index, add it | ||
shared = new SharedFingerprint(hash, data); | ||
this.index.set(hash, shared); | ||
} | ||
shared.add(part); | ||
if (entry.isIgnored || shared.fileCount() > this.maxFingerprintFileCount || this.ignoredHashes.has(hash)) { | ||
this.ignoreSharedFingerprint(shared); | ||
} | ||
else { | ||
entry.shared.add(shared); | ||
kgram += 1; | ||
} | ||
kgram += 1; | ||
} | ||
return this.index; | ||
} | ||
addIgnoredHashes(hashes) { | ||
for (const hash of hashes) { | ||
this.ignoredHashes.add(hash); | ||
const shared = this.index.get(hash); | ||
if (shared) { | ||
this.ignoreSharedFingerprint(shared); | ||
} | ||
} | ||
} | ||
ignoreSharedFingerprint(shared) { | ||
shared.ignored = true; | ||
for (const other of shared.files()) { | ||
if (!this.ignoredFiles.has(other.id)) { | ||
const otherEntry = this.files.get(other.id); | ||
otherEntry.shared.delete(shared); | ||
otherEntry.ignored.add(shared); | ||
} | ||
} | ||
} | ||
unIgnoreSharedFingerprint(shared) { | ||
shared.ignored = false; | ||
for (const other of shared.files()) { | ||
const otherEntry = this.files.get(other.id); | ||
otherEntry.ignored.delete(shared); | ||
otherEntry.shared.add(shared); | ||
} | ||
} | ||
sharedFingerprints() { | ||
@@ -73,2 +145,5 @@ return Array.from(this.index.values()); | ||
} | ||
ignoredEntries() { | ||
return Array.from(this.ignoredFiles.values()); | ||
} | ||
getPair(file1, file2) { | ||
@@ -75,0 +150,0 @@ const entry1 = this.files.get(file1.id); |
@@ -21,2 +21,4 @@ import { Fragment } from "./fragment.js"; | ||
readonly similarity: number; | ||
readonly leftIgnored: number; | ||
readonly rightIgnored: number; | ||
constructor(leftEntry: FileEntry, rightEntry: FileEntry); | ||
@@ -23,0 +25,0 @@ private longestCommonSubstring; |
@@ -46,6 +46,9 @@ import { Range } from "../util/range.js"; | ||
this.rightCovered = right.length; | ||
this.leftIgnored = leftEntry.ignored.size; | ||
this.rightIgnored = leftEntry.ignored.size; | ||
this.leftTotal = leftEntry.kgrams.length; | ||
this.rightTotal = rightEntry.kgrams.length; | ||
if (this.leftTotal + this.rightTotal > 0) { | ||
this.similarity = (this.leftCovered + this.rightCovered) / (this.leftTotal + this.rightTotal); | ||
const denominator = this.leftTotal + this.rightTotal - this.leftIgnored - this.rightIgnored; | ||
if (denominator > 0) { | ||
this.similarity = (this.leftCovered + this.rightCovered) / denominator; | ||
} | ||
@@ -52,0 +55,0 @@ else { |
@@ -7,2 +7,3 @@ import { Occurrence } from "./fingerprintIndex.js"; | ||
readonly kgram: Array<string> | null; | ||
ignored: boolean; | ||
private partMap; | ||
@@ -16,3 +17,4 @@ constructor(hash: number, kgram: Array<string> | null); | ||
fileCount(): number; | ||
includesFile(file: TokenizedFile): boolean; | ||
} | ||
//# sourceMappingURL=sharedFingerprint.d.ts.map |
@@ -7,2 +7,4 @@ import { Identifiable } from "../util/identifiable.js"; | ||
this.kgram = kgram; | ||
// Whether this SharedFingerprint occurs in the boilerplate/template code | ||
this.ignored = false; | ||
this.partMap = new Map(); | ||
@@ -34,3 +36,6 @@ } | ||
} | ||
includesFile(file) { | ||
return this.partMap.has(file); | ||
} | ||
} | ||
//# sourceMappingURL=sharedFingerprint.js.map |
@@ -1,11 +0,17 @@ | ||
export { FingerprintIndex, FileEntry } from "./algorithm/fingerprintIndex.js"; | ||
export { SharedFingerprint } from "./algorithm/sharedFingerprint.js"; | ||
export { Pair } from "./algorithm/pair.js"; | ||
export { Fragment } from "./algorithm/fragment.js"; | ||
export { File, ExtraInfo } from "./file/file.js"; | ||
export { TokenizedFile } from "./file/tokenizedFile.js"; | ||
export { Result } from "./util/result.js"; | ||
export { Range } from "./util/range.js"; | ||
export { Region } from "./util/region.js"; | ||
export * from "./algorithm/fingerprintIndex.js"; | ||
export * from "./algorithm/fragment.js"; | ||
export * from "./algorithm/pair.js"; | ||
export * from "./algorithm/pairedOccurrence.js"; | ||
export * from "./algorithm/sharedFingerprint.js"; | ||
export * from "./file/file.js"; | ||
export * from "./file/tokenizedFile.js"; | ||
export * from "./hashing/hashFilter.js"; | ||
export * from "./hashing/rollingHash.js"; | ||
export * from "./hashing/tokenHash.js"; | ||
export * from "./hashing/winnowFilter.js"; | ||
export * from "./util/identifiable.js"; | ||
export * from "./util/range.js"; | ||
export * from "./util/result.js"; | ||
export * from "./util/region.js"; | ||
export * from "./util/utils.js"; | ||
//# sourceMappingURL=index.d.ts.map |
@@ -1,11 +0,17 @@ | ||
export { FingerprintIndex } from "./algorithm/fingerprintIndex.js"; | ||
export { SharedFingerprint } from "./algorithm/sharedFingerprint.js"; | ||
export { Pair } from "./algorithm/pair.js"; | ||
export { Fragment } from "./algorithm/fragment.js"; | ||
export { File } from "./file/file.js"; | ||
export { TokenizedFile } from "./file/tokenizedFile.js"; | ||
export { Result } from "./util/result.js"; | ||
export { Range } from "./util/range.js"; | ||
export { Region } from "./util/region.js"; | ||
export * from "./algorithm/fingerprintIndex.js"; | ||
export * from "./algorithm/fragment.js"; | ||
export * from "./algorithm/pair.js"; | ||
export * from "./algorithm/pairedOccurrence.js"; | ||
export * from "./algorithm/sharedFingerprint.js"; | ||
export * from "./file/file.js"; | ||
export * from "./file/tokenizedFile.js"; | ||
export * from "./hashing/hashFilter.js"; | ||
export * from "./hashing/rollingHash.js"; | ||
export * from "./hashing/tokenHash.js"; | ||
export * from "./hashing/winnowFilter.js"; | ||
export * from "./util/identifiable.js"; | ||
export * from "./util/range.js"; | ||
export * from "./util/result.js"; | ||
export * from "./util/region.js"; | ||
export * from "./util/utils.js"; | ||
//# sourceMappingURL=index.js.map |
{ | ||
"name": "@dodona/dolos-core", | ||
"version": "1.0.2", | ||
"version": "1.1.0", | ||
"exports": "./dist/index.js", | ||
@@ -30,3 +30,3 @@ "type": "module", | ||
"eslint": "8.57.0", | ||
"typescript": "5.2.2" | ||
"typescript": "5.4.5" | ||
}, | ||
@@ -33,0 +33,0 @@ "bugs": { |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
116913
1638