Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

@dodona/dolos-core

Package Overview
Dependencies
Maintainers
6
Versions
5
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@dodona/dolos-core - npm Package Compare versions

Comparing version 1.0.2 to 1.1.0

15

dist/algorithm/fingerprintIndex.d.ts

@@ -11,2 +11,4 @@ import { Range } from "../util/range.js";

shared: Set<SharedFingerprint>;
ignored: Set<SharedFingerprint>;
isIgnored: boolean;
}

@@ -20,5 +22,8 @@ export interface Occurrence {

private readonly kgramsInWindow;
private maxFingerprintFileCount;
private readonly hashFilter;
private readonly files;
private readonly ignoredFiles;
private readonly index;
private readonly ignoredHashes;
/**

@@ -29,6 +34,14 @@ * Creates a Fingerprint Index which is able to compare files with each other

*/
constructor(kgramLength: number, kgramsInWindow: number, kgramData?: boolean);
constructor(kgramLength: number, kgramsInWindow: number, kgramData?: boolean, maxFingerprintFileCount?: number);
addIgnoredFile(file: TokenizedFile): void;
getMaxFingerprintFileCount(): number;
updateMaxFingerprintFileCount(maxFingerprintFileCount: number | undefined): void;
addFiles(tokenizedFiles: TokenizedFile[]): Map<Hash, SharedFingerprint>;
private addEntry;
addIgnoredHashes(hashes: Array<Hash>): void;
private ignoreSharedFingerprint;
private unIgnoreSharedFingerprint;
sharedFingerprints(): Array<SharedFingerprint>;
entries(): Array<FileEntry>;
ignoredEntries(): Array<FileEntry>;
getPair(file1: TokenizedFile, file2: TokenizedFile): Pair;

@@ -35,0 +48,0 @@ allPairs(sortBy?: string): Array<Pair>;

135

dist/algorithm/fingerprintIndex.js

@@ -13,9 +13,43 @@ import { Range } from "../util/range.js";

*/
constructor(kgramLength, kgramsInWindow, kgramData) {
constructor(kgramLength, kgramsInWindow, kgramData, maxFingerprintFileCount = Number.MAX_SAFE_INTEGER) {
this.kgramLength = kgramLength;
this.kgramsInWindow = kgramsInWindow;
this.maxFingerprintFileCount = maxFingerprintFileCount;
this.hashFilter = new WinnowFilter(this.kgramLength, this.kgramsInWindow, kgramData);
this.files = new Map();
this.ignoredFiles = new Map();
this.index = new Map();
this.ignoredHashes = new Set();
}
addIgnoredFile(file) {
assert(!this.ignoredFiles.has(file.id), `This file has already been ignored: ${file.file.path}`);
const entry = {
file,
kgrams: [],
isIgnored: true,
shared: new Set(),
ignored: new Set()
};
this.ignoredFiles.set(file.id, entry);
this.addEntry(entry);
}
getMaxFingerprintFileCount() {
return this.maxFingerprintFileCount;
}
updateMaxFingerprintFileCount(maxFingerprintFileCount) {
if (maxFingerprintFileCount == this.maxFingerprintFileCount) {
return;
}
this.maxFingerprintFileCount = maxFingerprintFileCount || Number.MAX_SAFE_INTEGER;
for (const shared of this.index.values()) {
if (!this.ignoredHashes.has(shared.hash)) {
if (shared.fileCount() > this.maxFingerprintFileCount && !shared.ignored) {
this.ignoreSharedFingerprint(shared);
}
else if (shared.fileCount() <= this.maxFingerprintFileCount && shared.ignored) {
this.unIgnoreSharedFingerprint(shared);
}
}
}
}
addFiles(tokenizedFiles) {

@@ -26,42 +60,80 @@ for (const f of tokenizedFiles) {

for (const file of tokenizedFiles) {
let kgram = 0;
const entry = {
file,
kgrams: [],
shared: new Set()
isIgnored: false,
shared: new Set(),
ignored: new Set()
};
this.files.set(file.id, entry);
for (const { data, hash, start, stop } of this.hashFilter.fingerprints(file.tokens)) {
// add kgram to file
entry.kgrams.push(new Range(start, stop));
// sanity check
assert(Region.isInOrder(file.mapping[start], file.mapping[stop])
// If we end our kgram on a ')', the location of the opening token is used.
// However, the location of this token in the file might be before
// the location of the starting token of the kmer
// For example: the last token of every ast is ')', closing the program.
// The location of this token is always (0, 0), since the program root is the first token.
// In this way, the 'end' token is before any other token in the AST.
|| file.tokens[stop] === ")", `Invalid ordering:
this.addEntry(entry);
}
return this.index;
}
addEntry(entry) {
const file = entry.file;
let kgram = 0;
for (const { data, hash, start, stop } of this.hashFilter.fingerprints(file.tokens)) {
// add kgram to file
entry.kgrams.push(new Range(start, stop));
// sanity check
assert(Region.isInOrder(file.mapping[start], file.mapping[stop])
// If we end our kgram on a ')', the location of the opening token is used.
// However, the location of this token in the file might be before
// the location of the starting token of the kmer
// For example: the last token of every ast is ')', closing the program.
// The location of this token is always (0, 0), since the program root is the first token.
// In this way, the 'end' token is before any other token in the AST.
|| file.tokens[stop] === ")", `Invalid ordering:
expected ${file.mapping[start]}
to start be before the end of ${file.mapping[stop]}`);
const location = Region.merge(file.mapping[start], file.mapping[stop]);
const part = {
file,
side: { index: kgram, start, stop, data, location }
};
// look if the index already contains the given hashing
let shared = this.index.get(hash);
if (!shared) {
// if the hashing does not yet exist in the index, add it
shared = new SharedFingerprint(hash, data);
this.index.set(hash, shared);
}
shared.add(part);
const location = Region.merge(file.mapping[start], file.mapping[stop]);
const part = {
file,
side: { index: kgram, start, stop, data, location }
};
// look if the index already contains the given hashing
let shared = this.index.get(hash);
if (!shared) {
// if the hashing does not yet exist in the index, add it
shared = new SharedFingerprint(hash, data);
this.index.set(hash, shared);
}
shared.add(part);
if (entry.isIgnored || shared.fileCount() > this.maxFingerprintFileCount || this.ignoredHashes.has(hash)) {
this.ignoreSharedFingerprint(shared);
}
else {
entry.shared.add(shared);
kgram += 1;
}
kgram += 1;
}
return this.index;
}
addIgnoredHashes(hashes) {
for (const hash of hashes) {
this.ignoredHashes.add(hash);
const shared = this.index.get(hash);
if (shared) {
this.ignoreSharedFingerprint(shared);
}
}
}
ignoreSharedFingerprint(shared) {
shared.ignored = true;
for (const other of shared.files()) {
if (!this.ignoredFiles.has(other.id)) {
const otherEntry = this.files.get(other.id);
otherEntry.shared.delete(shared);
otherEntry.ignored.add(shared);
}
}
}
unIgnoreSharedFingerprint(shared) {
shared.ignored = false;
for (const other of shared.files()) {
const otherEntry = this.files.get(other.id);
otherEntry.ignored.delete(shared);
otherEntry.shared.add(shared);
}
}
sharedFingerprints() {

@@ -73,2 +145,5 @@ return Array.from(this.index.values());

}
ignoredEntries() {
return Array.from(this.ignoredFiles.values());
}
getPair(file1, file2) {

@@ -75,0 +150,0 @@ const entry1 = this.files.get(file1.id);

@@ -21,2 +21,4 @@ import { Fragment } from "./fragment.js";

readonly similarity: number;
readonly leftIgnored: number;
readonly rightIgnored: number;
constructor(leftEntry: FileEntry, rightEntry: FileEntry);

@@ -23,0 +25,0 @@ private longestCommonSubstring;

@@ -46,6 +46,9 @@ import { Range } from "../util/range.js";

this.rightCovered = right.length;
this.leftIgnored = leftEntry.ignored.size;
this.rightIgnored = leftEntry.ignored.size;
this.leftTotal = leftEntry.kgrams.length;
this.rightTotal = rightEntry.kgrams.length;
if (this.leftTotal + this.rightTotal > 0) {
this.similarity = (this.leftCovered + this.rightCovered) / (this.leftTotal + this.rightTotal);
const denominator = this.leftTotal + this.rightTotal - this.leftIgnored - this.rightIgnored;
if (denominator > 0) {
this.similarity = (this.leftCovered + this.rightCovered) / denominator;
}

@@ -52,0 +55,0 @@ else {

@@ -7,2 +7,3 @@ import { Occurrence } from "./fingerprintIndex.js";

readonly kgram: Array<string> | null;
ignored: boolean;
private partMap;

@@ -16,3 +17,4 @@ constructor(hash: number, kgram: Array<string> | null);

fileCount(): number;
includesFile(file: TokenizedFile): boolean;
}
//# sourceMappingURL=sharedFingerprint.d.ts.map

@@ -7,2 +7,4 @@ import { Identifiable } from "../util/identifiable.js";

this.kgram = kgram;
// Whether this SharedFingerprint occurs in the boilerplate/template code
this.ignored = false;
this.partMap = new Map();

@@ -34,3 +36,6 @@ }

}
includesFile(file) {
return this.partMap.has(file);
}
}
//# sourceMappingURL=sharedFingerprint.js.map

@@ -1,11 +0,17 @@

export { FingerprintIndex, FileEntry } from "./algorithm/fingerprintIndex.js";
export { SharedFingerprint } from "./algorithm/sharedFingerprint.js";
export { Pair } from "./algorithm/pair.js";
export { Fragment } from "./algorithm/fragment.js";
export { File, ExtraInfo } from "./file/file.js";
export { TokenizedFile } from "./file/tokenizedFile.js";
export { Result } from "./util/result.js";
export { Range } from "./util/range.js";
export { Region } from "./util/region.js";
export * from "./algorithm/fingerprintIndex.js";
export * from "./algorithm/fragment.js";
export * from "./algorithm/pair.js";
export * from "./algorithm/pairedOccurrence.js";
export * from "./algorithm/sharedFingerprint.js";
export * from "./file/file.js";
export * from "./file/tokenizedFile.js";
export * from "./hashing/hashFilter.js";
export * from "./hashing/rollingHash.js";
export * from "./hashing/tokenHash.js";
export * from "./hashing/winnowFilter.js";
export * from "./util/identifiable.js";
export * from "./util/range.js";
export * from "./util/result.js";
export * from "./util/region.js";
export * from "./util/utils.js";
//# sourceMappingURL=index.d.ts.map

@@ -1,11 +0,17 @@

export { FingerprintIndex } from "./algorithm/fingerprintIndex.js";
export { SharedFingerprint } from "./algorithm/sharedFingerprint.js";
export { Pair } from "./algorithm/pair.js";
export { Fragment } from "./algorithm/fragment.js";
export { File } from "./file/file.js";
export { TokenizedFile } from "./file/tokenizedFile.js";
export { Result } from "./util/result.js";
export { Range } from "./util/range.js";
export { Region } from "./util/region.js";
export * from "./algorithm/fingerprintIndex.js";
export * from "./algorithm/fragment.js";
export * from "./algorithm/pair.js";
export * from "./algorithm/pairedOccurrence.js";
export * from "./algorithm/sharedFingerprint.js";
export * from "./file/file.js";
export * from "./file/tokenizedFile.js";
export * from "./hashing/hashFilter.js";
export * from "./hashing/rollingHash.js";
export * from "./hashing/tokenHash.js";
export * from "./hashing/winnowFilter.js";
export * from "./util/identifiable.js";
export * from "./util/range.js";
export * from "./util/result.js";
export * from "./util/region.js";
export * from "./util/utils.js";
//# sourceMappingURL=index.js.map
{
"name": "@dodona/dolos-core",
"version": "1.0.2",
"version": "1.1.0",
"exports": "./dist/index.js",

@@ -30,3 +30,3 @@ "type": "module",

"eslint": "8.57.0",
"typescript": "5.2.2"
"typescript": "5.4.5"
},

@@ -33,0 +33,0 @@ "bugs": {

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc