@hdelva/termennetwerk_client
Advanced tools
Comparing version 1.0.7 to 2.0.0
@@ -7,4 +7,22 @@ "use strict"; | ||
exports.default = void 0; | ||
var AutoComplete_1 = require("./AutoComplete"); | ||
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(AutoComplete_1).default; } }); | ||
const AutoComplete_1 = __importDefault(require("./AutoComplete")); | ||
var AutoComplete_2 = require("./AutoComplete"); | ||
Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(AutoComplete_2).default; } }); | ||
const client = new AutoComplete_1.default([ | ||
"https://termen.opoi.org/nta", | ||
"https://termen.opoi.org/vtmk", | ||
"https://termen.opoi.org/cht", | ||
"https://termen.opoi.org/rkdartists" | ||
], 10); | ||
client.on("data", (data) => { | ||
//const results = []; | ||
console.log(data.object.value, client.resolveSubject(data.subject.value).length); | ||
for (const quad of client.resolveSubject(data.subject.value)) { | ||
//console.log(quad.object.value); | ||
} | ||
}); | ||
client.on("reset", (data) => { | ||
console.log(""); | ||
}); | ||
client.query("anne frank"); | ||
//# sourceMappingURL=index.js.map |
{ | ||
"name": "@hdelva/termennetwerk_client", | ||
"version": "1.0.7", | ||
"description": "Autocomplete over TREEs", | ||
"version": "2.0.0", | ||
"description": "Proof of concept of client-side autocompletion", | ||
"main": "lib/index.js", | ||
@@ -9,3 +9,3 @@ "author": "Harm Delva", | ||
"dependencies": { | ||
"@types/node": "^12.12.14", | ||
"@types/node": "^12.19.3", | ||
"@types/typescript": "^2.0.0", | ||
@@ -20,3 +20,3 @@ "collections": "^5.1.12", | ||
"devDependencies": { | ||
"ts-loader": "^8.0.5" | ||
"ts-loader": "^8.0.7" | ||
}, | ||
@@ -23,0 +23,0 @@ "directories": { |
@@ -1,1 +0,39 @@ | ||
export { default } from "./AutoComplete"; | ||
import FuzzyAutoComplete from "./examples/FuzzyAutoComplete"; | ||
import StrictAutoComplete from "./examples/StrictAutoComplete"; | ||
import QueryAgent from "./QueryAgent"; | ||
import QueryAggregator from "./QueryAggregator"; | ||
import ResultEmitter from "./ResultEmitter"; | ||
import ResultRanking from "./ResultRanking"; | ||
import ResultStore from "./ResultStore"; | ||
import ResultUniqueFilter from "./ResultUniqueFilter"; | ||
import asymmetricDiceCoefficient from "./similarity/asymmetricDiceCoefficient"; | ||
import commonPrefixSimilarity from "./similarity/commonPrefix"; | ||
import fuzzyIndexSimilarity from "./similarity/fuzzyIndex"; | ||
import fuzzyPrefixSimilarity from "./similarity/fuzzyPrefix"; | ||
import SimilarityConfiguration from "./similarity/SimilarityConfiguration"; | ||
import strictPrefixSimilarity from "./similarity/strictPrefix"; | ||
import tokenwiseCompare from "./similarity/tokenwise"; | ||
export default { | ||
examples: { | ||
StrictAutoComplete, | ||
FuzzyAutoComplete, | ||
}, | ||
components: { | ||
QueryAgent, | ||
QueryAggregator, | ||
ResultEmitter, | ||
ResultRanking, | ||
ResultStore, | ||
ResultUniqueFilter, | ||
SimilarityConfiguration, | ||
}, | ||
similarityFunctions: { | ||
asymmetricDiceCoefficient, | ||
commonPrefixSimilarity, | ||
fuzzyIndexSimilarity, | ||
fuzzyPrefixSimilarity, | ||
strictPrefixSimilarity, | ||
tokenwiseCompare, | ||
} | ||
}; |
@@ -7,3 +7,7 @@ import INormalizer from "./INormalizer"; | ||
constructor() { | ||
this.regex = /[^\p{L}\p{N}\s]/gu; | ||
// see https://javascript.info/regexp-unicode | ||
// \p{L} is all unicode letters | ||
// \p{N} is all numbers | ||
// \p{Z} is all separators (e.g., whitespace) | ||
this.regex = /[^\p{L}\p{N}\p{Z}]/gu; | ||
} | ||
@@ -10,0 +14,0 @@ |
@@ -5,4 +5,4 @@ import LDFetch from "ldfetch"; | ||
import IQueryEmitter from "./IQueryEmitter"; | ||
import { SimilarityConfiguration } from "./similarity/SimilarityConfiguration"; | ||
import ResultEmitter from "./ResultEmitter"; | ||
import SimilarityConfiguration from "./similarity/SimilarityConfiguration"; | ||
@@ -35,8 +35,11 @@ class RankedRelation { | ||
export default class QueryAgent extends IQueryEmitter { | ||
protected source: string; | ||
protected fetcher: LDFetch; | ||
protected activeQueries: Set<string>; | ||
protected similarityConfigurations: SimilarityConfiguration[]; | ||
protected knownRelations: Map<string, string>; // URI -> value | ||
/* | ||
* Traverses a single data source for the requested query string | ||
*/ | ||
export default class QueryAgent extends ResultEmitter { | ||
protected source: string; // access URI of the data source | ||
protected fetcher: LDFetch; // object that fetches and parses the RDF for us | ||
protected activeQueries: Set<string>; // link traversal is async, we may want to terminate query early | ||
protected similarityConfigurations: SimilarityConfiguration[]; // functions used to prioritize discovered relations | ||
protected knownRelations: Map<string, string>; // URI -> tree value | ||
@@ -49,8 +52,11 @@ constructor(source: string, similarityConfigurations: SimilarityConfiguration[]) { | ||
this.knownRelations = new Map(); | ||
this.similarityConfigurations = similarityConfigurations; // || strictPrefixSimilarity; | ||
this.similarityConfigurations = similarityConfigurations; | ||
// todo, maybe make this optional | ||
this.prefetch(); | ||
} | ||
private async prefetch() { | ||
// fetch the root node, and memorize all discovered relations | ||
// useful if the root node is particularly large | ||
public async prefetch() { | ||
const data = await this.fetcher.get(this.source); | ||
@@ -95,3 +101,7 @@ | ||
// kickstart the link traversal | ||
// start from the already discovered nodes | ||
for (const [uri, value] of this.knownRelations.entries()) { | ||
// iteratively build the similarity vector | ||
// stop as soon as this relations is certainly useless | ||
const similarityScores: number[] = []; | ||
@@ -101,2 +111,3 @@ for (const conf of this.similarityConfigurations) { | ||
if (!isNaN(similarity)) { | ||
// NaN similarities are not worth following | ||
similarityScores.push(similarity); | ||
@@ -109,2 +120,3 @@ } else { | ||
if (similarityScores.length === this.similarityConfigurations.length) { | ||
// the entire vector was evaluated, so it's worth following | ||
queue.push(new RankedRelation(uri, similarityScores)); | ||
@@ -118,2 +130,3 @@ } | ||
if (!this.activeQueries.has(input)) { | ||
// we're no longer waiting for this query's results | ||
break; | ||
@@ -129,2 +142,3 @@ } | ||
if (visited.has(page)) { | ||
// avoid hitting the same page multiple times | ||
continue; | ||
@@ -136,4 +150,4 @@ } | ||
const nodes = {}; | ||
const nodeValues = {}; | ||
const nodes = {}; // URI -> URI | ||
const nodeValues = {}; // URI -> list of tree values | ||
@@ -146,3 +160,3 @@ for (const untyped_quad of data.triples) { | ||
} else if (quad.predicate.value == "https://w3id.org/tree#value") { | ||
// be prepared for multiple values | ||
// a relation may contain several tree values | ||
if (!nodeValues[quad.subject.value]) { | ||
@@ -157,7 +171,10 @@ nodeValues[quad.subject.value] = []; | ||
// this page has been processed; schedule the next useful pages | ||
for (const [key, node] of Object.entries(nodes)) { | ||
if (!visited.has(node)) { | ||
const value = nodeValues[key]; | ||
this.knownRelations.set(node as string, value); | ||
this.knownRelations.set(node as string, value); // memorize this relation | ||
if (value) { | ||
// iteratively build the similarity vector | ||
// stop as soon as this relations is certainly useless | ||
const similarityScores: number[] = []; | ||
@@ -167,2 +184,3 @@ for (const conf of this.similarityConfigurations) { | ||
if (!isNaN(similarity)) { | ||
// NaN similarities are not worth following | ||
similarityScores.push(similarity); | ||
@@ -175,2 +193,3 @@ } else { | ||
if (similarityScores.length === this.similarityConfigurations.length) { | ||
// the entire vector was evaluated, so it's worth following | ||
queue.push(new RankedRelation(node, similarityScores)); | ||
@@ -183,4 +202,4 @@ } | ||
// signal that we're done following links | ||
this.activeQueries.delete(input); | ||
this.emit("end", input); | ||
@@ -187,0 +206,0 @@ } |
@@ -1,18 +0,20 @@ | ||
import IQueryEmitter from "./IQueryEmitter"; | ||
import ResultEmitter from "./ResultEmitter"; | ||
import { Quad } from "rdf-js"; | ||
export default class QueryAggregator extends IQueryEmitter { | ||
protected singleSourceAgents: IQueryEmitter[]; | ||
/* | ||
* Merges results from multiple other emitters | ||
*/ | ||
export default class QueryAggregator extends ResultEmitter { | ||
protected subEmitters: ResultEmitter[]; | ||
protected finished: Map<string, number>; | ||
constructor(sourceAgents: IQueryEmitter[]) { | ||
constructor(subEmitters: ResultEmitter[]) { | ||
super(); | ||
this.singleSourceAgents = sourceAgents; | ||
this.subEmitters = subEmitters; | ||
this.finished = new Map(); | ||
const self = this; | ||
for (const source of this.singleSourceAgents) { | ||
for (const source of this.subEmitters) { | ||
source.on("data", (q) => self.emit("data", q)); | ||
source.on("end", (q) => self.processEnd(q)); | ||
//source.on("reset", () => self.emit("reset")); | ||
} | ||
@@ -23,3 +25,3 @@ } | ||
this.emit("reset"); | ||
for (const source of this.singleSourceAgents) { | ||
for (const source of this.subEmitters) { | ||
source.query(input); | ||
@@ -31,3 +33,3 @@ } | ||
let result: Quad[] = []; | ||
for (const source of this.singleSourceAgents) { | ||
for (const source of this.subEmitters) { | ||
result = result.concat(source.resolveSubject(uri)); | ||
@@ -42,3 +44,3 @@ } | ||
if (count === this.singleSourceAgents.length) { | ||
if (count === this.subEmitters.length) { | ||
this.finished.delete(query); | ||
@@ -45,0 +47,0 @@ this.emit("end", query); |
@@ -5,8 +5,13 @@ const SortedArray = require("collections/sorted-array"); | ||
import IQueryEmitter from "./IQueryEmitter"; | ||
import ResultEmitter from "./ResultEmitter"; | ||
import INormalizer from "./normalizers/INormalizer"; | ||
import { SimilarityConfiguration } from "./similarity/SimilarityConfiguration"; | ||
import SimilarityConfiguration from "./similarity/SimilarityConfiguration"; | ||
export default class ResultRanking extends IQueryEmitter { | ||
protected subEmitter: IQueryEmitter; | ||
/* | ||
* Emits only the most relevant results from the subEmitter | ||
* Emits "reset" events each time the top N results changes, | ||
* followed by "data" events for each result in the top N | ||
*/ | ||
export default class ResultRanking extends ResultEmitter { | ||
protected subEmitter: ResultEmitter; | ||
protected activeQuery: string; | ||
@@ -21,3 +26,3 @@ protected size: number; | ||
size: number, | ||
subEmitter: IQueryEmitter, | ||
subEmitter: ResultEmitter, | ||
normalizer: INormalizer, | ||
@@ -24,0 +29,0 @@ similarityConfigurations: SimilarityConfiguration[], |
import { Quad } from "rdf-js"; | ||
import IQueryEmitter from "./IQueryEmitter"; | ||
import ResultEmitter from "./ResultEmitter"; | ||
import N3 = require('n3'); | ||
export default class ResultStore extends IQueryEmitter { | ||
protected subEmitter: IQueryEmitter; | ||
/* | ||
* Intercepts all data from the subEmitter, and only reemits the literal quads | ||
* Clients can then request everything we know about a given subject using the resolveSubject method | ||
*/ | ||
export default class ResultStore extends ResultEmitter { | ||
protected subEmitter: ResultEmitter; | ||
protected store: N3.Store; | ||
constructor(sourceAgents: IQueryEmitter) { | ||
constructor(sourceAgents: ResultEmitter) { | ||
super(); | ||
@@ -12,0 +16,0 @@ this.subEmitter = sourceAgents; |
import { Quad } from "rdf-js"; | ||
import IQueryEmitter from "./IQueryEmitter"; | ||
import ResultEmitter from "./ResultEmitter"; | ||
export default class ResultUniqueFilter extends IQueryEmitter { | ||
protected subEmitter: IQueryEmitter; | ||
/* | ||
* The same value can be in multiple pages across multiple datasets | ||
* Avoid duplicate results by just slapping a unique filter in there | ||
*/ | ||
export default class ResultUniqueFilter extends ResultEmitter { | ||
protected subEmitter: ResultEmitter; | ||
protected known: Set<string>; | ||
constructor(sourceAgents: IQueryEmitter) { | ||
constructor(sourceAgents: ResultEmitter) { | ||
super(); | ||
@@ -11,0 +15,0 @@ this.subEmitter = sourceAgents; |
@@ -0,1 +1,3 @@ | ||
// like the dice coefficient, but only looks for bigrams out of `expected` in `found` | ||
// this doesn't penalize longer results if we know the `expected` string is just incomplete | ||
export default function asymmetricDiceCoefficient(expected: string, found: string) { | ||
@@ -2,0 +4,0 @@ let firstBigrams = new Map(); |
/* | ||
* Length of the common prefix | ||
*/ | ||
export function commonPrefixSimilarity(expected: string, found: string) { | ||
export default function commonPrefixSimilarity(expected: string, found: string) { | ||
const minLength = Math.min(expected.length, found.length); | ||
@@ -6,0 +6,0 @@ |
@@ -1,2 +0,2 @@ | ||
import { commonPrefixSimilarity } from "./commonPrefix"; | ||
import commonPrefixSimilarity from "./commonPrefix"; | ||
import fuzzyIndexSimilarity from "./fuzzyIndex"; | ||
@@ -7,4 +7,4 @@ | ||
*/ | ||
export function fuzzyPrefixSimilarity(expected: string, found: string) { | ||
export default function fuzzyPrefixSimilarity(expected: string, found: string) { | ||
return commonPrefixSimilarity(expected, found) + fuzzyIndexSimilarity(expected, found); | ||
} |
import { FilterFunction, SimilarityFunction } from "./SimilarityFunction"; | ||
export class SimilarityConfiguration { | ||
export default class SimilarityConfiguration { | ||
private similarityFunction: SimilarityFunction; | ||
@@ -12,3 +12,5 @@ private filterFunction?: FilterFunction; | ||
public evaluate(expected: string, found: string) { | ||
public evaluate(expected: string, found: string): number { | ||
// higher is better | ||
// NaN indicates an ineligible result | ||
const similarity = this.similarityFunction(expected, found); | ||
@@ -15,0 +17,0 @@ |
@@ -1,3 +0,5 @@ | ||
// semantics of return is bigger = better | ||
// bigger is better, NaN is ineligible | ||
export type SimilarityFunction = (expected: string, found: string) => number; | ||
// return true if this similarity is good enough, false if not | ||
export type FilterFunction = (expected: string, found: string, similarity: number) => boolean; |
Sorry, the diff of this file is not supported yet
8069956
7999
81
Updated@types/node@^12.19.3