lunr
Advanced tools
Comparing version 2.0.3 to 2.1.0-alpha.1
@@ -34,4 +34,4 @@ /*! | ||
this.invertedIndex = Object.create(null) | ||
this.documentTermFrequencies = {} | ||
this.documentLengths = {} | ||
this.fieldTermFrequencies = {} | ||
this.fieldLengths = {} | ||
this.tokenizer = lunr.tokenizer | ||
@@ -119,8 +119,5 @@ this.pipeline = new lunr.Pipeline | ||
lunr.Builder.prototype.add = function (doc) { | ||
var docRef = doc[this._ref], | ||
documentTerms = {} | ||
var docRef = doc[this._ref] | ||
this.documentCount += 1 | ||
this.documentTermFrequencies[docRef] = documentTerms | ||
this.documentLengths[docRef] = 0 | ||
@@ -131,6 +128,11 @@ for (var i = 0; i < this._fields.length; i++) { | ||
tokens = this.tokenizer(field), | ||
terms = this.pipeline.run(tokens) | ||
terms = this.pipeline.run(tokens), | ||
fieldRef = new lunr.FieldRef (docRef, fieldName), | ||
fieldTerms = {} | ||
this.fieldTermFrequencies[fieldRef] = fieldTerms | ||
this.fieldLengths[fieldRef] = 0 | ||
// store the length of this field for this document | ||
this.documentLengths[docRef] += terms.length | ||
this.fieldLengths[fieldRef] += terms.length | ||
@@ -141,7 +143,7 @@ // calculate term frequencies for this field | ||
if (documentTerms[term] == undefined) { | ||
documentTerms[term] = 0 | ||
if (fieldTerms[term] == undefined) { | ||
fieldTerms[term] = 0 | ||
} | ||
documentTerms[term] += 1 | ||
fieldTerms[term] += 1 | ||
@@ -189,14 +191,26 @@ // add to inverted index | ||
*/ | ||
lunr.Builder.prototype.calculateAverageDocumentLengths = function () { | ||
lunr.Builder.prototype.calculateAverageFieldLengths = function () { | ||
var documentRefs = Object.keys(this.documentLengths), | ||
numberOfDocuments = documentRefs.length, | ||
allDocumentsLength = 0 | ||
var fieldRefs = Object.keys(this.fieldLengths), | ||
numberOfFields = fieldRefs.length, | ||
accumulator = {}, | ||
documentsWithField = {} | ||
for (var i = 0; i < numberOfDocuments; i++) { | ||
var documentRef = documentRefs[i] | ||
allDocumentsLength += this.documentLengths[documentRef] | ||
for (var i = 0; i < numberOfFields; i++) { | ||
var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]), | ||
field = fieldRef.fieldName | ||
documentsWithField[field] || (documentsWithField[field] = 0) | ||
documentsWithField[field] += 1 | ||
accumulator[field] || (accumulator[field] = 0) | ||
accumulator[field] += this.fieldLengths[fieldRef] | ||
} | ||
this.averageDocumentLength = allDocumentsLength / numberOfDocuments | ||
for (var i = 0; i < this._fields.length; i++) { | ||
var field = this._fields[i] | ||
accumulator[field] = accumulator[field] / documentsWithField[field] | ||
} | ||
this.averageFieldLength = accumulator | ||
} | ||
@@ -209,12 +223,13 @@ | ||
*/ | ||
lunr.Builder.prototype.createDocumentVectors = function () { | ||
var documentVectors = {}, | ||
docRefs = Object.keys(this.documentTermFrequencies), | ||
docRefsLength = docRefs.length | ||
lunr.Builder.prototype.createFieldVectors = function () { | ||
var fieldVectors = {}, | ||
fieldRefs = Object.keys(this.fieldTermFrequencies), | ||
fieldRefsLength = fieldRefs.length | ||
for (var i = 0; i < docRefsLength; i++) { | ||
var docRef = docRefs[i], | ||
documentLength = this.documentLengths[docRef], | ||
documentVector = new lunr.Vector, | ||
termFrequencies = this.documentTermFrequencies[docRef], | ||
for (var i = 0; i < fieldRefsLength; i++) { | ||
var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]), | ||
field = fieldRef.fieldName, | ||
fieldLength = this.fieldLengths[fieldRef], | ||
fieldVector = new lunr.Vector, | ||
termFrequencies = this.fieldTermFrequencies[fieldRef], | ||
terms = Object.keys(termFrequencies), | ||
@@ -228,3 +243,3 @@ termsLength = terms.length | ||
idf = lunr.idf(this.invertedIndex[term], this.documentCount), | ||
score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (documentLength / this.averageDocumentLength)) + tf), | ||
score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[field])) + tf), | ||
scoreWithPrecision = Math.round(score * 1000) / 1000 | ||
@@ -238,9 +253,9 @@ // Converts 1.23456789 to 1.234. | ||
documentVector.insert(termIndex, scoreWithPrecision) | ||
fieldVector.insert(termIndex, scoreWithPrecision) | ||
} | ||
documentVectors[docRef] = documentVector | ||
fieldVectors[fieldRef] = fieldVector | ||
} | ||
this.documentVectors = documentVectors | ||
this.fieldVectors = fieldVectors | ||
} | ||
@@ -269,4 +284,4 @@ | ||
lunr.Builder.prototype.build = function () { | ||
this.calculateAverageDocumentLengths() | ||
this.createDocumentVectors() | ||
this.calculateAverageFieldLengths() | ||
this.createFieldVectors() | ||
this.createTokenSet() | ||
@@ -276,10 +291,6 @@ | ||
invertedIndex: this.invertedIndex, | ||
documentVectors: this.documentVectors, | ||
fieldVectors: this.fieldVectors, | ||
tokenSet: this.tokenSet, | ||
averageDocumentLength: this.averageDocumentLength, | ||
documentCount: this.documentCount, | ||
fields: this._fields, | ||
pipeline: this.searchPipeline, | ||
b: this._b, | ||
k1: this._k1 | ||
pipeline: this.searchPipeline | ||
}) | ||
@@ -286,0 +297,0 @@ } |
@@ -17,4 +17,6 @@ /** | ||
return (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5) | ||
var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5) | ||
return Math.log(1 + Math.abs(x)) | ||
} | ||
@@ -19,6 +19,2 @@ /*! | ||
* @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens. | ||
* @param {number} attrs.documentCount - The total number of documents held in the index. | ||
* @param {number} attrs.averageDocumentLength - The average length of all documents in the index. | ||
* @param {number} attrs.b - A parameter for the document scoring algorithm. | ||
* @param {number} attrs.k1 - A parameter for the document scoring algorithm. | ||
* @param {string[]} attrs.fields - The names of indexed document fields. | ||
@@ -29,8 +25,4 @@ * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms. | ||
this.invertedIndex = attrs.invertedIndex | ||
this.documentVectors = attrs.documentVectors | ||
this.fieldVectors = attrs.fieldVectors | ||
this.tokenSet = attrs.tokenSet | ||
this.documentCount = attrs.documentCount | ||
this.averageDocumentLength = attrs.averageDocumentLength | ||
this.b = attrs.b | ||
this.k1 = attrs.k1 | ||
this.fields = attrs.fields | ||
@@ -139,3 +131,3 @@ this.pipeline = attrs.pipeline | ||
var query = new lunr.Query(this.fields), | ||
matchingDocuments = Object.create(null), | ||
matchingFields = Object.create(null), | ||
queryVector = new lunr.Vector | ||
@@ -194,6 +186,3 @@ | ||
posting = this.invertedIndex[expandedTerm], | ||
termIndex = posting._index, | ||
idf = lunr.idf(posting, this.documentCount), | ||
tf = 1, | ||
score = idf * ((this.k1 + 1) * tf) / (this.k1 * (1 - this.b + this.b * (query.clauses.length / this.averageDocumentLength)) + tf) | ||
termIndex = posting._index | ||
@@ -211,3 +200,3 @@ /* | ||
*/ | ||
queryVector.upsert(termIndex, score * clause.boost, function (a, b) { return a + b }) | ||
queryVector.upsert(termIndex, 1 * clause.boost, function (a, b) { return a + b }) | ||
@@ -235,2 +224,3 @@ for (var k = 0; k < clause.fields.length; k++) { | ||
var matchingDocumentRef = matchingDocumentRefs[l], | ||
matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field), | ||
documentMetadata, matchData | ||
@@ -241,6 +231,6 @@ | ||
if (matchingDocumentRef in matchingDocuments) { | ||
matchingDocuments[matchingDocumentRef].combine(matchData) | ||
if (matchingFieldRef in matchingFields) { | ||
matchingFields[matchingFieldRef].combine(matchData) | ||
} else { | ||
matchingDocuments[matchingDocumentRef] = matchData | ||
matchingFields[matchingFieldRef] = matchData | ||
} | ||
@@ -254,6 +244,6 @@ | ||
var matchingDocumentRefs = Object.keys(matchingDocuments), | ||
results = [] | ||
var matchingFieldRefs = Object.keys(matchingFields), | ||
results = {} | ||
for (var i = 0; i < matchingDocumentRefs.length; i++) { | ||
for (var i = 0; i < matchingFieldRefs.length; i++) { | ||
/* | ||
@@ -270,16 +260,26 @@ * With all the matching documents found they now need | ||
*/ | ||
var ref = matchingDocumentRefs[i], | ||
documentVector = this.documentVectors[ref], | ||
score = queryVector.similarity(documentVector) | ||
var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]), | ||
docRef = fieldRef.docRef, | ||
fieldVector = this.fieldVectors[fieldRef], | ||
score = queryVector.similarity(fieldVector) | ||
results.push({ | ||
ref: ref, | ||
score: score, | ||
matchData: matchingDocuments[ref] | ||
}) | ||
if (docRef in results) { | ||
results[docRef].score += score | ||
results[docRef].matchData.combine(matchingFields[fieldRef]) | ||
} else { | ||
results[docRef] = { | ||
ref: docRef, | ||
score: score, | ||
matchData: matchingFields[fieldRef] | ||
} | ||
} | ||
} | ||
return results.sort(function (a, b) { | ||
return b.score - a.score | ||
}) | ||
return Object.keys(results) | ||
.map(function (key) { | ||
return results[key] | ||
}) | ||
.sort(function (a, b) { | ||
return b.score - a.score | ||
}) | ||
} | ||
@@ -302,5 +302,5 @@ | ||
var documentVectors = Object.keys(this.documentVectors) | ||
var fieldVectors = Object.keys(this.fieldVectors) | ||
.map(function (ref) { | ||
return [ref, this.documentVectors[ref].toJSON()] | ||
return [ref, this.fieldVectors[ref].toJSON()] | ||
}, this) | ||
@@ -310,7 +310,4 @@ | ||
version: lunr.version, | ||
averageDocumentLength: this.averageDocumentLength, | ||
b: this.b, | ||
k1: this.k1, | ||
fields: this.fields, | ||
documentVectors: documentVectors, | ||
fieldVectors: fieldVectors, | ||
invertedIndex: invertedIndex, | ||
@@ -329,5 +326,4 @@ pipeline: this.pipeline.toJSON() | ||
var attrs = {}, | ||
documentVectors = {}, | ||
serializedVectors = serializedIndex.documentVectors, | ||
documentCount = 0, | ||
fieldVectors = {}, | ||
serializedVectors = serializedIndex.fieldVectors, | ||
invertedIndex = {}, | ||
@@ -342,3 +338,3 @@ serializedInvertedIndex = serializedIndex.invertedIndex, | ||
for (var i = 0; i < serializedVectors.length; i++, documentCount++) { | ||
for (var i = 0; i < serializedVectors.length; i++) { | ||
var tuple = serializedVectors[i], | ||
@@ -348,3 +344,3 @@ ref = tuple[0], | ||
documentVectors[ref] = new lunr.Vector(elements) | ||
fieldVectors[ref] = new lunr.Vector(elements) | ||
} | ||
@@ -363,9 +359,5 @@ | ||
attrs.b = serializedIndex.b | ||
attrs.k1 = serializedIndex.k1 | ||
attrs.fields = serializedIndex.fields | ||
attrs.averageDocumentLength = serializedIndex.averageDocumentLength | ||
attrs.documentCount = documentCount | ||
attrs.documentVectors = documentVectors | ||
attrs.fieldVectors = fieldVectors | ||
attrs.invertedIndex = invertedIndex | ||
@@ -372,0 +364,0 @@ attrs.tokenSet = tokenSetBuilder.root |
@@ -17,2 +17,7 @@ /** | ||
lunr.Query.wildcard = new String ("*") | ||
lunr.Query.wildcard.NONE = 0 | ||
lunr.Query.wildcard.LEADING = 1 | ||
lunr.Query.wildcard.TRAILING = 2 | ||
/** | ||
@@ -51,2 +56,14 @@ * A single clause in a {@link lunr.Query} contains a term and details on how to | ||
if (!('wildcard' in clause)) { | ||
clause.wildcard = lunr.Query.wildcard.NONE | ||
} | ||
if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) { | ||
clause.term = "*" + clause.term | ||
} | ||
if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) { | ||
clause.term = "" + clause.term + "*" | ||
} | ||
this.clauses.push(clause) | ||
@@ -53,0 +70,0 @@ |
{ | ||
"name": "lunr", | ||
"description": "Simple full-text search in your browser.", | ||
"version": "2.0.3", | ||
"version": "2.1.0-alpha.1", | ||
"author": "Oliver Nightingale", | ||
@@ -6,0 +6,0 @@ "keywords": ["search"], |
@@ -167,5 +167,5 @@ suite('lunr.Builder', function () { | ||
test('builds a vector space of the document', function () { | ||
assert.property(this.builder.documentVectors, 'id') | ||
assert.instanceOf(this.builder.documentVectors.id, lunr.Vector) | ||
test('builds a vector space of the document fields', function () { | ||
assert.property(this.builder.fieldVectors, 'title/id') | ||
assert.instanceOf(this.builder.fieldVectors['title/id'], lunr.Vector) | ||
}) | ||
@@ -186,6 +186,6 @@ | ||
test('calculates average document length', function () { | ||
assert.equal(1, this.builder.averageDocumentLength) | ||
test('calculates average field length', function () { | ||
assert.equal(1, this.builder.averageFieldLength['title']) | ||
}) | ||
}) | ||
}) |
@@ -52,3 +52,80 @@ suite('lunr.Query', function () { | ||
}) | ||
suite('wildcards', function () { | ||
suite('none', function () { | ||
setup(function () { | ||
this.query.clause({ | ||
term: 'foo', | ||
wildcard: lunr.Query.wildcard.NONE | ||
}) | ||
this.clause = this.query.clauses[0] | ||
}) | ||
test('no wildcard', function () { | ||
assert.equal(this.clause.term, 'foo') | ||
}) | ||
}) | ||
suite('leading', function () { | ||
setup(function () { | ||
this.query.clause({ | ||
term: 'foo', | ||
wildcard: lunr.Query.wildcard.LEADING | ||
}) | ||
this.clause = this.query.clauses[0] | ||
}) | ||
test('adds wildcard', function () { | ||
assert.equal(this.clause.term, '*foo') | ||
}) | ||
}) | ||
suite('trailing', function () { | ||
setup(function () { | ||
this.query.clause({ | ||
term: 'foo', | ||
wildcard: lunr.Query.wildcard.TRAILING | ||
}) | ||
this.clause = this.query.clauses[0] | ||
}) | ||
test('adds wildcard', function () { | ||
assert.equal(this.clause.term, 'foo*') | ||
}) | ||
}) | ||
suite('leading and trailing', function () { | ||
setup(function () { | ||
this.query.clause({ | ||
term: 'foo', | ||
wildcard: lunr.Query.wildcard.TRAILING | lunr.Query.wildcard.LEADING | ||
}) | ||
this.clause = this.query.clauses[0] | ||
}) | ||
test('adds wildcards', function () { | ||
assert.equal(this.clause.term, '*foo*') | ||
}) | ||
}) | ||
suite('existing', function () { | ||
setup(function () { | ||
this.query.clause({ | ||
term: '*foo*', | ||
wildcard: lunr.Query.wildcard.TRAILING | lunr.Query.wildcard.LEADING | ||
}) | ||
this.clause = this.query.clauses[0] | ||
}) | ||
test('no additional wildcards', function () { | ||
assert.equal(this.clause.term, '*foo*') | ||
}) | ||
}) | ||
}) | ||
}) | ||
}) |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
No v1
QualityPackage is not semver >=1. This means it is not stable and does not support ^ ranges.
Found 1 instance in 1 package
583928
76
18392
1