Socket
Socket
Sign inDemoInstall

lunr

Package Overview
Dependencies
Maintainers
1
Versions
64
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

lunr - npm Package Compare versions

Comparing version 0.7.2 to 2.0.0-alpha.1

.eslintrc.json

2

bower.json
{
"name": "lunr.js",
"version": "0.7.1",
"version": "0.6.0",
"main": "lunr.js",

@@ -5,0 +5,0 @@ "ignore": [

{
"name": "lunr",
"repo": "olivernn/lunr.js",
"version": "0.7.1",
"version": "0.6.0",
"description": "Simple full-text search in your browser.",

@@ -6,0 +6,0 @@ "license": "MIT",

require([
'/example/jquery.js',
'/example/mustache.js',
'../lunr.js',
'text!templates/question_view.mustache',
'text!templates/question_list.mustache',
'text!example_data.json',
'text!example_index.json'
], function (_, Mustache, lunr, questionView, questionList, data, indexDump) {
'text!example_data.json'
], function (lunr, data) {
var renderQuestionList = function (qs) {
$("#question-list-container")
.empty()
.append(Mustache.to_html(questionList, {questions: qs}))
}
var renderQuestionView = function (question) {
$("#question-view-container")
.empty()
.append(Mustache.to_html(questionView, question))
}
window.profile = function (term) {
console.profile('search')
idx.search(term)
console.profileEnd('search')
}
window.search = function (term) {
console.time('search')
idx.search(term)
console.timeEnd('search')
}
var indexDump = JSON.parse(indexDump)
console.time('load')
window.idx = lunr.Index.load(indexDump)
console.timeEnd('load')
var questions = JSON.parse(data).questions.map(function (raw) {

@@ -49,42 +15,40 @@ return {

renderQuestionList(questions)
renderQuestionView(questions[0])
console.time('load')
window.idx = lunr(function () {
this.ref('id')
this.field('title')
this.field('body')
this.field('tags')
$('a.all').bind('click', function () {
renderQuestionList(questions)
$('input').val('')
questions.forEach(function (q) {
this.add(q)
}, this)
})
console.timeEnd('load')
var debounce = function (fn) {
var timeout
return function () {
var args = Array.prototype.slice.call(arguments),
ctx = this
window.profile = function (term) {
console.profile('search')
window.idx.search(term)
console.profileEnd('search')
}
clearTimeout(timeout)
timeout = setTimeout(function () {
fn.apply(ctx, args)
}, 100)
}
window.search = function (term) {
console.time('search')
window.idx.search(term)
console.timeEnd('search')
}
$('input').bind('keyup', debounce(function () {
if ($(this).val() < 2) return
var query = $(this).val()
var results = idx.search(query).map(function (result) {
return questions.filter(function (q) { return q.id === parseInt(result.ref, 10) })[0]
})
window.serialize = function () {
console.time('dump')
var json = JSON.stringify(window.idx)
console.timeEnd('dump')
renderQuestionList(results)
}))
var serialized = JSON.parse(json)
console.profile("load")
var newIdx = lunr.Index.load(serialized)
console.profileEnd("load")
$("#question-list-container").delegate('li', 'click', function () {
var li = $(this)
var id = li.data('question-id')
return newIdx
}
renderQuestionView(questions.filter(function (question) {
return (question.id == id)
})[0])
})
})

@@ -7,464 +7,352 @@ /*!

/**
* lunr.Index is object that manages a search index. It contains the indexes
* and stores all the tokens and document lookups. It also provides the main
* user facing API for the library.
* An index contains the built index of all documents and provides a query interface
* to the index.
*
* Usually instances of lunr.Index will not be created using this constructor, instead
* lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
* used to load previously built and serialized indexes.
*
* @constructor
* @param {Object} attrs - The attributes of the built search index.
* @param {Object} attrs.invertedIndex - An index of term/field to document reference.
* @param {Object<string, lunr.Vector>} attrs.documentVectors - Document vectors keyed by document reference.
* @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.
* @param {number} attrs.documentCount - The total number of documents held in the index.
* @param {number} attrs.averageDocumentLength - The average length of all documents in the index.
* @param {number} attrs.b - A parameter for the document scoring algorithm.
* @param {number} attrs.k1 - A parameter for the document scoring algorithm.
* @param {string[]} attrs.fields - The names of indexed document fields.
* @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.
*/
lunr.Index = function () {
this._fields = []
this._ref = 'id'
this.pipeline = new lunr.Pipeline
this.documentStore = new lunr.Store
this.tokenStore = new lunr.TokenStore
this.corpusTokens = new lunr.SortedSet
this.eventEmitter = new lunr.EventEmitter
this.tokenizerFn = lunr.tokenizer
this._idfCache = {}
this.on('add', 'remove', 'update', (function () {
this._idfCache = {}
}).bind(this))
lunr.Index = function (attrs) {
this.invertedIndex = attrs.invertedIndex
this.documentVectors = attrs.documentVectors
this.tokenSet = attrs.tokenSet
this.documentCount = attrs.documentCount
this.averageDocumentLength = attrs.averageDocumentLength
this.b = attrs.b
this.k1 = attrs.k1
this.fields = attrs.fields
this.pipeline = attrs.pipeline
}
/**
* Bind a handler to events being emitted by the index.
*
* The handler can be bound to many events at the same time.
*
* @param {String} [eventName] The name(s) of events to bind the function to.
* @param {Function} fn The serialised set to load.
* @memberOf Index
* A result contains details of a document matching a search query.
* @typedef {Object} lunr.Index~Result
* @property {string} ref - The reference of the document this result represents.
* @property {number} score - A number between 0 and 1 representing how similar this document is to the query.
* @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match.
*/
lunr.Index.prototype.on = function () {
var args = Array.prototype.slice.call(arguments)
return this.eventEmitter.addListener.apply(this.eventEmitter, args)
}
/**
* Removes a handler from an event being emitted by the index.
* Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
* query language which itself is parsed into an instance of lunr.Query.
*
* @param {String} eventName The name of events to remove the function from.
* @param {Function} fn The serialised set to load.
* @memberOf Index
*/
lunr.Index.prototype.off = function (name, fn) {
return this.eventEmitter.removeListener(name, fn)
}
/**
* Loads a previously serialised index.
* For programmatically building queries it is advised to directly use lunr.Query, the query language
* is best used for human entered text rather than program generated text.
*
* Issues a warning if the index being imported was serialised
* by a different version of lunr.
* At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
* and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
* or 'world', though those that contain both will rank higher in the results.
*
* @param {Object} serialisedData The serialised set to load.
* @returns {lunr.Index}
* @memberOf Index
*/
lunr.Index.load = function (serialisedData) {
if (serialisedData.version !== lunr.version) {
lunr.utils.warn('version mismatch: current ' + lunr.version + ' importing ' + serialisedData.version)
}
var idx = new this
idx._fields = serialisedData.fields
idx._ref = serialisedData.ref
idx.tokenizer(lunr.tokenizer.load(serialisedData.tokenizer))
idx.documentStore = lunr.Store.load(serialisedData.documentStore)
idx.tokenStore = lunr.TokenStore.load(serialisedData.tokenStore)
idx.corpusTokens = lunr.SortedSet.load(serialisedData.corpusTokens)
idx.pipeline = lunr.Pipeline.load(serialisedData.pipeline)
return idx
}
/**
* Adds a field to the list of fields that will be searchable within documents
* in the index.
* Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
* be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
* wildcards will increase the number of documents that will be found but can also have a negative
* impact on query performance, especially with wildcards at the beginning of a term.
*
* An optional boost param can be passed to affect how much tokens in this field
* rank in search results, by default the boost value is 1.
* Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
* hello in the title field will match this query. Using a field not present in the index will lead
* to an error being thrown.
*
* Fields should be added before any documents are added to the index, fields
* that are added after documents are added to the index will only apply to new
* documents added to the index.
* Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
* boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
* to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
* Avoid large values for edit distance to improve query performance.
*
* @param {String} fieldName The name of the field within the document that
* should be indexed
* @param {Number} boost An optional boost that can be applied to terms in this
* field.
* @returns {lunr.Index}
* @memberOf Index
* @typedef {string} lunr.Index~QueryString
* @example <caption>Simple single term query</caption>
* hello
* @example <caption>Multiple term query</caption>
* hello world
* @example <caption>term scoped to a field</caption>
* title:hello
* @example <caption>term with a boost of 10</caption>
* hello^10
* @example <caption>term with an edit distance of 2</caption>
* hello~2
*/
lunr.Index.prototype.field = function (fieldName, opts) {
var opts = opts || {},
field = { name: fieldName, boost: opts.boost || 1 }
this._fields.push(field)
return this
}
/**
* Sets the property used to uniquely identify documents added to the index,
* by default this property is 'id'.
* Performs a search against the index using lunr query syntax.
*
* This should only be changed before adding documents to the index, changing
* the ref property without resetting the index can lead to unexpected results.
* Results will be returned sorted by their score, the most relevant results
* will be returned first.
*
* The value of ref can be of any type but it _must_ be stably comparable and
* orderable.
* For more programmatic querying use lunr.Index#query.
*
* @param {String} refName The property to use to uniquely identify the
* documents in the index.
* @param {Boolean} emitEvent Whether to emit add events, defaults to true
* @returns {lunr.Index}
* @memberOf Index
* @param {lunr.Index~QueryString} queryString - A string containing a lunr query.
* @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
* @returns {lunr.Index~Result[]}
*/
lunr.Index.prototype.ref = function (refName) {
this._ref = refName
return this
lunr.Index.prototype.search = function (queryString) {
return this.query(function (query) {
var parser = new lunr.QueryParser(queryString, query)
parser.parse()
})
}
/**
* Sets the tokenizer used for this index.
* A query builder callback provides a query object to be used to express
* the query to perform on the index.
*
* By default the index will use the default tokenizer, lunr.tokenizer. The tokenizer
* should only be changed before adding documents to the index. Changing the tokenizer
* without re-building the index can lead to unexpected results.
*
* @param {Function} fn The function to use as a tokenizer.
* @returns {lunr.Index}
* @memberOf Index
* @callback lunr.Index~queryBuilder
* @param {lunr.Query} query - The query object to build up.
* @this lunr.Query
*/
lunr.Index.prototype.tokenizer = function (fn) {
var isRegistered = fn.label && (fn.label in lunr.tokenizer.registeredFunctions)
if (!isRegistered) {
lunr.utils.warn('Function is not a registered tokenizer. This may cause problems when serialising the index')
}
this.tokenizerFn = fn
return this
}
/**
* Add a document to the index.
* Performs a query against the index using the yielded lunr.Query object.
*
* This is the way new documents enter the index, this function will run the
* fields from the document through the index's pipeline and then add it to
* the index, it will then show up in search results.
* If performing programmatic queries against the index, this method is preferred
* over lunr.Index#search so as to avoid the additional query parsing overhead.
*
* An 'add' event is emitted with the document that has been added and the index
* the document has been added to. This event can be silenced by passing false
* as the second argument to add.
* A query object is yielded to the supplied function which should be used to
* express the query to be run against the index.
*
* @param {Object} doc The document to add to the index.
* @param {Boolean} emitEvent Whether or not to emit events, default true.
* @memberOf Index
* Note that although this function takes a callback parameter it is _not_ an
* asynchronous operation, the callback is just yielded a query object to be
* customized.
*
* @param {lunr.Index~queryBuilder} fn - A function that is used to build the query.
* @returns {lunr.Index~Result[]}
*/
lunr.Index.prototype.add = function (doc, emitEvent) {
var docTokens = {},
allDocumentTokens = new lunr.SortedSet,
docRef = doc[this._ref],
emitEvent = emitEvent === undefined ? true : emitEvent
lunr.Index.prototype.query = function (fn) {
// for each query clause
// * process terms
// * expand terms from token set
// * find matching documents and metadata
// * get document vectors
// * score documents
this._fields.forEach(function (field) {
var fieldTokens = this.pipeline.run(this.tokenizerFn(doc[field.name]))
var query = new lunr.Query(this.fields),
matchingDocuments = {},
queryVector = new lunr.Vector
docTokens[field.name] = fieldTokens
fn.call(query, query)
for (var i = 0; i < fieldTokens.length; i++) {
var token = fieldTokens[i]
allDocumentTokens.add(token)
this.corpusTokens.add(token)
}
}, this)
for (var i = 0; i < query.clauses.length; i++) {
/*
* Unless the pipeline has been disabled for this term, which is
* the case for terms with wildcards, we need to pass the clause
* term through the search pipeline. A pipeline returns an array
* of processed terms. Pipeline functions may expand the passed
* term, which means we may end up performing multiple index lookups
* for a single query term.
*/
var clause = query.clauses[i],
terms = null
this.documentStore.set(docRef, allDocumentTokens)
for (var i = 0; i < allDocumentTokens.length; i++) {
var token = allDocumentTokens.elements[i]
var tf = 0;
for (var j = 0; j < this._fields.length; j++){
var field = this._fields[j]
var fieldTokens = docTokens[field.name]
var fieldLength = fieldTokens.length
if (!fieldLength) continue
var tokenCount = 0
for (var k = 0; k < fieldLength; k++){
if (fieldTokens[k] === token){
tokenCount++
}
}
tf += (tokenCount / fieldLength * field.boost)
if (clause.usePipeline) {
terms = this.pipeline.runString(clause.term)
} else {
terms = [clause.term]
}
this.tokenStore.add(token, { ref: docRef, tf: tf })
};
for (var m = 0; m < terms.length; m++) {
var term = terms[m]
if (emitEvent) this.eventEmitter.emit('add', doc, this)
}
/*
* Each term returned from the pipeline needs to use the same query
* clause object, e.g. the same boost and or edit distance. The
* simplest way to do this is to re-use the clause object but mutate
* its term property.
*/
clause.term = term
/**
* Removes a document from the index.
*
* To make sure documents no longer show up in search results they can be
* removed from the index using this method.
*
* The document passed only needs to have the same ref property value as the
* document that was added to the index, they could be completely different
* objects.
*
* A 'remove' event is emitted with the document that has been removed and the index
* the document has been removed from. This event can be silenced by passing false
* as the second argument to remove.
*
* @param {Object} doc The document to remove from the index.
* @param {Boolean} emitEvent Whether to emit remove events, defaults to true
* @memberOf Index
*/
lunr.Index.prototype.remove = function (doc, emitEvent) {
var docRef = doc[this._ref],
emitEvent = emitEvent === undefined ? true : emitEvent
/*
* From the term in the clause we create a token set which will then
* be used to intersect the indexes token set to get a list of terms
* to lookup in the inverted index
*/
var termTokenSet = lunr.TokenSet.fromClause(clause),
expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
if (!this.documentStore.has(docRef)) return
for (var j = 0; j < expandedTerms.length; j++) {
/*
* For each term calculate the score as the term relates to the
* query using the same calculation used to score documents during
* indexing. This score will be used to build a vector space
* representation of the query.
*
* Also need to discover the terms index to insert into the query
* vector at the right position
*/
var expandedTerm = expandedTerms[j],
posting = this.invertedIndex[expandedTerm],
termIndex = posting._index,
idf = lunr.idf(posting, this.documentCount),
tf = 1,
score = idf * ((this.k1 + 1) * tf) / (this.k1 * (1 - this.b + this.b * (query.clauses.length / this.averageDocumentLength)) + tf)
var docTokens = this.documentStore.get(docRef)
/*
* Inserting the found query term, along with its term index
* into the vector representing the query. It is here that
* any boosts are applied to the score. They could have been
* applied when calculating the score above, but that expression
* is already quite busy.
*/
queryVector.insert(termIndex, score * clause.boost)
this.documentStore.remove(docRef)
for (var k = 0; k < clause.fields.length; k++) {
/*
* For each field that this query term is scoped by (by default
* all fields are in scope) we need to get all the document refs
* that have this term in that field.
*
* The posting is the entry in the invertedIndex for the matching
* term from above.
*/
var field = clause.fields[k],
fieldPosting = posting[field],
matchingDocumentRefs = Object.keys(fieldPosting)
docTokens.forEach(function (token) {
this.tokenStore.remove(token, docRef)
}, this)
for (var l = 0; l < matchingDocumentRefs.length; l++) {
/*
* All metadata for this term/field/document triple
* are then extracted and collected into an instance
* of lunr.MatchData ready to be returned in the query
* results
*/
var matchingDocumentRef = matchingDocumentRefs[l],
documentMetadata, matchData
if (emitEvent) this.eventEmitter.emit('remove', doc, this)
}
documentMetadata = fieldPosting[matchingDocumentRef]
matchData = new lunr.MatchData (expandedTerm, field, documentMetadata)
/**
* Updates a document in the index.
*
* When a document contained within the index gets updated, fields changed,
* added or removed, to make sure it correctly matched against search queries,
* it should be updated in the index.
*
* This method is just a wrapper around `remove` and `add`
*
* An 'update' event is emitted with the document that has been updated and the index.
* This event can be silenced by passing false as the second argument to update. Only
* an update event will be fired, the 'add' and 'remove' events of the underlying calls
* are silenced.
*
* @param {Object} doc The document to update in the index.
* @param {Boolean} emitEvent Whether to emit update events, defaults to true
* @see Index.prototype.remove
* @see Index.prototype.add
* @memberOf Index
*/
lunr.Index.prototype.update = function (doc, emitEvent) {
var emitEvent = emitEvent === undefined ? true : emitEvent
if (matchingDocumentRef in matchingDocuments) {
matchingDocuments[matchingDocumentRef].combine(matchData)
} else {
matchingDocuments[matchingDocumentRef] = matchData
}
this.remove(doc, false)
this.add(doc, false)
}
}
}
}
}
if (emitEvent) this.eventEmitter.emit('update', doc, this)
}
var matchingDocumentRefs = Object.keys(matchingDocuments),
results = []
/**
* Calculates the inverse document frequency for a token within the index.
*
* @param {String} token The token to calculate the idf of.
* @see Index.prototype.idf
* @private
* @memberOf Index
*/
lunr.Index.prototype.idf = function (term) {
var cacheKey = "@" + term
if (Object.prototype.hasOwnProperty.call(this._idfCache, cacheKey)) return this._idfCache[cacheKey]
for (var i = 0; i < matchingDocumentRefs.length; i++) {
/*
* With all the matching documents found they now need
* to be sorted by their relevance to the query. This
* is done by retrieving the documents vector representation
* and then finding its similarity with the query vector
* that was constructed earlier.
*
* This score, along with the document ref and any metadata
* we collected into a lunr.MatchData instance are stored
* in the results array ready for returning to the caller
*/
var ref = matchingDocumentRefs[i],
documentVector = this.documentVectors[ref],
score = queryVector.similarity(documentVector)
var documentFrequency = this.tokenStore.count(term),
idf = 1
if (documentFrequency > 0) {
idf = 1 + Math.log(this.documentStore.length / documentFrequency)
results.push({
ref: ref,
score: score,
matchData: matchingDocuments[ref]
})
}
return this._idfCache[cacheKey] = idf
return results.sort(function (a, b) {
return b.score - a.score
})
}
/**
* Searches the index using the passed query.
* Prepares the index for JSON serialization.
*
* Queries should be a string, multiple words are allowed and will lead to an
* AND based query, e.g. `idx.search('foo bar')` will run a search for
* documents containing both 'foo' and 'bar'.
* The schema for this JSON blob will be described in a
* separate JSON schema file.
*
* All query tokens are passed through the same pipeline that document tokens
* are passed through, so any language processing involved will be run on every
* query term.
*
* Each query term is expanded, so that the term 'he' might be expanded to
* 'hello' and 'help' if those terms were already included in the index.
*
* Matching documents are returned as an array of objects, each object contains
* the matching document ref, as set for this index, and the similarity score
* for this document against the query.
*
* @param {String} query The query to search the index with.
* @returns {Object}
* @see Index.prototype.idf
* @see Index.prototype.documentVector
* @memberOf Index
*/
lunr.Index.prototype.search = function (query) {
var queryTokens = this.pipeline.run(this.tokenizerFn(query)),
queryVector = new lunr.Vector,
documentSets = [],
fieldBoosts = this._fields.reduce(function (memo, f) { return memo + f.boost }, 0)
var hasSomeToken = queryTokens.some(function (token) {
return this.tokenStore.has(token)
}, this)
if (!hasSomeToken) return []
queryTokens
.forEach(function (token, i, tokens) {
var tf = 1 / tokens.length * this._fields.length * fieldBoosts,
self = this
var set = this.tokenStore.expand(token).reduce(function (memo, key) {
var pos = self.corpusTokens.indexOf(key),
idf = self.idf(key),
similarityBoost = 1,
set = new lunr.SortedSet
// if the expanded key is not an exact match to the token then
// penalise the score for this key by how different the key is
// to the token.
if (key !== token) {
var diff = Math.max(3, key.length - token.length)
similarityBoost = 1 / Math.log(diff)
}
// calculate the query tf-idf score for this token
// applying an similarityBoost to ensure exact matches
// these rank higher than expanded terms
if (pos > -1) queryVector.insert(pos, tf * idf * similarityBoost)
// add all the documents that have this key into a set
// ensuring that the type of key is preserved
var matchingDocuments = self.tokenStore.get(key),
refs = Object.keys(matchingDocuments),
refsLen = refs.length
for (var i = 0; i < refsLen; i++) {
set.add(matchingDocuments[refs[i]].ref)
}
return memo.union(set)
}, new lunr.SortedSet)
documentSets.push(set)
lunr.Index.prototype.toJSON = function () {
var invertedIndex = Object.keys(this.invertedIndex)
.sort()
.map(function (term) {
return [term, this.invertedIndex[term]]
}, this)
var documentSet = documentSets.reduce(function (memo, set) {
return memo.intersect(set)
})
return documentSet
var documentVectors = Object.keys(this.documentVectors)
.map(function (ref) {
return { ref: ref, score: queryVector.similarity(this.documentVector(ref)) }
return [ref, this.documentVectors[ref].toJSON()]
}, this)
.sort(function (a, b) {
return b.score - a.score
})
return {
version: lunr.version,
averageDocumentLength: this.averageDocumentLength,
b: this.b,
k1: this.k1,
fields: this.fields,
documentVectors: documentVectors,
invertedIndex: invertedIndex,
pipeline: this.pipeline.toJSON()
}
}
/**
* Generates a vector containing all the tokens in the document matching the
* passed documentRef.
* Loads a previously serialized lunr.Index
*
* The vector contains the tf-idf score for each token contained in the
* document with the passed documentRef. The vector will contain an element
* for every token in the indexes corpus, if the document does not contain that
* token the element will be 0.
*
* @param {Object} documentRef The ref to find the document with.
* @returns {lunr.Vector}
* @private
* @memberOf Index
* @param {Object} serializedIndex - A previously serialized lunr.Index
* @returns {lunr.Index}
*/
lunr.Index.prototype.documentVector = function (documentRef) {
var documentTokens = this.documentStore.get(documentRef),
documentTokensLength = documentTokens.length,
documentVector = new lunr.Vector
lunr.Index.load = function (serializedIndex) {
var attrs = {},
documentVectors = {},
serializedVectors = serializedIndex.documentVectors,
documentCount = 0,
invertedIndex = {},
serializedInvertedIndex = serializedIndex.invertedIndex,
tokenSetBuilder = new lunr.TokenSet.Builder,
pipeline = lunr.Pipeline.load(serializedIndex.pipeline)
for (var i = 0; i < documentTokensLength; i++) {
var token = documentTokens.elements[i],
tf = this.tokenStore.get(token)[documentRef].tf,
idf = this.idf(token)
if (serializedIndex.version != lunr.version) {
lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'")
}
documentVector.insert(this.corpusTokens.indexOf(token), tf * idf)
};
for (var i = 0; i < serializedVectors.length; i++, documentCount++) {
var tuple = serializedVectors[i],
ref = tuple[0],
elements = tuple[1]
return documentVector
}
documentVectors[ref] = new lunr.Vector(elements)
}
/**
* Returns a representation of the index ready for serialisation.
*
* @returns {Object}
* @memberOf Index
*/
lunr.Index.prototype.toJSON = function () {
return {
version: lunr.version,
fields: this._fields,
ref: this._ref,
tokenizer: this.tokenizerFn.label,
documentStore: this.documentStore.toJSON(),
tokenStore: this.tokenStore.toJSON(),
corpusTokens: this.corpusTokens.toJSON(),
pipeline: this.pipeline.toJSON()
for (var i = 0; i < serializedInvertedIndex.length; i++) {
var tuple = serializedInvertedIndex[i],
term = tuple[0],
posting = tuple[1]
tokenSetBuilder.insert(term)
invertedIndex[term] = posting
}
}
/**
* Applies a plugin to the current index.
*
* A plugin is a function that is called with the index as its context.
* Plugins can be used to customise or extend the behaviour the index
* in some way. A plugin is just a function, that encapsulated the custom
* behaviour that should be applied to the index.
*
* The plugin function will be called with the index as its argument, additional
* arguments can also be passed when calling use. The function will be called
* with the index as its context.
*
* Example:
*
* var myPlugin = function (idx, arg1, arg2) {
* // `this` is the index to be extended
* // apply any extensions etc here.
* }
*
* var idx = lunr(function () {
* this.use(myPlugin, 'arg1', 'arg2')
* })
*
* @param {Function} plugin The plugin to apply.
* @memberOf Index
*/
lunr.Index.prototype.use = function (plugin) {
var args = Array.prototype.slice.call(arguments, 1)
args.unshift(this)
plugin.apply(this, args)
tokenSetBuilder.finish()
attrs.b = serializedIndex.b
attrs.k1 = serializedIndex.k1
attrs.fields = serializedIndex.fields
attrs.averageDocumentLength = serializedIndex.averageDocumentLength
attrs.documentCount = documentCount
attrs.documentVectors = documentVectors
attrs.invertedIndex = invertedIndex
attrs.tokenSet = tokenSetBuilder.root
attrs.pipeline = pipeline
return new lunr.Index(attrs)
}
/**
* Convenience function for instantiating a new lunr index and configuring it
* with the default pipeline functions and the passed config function.
* A convenience function for configuring and constructing
* a new lunr Index.
*
* When using this convenience function a new index will be created with the
* following functions already in the pipeline:
* A lunr.Builder instance is created and the pipeline setup
* with a trimmer, stop word filter and stemmer.
*
* lunr.StopWordFilter - filters out any stop words before they enter the
* index
* This builder object is yielded to the configuration function
* that is passed as a parameter, allowing the list of fields
* and other builder parameters to be customised.
*
* lunr.stemmer - stems the tokens before entering the index.
* All documents _must_ be added within the passed config function.
*
* Example:
* @example
* var idx = lunr(function () {
* this.field('title')
* this.field('body')
* this.ref('id')
*
* var idx = lunr(function () {
* this.field('title', 10)
* this.field('tags', 100)
* this.field('body')
*
* this.ref('cid')
*
* this.pipeline.add(function () {
* // some custom pipeline function
* })
*
* })
* documents.forEach(function (doc) {
* this.add(doc)
* }, this)
* })
*
* @param {Function} config A function that will be called with the new instance
* of the lunr.Index as both its context and first parameter. It can be used to
* customize the instance of new lunr.Index.
* @namespace
* @module
* @returns {lunr.Index}
*
* @see {@link lunr.Builder}
* @see {@link lunr.Pipeline}
* @see {@link lunr.trimmer}
* @see {@link lunr.stopWordFilter}
* @see {@link lunr.stemmer}
* @namespace {function} lunr
*/
var lunr = function (config) {
var idx = new lunr.Index
var builder = new lunr.Builder
idx.pipeline.add(
builder.pipeline.add(
lunr.trimmer,

@@ -45,7 +41,8 @@ lunr.stopWordFilter,

if (config) config.call(idx, idx)
builder.searchPipeline.add(
lunr.stemmer
)
return idx
config.call(builder)
return builder.build()
}
lunr.version = "@VERSION"

@@ -42,2 +42,23 @@ /*!

/**
* A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
* string as well as all known metadata. A pipeline function can mutate the token string
* or mutate (or add) metadata for a given token.
*
* A pipeline function can indicate that the passed token should be discarded by returning
* null. This token will not be passed to any downstream pipeline functions and will not be
* added to the index.
*
* Multiple tokens can be returned by returning an array of tokens. Each token will be passed
* to any downstream pipeline functions and all will returned tokens will be added to the index.
*
* Any number of pipeline functions may be chained together using a lunr.Pipeline.
*
* @interface lunr.PipelineFunction
* @param {lunr.Token} token - A token from the document being processed.
* @param {number} i - The index of this token in the complete list of tokens for this document/field.
* @param {lunr.Token[]} tokens - All tokens for this document/field.
* @returns {(?lunr.Token|lunr.Token[])}
*/
/**
* Register a function with the pipeline.

@@ -51,5 +72,4 @@ *

*
* @param {Function} fn The function to check for.
* @param {String} label The label to register this function with
* @memberOf Pipeline
* @param {lunr.PipelineFunction} fn - The function to check for.
* @param {String} label - The label to register this function with
*/

@@ -68,5 +88,4 @@ lunr.Pipeline.registerFunction = function (fn, label) {

*
* @param {Function} fn The function to check for.
* @param {lunr.PipelineFunction} fn - The function to check for.
* @private
* @memberOf Pipeline
*/

@@ -88,5 +107,4 @@ lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {

*
* @param {Object} serialised The serialised pipeline to load.
* @param {Object} serialised - The serialised pipeline to load.
* @returns {lunr.Pipeline}
* @memberOf Pipeline
*/

@@ -102,3 +120,3 @@ lunr.Pipeline.load = function (serialised) {

} else {
throw new Error('Cannot load un-registered function: ' + fnName)
throw new Error('Cannot load unregistered function: ' + fnName)
}

@@ -115,4 +133,3 @@ })

*
* @param {Function} functions Any number of functions to add to the pipeline.
* @memberOf Pipeline
* @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.
*/

@@ -134,5 +151,4 @@ lunr.Pipeline.prototype.add = function () {

*
* @param {Function} existingFn A function that already exists in the pipeline.
* @param {Function} newFn The new function to add to the pipeline.
* @memberOf Pipeline
* @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
* @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
*/

@@ -157,5 +173,4 @@ lunr.Pipeline.prototype.after = function (existingFn, newFn) {

*
* @param {Function} existingFn A function that already exists in the pipeline.
* @param {Function} newFn The new function to add to the pipeline.
* @memberOf Pipeline
* @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
* @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
*/

@@ -176,4 +191,3 @@ lunr.Pipeline.prototype.before = function (existingFn, newFn) {

*
* @param {Function} fn The function to remove from the pipeline.
* @memberOf Pipeline
* @param {lunr.PipelineFunction} fn The function to remove from the pipeline.
*/

@@ -195,27 +209,40 @@ lunr.Pipeline.prototype.remove = function (fn) {

* @returns {Array}
* @memberOf Pipeline
*/
lunr.Pipeline.prototype.run = function (tokens) {
var out = [],
tokenLength = tokens.length,
stackLength = this._stack.length
var stackLength = this._stack.length
for (var i = 0; i < tokenLength; i++) {
var token = tokens[i]
for (var i = 0; i < stackLength; i++) {
var fn = this._stack[i]
for (var j = 0; j < stackLength; j++) {
token = this._stack[j](token, i, tokens)
if (token === void 0 || token === '') break
};
tokens = tokens.reduce(function (memo, token, j) {
var result = fn(token, j, tokens)
if (token !== void 0 && token !== '') out.push(token)
};
if (result === void 0 || result === '') return memo
return out
return memo.concat(result)
}, [])
}
return tokens
}
/**
* Convenience method for passing a string through a pipeline and getting
* strings out. This method takes care of wrapping the passed string in a
* token and mapping the resulting tokens back to strings.
*
* @param {string} str - The string to pass through the pipeline.
* @returns {string[]}
*/
lunr.Pipeline.prototype.runString = function (str) {
var token = new lunr.Token (str)
return this.run([token]).map(function (t) {
return t.toString()
})
}
/**
* Resets the pipeline by removing any existing processors.
*
* @memberOf Pipeline
*/

@@ -232,3 +259,2 @@ lunr.Pipeline.prototype.reset = function () {

* @returns {Array}
* @memberOf Pipeline
*/

@@ -235,0 +261,0 @@ lunr.Pipeline.prototype.toJSON = function () {

@@ -0,1 +1,2 @@

/* eslint-disable */
/*!

@@ -11,6 +12,7 @@ * lunr.stemmer

*
* @module
* @param {String} str The string to stem
* @returns {String}
* @see lunr.Pipeline
* @static
* @implements {lunr.PipelineFunction}
* @param {lunr.Token} token - The string to stem
* @returns {lunr.Token}
* @see {@link lunr.Pipeline}
*/

@@ -89,3 +91,3 @@ lunr.stemmer = (function(){

var porterStemmer = function porterStemmer(w) {
var stem,
var stem,
suffix,

@@ -131,3 +133,3 @@ firstch,

re4 = re4_1b_2;
if (re2.test(w)) { w = w + "e"; }
if (re2.test(w)) { w = w + "e"; }
else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); }

@@ -218,5 +220,7 @@ else if (re4.test(w)) { w = w + "e"; }

return porterStemmer;
return function (token) {
return token.update(porterStemmer);
}
})();
lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')

@@ -13,5 +13,4 @@ /*!

*
* @module
* @param {Array} token The token to pass through the filter
* @returns {Function}
* @returns {lunr.PipelineFunction}
* @see lunr.Pipeline

@@ -27,3 +26,3 @@ * @see lunr.stopWordFilter

return function (token) {
if (token && words[token] !== token) return token
if (token && words[token.toString()] !== token.toString()) return token
}

@@ -39,6 +38,6 @@ }

*
* @module
* @param {String} token The token to pass through the filter
* @returns {String}
* @see lunr.Pipeline
* @implements {lunr.PipelineFunction}
* @params {lunr.Token} token - A token to check for being a stop word.
* @returns {lunr.Token}
* @see {@link lunr.Pipeline}
*/

@@ -45,0 +44,0 @@ lunr.stopWordFilter = lunr.generateStopWordFilter([

@@ -11,87 +11,55 @@ /*!

*
* @module
* @param {String} obj The string to convert into tokens
* @see lunr.tokenizer.separator
* @returns {Array}
* This tokenizer will convert its parameter to a string by calling `toString` and
* then will split this string on the character in `lunr.tokenizer.separator`.
* Arrays will have their elements converted to strings and wrapped in a lunr.Token.
*
* @static
* @param {?(string|object|object[])} obj - The object to convert into tokens
* @returns {lunr.Token[]}
*/
lunr.tokenizer = function (obj) {
if (!arguments.length || obj == null || obj == undefined) return []
if (Array.isArray(obj)) return obj.map(function (t) { return lunr.utils.asString(t).toLowerCase() })
if (obj == null || obj == undefined) {
return []
}
// TODO: This exists so that the deprecated property lunr.tokenizer.seperator can still be used. By
// default it is set to false and so the correctly spelt lunr.tokenizer.separator is used unless
// the user is using the old property to customise the tokenizer.
//
// This should be removed when version 1.0.0 is released.
var separator = lunr.tokenizer.seperator || lunr.tokenizer.separator
if (Array.isArray(obj)) {
return obj.map(function (t) {
return new lunr.Token(lunr.utils.asString(t).toLowerCase())
})
}
return obj.toString().trim().toLowerCase().split(separator)
}
var str = obj.toString().trim().toLowerCase(),
len = str.length,
tokens = []
/**
* This property is legacy alias for lunr.tokenizer.separator to maintain backwards compatability.
* When introduced the token was spelt incorrectly. It will remain until 1.0.0 when it will be removed,
* all code should use the correctly spelt lunr.tokenizer.separator property instead.
*
* @static
* @see lunr.tokenizer.separator
* @deprecated since 0.7.2 will be removed in 1.0.0
* @private
* @see lunr.tokenizer
*/
lunr.tokenizer.seperator = false
for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {
var char = str.charAt(sliceEnd),
sliceLength = sliceEnd - sliceStart
/**
* The sperator used to split a string into tokens. Override this property to change the behaviour of
* `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
*
* @static
* @see lunr.tokenizer
*/
lunr.tokenizer.separator = /[\s\-]+/
if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {
/**
* Loads a previously serialised tokenizer.
*
* A tokenizer function to be loaded must already be registered with lunr.tokenizer.
* If the serialised tokenizer has not been registered then an error will be thrown.
*
* @param {String} label The label of the serialised tokenizer.
* @returns {Function}
* @memberOf tokenizer
*/
lunr.tokenizer.load = function (label) {
var fn = this.registeredFunctions[label]
if (sliceLength > 0) {
tokens.push(
new lunr.Token (str.slice(sliceStart, sliceEnd), {
position: [sliceStart, sliceLength],
index: tokens.length
})
)
}
if (!fn) {
throw new Error('Cannot load un-registered function: ' + label)
sliceStart = sliceEnd + 1
}
}
return fn
return tokens
}
lunr.tokenizer.label = 'default'
lunr.tokenizer.registeredFunctions = {
'default': lunr.tokenizer
}
/**
* Register a tokenizer function.
* The separator used to split a string into tokens. Override this property to change the behaviour of
* `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
*
* Functions that are used as tokenizers should be registered if they are to be used with a serialised index.
*
* Registering a function does not add it to an index, functions must still be associated with a specific index for them to be used when indexing and searching documents.
*
* @param {Function} fn The function to register.
* @param {String} label The label to register this function with
* @memberOf tokenizer
* @static
* @see lunr.tokenizer
*/
lunr.tokenizer.registerFunction = function (fn, label) {
if (label in this.registeredFunctions) {
lunr.utils.warn('Overwriting existing tokenizer: ' + label)
}
fn.label = label
this.registeredFunctions[label] = fn
}
lunr.tokenizer.separator = /[\s\-]+/

@@ -8,3 +8,3 @@ /*!

* lunr.trimmer is a pipeline function for trimming non word
* characters from the begining and end of tokens before they
* characters from the beginning and end of tokens before they
* enter the index.

@@ -16,11 +16,14 @@ *

*
* @module
* @param {String} token The token to pass through the filter
* @returns {String}
* @static
* @implements {lunr.PipelineFunction}
* @param {lunr.Token} token The token to pass through the filter
* @returns {lunr.Token}
* @see lunr.Pipeline
*/
lunr.trimmer = function (token) {
return token.replace(/^\W+/, '').replace(/\W+$/, '')
return token.update(function (s) {
return s.replace(/^\W+/, '').replace(/\W+$/, '')
})
}
lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')

@@ -18,2 +18,3 @@ /*!

lunr.utils.warn = (function (global) {
/* eslint-disable no-console */
return function (message) {

@@ -24,2 +25,3 @@ if (global.console && console.warn) {

}
/* eslint-enable no-console */
})(this)

@@ -26,0 +28,0 @@

@@ -7,65 +7,67 @@ /*!

/**
* lunr.Vectors implement vector related operations for
* a series of elements.
* A vector is used to construct the vector space of documents and queries. These
* vectors support operations to determine the similarity between two documents or
* a document and a query.
*
* @constructor
*/
lunr.Vector = function () {
this._magnitude = null
this.list = undefined
this.length = 0
}
/**
* lunr.Vector.Node is a simple struct for each node
* in a lunr.Vector.
* Normally no parameters are required for initializing a vector, but in the case of
* loading a previously dumped vector the raw elements can be provided to the constructor.
*
* @private
* @param {Number} The index of the node in the vector.
* @param {Object} The data at this node in the vector.
* @param {lunr.Vector.Node} The node directly after this node in the vector.
* For performance reasons vectors are implemented with a flat array, where an elements
* index is immediately followed by its value. E.g. [index, value, index, value]. This
* allows the underlying array to be as sparse as possible and still offer decent
* performance when being used for vector calculations.
*
* @constructor
* @memberOf Vector
* @param {Number[]} [elements] - The flat list of element index and element value pairs.
*/
lunr.Vector.Node = function (idx, val, next) {
this.idx = idx
this.val = val
this.next = next
lunr.Vector = function (elements) {
this._magnitude = 0
this.elements = elements || []
}
/**
* Inserts a new value at a position in a vector.
* Inserts an element at an index within the vector.
*
* @param {Number} The index at which to insert a value.
* @param {Object} The object to insert in the vector.
* @memberOf Vector.
* @param {Number} insertIdx - The index at which the element should be inserted.
* @param {Number} val - The value to be inserted into the vector.
*/
lunr.Vector.prototype.insert = function (idx, val) {
this._magnitude = undefined;
var list = this.list
lunr.Vector.prototype.insert = function (insertIdx, val) {
this._magnitude = 0
if (!list) {
this.list = new lunr.Vector.Node (idx, val, list)
return this.length++
if (this.elements.length == 0) {
this.elements.push(insertIdx, val)
return
}
if (idx < list.idx) {
this.list = new lunr.Vector.Node (idx, val, list)
return this.length++
}
var start = 0,
end = this.elements.length,
sliceLength = end - start,
pivot = Math.floor((sliceLength / 2) / 2) * 2,
pivotIdx = this.elements[pivot]
var prev = list,
next = list.next
while (sliceLength > 2) {
if (pivotIdx == insertIdx) {
throw "duplicate index"
}
while (next != undefined) {
if (idx < next.idx) {
prev.next = new lunr.Vector.Node (idx, val, next)
return this.length++
if (insertIdx > pivotIdx) {
start = pivot
}
prev = next, next = next.next
if (insertIdx < pivotIdx) {
end = pivot
}
sliceLength = end - start
pivot = start + Math.floor((sliceLength / 2) / 2) * 2
pivotIdx = this.elements[pivot]
}
prev.next = new lunr.Vector.Node (idx, val, next)
return this.length++
if (pivotIdx > insertIdx) {
this.elements.splice(pivot, 0, insertIdx, val)
}
if (pivotIdx < insertIdx) {
this.elements.splice(pivot + 2, 0, insertIdx, val)
}
}

@@ -77,14 +79,12 @@

* @returns {Number}
* @memberOf Vector
*/
lunr.Vector.prototype.magnitude = function () {
if (this._magnitude) return this._magnitude
var node = this.list,
sumOfSquares = 0,
val
while (node) {
val = node.val
var sumOfSquares = 0,
elementsLength = this.elements.length
for (var i = 1; i < elementsLength; i += 2) {
var val = this.elements[i]
sumOfSquares += val * val
node = node.next
}

@@ -98,20 +98,22 @@

*
* @param {lunr.Vector} otherVector The vector to compute the dot product with.
* @param {lunr.Vector} otherVector - The vector to compute the dot product with.
* @returns {Number}
* @memberOf Vector
*/
lunr.Vector.prototype.dot = function (otherVector) {
var node = this.list,
otherNode = otherVector.list,
dotProduct = 0
var dotProduct = 0,
a = this.elements, b = otherVector.elements,
aLen = a.length, bLen = b.length,
aVal = 0, bVal = 0,
i = 0, j = 0
while (node && otherNode) {
if (node.idx < otherNode.idx) {
node = node.next
} else if (node.idx > otherNode.idx) {
otherNode = otherNode.next
} else {
dotProduct += node.val * otherNode.val
node = node.next
otherNode = otherNode.next
while (i < aLen && j < bLen) {
aVal = a[i], bVal = b[j]
if (aVal < bVal) {
i += 2
} else if (aVal > bVal) {
j += 2
} else if (aVal == bVal) {
dotProduct += a[i + 1] * b[j + 1]
i += 2
j += 2
}

@@ -127,6 +129,5 @@ }

*
* @param {lunr.Vector} otherVector The other vector to calculate the
* @param {lunr.Vector} otherVector - The other vector to calculate the
* similarity with.
* @returns {Number}
* @memberOf Vector
*/

@@ -136,1 +137,25 @@ lunr.Vector.prototype.similarity = function (otherVector) {

}
/**
* Converts the vector to an array of the elements within the vector.
*
* @returns {Number[]}
*/
lunr.Vector.prototype.toArray = function () {
var output = new Array (this.elements.length / 2)
for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {
output[j] = this.elements[i]
}
return output
}
/**
* A JSON serializable representation of the vector.
*
* @returns {Number[]}
*/
lunr.Vector.prototype.toJSON = function () {
return this.elements
}
{
"name": "lunr",
"description": "Simple full-text search in your browser.",
"version": "0.7.2",
"version": "2.0.0-alpha.1",
"author": "Oliver Nightingale",

@@ -16,6 +16,12 @@ "keywords": ["search"],

"devDependencies": {
"dox": "0.4.4",
"dox-template": "0.1.1",
"phantomjs": "1.9.*",
"uglify-js": "2.4.13"
"benchmark": "2.1.x",
"chai": "3.5.x",
"eslint-plugin-spellcheck": "0.0.8",
"eslint": "3.4.x",
"jsdoc": "3.4.x",
"mocha": "3.0.x",
"mustache": "2.2.x",
"node-static": "0.7.x",
"uglify-js": "2.4.x",
"word-list": "1.0.x"
},

@@ -22,0 +28,0 @@ "scripts": {

@@ -1,1 +0,1 @@

var stemmingFixture = {"consign":"consign","consigned":"consign","consigning":"consign","consignment":"consign","consist":"consist","consisted":"consist","consistency":"consist","consistent":"consist","consistently":"consist","consisting":"consist","consists":"consist","consolation":"consol","consolations":"consol","consolatory":"consolatori","console":"consol","consoled":"consol","consoles":"consol","consolidate":"consolid","consolidated":"consolid","consolidating":"consolid","consoling":"consol","consols":"consol","consonant":"conson","consort":"consort","consorted":"consort","consorting":"consort","conspicuous":"conspicu","conspicuously":"conspicu","conspiracy":"conspiraci","conspirator":"conspir","conspirators":"conspir","conspire":"conspir","conspired":"conspir","conspiring":"conspir","constable":"constabl","constables":"constabl","constance":"constanc","constancy":"constanc","constant":"constant","knack":"knack","knackeries":"knackeri","knacks":"knack","knag":"knag","knave":"knave","knaves":"knave","knavish":"knavish","kneaded":"knead","kneading":"knead","knee":"knee","kneel":"kneel","kneeled":"kneel","kneeling":"kneel","kneels":"kneel","knees":"knee","knell":"knell","knelt":"knelt","knew":"knew","knick":"knick","knif":"knif","knife":"knife","knight":"knight","knights":"knight","knit":"knit","knits":"knit","knitted":"knit","knitting":"knit","knives":"knive","knob":"knob","knobs":"knob","knock":"knock","knocked":"knock","knocker":"knocker","knockers":"knocker","knocking":"knock","knocks":"knock","knopp":"knopp","knot":"knot","knots":"knot","lay":"lay","try":"tri"}
{"consign":"consign","consigned":"consign","consigning":"consign","consignment":"consign","consist":"consist","consisted":"consist","consistency":"consist","consistent":"consist","consistently":"consist","consisting":"consist","consists":"consist","consolation":"consol","consolations":"consol","consolatory":"consolatori","console":"consol","consoled":"consol","consoles":"consol","consolidate":"consolid","consolidated":"consolid","consolidating":"consolid","consoling":"consol","consols":"consol","consonant":"conson","consort":"consort","consorted":"consort","consorting":"consort","conspicuous":"conspicu","conspicuously":"conspicu","conspiracy":"conspiraci","conspirator":"conspir","conspirators":"conspir","conspire":"conspir","conspired":"conspir","conspiring":"conspir","constable":"constabl","constables":"constabl","constance":"constanc","constancy":"constanc","constant":"constant","knack":"knack","knackeries":"knackeri","knacks":"knack","knag":"knag","knave":"knave","knaves":"knave","knavish":"knavish","kneaded":"knead","kneading":"knead","knee":"knee","kneel":"kneel","kneeled":"kneel","kneeling":"kneel","kneels":"kneel","knees":"knee","knell":"knell","knelt":"knelt","knew":"knew","knick":"knick","knif":"knif","knife":"knife","knight":"knight","knights":"knight","knit":"knit","knits":"knit","knitted":"knit","knitting":"knit","knives":"knive","knob":"knob","knobs":"knob","knock":"knock","knocked":"knock","knocker":"knocker","knockers":"knocker","knocking":"knock","knocks":"knock","knopp":"knopp","knot":"knot","knots":"knot","lay":"lay","try":"tri"}

@@ -1,247 +0,271 @@

module('lunr.Pipeline', {
setup: function () {
suite('lunr.Pipeline', function () {
var noop = function () {}
setup(function () {
this.existingRegisteredFunctions = lunr.Pipeline.registeredFunctions
this.existingWarnIfFunctionNotRegistered = lunr.Pipeline.warnIfFunctionNotRegistered
lunr.Pipeline.registeredFunctions = {}
lunr.Pipeline.warnIfFunctionNotRegistered = noop
this.existingWarnIfFunctionNotRegistered = lunr.Pipeline.warnIfFunctionNotRegistered
lunr.Pipeline.warnIfFunctionNotRegistered = $.noop
},
teardown: function () {
this.pipeline = new lunr.Pipeline
})
teardown(function () {
lunr.Pipeline.registeredFunctions = this.existingRegisteredFunctions
lunr.Pipeline.warnIfFunctionNotRegistered = this.existingWarnIfFunctionNotRegistered
}
})
})
test("adding a new item to the pipeline", function () {
var pipeline = new lunr.Pipeline
equal(pipeline._stack.length, 0)
suite('#add', function () {
test('add function to pipeline', function () {
this.pipeline.add(noop)
assert.equal(1, this.pipeline._stack.length)
})
pipeline.add($.noop)
equal(pipeline._stack.length, 1)
})
test('add multiple functions to the pipeline', function () {
this.pipeline.add(noop, noop)
assert.equal(2, this.pipeline._stack.length)
})
})
test("adding multiple items to the pipeline in one go", function () {
var pipeline = new lunr.Pipeline
suite('#remove', function () {
test('function exists in pipeline', function () {
this.pipeline.add(noop)
assert.equal(1, this.pipeline._stack.length)
this.pipeline.remove(noop)
assert.equal(0, this.pipeline._stack.length)
})
pipeline.add($.noop, $.noop)
equal(pipeline._stack.length, 2)
})
test('function does not exist in pipeline', function () {
var fn = function () {}
this.pipeline.add(noop)
assert.equal(1, this.pipeline._stack.length)
this.pipeline.remove(fn)
assert.equal(1, this.pipeline._stack.length)
})
})
test("removing an item from the pipeline", function () {
var pipeline = new lunr.Pipeline,
fn = $.noop
suite('#before', function () {
var fn = function () {}
pipeline.add(fn)
equal(pipeline._stack.length, 1)
test('other function exists', function () {
this.pipeline.add(noop)
this.pipeline.before(noop, fn)
pipeline.remove(fn)
equal(pipeline._stack.length, 0)
})
assert.deepEqual([fn, noop], this.pipeline._stack)
})
test("removing a nonexistent item from the pipeline", function () {
var pipeline = new lunr.Pipeline,
fn1 = $.noop,
fn2 = function () {}
test('other function does not exist', function () {
var action = function () {
this.pipeline.before(noop, fn)
}
pipeline.add(fn1)
equal(pipeline._stack.length, 1)
assert.throws(action.bind(this))
assert.equal(0, this.pipeline._stack.length)
})
})
pipeline.remove(fn2)
equal(pipeline._stack.length, 1)
})
suite('#after', function () {
var fn = function () {}
test("adding an item to the pipeline before another item", function () {
var pipeline = new lunr.Pipeline,
fn1 = $.noop,
fn2 = function () {}
test('other function exists', function () {
this.pipeline.add(noop)
this.pipeline.after(noop, fn)
pipeline.add(fn1)
pipeline.before(fn1, fn2)
assert.deepEqual([noop, fn], this.pipeline._stack)
})
deepEqual(pipeline._stack, [fn2, fn1])
})
test('other function does not exist', function () {
var action = function () {
this.pipeline.after(noop, fn)
}
test("adding an item to the pipeline before nonexistent item", function () {
var pipeline = new lunr.Pipeline,
fn1 = $.noop,
fn2 = function () {},
fn3 = function () {}
assert.throws(action.bind(this))
assert.equal(0, this.pipeline._stack.length)
})
})
pipeline.add(fn1, fn2)
suite('#run', function () {
test('calling each function for each token', function () {
var count1 = 0, count2 = 0,
fn1 = function (t) { count1++; return t },
fn2 = function (t) { count2++; return t }
throws(function () {
pipeline.before(fn3, fn1)
})
this.pipeline.add(fn1, fn2)
this.pipeline.run([1,2,3])
deepEqual(pipeline._stack, [fn1, fn2])
})
assert.equal(3, count1)
assert.equal(3, count2)
})
test("adding an item to the pipeline after another item", function () {
var pipeline = new lunr.Pipeline,
fn1 = $.noop,
fn2 = function () {},
fn3 = function () {}
test('passes token to pipeline function', function () {
this.pipeline.add(function (token) {
assert.equal('foo', token)
})
pipeline.add(fn1, fn2)
pipeline.after(fn1, fn3)
this.pipeline.run(['foo'])
})
deepEqual(pipeline._stack, [fn1, fn3, fn2])
})
test('passes index to pipeline function', function () {
this.pipeline.add(function (_, index) {
assert.equal(0, index)
})
test("adding an item to the pipeline after nonexistent item", function () {
var pipeline = new lunr.Pipeline,
fn1 = $.noop,
fn2 = function () {},
fn3 = function () {}
this.pipeline.run(['foo'])
})
pipeline.add(fn1, fn2)
test('passes entire token array to pipeline function', function () {
this.pipeline.add(function (_, _, tokens) {
assert.deepEqual(['foo'], tokens)
})
throws(function () {
pipeline.after(fn3, fn1)
})
this.pipeline.run(['foo'])
})
deepEqual(pipeline._stack, [fn1, fn2])
})
test('passes output of one function as input to the next', function () {
this.pipeline.add(function (t) {
return t.toUpperCase()
})
test("run calls each member of the pipeline for each input", function () {
var pipeline = new lunr.Pipeline,
count1 = 0, count2 = 0,
fn1 = function (token) { count1++ ; return token },
fn2 = function (token) { count2++ ; return token }
this.pipeline.add(function (t) {
assert.equal('FOO', t)
})
pipeline.add(fn1, fn2)
this.pipeline.run(['foo'])
})
pipeline.run([1,2,3])
test('returns the results of the last function', function () {
this.pipeline.add(function (t) {
return t.toUpperCase()
})
equal(count1, 3)
equal(count2, 3)
})
assert.deepEqual(['FOO'], this.pipeline.run(['foo']))
})
test("run should pass three inputs to the pipeline fn", function () {
var pipeline = new lunr.Pipeline,
input, index, arr,
fn1 = function () { input = arguments[0], index = arguments[1], arr = arguments[2] }
test('filters out undefined values', function () {
var tokens = [],
output
pipeline.add(fn1)
// only pass on tokens for even token indexes
this.pipeline.add(function (t, i) {
if (i % 2) {
return t
}
})
pipeline.run(['a'])
this.pipeline.add(function (t) {
tokens.push(t)
return t
})
equal(input, 'a')
equal(index, 0)
deepEqual(arr, ['a'])
})
output = this.pipeline.run(['a', 'b', 'c', 'd'])
test("run should pass the output of one into the input of the next", function () {
var pipeline = new lunr.Pipeline,
output,
fn1 = function (t1) { return t1.toUpperCase() },
fn2 = function (t2) { output = t2 }
assert.sameMembers(['b', 'd'], tokens)
assert.sameMembers(['b', 'd'], output)
})
pipeline.add(fn1)
pipeline.add(fn2)
suite('expanding tokens', function () {
test('passed to output', function () {
this.pipeline.add(function (t) {
return [t, t.toUpperCase()]
})
pipeline.run(['a'])
assert.sameMembers(["foo", "FOO"], this.pipeline.run(['foo']))
})
equal(output, 'A')
})
test('not passed to same function', function () {
var received = []
test("run should return the result of running the entire pipeline on each element", function () {
var pipeline = new lunr.Pipeline,
fn1 = function (t1) { return t1.toUpperCase() }
pipeline.add(fn1)
deepEqual(pipeline.run(['a']), ['A'])
})
this.pipeline.add(function (t) {
received.push(t)
return [t, t.toUpperCase()]
})
test("run should filter out any undefined values at each stage in the pipeline", function () {
var pipeline = new lunr.Pipeline,
fn2Count = 0,
fn1 = function (t) { if (t < 5) return t },
fn2 = function (t) { fn2Count++ ; return t }
this.pipeline.run(['foo'])
pipeline.add(fn1, fn2)
assert.sameMembers(['foo'], received)
})
var output = pipeline.run([0,1,2,3,4,5,6,7,8,9])
equal(fn2Count, 5)
equal(output.length, 5)
})
test('passed to the next pipeline function', function () {
var received = []
test("run should filter out any empty string values at each stage in the pipeline", function () {
var pipeline = new lunr.Pipeline,
fn2Count = 0,
fn1 = function (t) {
if (t === "foo") {
return ""
} else {
return t
}
},
fn2 = function (t) { fn2Count++ ; return t }
this.pipeline.add(function (t) {
return [t, t.toUpperCase()]
})
pipeline.add(fn1, fn2)
this.pipeline.add(function (t) {
received.push(t)
})
var output = pipeline.run(["foo", "bar", "baz", ""])
equal(fn2Count, 2)
equal(output.length, 2)
deepEqual(output, ["bar", "baz"])
})
this.pipeline.run(['foo'])
test('toJSON', function () {
var pipeline = new lunr.Pipeline,
fn1 = function () {},
fn2 = function () {}
assert.sameMembers(['foo', 'FOO'], received)
})
})
})
lunr.Pipeline.registerFunction(fn1, 'fn1')
lunr.Pipeline.registerFunction(fn2, 'fn2')
suite('#toJSON', function () {
test('returns an array of registered function labels', function () {
var fn = function () {}
pipeline.add(fn1, fn2)
lunr.Pipeline.registerFunction(fn, 'fn')
deepEqual(pipeline.toJSON(), ['fn1', 'fn2'])
})
this.pipeline.add(fn)
test('registering a pipeline function', function () {
var fn1 = function () {}
assert.sameMembers(['fn'], this.pipeline.toJSON())
})
})
equal(Object.keys(lunr.Pipeline.registeredFunctions).length, 0)
suite('.registerFunction', function () {
setup(function () {
this.fn = function () {}
})
lunr.Pipeline.registerFunction(fn1, 'fn1')
test('adds a label property to the function', function () {
lunr.Pipeline.registerFunction(this.fn, 'fn')
equal(fn1.label, 'fn1')
equal(Object.keys(lunr.Pipeline.registeredFunctions).length, 1)
deepEqual(lunr.Pipeline.registeredFunctions['fn1'], fn1)
})
assert.equal('fn', this.fn.label)
})
test('load', function () {
var fn1 = function () {},
fn2 = function () {}
test('adds function to the list of registered functions', function () {
lunr.Pipeline.registerFunction(this.fn, 'fn')
lunr.Pipeline.registerFunction(fn1, 'fn1')
lunr.Pipeline.registerFunction(fn2, 'fn2')
assert.equal(this.fn, lunr.Pipeline.registeredFunctions['fn'])
})
})
var serialised = ['fn1', 'fn2']
suite('.load', function () {
test('with registered functions', function () {
var fn = function () {},
serializedPipeline = ['fn'],
pipeline
var pipeline = lunr.Pipeline.load(serialised)
lunr.Pipeline.registerFunction(fn, 'fn')
equal(pipeline._stack.length, 2)
deepEqual(pipeline._stack[0], fn1)
deepEqual(pipeline._stack[1], fn2)
})
pipeline = lunr.Pipeline.load(serializedPipeline)
test('loading an un-registered pipeline function', function () {
var serialised = ['fn1']
assert.equal(1, pipeline._stack.length)
assert.equal(fn, pipeline._stack[0])
})
throws(function () {
lunr.Pipeline.load(serialised)
test('with unregisterd functions', function () {
var serializedPipeline = ['fn']
assert.throws(function () {
lunr.Pipeline.load(serializedPipeline)
})
})
})
})
test('resetting the pipeline', function () {
var fn1 = function () {},
fn2 = function () {},
pipeline = new lunr.Pipeline
suite('#reset', function () {
test('empties the stack', function () {
this.pipeline.add(function () {})
pipeline.add(fn1, fn2)
deepEqual(pipeline._stack, [fn1, fn2])
assert.equal(1, this.pipeline._stack.length)
pipeline.reset()
deepEqual(pipeline._stack, [])
this.pipeline.reset()
assert.equal(0, this.pipeline._stack.length)
})
})
})

@@ -1,8 +0,4 @@

module('search', {
setup: function () {
var idx = new lunr.Index
idx.field('body')
idx.field('title', { boost: 10 })
;([{
suite('search', function () {
setup(function () {
var documents = [{
id: 'a',

@@ -22,80 +18,469 @@ title: 'Mr. Green kills Colonel Mustard',

wordCount: 16
},{
id: 'd',
title: 'title',
body: 'handsome',
},{
id: 'e',
title: 'title',
body: 'hand',
}]).forEach(function (doc) { idx.add(doc) })
}]
this.idx = idx
}
})
this.idx = lunr(function () {
this.ref('id')
this.field('title')
this.field('body')
test('returning the correct results', function () {
var results = this.idx.search('green plant')
documents.forEach(function (document) {
this.add(document)
}, this)
})
})
equal(results.length, 2)
equal(results[0].ref, 'b')
})
suite('single term search', function () {
suite('one match', function () {
setup(function () {
this.results = this.idx.search('scarlett')
})
test('search term not in the index', function () {
var results = this.idx.search('foo')
test('one result returned', function () {
assert.lengthOf(this.results, 1)
})
equal(results.length, 0)
})
test('document c matches', function () {
assert.equal('c', this.results[0].ref)
})
test('one search term not in the index', function () {
var results = this.idx.search('foo green')
test('matching term', function () {
assert.sameMembers(['scarlett'], Object.keys(this.results[0].matchData.metadata))
})
})
equal(results.length, 0)
})
suite('no match', function () {
setup(function () {
this.results = this.idx.search('foo')
})
test('search contains one term not in the index', function () {
var results = this.idx.search('green foo')
test('no matches', function () {
assert.lengthOf(this.results, 0)
})
})
equal(results.length, 0)
})
suite('multiple matches', function () {
setup(function () {
this.results = this.idx.search('plant')
})
test('search takes into account boosts', function () {
var results = this.idx.search('professor')
test('has two matches', function () {
assert.lengthOf(this.results, 2)
})
equal(results.length, 2)
equal(results[0].ref, 'c')
test('sorted by relevance', function () {
assert.equal('b', this.results[0].ref)
assert.equal('c', this.results[1].ref)
})
})
ok(results[0].score > 10 * results[1].score)
})
suite('pipeline processing', function () {
// study would be stemmed to studi, tokens
// are stemmed by default on index and must
// also be stemmed on search to match
suite('enabled (default)', function () {
setup(function () {
this.results = this.idx.query(function (q) {
q.clause({term: 'study', usePipeline: true})
})
})
test('search boosts exact matches', function () {
var results = this.idx.search('hand')
test('has two matches', function () {
assert.lengthOf(this.results, 2)
})
equal(results.length, 2)
equal(results[0].ref, 'e')
test('sorted by relevance', function () {
assert.equal('b', this.results[0].ref)
assert.equal('a', this.results[1].ref)
})
})
ok(results[0].score > results[1].score)
})
suite('disabled', function () {
setup(function () {
this.results = this.idx.query(function (q) {
q.clause({term: 'study', usePipeline: false})
})
})
test('ref type is not changed to a string', function () {
var idx = new lunr.Index
idx.field('type')
test('no matches', function () {
assert.lengthOf(this.results, 0)
})
})
})
})
var objKey = {},
arrKey = [],
dateKey = new Date,
numKey = 1,
strKey = "foo"
suite('multiple terms', function () {
suite('all terms match', function () {
setup(function () {
this.results = this.idx.search('fellow candlestick')
})
idx.add({id: objKey, type: "object"})
idx.add({id: arrKey, type: "array"})
idx.add({id: dateKey, type: "date"})
idx.add({id: numKey, type: "number"})
idx.add({id: strKey, type: "string"})
test('has one match', function () {
assert.lengthOf(this.results, 1)
})
deepEqual(idx.search("object")[0].ref, objKey)
deepEqual(idx.search("array")[0].ref, arrKey)
deepEqual(idx.search("date")[0].ref, dateKey)
deepEqual(idx.search("number")[0].ref, numKey)
deepEqual(idx.search("string")[0].ref, strKey)
test('correct document returned', function () {
assert.equal('a', this.results[0].ref)
})
test('matched terms returned', function () {
assert.sameMembers(['fellow', 'candlestick'], Object.keys(this.results[0].matchData.metadata))
})
})
suite('one term matches', function () {
setup(function () {
this.results = this.idx.search('week foo')
})
test('has one match', function () {
assert.lengthOf(this.results, 1)
})
test('correct document returned', function () {
assert.equal('c', this.results[0].ref)
})
test('only matching terms returned', function () {
assert.sameMembers(['week'], Object.keys(this.results[0].matchData.metadata))
})
})
suite('documents with all terms score higher', function () {
setup(function () {
this.results = this.idx.search('candlestick green')
})
test('has three matches', function () {
assert.lengthOf(this.results, 3)
})
test('correct documents returned', function () {
var matchingDocuments = this.results.map(function (r) {
return r.ref
})
assert.sameMembers(['a', 'b', 'c'], matchingDocuments)
})
test('documents with all terms score highest', function () {
assert.equal('a', this.results[0].ref)
})
test('matching terms are returned', function () {
assert.sameMembers(['candlestick', 'green'], Object.keys(this.results[0].matchData.metadata))
assert.sameMembers(['green'], Object.keys(this.results[1].matchData.metadata))
assert.sameMembers(['green'], Object.keys(this.results[2].matchData.metadata))
})
})
suite('no terms match', function () {
setup(function () {
this.results = this.idx.search('foo bar')
})
test('no matches', function () {
assert.lengthOf(this.results, 0)
})
})
suite('corpus terms are stemmed', function () {
setup(function () {
this.results = this.idx.search('water')
})
test('matches two documents', function () {
assert.lengthOf(this.results, 2)
})
test('matches correct documents', function () {
var matchingDocuments = this.results.map(function (r) {
return r.ref
})
assert.sameMembers(['b', 'c'], matchingDocuments)
})
})
suite('field scoped terms', function () {
suite('only matches on scoped field', function () {
setup(function () {
this.results = this.idx.search('title:plant')
})
test('one result returned', function () {
assert.lengthOf(this.results, 1)
})
test('returns the correct document', function () {
assert.equal('b', this.results[0].ref)
})
test('match data', function () {
assert.sameMembers(['plant'], Object.keys(this.results[0].matchData.metadata))
})
})
suite('no matching terms', function () {
setup(function () {
this.results = this.idx.search('title:candlestick')
})
test('no results returned', function () {
assert.lengthOf(this.results, 0)
})
})
})
suite('wildcard matching', function () {
suite('trailing wildcard', function () {
suite('no matches', function () {
setup(function () {
this.results = this.idx.search('fo*')
})
test('no results returned', function () {
assert.lengthOf(this.results, 0)
})
})
suite('one match', function () {
setup(function () {
this.results = this.idx.search('candle*')
})
test('one result returned', function () {
assert.lengthOf(this.results, 1)
})
test('correct document matched', function () {
assert.equal('a', this.results[0].ref)
})
test('matching terms returned', function () {
assert.sameMembers(['candlestick'], Object.keys(this.results[0].matchData.metadata))
})
})
suite('multiple terms match', function () {
setup(function () {
this.results = this.idx.search('pl*')
})
test('two results returned', function () {
assert.lengthOf(this.results, 2)
})
test('correct documents matched', function () {
var matchingDocuments = this.results.map(function (r) {
return r.ref
})
assert.sameMembers(['b', 'c'], matchingDocuments)
})
test('matching terms returned', function () {
assert.sameMembers(['plumb', 'plant'], Object.keys(this.results[0].matchData.metadata))
assert.sameMembers(['plumb', 'plant'], Object.keys(this.results[1].matchData.metadata))
})
})
})
})
})
suite('wildcard matching', function () {
suite('trailing wildcard', function () {
suite('no matches found', function () {
setup(function () {
this.results = this.idx.search('fo*')
})
test('no results returned', function () {
assert.lengthOf(this.results, 0)
})
})
suite('results found', function () {
setup(function () {
this.results = this.idx.search('pl*')
})
test('two results returned', function () {
assert.lengthOf(this.results, 2)
})
test('matching documents returned', function () {
assert.equal('b', this.results[0].ref)
assert.equal('c', this.results[1].ref)
})
test('matching terms returned', function () {
assert.sameMembers(['plant', 'plumb'], Object.keys(this.results[0].matchData.metadata))
assert.sameMembers(['plant', 'plumb'], Object.keys(this.results[1].matchData.metadata))
})
})
})
suite('leading wildcard', function () {
suite('no results found', function () {
setup(function () {
this.results = this.idx.search('*oo')
})
test('no results found', function () {
assert.lengthOf(this.results, 0)
})
})
suite('results found', function () {
setup(function () {
this.results = this.idx.search('*ant')
})
test('two results found', function () {
assert.lengthOf(this.results, 2)
})
test('matching documents returned', function () {
assert.equal('b', this.results[0].ref)
assert.equal('c', this.results[1].ref)
})
test('matching terms returned', function () {
assert.sameMembers(['plant'], Object.keys(this.results[0].matchData.metadata))
assert.sameMembers(['plant'], Object.keys(this.results[1].matchData.metadata))
})
})
})
suite('contained wildcard', function () {
suite('no results found', function () {
setup(function () {
this.results = this.idx.search('f*o')
})
test('no results found', function () {
assert.lengthOf(this.results, 0)
})
})
suite('results found', function () {
setup(function () {
this.results = this.idx.search('pl*nt')
})
test('two results found', function () {
assert.lengthOf(this.results, 2)
})
test('matching documents returned', function () {
assert.equal('b', this.results[0].ref)
assert.equal('c', this.results[1].ref)
})
test('matching terms returned', function () {
assert.sameMembers(['plant'], Object.keys(this.results[0].matchData.metadata))
assert.sameMembers(['plant'], Object.keys(this.results[1].matchData.metadata))
})
})
})
})
suite('edit distance', function () {
suite('no results found', function () {
setup(function () {
this.results = this.idx.search('foo~1')
})
test('no results returned', function () {
assert.lengthOf(this.results, 0)
})
})
suite('results found', function () {
setup(function () {
this.results = this.idx.search('plont~1')
})
test('two results found', function () {
assert.lengthOf(this.results, 2)
})
test('matching documents returned', function () {
assert.equal('b', this.results[0].ref)
assert.equal('c', this.results[1].ref)
})
test('matching terms returned', function () {
assert.sameMembers(['plant'], Object.keys(this.results[0].matchData.metadata))
assert.sameMembers(['plant'], Object.keys(this.results[1].matchData.metadata))
})
})
})
suite('searching by field', function () {
suite('unknown field', function () {
test('throws lunr.QueryParseError', function () {
assert.throws(function () {
this.idx.search('unknown-field:plant')
}.bind(this), lunr.QueryParseError)
})
})
suite('no results found', function () {
setup(function () {
this.results = this.idx.search('title:candlestick')
})
test('no results found', function () {
assert.lengthOf(this.results, 0)
})
})
suite('results found', function () {
setup(function () {
this.results = this.idx.search('title:plant')
})
test('one results found', function () {
assert.lengthOf(this.results, 1)
})
test('matching documents returned', function () {
assert.equal('b', this.results[0].ref)
})
test('matching terms returned', function () {
assert.sameMembers(['plant'], Object.keys(this.results[0].matchData.metadata))
})
})
})
suite('term boosts', function () {
suite('no results found', function () {
setup(function () {
this.results = this.idx.search('foo^10')
})
test('no results found', function () {
assert.lengthOf(this.results, 0)
})
})
suite('results found', function () {
setup(function () {
this.results = this.idx.search('scarlett candlestick^5')
})
test('two results found', function () {
assert.lengthOf(this.results, 2)
})
test('matching documents returned', function () {
assert.equal('a', this.results[0].ref)
assert.equal('c', this.results[1].ref)
})
test('matching terms returned', function () {
assert.sameMembers(['candlestick'], Object.keys(this.results[0].matchData.metadata))
assert.sameMembers(['scarlett'], Object.keys(this.results[1].matchData.metadata))
})
})
})
})

@@ -1,14 +0,26 @@

module('lunr.stemmer')
suite('lunr.stemmer', function () {
test('reduces words to their stem', function (done) {
withFixture('stemming_vocab.json', function (err, fixture) {
if (err != null) {
throw err
}
test('should stem words correctly', function () {
Object.keys(stemmingFixture).forEach(function (testWord) {
var expected = stemmingFixture[testWord]
var testData = JSON.parse(fixture)
equal(lunr.stemmer(testWord), expected)
Object.keys(testData).forEach(function (word) {
var expected = testData[word],
token = new lunr.Token(word),
result = lunr.stemmer(token).toString()
assert.equal(expected, result)
})
done()
})
})
})
test('should be registered with lunr.Pipeline', function () {
equal(lunr.stemmer.label, 'stemmer')
deepEqual(lunr.Pipeline.registeredFunctions['stemmer'], lunr.stemmer)
test('is a registered pipeline function', function () {
assert.equal('stemmer', lunr.stemmer.label)
assert.equal(lunr.stemmer, lunr.Pipeline.registeredFunctions['stemmer'])
})
})

@@ -1,30 +0,30 @@

module('lunr.stopWordFilter')
suite('lunr.stopWordFilter', function () {
test('filters stop words', function () {
var stopWords = ['the', 'and', 'but', 'than', 'when']
test('stops stop words', function () {
var stopWords = ['the', 'and', 'but', 'than', 'when']
stopWords.forEach(function (word) {
equal(lunr.stopWordFilter(word), undefined)
stopWords.forEach(function (word) {
assert.isUndefined(lunr.stopWordFilter(word))
})
})
})
test('non stop words pass through', function () {
var nonStopWords = ['interesting', 'words', 'pass', 'through']
test('ignores non stop words', function () {
var nonStopWords = ['interesting', 'words', 'pass', 'through']
nonStopWords.forEach(function (word) {
equal(lunr.stopWordFilter(word), word)
nonStopWords.forEach(function (word) {
assert.equal(word, lunr.stopWordFilter(word))
})
})
})
test('should not filter Object.prototype terms', function () {
var nonStopWords = ['constructor', 'hasOwnProperty', 'toString', 'valueOf']
test('ignores properties of Object.prototype', function () {
var nonStopWords = ['constructor', 'hasOwnProperty', 'toString', 'valueOf']
nonStopWords.forEach(function (word) {
equal(lunr.stopWordFilter(word), word)
nonStopWords.forEach(function (word) {
assert.equal(word, lunr.stopWordFilter(word))
})
})
})
test('should be registered with lunr.Pipeline', function () {
equal(lunr.stopWordFilter.label, 'stopWordFilter')
deepEqual(lunr.Pipeline.registeredFunctions['stopWordFilter'], lunr.stopWordFilter)
test('is a registered pipeline function', function () {
assert.equal('stopWordFilter', lunr.stopWordFilter.label)
assert.equal(lunr.stopWordFilter, lunr.Pipeline.registeredFunctions['stopWordFilter'])
})
})

@@ -1,23 +0,13 @@

var helpers = require('./../lib/helpers')
var lunr = require('../lunr.js'),
assert = require('chai').assert,
fs = require('fs'),
path = require('path')
var extensions = function () {
this.equalNumber = function (lambdaNum, num, desc) {
return this.equal.call(this, helpers.toNumber(lambdaNum), num, desc)
},
this.isTrue = function (lambdaBool, desc) {
return this.ok.call(this, helpers.toBoolean(lambdaBool), desc)
},
this.isFalse = function (lambdaBool, desc) {
return this.ok.call(this, !helpers.toBoolean(lambdaBool), desc)
}
var withFixture = function (name, fn) {
var fixturePath = path.join('test', 'fixtures', name)
fs.readFile(fixturePath, fn)
}
module.exports = function (testName, testFn) {
module.exports[testName] = function (test) {
extensions.call(test)
testFn.call(test, test)
test.done()
}
}
global.lunr = lunr
global.assert = assert
global.withFixture = withFixture

@@ -1,125 +0,96 @@

module('lunr.tokenizer')
suite('lunr.tokenizer', function () {
var toString = function (o) { return o.toString() }
test("splitting simple strings into tokens", function () {
var simpleString = "this is a simple string",
tokens = lunr.tokenizer(simpleString)
test('splitting into tokens', function () {
var tokens = lunr.tokenizer('foo bar baz')
.map(toString)
deepEqual(tokens, ['this', 'is', 'a', 'simple', 'string'])
})
assert.sameMembers(['foo', 'bar', 'baz'], tokens)
})
test('downcasing tokens', function () {
var simpleString = 'FOO BAR',
tags = ['Foo', 'BAR']
test('downcases tokens', function () {
var tokens = lunr.tokenizer('Foo BAR BAZ')
.map(toString)
deepEqual(lunr.tokenizer(simpleString), ['foo', 'bar'])
deepEqual(lunr.tokenizer(tags), ['foo', 'bar'])
})
assert.sameMembers(['foo', 'bar', 'baz'], tokens)
})
test('handling arrays of strings', function () {
var tags = ['foo', 'bar'],
tokens = lunr.tokenizer(tags)
test('array of strings', function () {
var tokens = lunr.tokenizer(['foo', 'bar', 'baz'])
.map(toString)
deepEqual(tokens, tags)
})
assert.sameMembers(['foo', 'bar', 'baz'], tokens)
})
test('handling arrays with undefined or null values', function () {
var arr = ['foo', undefined, null, 'bar'],
tokens = lunr.tokenizer(arr)
test('undefined is converted to empty string', function () {
var tokens = lunr.tokenizer(['foo', undefined, 'baz'])
.map(toString)
deepEqual(tokens, ['foo', '', '', 'bar'])
})
assert.sameMembers(['foo', '', 'baz'], tokens)
})
test('handling multiple white spaces', function () {
var testString = ' foo bar ',
tokens = lunr.tokenizer(testString)
test('null is converted to empty string', function () {
var tokens = lunr.tokenizer(['foo', null, 'baz'])
.map(toString)
deepEqual(tokens, ['foo', 'bar'])
})
assert.sameMembers(['foo', '', 'baz'], tokens)
})
test('handling null-like arguments', function () {
deepEqual(lunr.tokenizer(), [])
deepEqual(lunr.tokenizer(null), [])
deepEqual(lunr.tokenizer(undefined), [])
})
test('multiple white space is stripped', function () {
var tokens = lunr.tokenizer(' foo bar baz ')
.map(toString)
test('calling to string on passed val', function () {
var date = new Date (Date.UTC(2013, 0, 1, 12)),
obj = {
toString: function () { return 'custom object' }
}
assert.sameMembers(['foo', 'bar', 'baz'], tokens)
})
equal(lunr.tokenizer(41), '41')
equal(lunr.tokenizer(false), 'false')
deepEqual(lunr.tokenizer(obj), ['custom', 'object'])
test('handling null-like arguments', function () {
assert.lengthOf(lunr.tokenizer(), 0)
assert.lengthOf(lunr.tokenizer(undefined), 0)
assert.lengthOf(lunr.tokenizer(null), 0)
})
// slicing here to avoid asserting on the timezone part of the date
// that will be different whereever the test is run.
deepEqual(lunr.tokenizer(date).slice(0, 4), ['tue', 'jan', '01', '2013'])
})
test('converting a date to tokens', function () {
var date = new Date(Date.UTC(2013, 0, 1, 12))
test("splitting strings with hyphens", function () {
var simpleString = "take the New York-San Francisco flight",
tokens = lunr.tokenizer(simpleString)
// NOTE: slicing here to prevent asserting on parts
// of the date that might be affected by the timezone
// the test is running in.
assert.sameMembers(['tue', 'jan', '01', '2013'], lunr.tokenizer(date).slice(0, 4).map(toString))
})
deepEqual(tokens, ['take', 'the', 'new', 'york', 'san', 'francisco', 'flight'])
})
test('converting a number to tokens', function () {
assert.equal('41', lunr.tokenizer(41).map(toString))
})
test("splitting strings with hyphens and spaces", function () {
var simpleString = "Solve for A - B",
tokens = lunr.tokenizer(simpleString)
test('converting a boolean to tokens', function () {
assert.equal('false', lunr.tokenizer(false).map(toString))
})
deepEqual(tokens, ['solve', 'for', 'a', 'b'])
})
test('converting an object to tokens', function () {
var obj = {
toString: function () { return 'custom object' }
}
test("registering a tokenizer function", function () {
var fn = function () {}
lunr.tokenizer.registerFunction(fn, 'test')
assert.sameMembers(lunr.tokenizer(obj).map(toString), ['custom', 'object'])
})
equal(fn.label, 'test')
equal(lunr.tokenizer.registeredFunctions['test'], fn)
test('splits strings with hyphens', function () {
assert.sameMembers(lunr.tokenizer('foo-bar').map(toString), ['foo', 'bar'])
})
delete lunr.tokenizer.registerFunction['test'] // resetting the state after the test
})
test('splits strings with hyphens and spaces', function () {
assert.sameMembers(lunr.tokenizer('foo - bar').map(toString), ['foo', 'bar'])
})
test("loading a registered tokenizer", function () {
var serialized = 'default', // default tokenizer is already registered
tokenizerFn = lunr.tokenizer.load(serialized)
test('tracking the token index', function () {
var tokens = lunr.tokenizer('foo bar')
assert.equal(tokens[0].metadata.index, 0)
assert.equal(tokens[1].metadata.index, 1)
})
equal(tokenizerFn, lunr.tokenizer)
})
test("loading an un-registered tokenizer", function () {
var serialized = 'un-registered' // default tokenizer is already registered
throws(function () {
lunr.tokenizer.load(serialized)
test('tracking the token position', function () {
var tokens = lunr.tokenizer('foo bar')
assert.deepEqual(tokens[0].metadata.position, [0, 3])
assert.deepEqual(tokens[1].metadata.position, [4, 3])
})
})
test('custom separator', function () {
try {
var defaultSeparator = lunr.tokenizer.separator,
str = 'foo|bar|baz'
lunr.tokenizer.separator = '|'
deepEqual(lunr.tokenizer(str), ['foo', 'bar', 'baz'])
} finally {
lunr.tokenizer.separator = defaultSeparator
}
})
// TODO: this test is only here to test backwards compatibility and
// can and should be removed in 1.0.0
test('custom separator using legacy seperator property', function () {
try {
var defaultSeparator = lunr.tokenizer.seperator,
str = 'foo|bar|baz'
lunr.tokenizer.seperator = '|'
deepEqual(lunr.tokenizer(str), ['foo', 'bar', 'baz'])
} finally {
lunr.tokenizer.seperator = defaultSeparator
}
})

@@ -1,27 +0,29 @@

module('lunr.trimmer')
suite('lunr.trimmer', function () {
test('latin characters', function () {
var token = new lunr.Token ('hello')
assert.equal(lunr.trimmer(token).toString(), token.toString())
})
test('latin characters', function () {
var token = 'hello'
equal(lunr.trimmer(token), token)
})
suite('punctuation', function () {
var trimmerTest = function (description, str, expected) {
test(description, function () {
var token = new lunr.Token(str),
trimmed = lunr.trimmer(token).toString()
test('removing leading and trailing punctuation', function () {
var fullStop = 'hello.',
innerApostrophe = "it's",
trailingApostrophe = "james'",
exclamationMark = 'stop!',
comma = 'first,',
brackets = '[tag]'
assert.equal(expected, trimmed)
})
}
deepEqual(lunr.trimmer(fullStop), 'hello')
deepEqual(lunr.trimmer(innerApostrophe), "it's")
deepEqual(lunr.trimmer(trailingApostrophe), "james")
deepEqual(lunr.trimmer(exclamationMark), 'stop')
deepEqual(lunr.trimmer(comma), 'first')
deepEqual(lunr.trimmer(brackets), 'tag')
})
trimmerTest('full stop', 'hello.', 'hello')
trimmerTest('inner apostrophe', "it's", "it's")
trimmerTest('trailing apostrophe', "james'", 'james')
trimmerTest('exclamation mark', 'stop!', 'stop')
trimmerTest('comma', 'first,', 'first')
trimmerTest('brackets', '[tag]', 'tag')
})
test('should be registered with lunr.Pipeline', function () {
equal(lunr.trimmer.label, 'trimmer')
deepEqual(lunr.Pipeline.registeredFunctions['trimmer'], lunr.trimmer)
test('is a registered pipeline function', function () {
assert.equal(lunr.trimmer.label, 'trimmer')
assert.equal(lunr.Pipeline.registeredFunctions['trimmer'], lunr.trimmer)
})
})

@@ -1,64 +0,59 @@

module("lunr.Vector")
suite('lunr.Vector', function () {
var vectorFromArgs = function () {
var vector = new lunr.Vector
test("calculating the magnitude of a vector", function () {
var vector = new lunr.Vector,
elements = [4,5,6]
Array.prototype.slice.call(arguments)
.forEach(function (el, i) {
vector.insert(i, el)
})
elements.forEach(function (el, i) { vector.insert(i, el) })
return vector
}
equal(vector.magnitude(), Math.sqrt(77))
})
suite('#magnitude', function () {
test('calculates magnitude of a vector', function () {
var vector = vectorFromArgs(4,5,6)
assert.equal(Math.sqrt(77), vector.magnitude())
})
})
test("calculating the dot product with another vector", function () {
var v1 = new lunr.Vector,
v2 = new lunr.Vector,
els1 = [1, 3, -5],
els2 = [4, -2, -1]
suite('#dot', function () {
test('calculates dot product of two vectors', function () {
var v1 = vectorFromArgs(1, 3, -5),
v2 = vectorFromArgs(4, -2, -1)
assert.equal(3, v1.dot(v2))
})
})
els1.forEach(function (el, i) { v1.insert(i, el) })
els2.forEach(function (el, i) { v2.insert(i, el) })
suite('#similarity', function () {
test('calculates the similarity between two vectors', function () {
var v1 = vectorFromArgs(1, 3, -5),
v2 = vectorFromArgs(4, -2, -1)
equal(v1.dot(v2), 3)
})
assert.approximately(v1.similarity(v2), 0.111, 0.001)
})
})
test("calculating the similarity between two vectors", function () {
var v1 = new lunr.Vector,
v2 = new lunr.Vector,
els1 = [1, 3, -5],
els2 = [4, -2, -1]
suite('#insert', function () {
test('invalidates magnitude cache', function () {
var vector = vectorFromArgs(4,5,6)
els1.forEach(function (el, i) { v1.insert(i, el) })
els2.forEach(function (el, i) { v2.insert(i, el) })
assert.equal(Math.sqrt(77), vector.magnitude())
var similarity = v1.similarity(v2),
roundedSimilarity = Math.round(similarity * 1000) / 1000
vector.insert(3, 7)
equal(roundedSimilarity, 0.111)
})
assert.equal(Math.sqrt(126), vector.magnitude())
})
test("inserting an element invalidates the magnitude cache", function () {
var vector = new lunr.Vector,
elements = [4,5,6]
test('keeps items in index specified order', function () {
var vector = new lunr.Vector
elements.forEach(function (el, i) { vector.insert(i, el) })
vector.insert(2, 4)
vector.insert(1, 5)
vector.insert(0, 6)
equal(vector.magnitude(), Math.sqrt(77))
vector.insert(3, 7)
equal(vector.magnitude(), Math.sqrt(126))
assert.deepEqual([6,5,4], vector.toArray())
})
})
})
test("inserted elements are kept in index order", function () {
var vector = new lunr.Vector,
elements = [6,5,4]
vector.insert(2, 4)
vector.insert(1, 5)
vector.insert(0, 6)
equal(vector.list.idx, 0)
equal(vector.list.next.idx, 1)
equal(vector.list.next.next.idx, 2)
})

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is too big to display

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

Sorry, the diff of this file is not supported yet

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc