text-annotator
Advanced tools
Comparing version 0.6.9 to 0.7.0
{ | ||
"name": "text-annotator", | ||
"version": "0.6.9", | ||
"version": "0.7.0", | ||
"description": "A JavaScript library for locating and annotating plain text in HTML", | ||
@@ -5,0 +5,0 @@ "main": "build/text-annotator.js", |
@@ -12,2 +12,3 @@ import getSentences from './ext/sbd' | ||
const content = options.content | ||
// isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false | ||
const isHTML = options.isHTML === undefined || options.isHTML | ||
@@ -20,3 +21,2 @@ | ||
: content | ||
// isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false | ||
this.isHTML = isHTML | ||
@@ -27,3 +27,3 @@ | ||
this.tagLocations = [] | ||
// sentences are used in (sentence based) fuzzy search | ||
// sentences are used in sentence-based fuzzy search | ||
this.sentences = [] | ||
@@ -38,3 +38,3 @@ // one highlight can have more than one location because of the potential issue in tag insertion*** | ||
// lastHighlightIndex can be within options*** | ||
// lastHighlightIndex can be within options; it is currently used by searchAll | ||
search(str, options = {}, lastHighlightIndex) { | ||
@@ -98,3 +98,3 @@ let prefix = options.prefix || '' | ||
// only support directly search for now*** | ||
// only support direct search for now | ||
searchAll(str, options = {}) { | ||
@@ -145,2 +145,3 @@ const highlightIndexes = [] | ||
) | ||
// it has to be set after adjustLoc so that it will not be checked | ||
this.highlights[highlightIndex].highlighted = true | ||
@@ -163,8 +164,10 @@ | ||
: content | ||
highlightIndexes.forEach(highlightIndex => { | ||
for (let i = 0; i < highlightIndexes.length; i++) { | ||
options.content = newContent | ||
newContent = this.highlight(highlightIndex, options) | ||
}) | ||
newContent = this.highlight(highlightIndexes[i], options) | ||
} | ||
if (!isBrowser || !containerId || returnContent) { | ||
if (isBrowser && containerId && !returnContent) { | ||
document.getElementById(containerId).innerHTML = newContent | ||
} else { | ||
return newContent | ||
@@ -184,4 +187,2 @@ } | ||
// add searchAndHighlightAll*** | ||
unhighlight(highlightIndex, options = {}) { | ||
@@ -198,2 +199,3 @@ // byStringOperation is used to decide whether the content is changed by string operation or dom operation | ||
// it has to be set before adjustLoc so that it will not be checked | ||
this.highlights[highlightIndex].highlighted = false | ||
@@ -243,4 +245,2 @@ | ||
// add unighlightAll*** | ||
stripAndStoreHTMLTags() { | ||
@@ -276,2 +276,3 @@ let tag | ||
// for searchAll | ||
let offset = 0 | ||
@@ -301,3 +302,3 @@ if (lastHighlightIndex !== undefined) { | ||
let highlightIndex = -1 | ||
// IE will not be considered*** | ||
// IE is not considered | ||
if (window.find) { | ||
@@ -313,2 +314,3 @@ document.designMode = 'on' | ||
// step 2: locate the found within the container where the annotator is applied | ||
// selector may become better | ||
const found = document.querySelector( | ||
@@ -348,15 +350,15 @@ '#' + | ||
// improve later*** | ||
fuzzySearch(prefix, str, postfix, fuzzySearchOptions = {}) { | ||
const caseSensitive = fuzzySearchOptions.caseSensitive | ||
const tokenBased = fuzzySearchOptions.tokenBased | ||
let tbThreshold = fuzzySearchOptions.tbThreshold || 0.68 | ||
const tokenBased = fuzzySearchOptions.tokenBased | ||
// sentence-based fuzzy search is enabled by default | ||
const sentenceBased = | ||
fuzzySearchOptions.sentenceBased === undefined || | ||
fuzzySearchOptions.sentenceBased | ||
let sbThreshold = fuzzySearchOptions.sbThreshold || 0.85 | ||
const lenRatio = fuzzySearchOptions.lenRatio || 2 | ||
const processSentence = fuzzySearchOptions.processSentence | ||
const sentenceBased = | ||
fuzzySearchOptions.sentenceBased === undefined || | ||
fuzzySearchOptions.sentenceBased | ||
@@ -377,7 +379,9 @@ let highlightIndex = -1 | ||
const fragment = prefix + str + postfix | ||
for (const i of strIndexes) { | ||
for (let i = 0; i < strIndexes.length; i++) { | ||
const si = strIndexes[i] | ||
// f can be wider | ||
const f = | ||
text.substring(i - prefix.length, i) + | ||
text.substring(si - prefix.length, si) + | ||
str + | ||
text.substring(i + str.length, i + str.length + postfix.length) | ||
text.substring(si + str.length, si + str.length + postfix.length) | ||
const similarity = TextAnnotator.getSimilarity( | ||
@@ -390,3 +394,3 @@ f, | ||
tbThreshold = similarity | ||
strIndex = i | ||
strIndex = si | ||
} | ||
@@ -414,6 +418,6 @@ } | ||
const filteredSentences = [] | ||
for (const sentence of sentences) { | ||
for (const word of words) { | ||
if (sentence.raw.includes(word)) { | ||
filteredSentences.push(sentence) | ||
for (let i = 0; i < sentences.length; i++) { | ||
for (let j = 0; j < words.length; j++) { | ||
if (sentences[i].raw.includes(words[j])) { | ||
filteredSentences.push(sentences[i]) | ||
break | ||
@@ -424,37 +428,39 @@ } | ||
//step 2.5: remove text that must not be annotated | ||
//step 3 (optional) | ||
if (processSentence) { | ||
const tagLocations = this.tagLocations | ||
const length = tagLocations.length | ||
if (length) { | ||
let index = 0 | ||
for (const fs of filteredSentences) { | ||
let raw = fs.raw | ||
const loc = [fs.index, fs.index + raw.length] | ||
let locInc = 0 | ||
for (let i = index; i < length; i++) { | ||
const tagLoc = tagLocations[i] | ||
if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) { | ||
const tag = this.originalContent.substring( | ||
tagLoc[0] + tagLoc[2], | ||
tagLoc[0] + tagLoc[2] + tagLoc[1] | ||
) | ||
const insertIndex = tagLoc[0] + locInc - loc[0] | ||
raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex) | ||
locInc += tagLoc[1] | ||
} else if (tagLoc[0] > loc[1]) { | ||
index = i - 1 | ||
break | ||
} | ||
let index = 0 | ||
// for each sentence | ||
for (let i = 0; i < filteredSentences.length; i++) { | ||
const fs = filteredSentences[i] | ||
let raw = fs.raw | ||
// loc without tags | ||
const loc = [fs.index, fs.index + raw.length] | ||
let locInc = 0 | ||
// add loc of all tags before the one being checked so as to derive the actual loc | ||
const tagLocations = this.tagLocations | ||
// for each loc of tag whose loc is larger than the last sentence | ||
for (let j = index; j < tagLocations.length; j++) { | ||
const tagLoc = tagLocations[j] | ||
if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) { | ||
const tag = this.originalContent.substring( | ||
tagLoc[0] + tagLoc[2], | ||
tagLoc[0] + tagLoc[2] + tagLoc[1] | ||
) | ||
const insertIndex = tagLoc[0] + locInc - loc[0] | ||
raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex) | ||
locInc += tagLoc[1] | ||
} else if (tagLoc[0] > loc[1]) { | ||
index = j // not sure this part | ||
break | ||
} | ||
} | ||
raw = processSentence(raw) | ||
raw = raw.replace(/(<([^>]+)>)/gi, '') | ||
raw = processSentence(raw) | ||
raw = raw.replace(/(<([^>]+)>)/gi, '') | ||
const copy = fs.raw | ||
// update the sentence if it got reduced | ||
if (copy !== raw) { | ||
fs.raw = raw | ||
fs.index = fs.index + copy.indexOf(raw) | ||
} | ||
const copy = fs.raw | ||
// update the sentence if it got reduced | ||
if (copy !== raw) { | ||
fs.raw = raw | ||
fs.index = fs.index + copy.indexOf(raw) | ||
} | ||
@@ -464,3 +470,3 @@ } | ||
// // step 3: find the sentence that includes the most similar str | ||
// // step 4: find the sentence that includes the most similar str | ||
// let bestResult = null | ||
@@ -497,3 +503,3 @@ // let mostPossibleSentence = null | ||
// // step 4: if such sentence is found, derive and return the location of the most similar str | ||
// // step 5: if such sentence is found, derive and return the location of the most similar str | ||
// if (bestResult) { | ||
@@ -507,5 +513,6 @@ // let index = mostPossibleSentence.index | ||
// step 3: find the most possible sentence | ||
// step 4: find the most possible sentence | ||
let mostPossibleSentence = null | ||
filteredSentences.forEach((sentence, index) => { | ||
for (let i = 0; i < filteredSentences.length; i++) { | ||
const sentence = filteredSentences[i] | ||
const similarity = TextAnnotator.getSimilarity( | ||
@@ -519,8 +526,8 @@ sentence.raw, | ||
mostPossibleSentence = sentence | ||
} else if (index !== filteredSentences.length - 1) { | ||
} else if (i !== filteredSentences.length - 1) { | ||
// combine two sentences to reduce the inaccuracy of sentenizing text | ||
const newSentenceRaw = sentence.raw + filteredSentences[index + 1].raw | ||
const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw | ||
const lengthDiff = | ||
Math.abs(newSentenceRaw.length - str.length) / str.length | ||
// whether allowing the customization of length diff threshold**** | ||
// whether allowing the customization of length diff threshold*** | ||
if (lengthDiff <= 0.1) { | ||
@@ -541,5 +548,5 @@ const newSimilarity = TextAnnotator.getSimilarity( | ||
} | ||
}) | ||
} | ||
// step 4: if the most possible sentence is found, derive and return the location of the most similar str from it | ||
// step 5: if the most possible sentence is found, derive and return the location of the most similar str from it | ||
if (mostPossibleSentence) { | ||
@@ -568,3 +575,4 @@ const result = TextAnnotator.getBestSubstring( | ||
adjustLoc(highlightIdPattern, highlightIndex, highlightClass) { | ||
const highlightLoc = this.highlights[highlightIndex].loc | ||
const { highlights } = this | ||
const highlightLoc = highlights[highlightIndex].loc | ||
const locInc = [0, 0] | ||
@@ -637,7 +645,8 @@ | ||
// step 2: check locations of other highlights | ||
this.highlights.forEach((highlight, highlightIndex) => { | ||
for (let i = 0; i < highlights.length; i++) { | ||
const highlight = highlights[i] | ||
if (highlight.highlighted) { | ||
const openTagLength = TextAnnotator.getOpenTagLength( | ||
highlightIdPattern, | ||
highlightIndex, | ||
i, | ||
highlightClass | ||
@@ -670,3 +679,3 @@ ) | ||
} | ||
}) | ||
} | ||
@@ -745,2 +754,3 @@ return [highlightLoc[0] + locInc[0], highlightLoc[1] + locInc[1]] | ||
// step 1: derive best substr | ||
// /s may be better*** | ||
const words = str.split(' ') | ||
@@ -747,0 +757,0 @@ while (words.length) { |
2637684
2097