text-annotator
Advanced tools
Comparing version 0.7.0 to 0.7.1
@@ -19,11 +19,11 @@ "use strict"; | ||
const containerId = options.containerId; | ||
const content = options.content; | ||
const content = options.content; // isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false | ||
const isHTML = options.isHTML === undefined || options.isHTML; // containerId has higher priority over content | ||
this.originalContent = isBrowser && containerId ? document.getElementById(containerId).innerHTML : content; // isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false | ||
this.originalContent = isBrowser && containerId ? document.getElementById(containerId).innerHTML : content; | ||
this.isHTML = isHTML; // stripedHTML and tagLocations are needed only when the content is HTML | ||
this.stripedHTML = ''; | ||
this.tagLocations = []; // sentences are used in (sentence based) fuzzy search | ||
this.tagLocations = []; // sentences are used in sentence-based fuzzy search | ||
@@ -37,3 +37,3 @@ this.sentences = []; // one highlight can have more than one location because of the potential issue in tag insertion*** | ||
} | ||
} // lastHighlightIndex can be within options*** | ||
} // lastHighlightIndex can be within options; it is currently used by searchAll | ||
@@ -83,3 +83,3 @@ | ||
return highlightIndex; | ||
} // only support directly search for now*** | ||
} // only support direct search for now | ||
@@ -119,3 +119,4 @@ | ||
let newContent = TextAnnotator.insert(content, openTag, loc[0]); | ||
newContent = TextAnnotator.insert(newContent, TextAnnotator.createCloseTag(), loc[1] + openTag.length); | ||
newContent = TextAnnotator.insert(newContent, TextAnnotator.createCloseTag(), loc[1] + openTag.length); // it has to be set after adjustLoc so that it will not be checked | ||
this.highlights[highlightIndex].highlighted = true; | ||
@@ -138,8 +139,11 @@ | ||
let newContent = isBrowser && containerId ? document.getElementById(containerId).innerHTML : content; | ||
highlightIndexes.forEach(highlightIndex => { | ||
for (let i = 0; i < highlightIndexes.length; i++) { | ||
options.content = newContent; | ||
newContent = this.highlight(highlightIndex, options); | ||
}); | ||
newContent = this.highlight(highlightIndexes[i], options); | ||
} | ||
if (!isBrowser || !containerId || returnContent) { | ||
if (isBrowser && containerId && !returnContent) { | ||
document.getElementById(containerId).innerHTML = newContent; | ||
} else { | ||
return newContent; | ||
@@ -158,5 +162,4 @@ } | ||
} | ||
} // add searchAndHighlightAll*** | ||
} | ||
unhighlight(highlightIndex, options = {}) { | ||
@@ -171,3 +174,4 @@ // byStringOperation is used to decide whether the content is changed by string operation or dom operation | ||
const returnContent = options.returnContent; | ||
const returnContent = options.returnContent; // it has to be set before adjustLoc so that it will not be checked | ||
this.highlights[highlightIndex].highlighted = false; | ||
@@ -200,5 +204,4 @@ | ||
} | ||
} // add unighlightAll*** | ||
} | ||
stripAndStoreHTMLTags() { | ||
@@ -227,4 +230,5 @@ let tag; | ||
text = text.toLowerCase(); | ||
} | ||
} // for searchAll | ||
let offset = 0; | ||
@@ -256,3 +260,3 @@ | ||
const strWithFixes = prefix + str + postfix; | ||
let highlightIndex = -1; // IE will not be considered*** | ||
let highlightIndex = -1; // IE is not considered | ||
@@ -268,2 +272,3 @@ if (window.find) { | ||
sel.collapseToEnd(); // step 2: locate the found within the container where the annotator is applied | ||
// selector may become better | ||
@@ -297,13 +302,13 @@ const found = document.querySelector('#' + containerId + ' [style="background-color: rgba(255, 255, 255, 0);"]'); | ||
return highlightIndex; | ||
} // improve later*** | ||
} | ||
fuzzySearch(prefix, str, postfix, fuzzySearchOptions = {}) { | ||
const caseSensitive = fuzzySearchOptions.caseSensitive; | ||
let tbThreshold = fuzzySearchOptions.tbThreshold || 0.68; | ||
const tokenBased = fuzzySearchOptions.tokenBased; | ||
let tbThreshold = fuzzySearchOptions.tbThreshold || 0.68; // sentence-based fuzzy search is enabled by default | ||
const sentenceBased = fuzzySearchOptions.sentenceBased === undefined || fuzzySearchOptions.sentenceBased; | ||
let sbThreshold = fuzzySearchOptions.sbThreshold || 0.85; | ||
const lenRatio = fuzzySearchOptions.lenRatio || 2; | ||
const processSentence = fuzzySearchOptions.processSentence; | ||
const sentenceBased = fuzzySearchOptions.sentenceBased === undefined || fuzzySearchOptions.sentenceBased; | ||
let highlightIndex = -1; | ||
@@ -325,4 +330,6 @@ const text = this.isHTML ? this.stripedHTML : this.originalContent; // token-based | ||
for (const i of strIndexes) { | ||
const f = text.substring(i - prefix.length, i) + str + text.substring(i + str.length, i + str.length + postfix.length); | ||
for (let i = 0; i < strIndexes.length; i++) { | ||
const si = strIndexes[i]; // f can be wider | ||
const f = text.substring(si - prefix.length, si) + str + text.substring(si + str.length, si + str.length + postfix.length); | ||
const similarity = TextAnnotator.getSimilarity(f, fragment, caseSensitive); | ||
@@ -332,3 +339,3 @@ | ||
tbThreshold = similarity; | ||
strIndex = i; | ||
strIndex = si; | ||
} | ||
@@ -358,49 +365,49 @@ } // step 3: check whether the most similar enough "fragment" is found, if yes return its location | ||
for (const sentence of sentences) { | ||
for (const word of words) { | ||
if (sentence.raw.includes(word)) { | ||
filteredSentences.push(sentence); | ||
for (let i = 0; i < sentences.length; i++) { | ||
for (let j = 0; j < words.length; j++) { | ||
if (sentences[i].raw.includes(words[j])) { | ||
filteredSentences.push(sentences[i]); | ||
break; | ||
} | ||
} | ||
} //step 2.5: remove text that must not be annotated | ||
} //step 3 (optional) | ||
if (processSentence) { | ||
const tagLocations = this.tagLocations; | ||
const length = tagLocations.length; | ||
let index = 0; // for each sentence | ||
if (length) { | ||
let index = 0; | ||
for (let i = 0; i < filteredSentences.length; i++) { | ||
const fs = filteredSentences[i]; | ||
let raw = fs.raw; // loc without tags | ||
for (const fs of filteredSentences) { | ||
let raw = fs.raw; | ||
const loc = [fs.index, fs.index + raw.length]; | ||
let locInc = 0; | ||
const loc = [fs.index, fs.index + raw.length]; | ||
let locInc = 0; // add loc of all tags before the one being checked so as to derive the actual loc | ||
for (let i = index; i < length; i++) { | ||
const tagLoc = tagLocations[i]; | ||
const tagLocations = this.tagLocations; // for each loc of tag whose loc is larger than the last sentence | ||
if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) { | ||
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); | ||
const insertIndex = tagLoc[0] + locInc - loc[0]; | ||
raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex); | ||
locInc += tagLoc[1]; | ||
} else if (tagLoc[0] > loc[1]) { | ||
index = i - 1; | ||
break; | ||
} | ||
} | ||
for (let j = index; j < tagLocations.length; j++) { | ||
const tagLoc = tagLocations[j]; | ||
raw = processSentence(raw); | ||
raw = raw.replace(/(<([^>]+)>)/gi, ''); | ||
const copy = fs.raw; // update the sentence if it got reduced | ||
if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) { | ||
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); | ||
const insertIndex = tagLoc[0] + locInc - loc[0]; | ||
raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex); | ||
locInc += tagLoc[1]; | ||
} else if (tagLoc[0] > loc[1]) { | ||
index = j; // not sure this part | ||
if (copy !== raw) { | ||
fs.raw = raw; | ||
fs.index = fs.index + copy.indexOf(raw); | ||
break; | ||
} | ||
} | ||
raw = processSentence(raw); | ||
raw = raw.replace(/(<([^>]+)>)/gi, ''); | ||
const copy = fs.raw; // update the sentence if it got reduced | ||
if (copy !== raw) { | ||
fs.raw = raw; | ||
fs.index = fs.index + copy.indexOf(raw); | ||
} | ||
} | ||
} // // step 3: find the sentence that includes the most similar str | ||
} // // step 4: find the sentence that includes the most similar str | ||
// let bestResult = null | ||
@@ -436,3 +443,3 @@ // let mostPossibleSentence = null | ||
// }) | ||
// // step 4: if such sentence is found, derive and return the location of the most similar str | ||
// // step 5: if such sentence is found, derive and return the location of the most similar str | ||
// if (bestResult) { | ||
@@ -445,7 +452,9 @@ // let index = mostPossibleSentence.index | ||
// } | ||
// step 3: find the most possible sentence | ||
// step 4: find the most possible sentence | ||
let mostPossibleSentence = null; | ||
filteredSentences.forEach((sentence, index) => { | ||
for (let i = 0; i < filteredSentences.length; i++) { | ||
const sentence = filteredSentences[i]; | ||
const similarity = TextAnnotator.getSimilarity(sentence.raw, str, caseSensitive); | ||
@@ -456,6 +465,6 @@ | ||
mostPossibleSentence = sentence; | ||
} else if (index !== filteredSentences.length - 1) { | ||
} else if (i !== filteredSentences.length - 1) { | ||
// combine two sentences to reduce the inaccuracy of sentenizing text | ||
const newSentenceRaw = sentence.raw + filteredSentences[index + 1].raw; | ||
const lengthDiff = Math.abs(newSentenceRaw.length - str.length) / str.length; // whether allowing the customization of length diff threshold**** | ||
const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw; | ||
const lengthDiff = Math.abs(newSentenceRaw.length - str.length) / str.length; // whether allowing the customization of length diff threshold*** | ||
@@ -474,4 +483,5 @@ if (lengthDiff <= 0.1) { | ||
} | ||
}); // step 4: if the most possible sentence is found, derive and return the location of the most similar str from it | ||
} // step 5: if the most possible sentence is found, derive and return the location of the most similar str from it | ||
if (mostPossibleSentence) { | ||
@@ -494,3 +504,6 @@ const result = TextAnnotator.getBestSubstring(mostPossibleSentence.raw, str, sbThreshold, lenRatio, caseSensitive, true); | ||
adjustLoc(highlightIdPattern, highlightIndex, highlightClass) { | ||
const highlightLoc = this.highlights[highlightIndex].loc; | ||
const { | ||
highlights | ||
} = this; | ||
const highlightLoc = highlights[highlightIndex].loc; | ||
const locInc = [0, 0]; // step 1: check locations of tags | ||
@@ -558,5 +571,7 @@ | ||
this.highlights.forEach((highlight, highlightIndex) => { | ||
for (let i = 0; i < highlights.length; i++) { | ||
const highlight = highlights[i]; | ||
if (highlight.highlighted) { | ||
const openTagLength = TextAnnotator.getOpenTagLength(highlightIdPattern, highlightIndex, highlightClass); | ||
const openTagLength = TextAnnotator.getOpenTagLength(highlightIdPattern, i, highlightClass); | ||
const closeTagLength = TextAnnotator.getCloseTagLength(); | ||
@@ -580,3 +595,4 @@ const loc = highlight.loc; | ||
} | ||
}); | ||
} | ||
return [highlightLoc[0] + locInc[0], highlightLoc[1] + locInc[1]]; | ||
@@ -649,2 +665,3 @@ } | ||
// step 1: derive best substr | ||
// /s may be better*** | ||
const words = str.split(' '); | ||
@@ -651,0 +668,0 @@ |
{ | ||
"name": "text-annotator", | ||
"version": "0.7.0", | ||
"version": "0.7.1", | ||
"description": "A JavaScript library for locating and annotating plain text in HTML", | ||
@@ -5,0 +5,0 @@ "main": "build/text-annotator.js", |
2638315
2105