@@ -8,722 +8,265 @@ "use strict";

		var _htmlEntities = require("html-entities");

		var _sbd = _interopRequireDefault(require("./ext/sbd"));

		function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

		// div inside span is a bad idea
		const blockElements = ['address', 'article', 'aside', 'blockquote', 'canvas', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main', 'nav', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 'tfoot', 'ul', 'video'];

		class TextAnnotator {
		constructor(options = {}) {
		const content = options.content; // isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false
		constructor(html) {
		this.html = html;

		const isHTML = options.isHTML === undefined \|\| options.isHTML; // annotatedContent is introduced in order to avoid passing content in the methods
		const {
		text,
		tags
		} = this._stripHTMLTags(html);

		this.originalContent = this.annotatedContent = content;
		this.isHTML = isHTML; // stripedHTML and tagLocations are needed only when the content is HTML
		this.text = text; // [{ index, length, isCloseTag, annotationIndex* }]; ordered by index

		this.stripedHTML = '';
		this.tagLocations = []; // sentences are used in sentence-based fuzzy search
		this.tags = tags; // [{ index, length }]; unordered

		this.sentences = []; // future work: one highlight can have more than one location because of the potential issue in tag insertion
		this.annotations = [];
		}

		this.highlights = [];
		search(searchText, {
		prefix = '',
		postfix = '',
		trim = true,
		caseSensitive = false,
		offset = 0
		} = {}) {
		const {
		text,
		annotations
		} = this;
		let str = prefix + searchText + postfix;
		str = trim ? str.trim() : str;
		str = caseSensitive ? str : str.toLowerCase();
		const index = (caseSensitive ? text : text.toLowerCase()).indexOf(str, offset);
		const prefixLength = trim ? prefix.replace(/^\s+/, '').length : prefix.length;
		const postfixLength = trim ? postfix.replace(/\s+$/, '').length : postfix.length;
		return index === -1 ? -1 : annotations.push({
		index: index + prefixLength,
		length: str.substring(prefixLength, str.length - postfixLength).length
		}) - 1;
		}

		if (isHTML) {
		this.stripAndStoreHTMLTags();
		}
		} // the order of directSearch => fuzzy search => eager search is tailored for specific feature, it is now the default way of search but it can be customized via options. More customizations can be done by composing functions
		searchAll(searchText, options) {
		let offset = 0;
		const annotationIndexes = [];
		let annotationIndex = -1; // do not mutate param

		const newOptions = Object.assign({}, options);

		search(str, options = {}) {
		let prefix = options.prefix \|\| '';
		let postfix = options.postfix \|\| '';
		const directSearchOptions = options.directSearchOptions \|\| {};
		const fuzzySearchOptions = options.fuzzySearchOptions;
		const eagerSearchOptions = options.eagerSearchOptions; // trim by default
		do {
		annotationIndex = this.search(searchText, newOptions);

		const trim = options.trim === undefined \|\| options.trim; // used unless overwritten

		const caseSensitive = options.caseSensitive;

		if (trim) {
		const res = TextAnnotator.trim(prefix, str, postfix);
		prefix = res.prefix;
		str = res.str;
		postfix = res.postfix;
		}

		let highlightIndex = -1; // direct search will always be performed

		highlightIndex = this.directSearch(prefix, str, postfix, Object.assign({
		caseSensitive
		}, directSearchOptions));

		if (highlightIndex !== -1) {
		return highlightIndex;
		} // experimental feature


		if (fuzzySearchOptions) {
		highlightIndex = this.fuzzySearch(prefix, str, postfix, Object.assign({
		caseSensitive
		}, fuzzySearchOptions));

		if (highlightIndex !== -1) {
		return highlightIndex;
		if (annotationIndex !== -1) {
		offset = this.annotations[annotationIndex].index + 1;
		newOptions.offset = offset;
		annotationIndexes.push(annotationIndex);
		}
		} // experimental feature
		// eager search only works in (particular) browsers
		} while (annotationIndex !== -1);

		return annotationIndexes;
		}

		if (eagerSearchOptions) {
		highlightIndex = this.eagerSearch(prefix, str, postfix, Object.assign({
		caseSensitive
		}, eagerSearchOptions));
		annotate(annotationIndex, {
		tagName = 'span',
		baseClassName = 'annotation',
		classPattern = 'annotation-'
		} = {}) {
		const {
		tags,
		annotations,
		_insert,
		_binaryInsert
		} = this;
		const annotation = annotations[annotationIndex]; // [start, end, offset]

		if (highlightIndex !== -1) {
		return highlightIndex;
		}
		}
		const annotationLocation = [annotation.index, annotation.index + annotation.length, 0]; // partition

		return highlightIndex;
		} // experimental feature
		// only support direct search for now
		const annotatorLocations = [[...annotationLocation]];

		for (let i = 0; i < tags.length; i++) {
		const {
		index: tagIndex,
		length: tagLength
		} = tags[i];

		searchAll(str, options = {}) {
		const highlightIndexes = [];
		if (tagIndex <= annotationLocation[0]) {
		annotatorLocations[0][2] += tagLength;
		} else if (tagIndex < annotationLocation[1]) {
		const lastTagIndex = i === 0 ? 0 : tags[i - 1].index;
		const lastAnnotatorLocation = annotatorLocations[annotatorLocations.length - 1];

		const continueSearch = (str, options) => {
		const highlightIndex = this.search(str, options);

		if (highlightIndex !== -1) {
		highlightIndexes.push(highlightIndex);
		options.directSearchOptions = options.directSearchOptions \|\| {};
		options.directSearchOptions.lastHighlightIndex = highlightIndex;
		continueSearch(str, options);
		if (tagIndex === lastTagIndex) {
		lastAnnotatorLocation[2] += tagLength;
		} else {
		const annotatorLocationEnd = lastAnnotatorLocation[1];
		lastAnnotatorLocation[1] = tagIndex;
		annotatorLocations.push([lastAnnotatorLocation[1], annotatorLocationEnd, lastAnnotatorLocation[2] + tagLength]);
		}
		} else {
		break;
		}
		};
		} // insert annotator tags into tag list and html

		continueSearch(str, options);
		return highlightIndexes;
		}

		highlight(highlightIndex, options = {}) {
		const highlightTagName = options.highlightTagName \|\| 'span';
		const highlightClass = options.highlightClass \|\| 'highlight';
		const highlightIdPattern = options.highlightIdPattern \|\| 'highlight-';
		const openTag = TextAnnotator.createOpenTag(highlightTagName, highlightIdPattern, highlightIndex, highlightClass);
		const loc = this.adjustLoc(highlightTagName, highlightIdPattern, highlightIndex, highlightClass);
		this.annotatedContent = TextAnnotator.insert(this.annotatedContent, openTag, loc[0]);
		this.annotatedContent = TextAnnotator.insert(this.annotatedContent, TextAnnotator.createCloseTag(highlightTagName), loc[1] + openTag.length); // it has to be set after adjustLoc so that it will not be checked
		const annotatorOpenTag = `<${tagName} class="${baseClassName} ${classPattern}${annotationIndex}">`;
		const annotatorCloseTag = `</${tagName}>`;
		const annotatorOpenTagLength = annotatorOpenTag.length;
		const annotatorCloseTagLength = annotatorCloseTag.length;
		let locInc = 0;

		this.highlights[highlightIndex].highlighted = true;
		return this.annotatedContent;
		} // experimental feature
		for (let i = 0; i < annotatorLocations.length; i++) {
		const annotatorLocation = annotatorLocations[i];

		_binaryInsert(tags, {
		index: annotatorLocation[0],
		length: annotatorOpenTagLength,
		isCloseTag: false,
		annotationIndex
		}, (a, b) => {
		return a.index <= b.index ? -1 : 1;
		});

		highlightAll(highlightIndexes, options = {}) {
		for (let i = 0; i < highlightIndexes.length; i++) {
		this.annotatedContent = this.highlight(highlightIndexes[i], options);
		}
		_binaryInsert(tags, {
		index: annotatorLocation[1],
		length: annotatorCloseTagLength,
		isCloseTag: true,
		annotationIndex
		}, (a, b) => a.index - b.index);

		return this.annotatedContent;
		}

		searchAndHighlight(str, options = {}) {
		const highlightIndex = this.search(str, options.searchOptions);

		if (highlightIndex !== -1) {
		return {
		highlightIndex,
		content: this.highlight(highlightIndex, options.highlightOptions)
		};
		this.html = _insert(this.html, annotatorOpenTag, annotatorLocation[0] + annotatorLocation[2] + locInc);
		this.html = _insert(this.html, annotatorCloseTag, annotatorLocation[1] + annotatorLocation[2] + locInc + annotatorOpenTagLength);
		locInc += annotatorOpenTagLength + annotatorCloseTagLength;
		}
		}

		unhighlight(highlightIndex, options = {}) {
		const highlightTagName = options.highlightTagName \|\| 'span';
		const highlightClass = options.highlightClass \|\| 'highlight';
		const highlightIdPattern = options.highlightIdPattern \|\| 'highlight-'; // it has to be set before adjustLoc so that it will not be checked

		this.highlights[highlightIndex].highlighted = false; // need to change when one annotation => more than one highlight

		const loc = this.adjustLoc(highlightTagName, highlightIdPattern, highlightIndex, highlightClass);
		const openTagLength = TextAnnotator.getOpenTagLength(highlightTagName, highlightIdPattern, highlightIndex, highlightClass);
		const substr1 = this.annotatedContent.substring(loc[0], loc[1] + openTagLength + TextAnnotator.getCloseTagLength(highlightTagName));
		const substr2 = this.annotatedContent.substring(loc[0] + openTagLength, loc[1] + openTagLength);
		this.annotatedContent = this.annotatedContent.replace(substr1, substr2);
		return this.annotatedContent;
		return this.html;
		}

		stripAndStoreHTMLTags() {
		let tag;
		this.stripedHTML = this.originalContent;
		const tagRegEx = /<[^>]+>/;
		let indexInc = 0;

		while (tag = this.stripedHTML.match(tagRegEx)) {
		this.stripedHTML = this.stripedHTML.replace(tag, '');
		const tagLength = tag[0].length; // tagLocations will be used in adjustLoc

		this.tagLocations.push([tag.index, tagLength, indexInc]);
		indexInc += tagLength;
		}
		annotateAll(annotationIndexes, options) {
		annotationIndexes.forEach(annotationIndex => {
		this.annotate(annotationIndex, options);
		});
		return this.html;
		}

		directSearch(prefix, str, postfix, directSearchOptions = {}) {
		const caseSensitive = directSearchOptions.caseSensitive; // experimental option; used for specific feature

		const ifEncode = directSearchOptions.encode;
		const lastHighlightIndex = directSearchOptions.lastHighlightIndex;
		let strWithFixes = prefix + str + postfix;
		let text = this.isHTML ? this.stripedHTML : this.originalContent;

		if (!caseSensitive) {
		strWithFixes = strWithFixes.toLowerCase();
		text = text.toLowerCase();
		} // for searchAll


		let offset = 0;

		if (lastHighlightIndex !== undefined) {
		offset = this.highlights[lastHighlightIndex].loc[1] + 1;
		}

		let highlightIndex = -1;
		const index = text.indexOf(strWithFixes, offset); // experimental feature: if the text to be searched does not work, try to encode it

		if (ifEncode && index === -1) {
		const encodedStrWithFixes = (0, _htmlEntities.encode)(strWithFixes);
		const index = text.indexOf(encodedStrWithFixes, offset);

		if (index !== -1) {
		const loc = [];
		loc[0] = index + (0, _htmlEntities.encode)(prefix).length;
		loc[1] = loc[0] + (0, _htmlEntities.encode)(str).length;
		highlightIndex = this.highlights.push({
		loc
		}) - 1;
		unannotate(annotationIndex) {
		// annotatorIndexesInTags amd annotators have the same size
		const annotatorIndexesInTags = [];
		const annotators = this.tags.filter((tag, index) => {
		if (tag.annotationIndex === annotationIndex) {
		annotatorIndexesInTags.push(index);
		}
		} else if (index !== -1) {
		const loc = [];
		loc[0] = index + prefix.length;
		loc[1] = loc[0] + str.length;
		highlightIndex = this.highlights.push({
		loc
		}) - 1;
		}

		return highlightIndex;
		}
		return tag.annotationIndex === annotationIndex;
		});
		const otherTags = this.tags.filter(tag => tag.annotationIndex !== annotationIndex); // find index difference

		eagerSearch(prefix, str, postfix, eagerSearchOptions = {}) {
		const caseSensitive = eagerSearchOptions.caseSensitive;
		const containerId = eagerSearchOptions.containerId;
		const threshold = eagerSearchOptions.threshold \|\| 0.74;
		const strWithFixes = prefix + str + postfix;
		let highlightIndex = -1; // IE is not considered
		for (let i = 0; i < annotators.length; i++) {
		const annotator = annotators[i];
		let indexInc = 0;

		if (window.find) {
		document.designMode = 'on'; // step 1: ask the browser to highlight the found
		for (let j = 0; j < otherTags.length; j++) {
		const otherTag = otherTags[j];

		const sel = window.getSelection();
		sel.collapse(document.body, 0);

		while (window.find(strWithFixes, caseSensitive)) {
		document.execCommand('hiliteColor', true, 'rgba(255, 255, 255, 0)');
		sel.collapseToEnd(); // step 2: locate the found within the container where the annotator is applied
		// selector may become better

		const found = document.querySelector('#' + containerId + ' [style="background-color: rgba(255, 255, 255, 0);"]');

		if (found) {
		const foundStr = found.innerHTML.replace(/<[^>]*>/g, '');
		const result = TextAnnotator.getBestSubstring(foundStr, str, threshold);

		if (result.similarity) {
		const text = this.isHTML ? this.stripedHTML : this.originalContent;
		const index = text.indexOf(foundStr);

		if (index !== -1) {
		highlightIndex = this.highlights.push({
		loc: [index + result.loc[0], index + result.loc[1]]
		}) - 1;
		}
		}

		if (annotator.index < otherTag.index) {
		break;
		}
		} // step 3: remove the highlights created by the browser


		document.execCommand('undo');
		document.designMode = 'off';
		}

		return highlightIndex;
		}

		fuzzySearch(prefix, str, postfix, fuzzySearchOptions = {}) {
		const caseSensitive = fuzzySearchOptions.caseSensitive;
		const tokenBased = fuzzySearchOptions.tokenBased;
		let tbThreshold = fuzzySearchOptions.tbThreshold \|\| 0.68; // sentence-based fuzzy search is enabled by default

		const sentenceBased = fuzzySearchOptions.sentenceBased === undefined \|\| fuzzySearchOptions.sentenceBased;
		let sbThreshold = fuzzySearchOptions.sbThreshold \|\| 0.85;
		const maxLengthDiff = fuzzySearchOptions.maxLengthDiff \|\| 0.1;
		const lenRatio = fuzzySearchOptions.lenRatio \|\| 2;
		const processSentence = fuzzySearchOptions.processSentence;
		let highlightIndex = -1;
		const text = this.isHTML ? this.stripedHTML : this.originalContent; // token-based

		if (tokenBased \|\| prefix \|\| postfix) {
		// step 1: find all indexes of str
		const strIndexes = [];
		let i = -1;

		while ((i = text.indexOf(str, i + 1)) !== -1) {
		strIndexes.push(i);
		} // step 2: find the index of the most similar "fragment" - the str with pre- and post- fixes


		let strIndex = -1;
		const fragment = prefix + str + postfix;

		for (let i = 0; i < strIndexes.length; i++) {
		const si = strIndexes[i]; // f can be wider

		const f = text.substring(si - prefix.length, si) + str + text.substring(si + str.length, si + str.length + postfix.length);
		const similarity = TextAnnotator.getSimilarity(f, fragment, caseSensitive);

		if (similarity >= tbThreshold) {
		tbThreshold = similarity;
		strIndex = si;
		}
		} // step 3: check whether the most similar enough "fragment" is found, if yes return its location


		if (strIndex !== -1) {
		highlightIndex = this.highlights.push({
		loc: [strIndex, strIndex + str.length]
		}) - 1;
		}
		} // sentence-based
		else if (sentenceBased) {
		// step 1: sentenize the text if has not done so
		let sentences = [];

		if (this.sentences.length) {
		sentences = this.sentences;
		} else if (annotator.index > otherTag.index) {
		indexInc += otherTag.length;
		} else {
		sentences = this.sentences = TextAnnotator.sentenize(text);
		} // step 2 (for efficiency only): filter sentences by words of the str


		const words = str.split(/\s/);
		const filteredSentences = [];

		for (let i = 0; i < sentences.length; i++) {
		for (let j = 0; j < words.length; j++) {
		if (sentences[i].raw.includes(words[j])) {
		filteredSentences.push(sentences[i]);
		break;
		if (otherTag.annotationIndex === undefined) {
		if (!annotator.isCloseTag) {
		indexInc += otherTag.length;
		}
		}
		} //step 3 (optional)


		if (processSentence) {
		let index = 0; // for each sentence

		for (let i = 0; i < filteredSentences.length; i++) {
		const fs = filteredSentences[i];
		let raw = fs.raw; // loc without tags

		const loc = [fs.index, fs.index + raw.length];
		let locInc = 0; // add loc of all tags before the one being checked so as to derive the actual loc

		const tagLocations = this.tagLocations; // for each loc of tag whose loc is larger than the last sentence

		for (let j = index; j < tagLocations.length; j++) {
		const tagLoc = tagLocations[j];

		if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]);
		const insertIndex = tagLoc[0] + locInc - loc[0];
		raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex);
		locInc += tagLoc[1];
		} else if (tagLoc[0] > loc[1]) {
		index = j; // not sure this part

		break;
		} else {
		if (annotator.annotationIndex < otherTag.annotationIndex) {
		if (otherTag.isCloseTag) {
		indexInc += otherTag.length;
		}
		}

		raw = processSentence(raw);
		raw = raw.replace(/(<([^>]+)>)/gi, '');
		const copy = fs.raw; // update the sentence if it got reduced

		if (copy !== raw) {
		fs.raw = raw;
		fs.index = fs.index + copy.indexOf(raw);
		}
		}
		} // step 4: find the most possible sentence


		let mostPossibleSentence = null;

		for (let i = 0; i < filteredSentences.length; i++) {
		const sentence = filteredSentences[i];
		const similarity = TextAnnotator.getSimilarity(sentence.raw, str, caseSensitive);

		if (similarity >= sbThreshold) {
		sbThreshold = similarity;
		mostPossibleSentence = sentence;
		} else if (i !== filteredSentences.length - 1) {
		// combine two sentences to reduce the inaccuracy of sentenizing text
		const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw;
		const lengthDiff = Math.abs(newSentenceRaw.length - str.length) / str.length;

		if (lengthDiff <= maxLengthDiff) {
		const newSimilarity = TextAnnotator.getSimilarity(newSentenceRaw, str, caseSensitive);

		if (newSimilarity >= sbThreshold) {
		sbThreshold = newSimilarity;
		mostPossibleSentence = {
		raw: newSentenceRaw,
		index: sentence.index
		};
		} else {
		if (!annotator.isCloseTag) {
		indexInc += otherTag.length;
		}
		}
		}
		} // step 5: if the most possible sentence is found, derive and return the location of the most similar str from it


		if (mostPossibleSentence) {
		const result = TextAnnotator.getBestSubstring(mostPossibleSentence.raw, str, sbThreshold, lenRatio, caseSensitive, true);

		if (result.loc) {
		let index = mostPossibleSentence.index;
		highlightIndex = this.highlights.push({
		loc: [index + result.loc[0], index + result.loc[1]]
		}) - 1;
		}
		}
		}
		} // remove annotators one by one

		return highlightIndex;
		} // future work: further improvement when one annotation binds with more than one highlight
		// includeRequiredTag used in = condition only


		includeRequiredTag(i, highlightLoc, tag) {
		const isCloseTag = tag.startsWith('</');
		const tagName = isCloseTag ? tag.split('</')[1].split('>')[0] : tag.split(' ')[0].split('<')[1].split('>')[0];
		let included = false;
		let requiredTagNumber = 1;
		let requiredTagCount = 0; // if both the start tag and the end tag are at the borders, place the tags outside the borders
		// if the close tag is at the border, check backwards until the start of the highlight

		if (isCloseTag) {
		for (let i2 = i - 1; i2 >= 0; i2--) {
		const tagLoc2 = this.tagLocations[i2];

		if (highlightLoc[0] > tagLoc2[0]) {
		break;
		} else {
		const tag2 = this.originalContent.substring(tagLoc2[0] + tagLoc2[2], tagLoc2[0] + tagLoc2[2] + tagLoc2[1]);

		if (tag2.startsWith('</' + tagName)) {
		requiredTagNumber++;
		} else if (tag2.startsWith('<' + tagName)) {
		requiredTagCount++;
		}

		if (requiredTagNumber === requiredTagCount) {
		included = true;
		break;
		}
		}
		}
		} // if the start tag is at the border, check forwards until the end of the highlight
		else {
		for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) {
		const tagLoc2 = this.tagLocations[i2];

		if (highlightLoc[1] < tagLoc2[0]) {
		break;
		} else {
		const tag2 = this.originalContent.substring(tagLoc2[0] + tagLoc2[2], tagLoc2[0] + tagLoc2[2] + tagLoc2[1]);

		if (tag2.startsWith('<' + tagName)) {
		requiredTagNumber++;
		} else if (tag2.startsWith('</' + tagName)) {
		requiredTagCount++;
		}

		if (requiredTagNumber === requiredTagCount) {
		included = true;
		break;
		}
		}
		}
		}

		return included;
		}

		adjustLoc(highlightTagName = 'span', highlightIdPattern, highlightIndex, highlightClass) {
		const highlightLoc = this.highlights[highlightIndex].loc;
		const locInc = [0, 0]; // step 1: check locations of tags

		const length = this.tagLocations.length;

		for (let i = 0; i < length; i++) {
		const tagLoc = this.tagLocations[i]; // start end tag

		if (highlightLoc[1] < tagLoc[0]) {
		break;
		} // start end&tag
		else if (highlightLoc[1] === tagLoc[0]) {
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if end tag, not block element and include the required close tag, add right to the tag

		if (!tag.endsWith('/>') && tag.startsWith('</') && !blockElements.includes(tag.split('</')[1].split('>')[0]) && this.includeRequiredTag(i, highlightLoc, tag)) {
		locInc[1] += tagLoc[1];
		}
		} // start tag end
		else if (highlightLoc[1] > tagLoc[0]) {
		locInc[1] += tagLoc[1]; // start&tag end

		if (highlightLoc[0] === tagLoc[0]) {
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if self close tag or end tag or block element or not include the required close tag, add right to the tag

		if (tag.startsWith('</') \|\| tag.endsWith('/>') \|\| blockElements.includes(tag.split(' ')[0].split('<')[1].split('>')[0]) \|\| !this.includeRequiredTag(i, highlightLoc, tag)) {
		locInc[0] += tagLoc[1];
		}
		} // tag start end
		else if (highlightLoc[0] > tagLoc[0]) {
		locInc[0] += tagLoc[1];
		}
		}
		} // step 2: check locations of other highlights
		// all span (no blocks)
		// stored in a different array than tags
		// can intersect


		for (let i = 0; i < this.highlights.length; i++) {
		const highlight = this.highlights[i]; // only check the highlighted

		if (highlight.highlighted) {
		const openTagLength = TextAnnotator.getOpenTagLength(highlightTagName, highlightIdPattern, i, highlightClass);
		const closeTagLength = TextAnnotator.getCloseTagLength(highlightTagName);
		const loc = highlight.loc;

		if (highlightLoc[0] >= loc[1]) {
		locInc[0] += openTagLength + closeTagLength;
		locInc[1] += openTagLength + closeTagLength;
		} // syntactical correct but semantical incorrect
		else if (highlightLoc[0] < loc[1] && highlightLoc[0] > loc[0] && highlightLoc[1] > loc[1]) {
		locInc[0] += openTagLength;
		locInc[1] += openTagLength + closeTagLength;
		} else if (highlightLoc[0] <= loc[0] && highlightLoc[1] >= loc[1]) {
		locInc[1] += openTagLength + closeTagLength;
		} // syntactical correct but semantical incorrect
		else if (highlightLoc[0] < loc[0] && highlightLoc[1] > loc[0] && highlightLoc[1] < loc[1]) {
		locInc[1] += openTagLength;
		} else if (highlightLoc[0] >= loc[0] && highlightLoc[1] <= loc[1]) {
		locInc[0] += openTagLength;
		locInc[1] += openTagLength;
		}
		}
		this.html = this.html.slice(0, annotator.index + indexInc) + this.html.slice(annotator.index + indexInc + annotator.length);
		this.tags.splice(annotatorIndexesInTags[i] - i, 1);
		}

		return [highlightLoc[0] + locInc[0], highlightLoc[1] + locInc[1]];
		return this.html;
		}

		static createOpenTag(highlightTagName = 'span', highlightIdPattern, highlightIndex, highlightClass) {
		return `<${highlightTagName} id="${highlightIdPattern + highlightIndex}" class="${highlightClass}">`;
		}
		unannotateAll(annotationIndexes) {
		annotationIndexes.forEach(annotationIndex => {
		this.unannotate(annotationIndex);
		});
		return this.html;
		} // pure function

		static createCloseTag(highlightTagName = 'span') {
		return `</${highlightTagName}>`;
		}

		static getOpenTagLength(highlightTagName = 'span', highlightIdPattern, highlightIndex, highlightClass) {
		return TextAnnotator.createOpenTag(highlightTagName, highlightIdPattern, highlightIndex, highlightClass).length;
		}
		_stripHTMLTags(html) {
		let text = html;
		const tags = [];
		let tag;
		const tagRegEx = /<[^>]+>/;

		static getCloseTagLength(highlightTagName = 'span') {
		return TextAnnotator.createCloseTag(highlightTagName).length;
		}

		static trim(prefix, str, postfix) {
		prefix = prefix.replace(/^\s+/, '');
		postfix = postfix.replace(/\s+$/, '');

		if (!prefix) {
		str = str.replace(/^\s+/, '');
		while (tag = text.match(tagRegEx)) {
		text = text.replace(tag, '');
		tags.push({
		index: tag.index,
		length: tag[0].length,
		isCloseTag: tag[0].startsWith('</')
		});
		}

		if (!postfix) {
		str = str.replace(/\s+$/, '');
		}

		return {
		prefix,
		str,
		postfix
		text,
		tags
		};
		}
		} // pure function

		static insert(str1, str2, index) {

		_insert(str1, str2, index) {
		return str1.slice(0, index) + str2 + str1.slice(index);
		}
		} // pure function

		static sentenize(text) {
		const options = {
		newline_boundaries: false,
		html_boundaries: false,
		sanitize: false,
		allowed_tags: false,
		preserve_whitespace: true,
		abbreviations: null
		};
		return (0, _sbd.default)(text, options).map(raw => {
		// future work: can tokenizer return location directly
		const index = text.indexOf(raw);
		return {
		raw,
		index
		};
		});
		}

		static getBestSubstring(str, substr, threshold, lenRatio, caseSensitive, skipFirstRun) {
		let result = {};
		let similarity = skipFirstRun ? threshold : TextAnnotator.getSimilarity(str, substr, caseSensitive);

		if (similarity >= threshold) {
		// step 1: derive best substr
		// future work: /s may be better
		const words = str.split(' ');

		while (words.length) {
		const firstWord = words.shift();
		const newStr = words.join(' ');
		let newSimilarity = TextAnnotator.getSimilarity(newStr, substr, caseSensitive);

		if (newSimilarity < similarity) {
		words.unshift(firstWord);
		const lastWord = words.pop();
		newSimilarity = TextAnnotator.getSimilarity(words.join(' '), substr, caseSensitive);

		if (newSimilarity < similarity) {
		words.push(lastWord);
		break;
		} else {
		similarity = newSimilarity;
		}
		} else {
		similarity = newSimilarity;
		}
		}

		const bestSubstr = words.join(' '); // step 2: return the best substr and its loc if found and if it meets the threshold and the length ratio

		if (!lenRatio \|\| bestSubstr.length / substr.length <= lenRatio) {
		const loc = [];
		loc[0] = str.indexOf(bestSubstr);
		loc[1] = loc[0] + bestSubstr.length;
		result = {
		similarity,
		loc
		};
		}
		_binaryInsert(arr, val, comparator) {
		if (arr.length === 0 \|\| comparator(arr[0], val) >= 0) {
		arr.splice(0, 0, val);
		return arr;
		} else if (arr.length > 0 && comparator(arr[arr.length - 1], val) <= 0) {
		arr.splice(arr.length, 0, val);
		return arr;
		}

		return result;
		}
		let left = 0,
		right = arr.length;
		let leftLast = 0,
		rightLast = right;

		static getSimilarity(str1, str2, caseSensitive) {
		if (!caseSensitive) {
		str1 = str1.toLowerCase();
		str2 = str2.toLowerCase();
		}
		while (left < right) {
		const inPos = Math.floor((right + left) / 2);
		const compared = comparator(arr[inPos], val);

		if (str1 === str2) return 1; // set str2 to denominator

		return TextAnnotator.lcsLength(str1, str2) / str2.length;
		} // copy from the code in https://www.npmjs.com/package/longest-common-subsequence


		static lcsLength(firstSequence, secondSequence, caseSensitive) {
		function createArray(dimension) {
		const array = [];

		for (let i = 0; i < dimension; i++) {
		array[i] = [];
		if (compared < 0) {
		left = inPos;
		} else if (compared > 0) {
		right = inPos;
		} else {
		right = inPos;
		left = inPos;
		}

		return array;
		}

		const firstString = caseSensitive ? firstSequence : firstSequence.toLowerCase();
		const secondString = caseSensitive ? secondSequence : secondSequence.toLowerCase();

		if (firstString === secondString) {
		return firstString.length;
		}

		if ((firstString \|\| secondString) === '') {
		return ''.length;
		}

		const firstStringLength = firstString.length;
		const secondStringLength = secondString.length;
		const lcsMatrix = createArray(secondStringLength + 1);
		let i;
		let j;

		for (i = 0; i <= firstStringLength; i++) {
		lcsMatrix[0][i] = 0;
		}

		for (i = 0; i <= secondStringLength; i++) {
		lcsMatrix[i][0] = 0;
		}

		for (i = 1; i <= secondStringLength; i++) {
		for (j = 1; j <= firstStringLength; j++) {
		if (firstString[j - 1] === secondString[i - 1]) {
		lcsMatrix[i][j] = lcsMatrix[i - 1][j - 1] + 1;
		} else {
		lcsMatrix[i][j] = Math.max(lcsMatrix[i - 1][j], lcsMatrix[i][j - 1]);
		}
		if (leftLast === left && rightLast === right) {
		break;
		}
		}

		let lcs = '';
		i = secondStringLength;
		j = firstStringLength;

		while (i > 0 && j > 0) {
		if (firstString[j - 1] === secondString[i - 1]) {
		lcs = firstString[j - 1] + lcs;
		i--;
		j--;
		} else if (Math.max(lcsMatrix[i - 1][j], lcsMatrix[i][j - 1]) === lcsMatrix[i - 1][j]) {
		i--;
		} else {
		j--;
		}
		leftLast = left;
		rightLast = right;
		}

		return lcs.length;
		arr.splice(right, 0, val);
		return arr;
		}
		@@ -730,0 +273,0 @@

package.json

		{
		"name": "text-annotator",
		"version": "0.8.8",
		"version": "1.0.0",
		"description": "A JavaScript library for locating and annotating plain text in HTML",
		@@ -34,3 +34,2 @@ "main": "build/text-annotator.js",
		"babel-jest": "^26.6.3",
		"dotenv": "^8.0.0",
		"eslint": "^7.21.0",
		@@ -44,2 +43,3 @@ "eslint-config-prettier": "^8.1.0",
		"sync-directory": "^2.2.17",
		"typescript": "^4.3.2",
		"webpack": "^4.46.0",
		@@ -90,6 +90,3 @@ "webpack-cli": "^3.3.12"
		"test"
		],
		"dependencies": {
		"html-entities": "^2.1.0"
		}
		]
		}

105

README.md

		@@ -1,2 +0,2 @@
		# text-annotator
		# text-annotator-v2
		A JavaScript library for annotating plain text in the HTML<br />
		@@ -6,10 +6,11 @@ The annotation process is:
		2. Annotate: Annotate the found text given its index<br />
		It can be seen that in order to annotate a piece of text, two steps, search and annotate, are taken. The idea of decomposing the annotation process into the two steps is to allow more flexibility, e.g., the user can search for all pieces of text first, and then annotate (some of) them later when required (e.g., when clicking a button). There is also a function combining the two steps, as can be seen in the An example of the usage section.<br />
		text-annotator can be used in the browser or the Node.js server.
		It can be seen that in order to annotate a piece of text, two steps, search and annotate, are taken. The idea of decomposing the annotation process into the two steps is to allow more flexibility, e.g., the user can search for all pieces of text first, and then annotate (some of) them later when required (e.g., when clicking a button). <br />
		text-annotator-v2 can be used in the browser or the Node.js server.<br />
		text-annotator-v2 evolved from [text-annotator](https://www.npmjs.com/package/text-annotator). See Comparing text-annotator-v2 and text-annotator at the end of this document.

		## Import
		### install it via npm
		`npm install --save text-annotator`
		`npm install --save text-annotator-v2`
		```javascript
		import TextAnnotator from 'text-annotator'
		import TextAnnotator from 'text-annotator-v2'
		```
		@@ -23,58 +24,34 @@ ### include it into the head tag
		```javascript
		// below is the HTML
		// <div id="content"><p><b>Europe PMC</b> is an <i>open science platform</i> that enables access to a worldwide collection of life science publications and preprints from trusted sources around the globe.</p></p>Europe PMC is <i>developed by <b>EMBL-EBI</b></i>. It is a partner of <b>PubMed Central</b> and a repository of choice for many international science funders.</p></div>
		/*
		below is the HTML
		<div id="content"><p><i>JavaScript</i> is the <b>world's most popular programming language</b>.</p><p><i>JavaScript</i> is the programming language of the Web. JavaScript is easy to learn.</p></div>
		*/

		// create an instance of TextAnnotator
		// content is the HTML string within which a piece of text can be annotated
		var annotator = new TextAnnotator({content: document.getElementById('content').innerHTML})
		// create an instance of TextAnnotator by passing the html to be annotated
		var annotator = new TextAnnotator(document.getElementById('content').innerHTML)

		// search for 'EMBL-EBI' in the HTML
		// if found, store the location of 'EMBL-EBI' and then return the index; otherwise return -1
		var highlightIndex = annotator.search('EMBL-EBI')
		// highlightIndex = 0

		// annotate 'EMBL-EBI' in the HTML
		if (highlightIndex !== -1) {
		document.getElementById('content').innerHTML = annotator.highlight(highlightIndex)
		// <span id="highlight-0" class="highlight"> is used to annotate 'EMBL-EBI', see below
		// <div id="content"><p><b>Europe PMC</b> is an <i>open science platform</i> that enables access to a worldwide collection of life science publications and preprints from trusted sources around the globe.</p></p>Europe PMC is <i>developed by <span id="highlight-0" class="highlight"><b>EMBL-EBI</b></span></i>. It is a partner of <b>PubMed Central</b> and a repository of choice for many international science funders.</p></div>
		// search for text "JavaScript is the world's most popular programming language.JavaScript is the programming language of the Web." within the HTML
		var annotationIndex = annotator.search('JavaScript is the world\'s most popular programming language.JavaScript is the programming language of the Web.')
		// annotate the text if finding it
		if (annotationIndex !== -1) {
		document.getElementById('content').innerHTML = annotator.annotate(annotationIndex)
		}

		// search for all occurances of 'Europe PMC' in the HTML
		var highlightIndexes = annotator.searchAll('Europe PMC')
		// highlightIndexes = [1, 2]

		// annotate all the found occurances of 'Europe PMC' given their indexes
		if (highlightIndexes.length) {
		document.getElementById('content').innerHTML = annotator.highlightAll(highlightIndexes)
		// <span id="highlight-1" class="highlight"> and <span id="highlight-2" class="highlight"> are used to annotate 'Europe PMC', see below
		// <div id="content"><p><span id="highlight-1" class="highlight"><b>Europe PMC</b><span> is an <i>open science platform</i> that enables access to a worldwide collection of life science publications and preprints from trusted sources around the globe.</p><p><span id="highlight-2" class="highlight">Europe PMC</span> is <i>developed by <span id="highlight-0" class="highlight"><b>EMBL-EBI</b></span></i>. It is a partner of <b>PubMed Central</b> and a repository of choice for many international science funders.</p></div>
		// search for all occurances of "JavaScript" in the HTML
		var annotationIndexes = annotator.searchAll('JavaScript')
		// annotate all the found occurances of 'Javascript' given their indexes
		if (annotationIndexes.length) {
		document.getElementById('content').innerHTML = annotator.annotateAll(annotationIndexes)
		}

		// search for and then annotate 'a partner of PubMed Central'
		document.getElementById('content').innerHTML = annotator.searchAndHighlight('a partner of PubMed Central').content
		// searchAndHighlight returns { content, highlightIndex }
		// <span id="highlight-3" class="highlight"> is used to annotate 'a partner of PubMed Central', see below
		// <div id="content"><p><span id="highlight-1" class="highlight"><b>Europe PMC</b><span> is an <i>open science platform</i> that enables access to a worldwide collection of life science publications and preprints from trusted sources around the globe.</p><p><span id="highlight-2" class="highlight">Europe PMC</span> is <i>developed by <span id="highlight-0" class="highlight"><b>EMBL-EBI</b></span></i>. It is <span id="highlight-3" class="highlight">a partner of <b>PubMed Central</b></span> and a repository of choice for many international science funders.</p></div>

		// remove annotation 'EMBL-EBI' given its index
		// the index is 0 as shown above
		document.getElementById('content').innerHTML = annotator.unhighlight(highlightIndex)
		// annotation <span id="highlight-0" class="highlight"> is removed, see below
		// <div id="content"><p><span id="highlight-1" class="highlight"><b>Europe PMC</b><span> is an <i>open science platform</i> that enables access to a worldwide collection of life science publications and preprints from trusted sources around the globe.</p><p><span id="highlight-2" class="highlight">Europe PMC</span> is <i>developed by <b>EMBL-EBI</b></i>. It is <span id="highlight-3" class="highlight">a partner of <b>PubMed Central</b></span> and a repository of choice for many international science funders.</p></div>

		// help annotate one occurance of 'science' - the one within 'international science funders', by providing the prefix and postfix of 'Europe PMC'
		var highlightIndex = annotator.search('science', { prefix: 'international ', postfix: ' funders' })
		if (highlightIndex !== -1) {
		document.getElementById('content').innerHTML = annotator.highlight(highlightIndex)
		}
		// <span id="highlight-4" class="highlight"> is used to annotate 'science' within 'international science funders', see below
		// <div id="content"><p><span id="highlight-1" class="highlight"><b>Europe PMC</b><span> is an <i>open science platform</i> that enables access to a worldwide collection of life science publications and preprints from trusted sources around the globe.</p><p><span id="highlight-2" class="highlight">Europe PMC</span> is <i>developed by <b>EMBL-EBI</b></i>. It is <span id="highlight-3" class="highlight">a partner of <b>PubMed Central</b></span> and a repository of choice for many international <span id="highlight-4" class="highlight">science</span> funders.</p></div>
		// unannotate all the previously annotated text
		document.getElementById('content').innerHTML = annotator.unannotate(annotationIndex)
		document.getElementById('content').innerHTML = annotator.unannotateAll(annotationIndexes)
		```

		## Constructor options
		#### new TextAnnotator(options)
		#### new TextAnnotator(html)
		\| Prop \| Type \| Description \|
		\| ---- \| ---- \| ---- \|
		\| content \| string \| The HTML string within which a piece of text can be annotated. \|
		\| html \| string \| The HTML string within which a piece of text can be annotated. \|

		@@ -92,26 +69,16 @@ ## Search options
		## Annotate options
		#### highlight(highlightIndex, options)
		#### highlightAll(highlightIndexes, options)
		#### unhighlight(highlightIndex, options)
		#### annotate(annotationIndex, options)
		#### annotationAll(annotationIndexes, options)
		#### unannotate(annotationIndex)
		#### unannotateAll(annotationIndexes)
		\| Prop \| Type \| Description \|
		\| ---- \| ---- \| ---- \|
		\| highlightTagName \| string \| The name of the annotation tag. Default is span so that the tag is <span ...>. \|
		\| highlightClass \| string \| The class name of the annotation tag. Default is highlight so that the tag is <span class="highlight" ...>. \|
		\| highlightIdPattern \| string \| The ID pattern of the annotation tag. Default is highlight- so that the tag is <span id="highlight-[highlightIndex]" ...>. \|
		\| tagName \| string \| The tag name of the annotation tag. Default is span so that the tag is <span ...>. \|
		\| baseClassName \| string \| The base class name of the annotation tag. Default is annotation so that the tag is <span class="annotation" ...>. \|
		\| classPattern \| string \| The pattern of the class used as the ID of the annotation. Default is annotation- so that the tag is <span class="annotation-[annotation]" ...>. \|

		## searchAndHighlight options
		searchAndHighlight(str, options), where options = { searchOptions, highlightOptions }, searchOptions and highlightOptions are described above in the Annotate options table.
		## Comparing text-annotator-v2 and text-annotator
		TBC

		## Examples from Europe PMC
		text-annotator has been widely used in [Europe PMC](https://europepmc.org "Europe PMC"), an open science platform that enables access to a worldwide collection of life science publications. Here is a list of examples:
		1. Article title highlighting: https://europepmc.org/search?query=cancer
		!["Article title highlighting" "Article title highlighting"](https://raw.githubusercontent.com/zhan-huang/assets/master/text-annotator/example.JPG)
		2. Snippets: https://europepmc.org/article/PPR/PPR158972 (Visit from https://europepmc.org/search?query=cancer)
		!["Snippets" "Snippets"](https://raw.githubusercontent.com/zhan-huang/assets/master/text-annotator/example2.JPG)
		3. SciLite: https://europepmc.org/article/PPR/PPR158972 (Click the Annotations link in the right panel)
		!["SciLite" "SciLite"](https://raw.githubusercontent.com/zhan-huang/assets/master/text-annotator/example3.JPG)
		4. Linkback: https://europepmc.org/article/PPR/PPR158972#europepmc-6e6312219dcad15c9a7dda8f71dce9af (In the popup shown in Example 3, click "Share" to get this linkback URL)
		!["Linkback" "Linkback"](https://raw.githubusercontent.com/zhan-huang/assets/master/text-annotator/example4.JPG)

		## Contact
		[Zhan Huang](mailto:z2hm@outlook.com "Zhan Huang")

1065

src/text-annotator.js

		@@ -1,487 +0,201 @@
		import { encode } from 'html-entities'
		import getSentences from './ext/sbd'

		// div inside span is a bad idea
		const blockElements = [
		'address',
		'article',
		'aside',
		'blockquote',
		'canvas',
		'dd',
		'div',
		'dl',
		'dt',
		'fieldset',
		'figcaption',
		'figure',
		'footer',
		'form',
		'h1',
		'h2',
		'h3',
		'h4',
		'h5',
		'h6',
		'header',
		'hgroup',
		'hr',
		'li',
		'main',
		'nav',
		'noscript',
		'ol',
		'output',
		'p',
		'pre',
		'section',
		'table',
		'tfoot',
		'ul',
		'video',
		]

		class TextAnnotator {
		constructor(options = {}) {
		const content = options.content
		// isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false
		const isHTML = options.isHTML === undefined \|\| options.isHTML

		// annotatedContent is introduced in order to avoid passing content in the methods
		this.originalContent = this.annotatedContent = content
		this.isHTML = isHTML

		// stripedHTML and tagLocations are needed only when the content is HTML
		this.stripedHTML = ''
		this.tagLocations = []
		// sentences are used in sentence-based fuzzy search
		this.sentences = []
		// future work: one highlight can have more than one location because of the potential issue in tag insertion
		this.highlights = []

		if (isHTML) {
		this.stripAndStoreHTMLTags()
		}
		constructor(html) {
		this.html = html
		const { text, tags } = this._stripHTMLTags(html)
		this.text = text
		// [{ index, length, isCloseTag, annotationIndex* }]; ordered by index
		this.tags = tags
		// [{ index, length }]; unordered
		this.annotations = []
		}

		// the order of directSearch => fuzzy search => eager search is tailored for specific feature, it is now the default way of search but it can be customized via options. More customizations can be done by composing functions
		search(str, options = {}) {
		let prefix = options.prefix \|\| ''
		let postfix = options.postfix \|\| ''
		const directSearchOptions = options.directSearchOptions \|\| {}
		const fuzzySearchOptions = options.fuzzySearchOptions
		const eagerSearchOptions = options.eagerSearchOptions
		// trim by default
		const trim = options.trim === undefined \|\| options.trim
		// used unless overwritten
		const caseSensitive = options.caseSensitive
		search(
		searchText,
		{
		prefix = '',
		postfix = '',
		trim = true,
		caseSensitive = false,
		offset = 0,
		} = {}
		) {
		const { text, annotations } = this
		let str = prefix + searchText + postfix
		str = trim ? str.trim() : str
		str = caseSensitive ? str : str.toLowerCase()

		if (trim) {
		const res = TextAnnotator.trim(prefix, str, postfix)
		prefix = res.prefix
		str = res.str
		postfix = res.postfix
		}

		let highlightIndex = -1

		// direct search will always be performed
		highlightIndex = this.directSearch(
		prefix,
		const index = (caseSensitive ? text : text.toLowerCase()).indexOf(
		str,
		postfix,
		Object.assign({ caseSensitive }, directSearchOptions)
		offset
		)
		if (highlightIndex !== -1) {
		return highlightIndex
		}

		// experimental feature
		if (fuzzySearchOptions) {
		highlightIndex = this.fuzzySearch(
		prefix,
		str,
		postfix,
		Object.assign({ caseSensitive }, fuzzySearchOptions)
		)
		if (highlightIndex !== -1) {
		return highlightIndex
		}
		}
		const prefixLength = trim
		? prefix.replace(/^\s+/, '').length
		: prefix.length
		const postfixLength = trim
		? postfix.replace(/\s+$/, '').length
		: postfix.length
		return index === -1
		? -1
		: annotations.push({
		index: index + prefixLength,
		length: str.substring(prefixLength, str.length - postfixLength)
		.length,
		}) - 1
		}

		// experimental feature
		// eager search only works in (particular) browsers
		if (eagerSearchOptions) {
		highlightIndex = this.eagerSearch(
		prefix,
		str,
		postfix,
		Object.assign({ caseSensitive }, eagerSearchOptions)
		)
		if (highlightIndex !== -1) {
		return highlightIndex
		searchAll(searchText, options) {
		let offset = 0
		const annotationIndexes = []
		let annotationIndex = -1
		// do not mutate param
		const newOptions = Object.assign({}, options)
		do {
		annotationIndex = this.search(searchText, newOptions)
		if (annotationIndex !== -1) {
		offset = this.annotations[annotationIndex].index + 1
		newOptions.offset = offset
		annotationIndexes.push(annotationIndex)
		}
		}

		return highlightIndex
		} while (annotationIndex !== -1)
		return annotationIndexes
		}

		// experimental feature
		// only support direct search for now
		searchAll(str, options = {}) {
		const highlightIndexes = []
		annotate(
		annotationIndex,
		{
		tagName = 'span',
		baseClassName = 'annotation',
		classPattern = 'annotation-',
		} = {}
		) {
		const { tags, annotations, _insert, _binaryInsert } = this
		const annotation = annotations[annotationIndex]
		// [start, end, offset]
		const annotationLocation = [
		annotation.index,
		annotation.index + annotation.length,
		0,
		]

		const continueSearch = (str, options) => {
		const highlightIndex = this.search(str, options)
		if (highlightIndex !== -1) {
		highlightIndexes.push(highlightIndex)
		options.directSearchOptions = options.directSearchOptions \|\| {}
		options.directSearchOptions.lastHighlightIndex = highlightIndex
		continueSearch(str, options)
		// partition
		const annotatorLocations = [[...annotationLocation]]
		for (let i = 0; i < tags.length; i++) {
		const { index: tagIndex, length: tagLength } = tags[i]
		if (tagIndex <= annotationLocation[0]) {
		annotatorLocations[0][2] += tagLength
		} else if (tagIndex < annotationLocation[1]) {
		const lastTagIndex = i === 0 ? 0 : tags[i - 1].index
		const lastAnnotatorLocation =
		annotatorLocations[annotatorLocations.length - 1]
		if (tagIndex === lastTagIndex) {
		lastAnnotatorLocation[2] += tagLength
		} else {
		const annotatorLocationEnd = lastAnnotatorLocation[1]
		lastAnnotatorLocation[1] = tagIndex
		annotatorLocations.push([
		lastAnnotatorLocation[1],
		annotatorLocationEnd,
		lastAnnotatorLocation[2] + tagLength,
		])
		}
		} else {
		break
		}
		}

		continueSearch(str, options)
		// insert annotator tags into tag list and html
		const annotatorOpenTag = `<${tagName} class="${baseClassName} ${classPattern}${annotationIndex}">`
		const annotatorCloseTag = `</${tagName}>`
		const annotatorOpenTagLength = annotatorOpenTag.length
		const annotatorCloseTagLength = annotatorCloseTag.length
		let locInc = 0
		for (let i = 0; i < annotatorLocations.length; i++) {
		const annotatorLocation = annotatorLocations[i]
		_binaryInsert(
		tags,
		{
		index: annotatorLocation[0],
		length: annotatorOpenTagLength,
		isCloseTag: false,
		annotationIndex,
		},
		(a, b) => {
		return a.index <= b.index ? -1 : 1
		}
		)
		_binaryInsert(
		tags,
		{
		index: annotatorLocation[1],
		length: annotatorCloseTagLength,
		isCloseTag: true,
		annotationIndex,
		},
		(a, b) => a.index - b.index
		)

		return highlightIndexes
		}
		this.html = _insert(
		this.html,
		annotatorOpenTag,
		annotatorLocation[0] + annotatorLocation[2] + locInc
		)
		this.html = _insert(
		this.html,
		annotatorCloseTag,
		annotatorLocation[1] +
		annotatorLocation[2] +
		locInc +
		annotatorOpenTagLength
		)
		locInc += annotatorOpenTagLength + annotatorCloseTagLength
		}

		highlight(highlightIndex, options = {}) {
		const highlightTagName = options.highlightTagName \|\| 'span'
		const highlightClass = options.highlightClass \|\| 'highlight'
		const highlightIdPattern = options.highlightIdPattern \|\| 'highlight-'

		const openTag = TextAnnotator.createOpenTag(
		highlightTagName,
		highlightIdPattern,
		highlightIndex,
		highlightClass
		)
		const loc = this.adjustLoc(
		highlightTagName,
		highlightIdPattern,
		highlightIndex,
		highlightClass
		)
		this.annotatedContent = TextAnnotator.insert(
		this.annotatedContent,
		openTag,
		loc[0]
		)
		this.annotatedContent = TextAnnotator.insert(
		this.annotatedContent,
		TextAnnotator.createCloseTag(highlightTagName),
		loc[1] + openTag.length
		)
		// it has to be set after adjustLoc so that it will not be checked
		this.highlights[highlightIndex].highlighted = true

		return this.annotatedContent
		return this.html
		}

		// experimental feature
		highlightAll(highlightIndexes, options = {}) {
		for (let i = 0; i < highlightIndexes.length; i++) {
		this.annotatedContent = this.highlight(highlightIndexes[i], options)
		}
		return this.annotatedContent
		annotateAll(annotationIndexes, options) {
		annotationIndexes.forEach((annotationIndex) => {
		this.annotate(annotationIndex, options)
		})
		return this.html
		}

		searchAndHighlight(str, options = {}) {
		const highlightIndex = this.search(str, options.searchOptions)
		if (highlightIndex !== -1) {
		return {
		highlightIndex,
		content: this.highlight(highlightIndex, options.highlightOptions),
		unannotate(annotationIndex) {
		// annotatorIndexesInTags amd annotators have the same size
		const annotatorIndexesInTags = []
		const annotators = this.tags.filter((tag, index) => {
		if (tag.annotationIndex === annotationIndex) {
		annotatorIndexesInTags.push(index)
		}
		}
		}

		unhighlight(highlightIndex, options = {}) {
		const highlightTagName = options.highlightTagName \|\| 'span'
		const highlightClass = options.highlightClass \|\| 'highlight'
		const highlightIdPattern = options.highlightIdPattern \|\| 'highlight-'

		// it has to be set before adjustLoc so that it will not be checked
		this.highlights[highlightIndex].highlighted = false

		// need to change when one annotation => more than one highlight
		const loc = this.adjustLoc(
		highlightTagName,
		highlightIdPattern,
		highlightIndex,
		highlightClass
		return tag.annotationIndex === annotationIndex
		})
		const otherTags = this.tags.filter(
		(tag) => tag.annotationIndex !== annotationIndex
		)
		const openTagLength = TextAnnotator.getOpenTagLength(
		highlightTagName,
		highlightIdPattern,
		highlightIndex,
		highlightClass
		)
		const substr1 = this.annotatedContent.substring(
		loc[0],
		loc[1] + openTagLength + TextAnnotator.getCloseTagLength(highlightTagName)
		)
		const substr2 = this.annotatedContent.substring(
		loc[0] + openTagLength,
		loc[1] + openTagLength
		)
		this.annotatedContent = this.annotatedContent.replace(substr1, substr2)

		return this.annotatedContent
		}

		stripAndStoreHTMLTags() {
		let tag
		this.stripedHTML = this.originalContent
		const tagRegEx = /<[^>]+>/
		let indexInc = 0
		while ((tag = this.stripedHTML.match(tagRegEx))) {
		this.stripedHTML = this.stripedHTML.replace(tag, '')
		const tagLength = tag[0].length
		// tagLocations will be used in adjustLoc
		this.tagLocations.push([tag.index, tagLength, indexInc])
		indexInc += tagLength
		}
		}

		directSearch(prefix, str, postfix, directSearchOptions = {}) {
		const caseSensitive = directSearchOptions.caseSensitive
		// experimental option; used for specific feature
		const ifEncode = directSearchOptions.encode
		const lastHighlightIndex = directSearchOptions.lastHighlightIndex

		let strWithFixes = prefix + str + postfix
		let text = this.isHTML ? this.stripedHTML : this.originalContent
		if (!caseSensitive) {
		strWithFixes = strWithFixes.toLowerCase()
		text = text.toLowerCase()
		}

		// for searchAll
		let offset = 0
		if (lastHighlightIndex !== undefined) {
		offset = this.highlights[lastHighlightIndex].loc[1] + 1
		}

		let highlightIndex = -1
		const index = text.indexOf(strWithFixes, offset)
		// experimental feature: if the text to be searched does not work, try to encode it
		if (ifEncode && index === -1) {
		const encodedStrWithFixes = encode(strWithFixes)
		const index = text.indexOf(encodedStrWithFixes, offset)
		if (index !== -1) {
		const loc = []
		loc[0] = index + encode(prefix).length
		loc[1] = loc[0] + encode(str).length
		highlightIndex = this.highlights.push({ loc }) - 1
		}
		} else if (index !== -1) {
		const loc = []
		loc[0] = index + prefix.length
		loc[1] = loc[0] + str.length
		highlightIndex = this.highlights.push({ loc }) - 1
		}
		return highlightIndex
		}

		eagerSearch(prefix, str, postfix, eagerSearchOptions = {}) {
		const caseSensitive = eagerSearchOptions.caseSensitive
		const containerId = eagerSearchOptions.containerId
		const threshold = eagerSearchOptions.threshold \|\| 0.74

		const strWithFixes = prefix + str + postfix

		let highlightIndex = -1
		// IE is not considered
		if (window.find) {
		document.designMode = 'on'

		// step 1: ask the browser to highlight the found
		const sel = window.getSelection()
		sel.collapse(document.body, 0)
		while (window.find(strWithFixes, caseSensitive)) {
		document.execCommand('hiliteColor', true, 'rgba(255, 255, 255, 0)')
		sel.collapseToEnd()
		// step 2: locate the found within the container where the annotator is applied
		// selector may become better
		const found = document.querySelector(
		'#' +
		containerId +
		' [style="background-color: rgba(255, 255, 255, 0);"]'
		)
		if (found) {
		const foundStr = found.innerHTML.replace(/<[^>]*>/g, '')
		const result = TextAnnotator.getBestSubstring(
		foundStr,
		str,
		threshold
		)
		if (result.similarity) {
		const text = this.isHTML ? this.stripedHTML : this.originalContent
		const index = text.indexOf(foundStr)
		if (index !== -1) {
		highlightIndex =
		this.highlights.push({
		loc: [index + result.loc[0], index + result.loc[1]],
		}) - 1
		}
		}
		// find index difference
		for (let i = 0; i < annotators.length; i++) {
		const annotator = annotators[i]
		let indexInc = 0
		for (let j = 0; j < otherTags.length; j++) {
		const otherTag = otherTags[j]
		if (annotator.index < otherTag.index) {
		break
		}
		}

		// step 3: remove the highlights created by the browser
		document.execCommand('undo')

		document.designMode = 'off'
		}
		return highlightIndex
		}

		fuzzySearch(prefix, str, postfix, fuzzySearchOptions = {}) {
		const caseSensitive = fuzzySearchOptions.caseSensitive

		const tokenBased = fuzzySearchOptions.tokenBased
		let tbThreshold = fuzzySearchOptions.tbThreshold \|\| 0.68

		// sentence-based fuzzy search is enabled by default
		const sentenceBased =
		fuzzySearchOptions.sentenceBased === undefined \|\|
		fuzzySearchOptions.sentenceBased
		let sbThreshold = fuzzySearchOptions.sbThreshold \|\| 0.85
		const maxLengthDiff = fuzzySearchOptions.maxLengthDiff \|\| 0.1
		const lenRatio = fuzzySearchOptions.lenRatio \|\| 2
		const processSentence = fuzzySearchOptions.processSentence

		let highlightIndex = -1
		const text = this.isHTML ? this.stripedHTML : this.originalContent
		// token-based
		if (tokenBased \|\| prefix \|\| postfix) {
		// step 1: find all indexes of str
		const strIndexes = []
		let i = -1
		while ((i = text.indexOf(str, i + 1)) !== -1) {
		strIndexes.push(i)
		}

		// step 2: find the index of the most similar "fragment" - the str with pre- and post- fixes
		let strIndex = -1
		const fragment = prefix + str + postfix
		for (let i = 0; i < strIndexes.length; i++) {
		const si = strIndexes[i]
		// f can be wider
		const f =
		text.substring(si - prefix.length, si) +
		str +
		text.substring(si + str.length, si + str.length + postfix.length)
		const similarity = TextAnnotator.getSimilarity(
		f,
		fragment,
		caseSensitive
		)
		if (similarity >= tbThreshold) {
		tbThreshold = similarity
		strIndex = si
		}
		}

		// step 3: check whether the most similar enough "fragment" is found, if yes return its location
		if (strIndex !== -1) {
		highlightIndex =
		this.highlights.push({ loc: [strIndex, strIndex + str.length] }) - 1
		}
		}
		// sentence-based
		else if (sentenceBased) {
		// step 1: sentenize the text if has not done so
		let sentences = []
		if (this.sentences.length) {
		sentences = this.sentences
		} else {
		sentences = this.sentences = TextAnnotator.sentenize(text)
		}

		// step 2 (for efficiency only): filter sentences by words of the str
		const words = str.split(/\s/)
		const filteredSentences = []
		for (let i = 0; i < sentences.length; i++) {
		for (let j = 0; j < words.length; j++) {
		if (sentences[i].raw.includes(words[j])) {
		filteredSentences.push(sentences[i])
		break
		}
		}
		}

		//step 3 (optional)
		if (processSentence) {
		let index = 0
		// for each sentence
		for (let i = 0; i < filteredSentences.length; i++) {
		const fs = filteredSentences[i]
		let raw = fs.raw
		// loc without tags
		const loc = [fs.index, fs.index + raw.length]
		let locInc = 0
		// add loc of all tags before the one being checked so as to derive the actual loc
		const tagLocations = this.tagLocations
		// for each loc of tag whose loc is larger than the last sentence
		for (let j = index; j < tagLocations.length; j++) {
		const tagLoc = tagLocations[j]
		if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
		const tag = this.originalContent.substring(
		tagLoc[0] + tagLoc[2],
		tagLoc[0] + tagLoc[2] + tagLoc[1]
		)
		const insertIndex = tagLoc[0] + locInc - loc[0]
		raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex)
		locInc += tagLoc[1]
		} else if (tagLoc[0] > loc[1]) {
		index = j // not sure this part
		break
		} else if (annotator.index > otherTag.index) {
		indexInc += otherTag.length
		} else {
		if (otherTag.annotationIndex === undefined) {
		if (!annotator.isCloseTag) {
		indexInc += otherTag.length
		}
		}

		raw = processSentence(raw)
		raw = raw.replace(/(<([^>]+)>)/gi, '')

		const copy = fs.raw
		// update the sentence if it got reduced
		if (copy !== raw) {
		fs.raw = raw
		fs.index = fs.index + copy.indexOf(raw)
		}
		}
		}

		// step 4: find the most possible sentence
		let mostPossibleSentence = null
		for (let i = 0; i < filteredSentences.length; i++) {
		const sentence = filteredSentences[i]
		const similarity = TextAnnotator.getSimilarity(
		sentence.raw,
		str,
		caseSensitive
		)
		if (similarity >= sbThreshold) {
		sbThreshold = similarity
		mostPossibleSentence = sentence
		} else if (i !== filteredSentences.length - 1) {
		// combine two sentences to reduce the inaccuracy of sentenizing text
		const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw
		const lengthDiff =
		Math.abs(newSentenceRaw.length - str.length) / str.length
		if (lengthDiff <= maxLengthDiff) {
		const newSimilarity = TextAnnotator.getSimilarity(
		newSentenceRaw,
		str,
		caseSensitive
		)
		if (newSimilarity >= sbThreshold) {
		sbThreshold = newSimilarity
		mostPossibleSentence = {
		raw: newSentenceRaw,
		index: sentence.index,
		} else {
		if (annotator.annotationIndex < otherTag.annotationIndex) {
		if (otherTag.isCloseTag) {
		indexInc += otherTag.length
		}
		} else {
		if (!annotator.isCloseTag) {
		indexInc += otherTag.length
		}
		}
		@@ -492,403 +206,80 @@ }

		// step 5: if the most possible sentence is found, derive and return the location of the most similar str from it
		if (mostPossibleSentence) {
		const result = TextAnnotator.getBestSubstring(
		mostPossibleSentence.raw,
		str,
		sbThreshold,
		lenRatio,
		caseSensitive,
		true
		)
		if (result.loc) {
		let index = mostPossibleSentence.index
		highlightIndex =
		this.highlights.push({
		loc: [index + result.loc[0], index + result.loc[1]],
		}) - 1
		}
		}
		// remove annotators one by one
		this.html =
		this.html.slice(0, annotator.index + indexInc) +
		this.html.slice(annotator.index + indexInc + annotator.length)
		this.tags.splice(annotatorIndexesInTags[i] - i, 1)
		}
		return highlightIndex
		}

		// future work: further improvement when one annotation binds with more than one highlight
		// includeRequiredTag used in = condition only
		includeRequiredTag(i, highlightLoc, tag) {
		const isCloseTag = tag.startsWith('</')
		const tagName = isCloseTag
		? tag.split('</')[1].split('>')[0]
		: tag.split(' ')[0].split('<')[1].split('>')[0]

		let included = false

		let requiredTagNumber = 1
		let requiredTagCount = 0
		// if both the start tag and the end tag are at the borders, place the tags outside the borders
		// if the close tag is at the border, check backwards until the start of the highlight
		if (isCloseTag) {
		for (let i2 = i - 1; i2 >= 0; i2--) {
		const tagLoc2 = this.tagLocations[i2]
		if (highlightLoc[0] > tagLoc2[0]) {
		break
		} else {
		const tag2 = this.originalContent.substring(
		tagLoc2[0] + tagLoc2[2],
		tagLoc2[0] + tagLoc2[2] + tagLoc2[1]
		)
		if (tag2.startsWith('</' + tagName)) {
		requiredTagNumber++
		} else if (tag2.startsWith('<' + tagName)) {
		requiredTagCount++
		}
		if (requiredTagNumber === requiredTagCount) {
		included = true
		break
		}
		}
		}
		}
		// if the start tag is at the border, check forwards until the end of the highlight
		else {
		for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) {
		const tagLoc2 = this.tagLocations[i2]
		if (highlightLoc[1] < tagLoc2[0]) {
		break
		} else {
		const tag2 = this.originalContent.substring(
		tagLoc2[0] + tagLoc2[2],
		tagLoc2[0] + tagLoc2[2] + tagLoc2[1]
		)
		if (tag2.startsWith('<' + tagName)) {
		requiredTagNumber++
		} else if (tag2.startsWith('</' + tagName)) {
		requiredTagCount++
		}
		if (requiredTagNumber === requiredTagCount) {
		included = true
		break
		}
		}
		}
		}

		return included
		return this.html
		}

		adjustLoc(
		highlightTagName = 'span',
		highlightIdPattern,
		highlightIndex,
		highlightClass
		) {
		const highlightLoc = this.highlights[highlightIndex].loc
		const locInc = [0, 0]

		// step 1: check locations of tags
		const length = this.tagLocations.length
		for (let i = 0; i < length; i++) {
		const tagLoc = this.tagLocations[i]
		// start end tag
		if (highlightLoc[1] < tagLoc[0]) {
		break
		}
		// start end&tag
		else if (highlightLoc[1] === tagLoc[0]) {
		const tag = this.originalContent.substring(
		tagLoc[0] + tagLoc[2],
		tagLoc[0] + tagLoc[2] + tagLoc[1]
		)
		// if end tag, not block element and include the required close tag, add right to the tag
		if (
		!tag.endsWith('/>') &&
		tag.startsWith('</') &&
		!blockElements.includes(tag.split('</')[1].split('>')[0]) &&
		this.includeRequiredTag(i, highlightLoc, tag)
		) {
		locInc[1] += tagLoc[1]
		}
		}
		// start tag end
		else if (highlightLoc[1] > tagLoc[0]) {
		locInc[1] += tagLoc[1]
		// start&tag end
		if (highlightLoc[0] === tagLoc[0]) {
		const tag = this.originalContent.substring(
		tagLoc[0] + tagLoc[2],
		tagLoc[0] + tagLoc[2] + tagLoc[1]
		)
		// if self close tag or end tag or block element or not include the required close tag, add right to the tag
		if (
		tag.startsWith('</') \|\|
		tag.endsWith('/>') \|\|
		blockElements.includes(
		tag.split(' ')[0].split('<')[1].split('>')[0]
		) \|\|
		!this.includeRequiredTag(i, highlightLoc, tag)
		) {
		locInc[0] += tagLoc[1]
		}
		}
		// tag start end
		else if (highlightLoc[0] > tagLoc[0]) {
		locInc[0] += tagLoc[1]
		}
		}
		}

		// step 2: check locations of other highlights
		// all span (no blocks)
		// stored in a different array than tags
		// can intersect
		for (let i = 0; i < this.highlights.length; i++) {
		const highlight = this.highlights[i]
		// only check the highlighted
		if (highlight.highlighted) {
		const openTagLength = TextAnnotator.getOpenTagLength(
		highlightTagName,
		highlightIdPattern,
		i,
		highlightClass
		)
		const closeTagLength = TextAnnotator.getCloseTagLength(highlightTagName)
		const loc = highlight.loc
		if (highlightLoc[0] >= loc[1]) {
		locInc[0] += openTagLength + closeTagLength
		locInc[1] += openTagLength + closeTagLength
		}
		// syntactical correct but semantical incorrect
		else if (
		highlightLoc[0] < loc[1] &&
		highlightLoc[0] > loc[0] &&
		highlightLoc[1] > loc[1]
		) {
		locInc[0] += openTagLength
		locInc[1] += openTagLength + closeTagLength
		} else if (highlightLoc[0] <= loc[0] && highlightLoc[1] >= loc[1]) {
		locInc[1] += openTagLength + closeTagLength
		}
		// syntactical correct but semantical incorrect
		else if (
		highlightLoc[0] < loc[0] &&
		highlightLoc[1] > loc[0] &&
		highlightLoc[1] < loc[1]
		) {
		locInc[1] += openTagLength
		} else if (highlightLoc[0] >= loc[0] && highlightLoc[1] <= loc[1]) {
		locInc[0] += openTagLength
		locInc[1] += openTagLength
		}
		}
		}

		return [highlightLoc[0] + locInc[0], highlightLoc[1] + locInc[1]]
		unannotateAll(annotationIndexes) {
		annotationIndexes.forEach((annotationIndex) => {
		this.unannotate(annotationIndex)
		})
		return this.html
		}

		static createOpenTag(
		highlightTagName = 'span',
		highlightIdPattern,
		highlightIndex,
		highlightClass
		) {
		return `<${highlightTagName} id="${
		highlightIdPattern + highlightIndex
		}" class="${highlightClass}">`
		}
		// pure function
		_stripHTMLTags(html) {
		let text = html
		const tags = []

		static createCloseTag(highlightTagName = 'span') {
		return `</${highlightTagName}>`
		}

		static getOpenTagLength(
		highlightTagName = 'span',
		highlightIdPattern,
		highlightIndex,
		highlightClass
		) {
		return TextAnnotator.createOpenTag(
		highlightTagName,
		highlightIdPattern,
		highlightIndex,
		highlightClass
		).length
		}

		static getCloseTagLength(highlightTagName = 'span') {
		return TextAnnotator.createCloseTag(highlightTagName).length
		}

		static trim(prefix, str, postfix) {
		prefix = prefix.replace(/^\s+/, '')
		postfix = postfix.replace(/\s+$/, '')
		if (!prefix) {
		str = str.replace(/^\s+/, '')
		let tag
		const tagRegEx = /<[^>]+>/
		while ((tag = text.match(tagRegEx))) {
		text = text.replace(tag, '')
		tags.push({
		index: tag.index,
		length: tag[0].length,
		isCloseTag: tag[0].startsWith('</'),
		})
		}
		if (!postfix) {
		str = str.replace(/\s+$/, '')
		}

		return { prefix, str, postfix }
		return { text, tags }
		}

		static insert(str1, str2, index) {
		// pure function
		_insert(str1, str2, index) {
		return str1.slice(0, index) + str2 + str1.slice(index)
		}

		static sentenize(text) {
		const options = {
		newline_boundaries: false,
		html_boundaries: false,
		sanitize: false,
		allowed_tags: false,
		preserve_whitespace: true,
		abbreviations: null,
		// pure function
		_binaryInsert(arr, val, comparator) {
		if (arr.length === 0 \|\| comparator(arr[0], val) >= 0) {
		arr.splice(0, 0, val)
		return arr
		} else if (arr.length > 0 && comparator(arr[arr.length - 1], val) <= 0) {
		arr.splice(arr.length, 0, val)
		return arr
		}
		return getSentences(text, options).map((raw) => {
		// future work: can tokenizer return location directly
		const index = text.indexOf(raw)
		return { raw, index }
		})
		}

		static getBestSubstring(
		str,
		substr,
		threshold,
		lenRatio,
		caseSensitive,
		skipFirstRun
		) {
		let result = {}

		let similarity = skipFirstRun
		? threshold
		: TextAnnotator.getSimilarity(str, substr, caseSensitive)
		if (similarity >= threshold) {
		// step 1: derive best substr
		// future work: /s may be better
		const words = str.split(' ')
		while (words.length) {
		const firstWord = words.shift()
		const newStr = words.join(' ')
		let newSimilarity = TextAnnotator.getSimilarity(
		newStr,
		substr,
		caseSensitive
		)
		if (newSimilarity < similarity) {
		words.unshift(firstWord)
		const lastWord = words.pop()
		newSimilarity = TextAnnotator.getSimilarity(
		words.join(' '),
		substr,
		caseSensitive
		)
		if (newSimilarity < similarity) {
		words.push(lastWord)
		break
		} else {
		similarity = newSimilarity
		}
		} else {
		similarity = newSimilarity
		}
		let left = 0,
		right = arr.length
		let leftLast = 0,
		rightLast = right
		while (left < right) {
		const inPos = Math.floor((right + left) / 2)
		const compared = comparator(arr[inPos], val)
		if (compared < 0) {
		left = inPos
		} else if (compared > 0) {
		right = inPos
		} else {
		right = inPos
		left = inPos
		}
		const bestSubstr = words.join(' ')

		// step 2: return the best substr and its loc if found and if it meets the threshold and the length ratio
		if (!lenRatio \|\| bestSubstr.length / substr.length <= lenRatio) {
		const loc = []
		loc[0] = str.indexOf(bestSubstr)
		loc[1] = loc[0] + bestSubstr.length
		result = { similarity, loc }
		if (leftLast === left && rightLast === right) {
		break
		}
		leftLast = left
		rightLast = right
		}

		return result
		arr.splice(right, 0, val)
		return arr
		}

		static getSimilarity(str1, str2, caseSensitive) {
		if (!caseSensitive) {
		str1 = str1.toLowerCase()
		str2 = str2.toLowerCase()
		}
		if (str1 === str2) return 1
		// set str2 to denominator
		return TextAnnotator.lcsLength(str1, str2) / str2.length
		}

		// copy from the code in https://www.npmjs.com/package/longest-common-subsequence
		static lcsLength(firstSequence, secondSequence, caseSensitive) {
		function createArray(dimension) {
		const array = []

		for (let i = 0; i < dimension; i++) {
		array[i] = []
		}

		return array
		}

		const firstString = caseSensitive
		? firstSequence
		: firstSequence.toLowerCase()
		const secondString = caseSensitive
		? secondSequence
		: secondSequence.toLowerCase()

		if (firstString === secondString) {
		return firstString.length
		}

		if ((firstString \|\| secondString) === '') {
		return ''.length
		}

		const firstStringLength = firstString.length
		const secondStringLength = secondString.length
		const lcsMatrix = createArray(secondStringLength + 1)

		let i
		let j
		for (i = 0; i <= firstStringLength; i++) {
		lcsMatrix[0][i] = 0
		}

		for (i = 0; i <= secondStringLength; i++) {
		lcsMatrix[i][0] = 0
		}

		for (i = 1; i <= secondStringLength; i++) {
		for (j = 1; j <= firstStringLength; j++) {
		if (firstString[j - 1] === secondString[i - 1]) {
		lcsMatrix[i][j] = lcsMatrix[i - 1][j - 1] + 1
		} else {
		lcsMatrix[i][j] = Math.max(lcsMatrix[i - 1][j], lcsMatrix[i][j - 1])
		}
		}
		}

		let lcs = ''
		i = secondStringLength
		j = firstStringLength

		while (i > 0 && j > 0) {
		if (firstString[j - 1] === secondString[i - 1]) {
		lcs = firstString[j - 1] + lcs
		i--
		j--
		} else if (
		Math.max(lcsMatrix[i - 1][j], lcsMatrix[i][j - 1]) ===
		lcsMatrix[i - 1][j]
		) {
		i--
		} else {
		j--
		}
		}

		return lcs.length
		}
		}

		export default TextAnnotator

285

test/index.test.js

		import TextAnnotator from '../src/text-annotator'

		const content =
		'"I am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan Huang'
		const html =
		'"I am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'

		const closeTag = '</span>'
		test('annotate text without surrounding/inside tags', () => {
		const textAnnotator = new TextAnnotator(html)
		const searchText = 'EMBL-EBI'
		const annotatedHtml = textAnnotator.annotate(textAnnotator.search(searchText))
		expect(annotatedHtml).toBe(
		'"I am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in <span class="annotation annotation-0">EMBL-EBI</span>. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'
		)
		})

		describe('test main scenarios', () => {
		test('direct search', () => {
		const annotator = new TextAnnotator({ content })
		const highlightIndex = annotator.search('I')
		const newContent = annotator.highlight(highlightIndex)
		const openTag = TextAnnotator.createOpenTag(
		'span',
		'highlight-',
		highlightIndex,
		'highlight'
		)
		expect(newContent).toBe(
		`"${openTag}I${closeTag} am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan Huang`
		)
		})
		test('annotate text with surrounding tags', () => {
		const textAnnotator = new TextAnnotator(html)
		const searchText = 'frontend developer'
		const annotatedHtml = textAnnotator.annotate(textAnnotator.search(searchText))
		expect(annotatedHtml).toBe(
		'"I am <b><i>Zhan Huang</i></b>, a <b><span class="annotation annotation-0">frontend developer</span></b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'
		)
		})

		test('search all', () => {
		const annotator = new TextAnnotator({ content })
		const highlightIndexes = annotator.searchAll('Zhan Huang')
		const newContent = annotator.highlightAll(highlightIndexes)
		const openTag1 = TextAnnotator.createOpenTag(
		'span',
		'highlight-',
		highlightIndexes[0],
		'highlight'
		)
		const openTag2 = TextAnnotator.createOpenTag(
		'span',
		'highlight-',
		highlightIndexes[1],
		'highlight'
		)
		expect(newContent).toBe(
		`"I am ${openTag1}<b><i>Zhan Huang</i></b>${closeTag}, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - ${openTag2}Zhan Huang${closeTag}`
		)
		})
		test('annotate text with inside open tag', () => {
		const textAnnotator = new TextAnnotator(html)
		const searchText = 'a frontend developer'
		const annotatedHtml = textAnnotator.annotate(textAnnotator.search(searchText))
		expect(annotatedHtml).toBe(
		'"I am <b><i>Zhan Huang</i></b>, <span class="annotation annotation-0">a </span><b><span class="annotation annotation-0">frontend developer</span></b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'
		)
		})

		test('token-based fuzzy search', () => {
		const annotator = new TextAnnotator({ content })
		const highlightIndex = annotator.search('frontend developer', {
		prefix: 'a ',
		postfix: ' in EMBLEBI',
		fuzzySearchOptions: {},
		})
		const newContent = annotator.highlight(highlightIndex)
		const openTag = TextAnnotator.createOpenTag(
		'span',
		'highlight-',
		highlightIndex,
		'highlight'
		)
		expect(newContent).toBe(
		`"I am <b><i>Zhan Huang</i></b>, a ${openTag}<b>frontend developer</b>${closeTag} in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan Huang`
		)
		})
		test('annotate text with inside close tag', () => {
		const textAnnotator = new TextAnnotator(html)
		const searchText = 'frontend developer in EMBL-EBI'
		const annotatedHtml = textAnnotator.annotate(textAnnotator.search(searchText))
		expect(annotatedHtml).toBe(
		'"I am <b><i>Zhan Huang</i></b>, a <b><span class="annotation annotation-0">frontend developer</span></b><span class="annotation annotation-0"> in EMBL-EBI</span>. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'
		)
		})

		test('sentence-based fuzzy search', () => {
		const annotator = new TextAnnotator({ content })
		const highlightIndex = annotator.search('I like fool', {
		fuzzySearchOptions: {},
		})
		const newContent = annotator.highlight(highlightIndex)
		const openTag = TextAnnotator.createOpenTag(
		'span',
		'highlight-',
		highlightIndex,
		'highlight'
		)
		expect(newContent).toBe(
		`"I am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. ${openTag}I like food${closeTag} and sports. My favourite food is udon noodles." - Zhan Huang`
		)
		})
		test('annotate text with multiple tags surrounding/inside', () => {
		const textAnnotator = new TextAnnotator(html)
		const searchText = 'Zhan Huang, a frontend developer in EMBL-EBI'
		const annotatedHtml = textAnnotator.annotate(textAnnotator.search(searchText))
		expect(annotatedHtml).toBe(
		'"I am <b><i><span class="annotation annotation-0">Zhan Huang</span></i></b><span class="annotation annotation-0">, a </span><b><span class="annotation annotation-0">frontend developer</span></b><span class="annotation annotation-0"> in EMBL-EBI</span>. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'
		)
		})

		test('combination of searching and highlighting', () => {
		const annotator = new TextAnnotator({ content })
		const result = annotator.searchAndHighlight('sports')
		const openTag = TextAnnotator.createOpenTag(
		'span',
		'highlight-',
		result.highlightIndex,
		'highlight'
		)
		expect(result.content).toBe(
		`"I am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and ${openTag}sports${closeTag}. My favourite food is udon noodles." - Zhan Huang`
		)
		})
		test('annotate all occurrences of the text', () => {
		const textAnnotator = new TextAnnotator(html)
		const searchText = 'Zhan Huang'
		const annotationIndexes = textAnnotator.searchAll(searchText)
		const annotatedHtml = textAnnotator.annotateAll(annotationIndexes)
		expect(annotatedHtml).toBe(
		'"I am <b><i><span class="annotation annotation-0">Zhan Huang</span></i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - <span class="annotation annotation-1">Zhan HUANG</span>'
		)
		})

		test('removal of a highlight', () => {
		const annotator = new TextAnnotator({ content })
		const result = annotator.searchAndHighlight('udon noodles')
		expect(
		annotator.unhighlight(result.highlightIndex, {
		content: result.content,
		})
		).toBe(content)
		})
		test('annotate multiple pieces of text', () => {
		const textAnnotator = new TextAnnotator(html)
		textAnnotator.annotate(textAnnotator.search('Zhan Huang'))
		textAnnotator.annotate(textAnnotator.search('food and sports'))
		const annotatedHtml = textAnnotator.annotate(
		textAnnotator.search('a frontend developer in EMBL-EBI')
		)
		expect(annotatedHtml).toBe(
		'"I am <b><i><span class="annotation annotation-0">Zhan Huang</span></i></b>, <span class="annotation annotation-2">a </span><b><span class="annotation annotation-2">frontend developer</span></b><span class="annotation annotation-2"> in EMBL-EBI</span>. I like <span class="annotation annotation-1">food and sports</span>. My favourite food is udon noodles." - Zhan HUANG'
		)
		})

		test('use <mark> for highlight', () => {
		const annotator = new TextAnnotator({ content })
		const highlightIndex = annotator.search('I')
		const newContent = annotator.highlight(highlightIndex, {
		highlightTagName: 'mark',
		})
		const openTag = TextAnnotator.createOpenTag(
		'mark',
		'highlight-',
		highlightIndex,
		'highlight'
		)
		const closeTag = '</mark>'
		expect(newContent).toBe(
		`"${openTag}I${closeTag} am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan Huang`
		)
		})
		test('annotate text with multiple tags inside/surrounding', () => {
		const textAnnotator = new TextAnnotator(
		'<p><span>I am <b>Zhan Huang</b></span></p>. <p><i>I like apples.<i></p>'
		)
		const annotatedHtml = textAnnotator.annotate(
		textAnnotator.search('I am Zhan Huang. I like')
		)
		expect(annotatedHtml).toBe(
		'<p><span><span class="annotation annotation-0">I am </span><b><span class="annotation annotation-0">Zhan Huang</span></b></span></p><span class="annotation annotation-0">. </span><p><i><span class="annotation annotation-0">I like</span> apples.<i></p>'
		)
		})

		describe('test edge cases', () => {
		test('ec1', () => {
		const annotator = new TextAnnotator({ content })
		const highlightIndex = annotator.search('I am Zhan Huang')
		const newContent = annotator.highlight(highlightIndex)
		const openTag = TextAnnotator.createOpenTag(
		'span',
		'highlight-',
		highlightIndex,
		'highlight'
		)
		expect(newContent).toBe(
		`"${openTag}I am <b><i>Zhan Huang</i></b>${closeTag}, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan Huang`
		)
		})
		test('annotate text with prefix/postfix', () => {
		const textAnnotator = new TextAnnotator(html)
		const annotatedHtml = textAnnotator.annotate(
		textAnnotator.search('Zhan Huang', { prefix: '- ', postfix: '' })
		)
		expect(annotatedHtml).toBe(
		'"I am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - <span class="annotation annotation-0">Zhan HUANG</span>'
		)
		})

		test('ec2', () => {
		const annotator = new TextAnnotator({ content })
		const highlightIndex = annotator.search('frontend developer in EMBL-EBI')
		const newContent = annotator.highlight(highlightIndex)
		const openTag = TextAnnotator.createOpenTag(
		'span',
		'highlight-',
		highlightIndex,
		'highlight'
		)
		expect(newContent).toBe(
		`"I am <b><i>Zhan Huang</i></b>, a ${openTag}<b>frontend developer</b> in EMBL-EBI${closeTag}. I like food and sports. My favourite food is udon noodles." - Zhan Huang`
		)
		})
		test('annotate text with trimming', () => {
		const textAnnotator = new TextAnnotator(html)
		const annotatedHtml = textAnnotator.annotate(
		textAnnotator.search('Zhan Huang', { prefix: ' - ', postfix: '' })
		)
		expect(annotatedHtml).toBe(
		'"I am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - <span class="annotation annotation-0">Zhan HUANG</span>'
		)
		})

		test('annotate text with case sensitive', () => {
		const textAnnotator = new TextAnnotator(html)
		const annotatedHtml = textAnnotator.annotate(
		textAnnotator.search('Zhan HUANG', { caseSensitive: true })
		)
		expect(annotatedHtml).toBe(
		'"I am <b><i>Zhan Huang</i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - <span class="annotation annotation-0">Zhan HUANG</span>'
		)
		})

		test('unannotate the text', () => {
		const textAnnotator = new TextAnnotator(html)
		const searchText = 'Zhan Huang, a frontend developer in EMBL-EBI'
		const annotationIndex = textAnnotator.search(searchText)
		const annotatedHtml = textAnnotator.annotate(annotationIndex)
		expect(annotatedHtml).toBe(
		'"I am <b><i><span class="annotation annotation-0">Zhan Huang</span></i></b><span class="annotation annotation-0">, a </span><b><span class="annotation annotation-0">frontend developer</span></b><span class="annotation annotation-0"> in EMBL-EBI</span>. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'
		)
		const unannotatedHtml = textAnnotator.unannotate(annotationIndex)
		expect(unannotatedHtml).toBe(html)
		})

		test('unnotatate the text with tags inside', () => {
		const textAnnotator = new TextAnnotator(html)
		textAnnotator.annotate(
		textAnnotator.search('Zhan Huang, a frontend developer in EMBL-EBI')
		)
		textAnnotator.annotate(textAnnotator.search('Zhan Huang'))
		const annotatedHtml = textAnnotator.unannotate(0)
		expect(annotatedHtml).toBe(
		'"I am <b><i><span class="annotation annotation-1">Zhan Huang</span></i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'
		)
		})

		test('annotate the text again after unannotating it', () => {
		const textAnnotator = new TextAnnotator(html)
		textAnnotator.annotate(textAnnotator.search('Zhan Huang'))
		textAnnotator.annotate(textAnnotator.search('food and sports'))
		textAnnotator.unannotateAll([0, 1])
		console.log(textAnnotator.html)
		const annotatedHtml = textAnnotator.annotate(0)
		expect(annotatedHtml).toBe(
		'"I am <b><i><span class="annotation annotation-0">Zhan Huang</span></i></b>, a <b>frontend developer</b> in EMBL-EBI. I like food and sports. My favourite food is udon noodles." - Zhan HUANG'
		)
		})

		test('annotate document without html tags', () => {
		const textAnnotator = new TextAnnotator('I am Zhan Huang')
		const annotatedHtml = textAnnotator.annotate(
		textAnnotator.search('Zhan Huang')
		)
		expect(annotatedHtml).toBe(
		'I am <span class="annotation annotation-0">Zhan Huang</span>'
		)
		})

build/ext/sbd.js

public/index.html

src/ext/sbd.js

sync.js

LICENSE

Sorry, the diff of this file is not supported yet

public/js/text-annotator.min.js

Sorry, the diff of this file is too big to display

text-annotator - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes