text-annotator - npm Package Compare versions

Comparing version 0.6.9 to 0.7.0

package.json

		{
		"name": "text-annotator",
		"version": "0.6.9",
		"version": "0.7.0",
		"description": "A JavaScript library for locating and annotating plain text in HTML",
		@@ -5,0 +5,0 @@ "main": "build/text-annotator.js",

150

src/text-annotator.js

		@@ -12,2 +12,3 @@ import getSentences from './ext/sbd'
		const content = options.content
		// isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false
		const isHTML = options.isHTML === undefined \|\| options.isHTML
		@@ -20,3 +21,2 @@
		: content
		// isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false
		this.isHTML = isHTML
		@@ -27,3 +27,3 @@
		this.tagLocations = []
		// sentences are used in (sentence based) fuzzy search
		// sentences are used in sentence-based fuzzy search
		this.sentences = []
		@@ -38,3 +38,3 @@ // one highlight can have more than one location because of the potential issue in tag insertion***

		// lastHighlightIndex can be within options***
		// lastHighlightIndex can be within options; it is currently used by searchAll
		search(str, options = {}, lastHighlightIndex) {
		@@ -98,3 +98,3 @@ let prefix = options.prefix \|\| ''

		// only support directly search for now***
		// only support direct search for now
		searchAll(str, options = {}) {
		@@ -145,2 +145,3 @@ const highlightIndexes = []
		)
		// it has to be set after adjustLoc so that it will not be checked
		this.highlights[highlightIndex].highlighted = true
		@@ -163,8 +164,10 @@
		: content
		highlightIndexes.forEach(highlightIndex => {
		for (let i = 0; i < highlightIndexes.length; i++) {
		options.content = newContent
		newContent = this.highlight(highlightIndex, options)
		})
		newContent = this.highlight(highlightIndexes[i], options)
		}

		if (!isBrowser \|\| !containerId \|\| returnContent) {
		if (isBrowser && containerId && !returnContent) {
		document.getElementById(containerId).innerHTML = newContent
		} else {
		return newContent
		@@ -184,4 +187,2 @@ }

		// add searchAndHighlightAll***

		unhighlight(highlightIndex, options = {}) {
		@@ -198,2 +199,3 @@ // byStringOperation is used to decide whether the content is changed by string operation or dom operation

		// it has to be set before adjustLoc so that it will not be checked
		this.highlights[highlightIndex].highlighted = false
		@@ -243,4 +245,2 @@

		// add unighlightAll***

		stripAndStoreHTMLTags() {
		@@ -276,2 +276,3 @@ let tag

		// for searchAll
		let offset = 0
		@@ -301,3 +302,3 @@ if (lastHighlightIndex !== undefined) {
		let highlightIndex = -1
		// IE will not be considered***
		// IE is not considered
		if (window.find) {
		@@ -313,2 +314,3 @@ document.designMode = 'on'
		// step 2: locate the found within the container where the annotator is applied
		// selector may become better
		const found = document.querySelector(
		@@ -348,15 +350,15 @@ '#' +

		// improve later***
		fuzzySearch(prefix, str, postfix, fuzzySearchOptions = {}) {
		const caseSensitive = fuzzySearchOptions.caseSensitive

		const tokenBased = fuzzySearchOptions.tokenBased
		let tbThreshold = fuzzySearchOptions.tbThreshold \|\| 0.68
		const tokenBased = fuzzySearchOptions.tokenBased

		// sentence-based fuzzy search is enabled by default
		const sentenceBased =
		fuzzySearchOptions.sentenceBased === undefined \|\|
		fuzzySearchOptions.sentenceBased
		let sbThreshold = fuzzySearchOptions.sbThreshold \|\| 0.85
		const lenRatio = fuzzySearchOptions.lenRatio \|\| 2
		const processSentence = fuzzySearchOptions.processSentence
		const sentenceBased =
		fuzzySearchOptions.sentenceBased === undefined \|\|
		fuzzySearchOptions.sentenceBased

		@@ -377,7 +379,9 @@ let highlightIndex = -1
		const fragment = prefix + str + postfix
		for (const i of strIndexes) {
		for (let i = 0; i < strIndexes.length; i++) {
		const si = strIndexes[i]
		// f can be wider
		const f =
		text.substring(i - prefix.length, i) +
		text.substring(si - prefix.length, si) +
		str +
		text.substring(i + str.length, i + str.length + postfix.length)
		text.substring(si + str.length, si + str.length + postfix.length)
		const similarity = TextAnnotator.getSimilarity(
		@@ -390,3 +394,3 @@ f,
		tbThreshold = similarity
		strIndex = i
		strIndex = si
		}
		@@ -414,6 +418,6 @@ }
		const filteredSentences = []
		for (const sentence of sentences) {
		for (const word of words) {
		if (sentence.raw.includes(word)) {
		filteredSentences.push(sentence)
		for (let i = 0; i < sentences.length; i++) {
		for (let j = 0; j < words.length; j++) {
		if (sentences[i].raw.includes(words[j])) {
		filteredSentences.push(sentences[i])
		break
		@@ -424,37 +428,39 @@ }

		//step 2.5: remove text that must not be annotated
		//step 3 (optional)
		if (processSentence) {
		const tagLocations = this.tagLocations
		const length = tagLocations.length
		if (length) {
		let index = 0
		for (const fs of filteredSentences) {
		let raw = fs.raw
		const loc = [fs.index, fs.index + raw.length]
		let locInc = 0
		for (let i = index; i < length; i++) {
		const tagLoc = tagLocations[i]
		if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
		const tag = this.originalContent.substring(
		tagLoc[0] + tagLoc[2],
		tagLoc[0] + tagLoc[2] + tagLoc[1]
		)
		const insertIndex = tagLoc[0] + locInc - loc[0]
		raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex)
		locInc += tagLoc[1]
		} else if (tagLoc[0] > loc[1]) {
		index = i - 1
		break
		}
		let index = 0
		// for each sentence
		for (let i = 0; i < filteredSentences.length; i++) {
		const fs = filteredSentences[i]
		let raw = fs.raw
		// loc without tags
		const loc = [fs.index, fs.index + raw.length]
		let locInc = 0
		// add loc of all tags before the one being checked so as to derive the actual loc
		const tagLocations = this.tagLocations
		// for each loc of tag whose loc is larger than the last sentence
		for (let j = index; j < tagLocations.length; j++) {
		const tagLoc = tagLocations[j]
		if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
		const tag = this.originalContent.substring(
		tagLoc[0] + tagLoc[2],
		tagLoc[0] + tagLoc[2] + tagLoc[1]
		)
		const insertIndex = tagLoc[0] + locInc - loc[0]
		raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex)
		locInc += tagLoc[1]
		} else if (tagLoc[0] > loc[1]) {
		index = j // not sure this part
		break
		}
		}

		raw = processSentence(raw)
		raw = raw.replace(/(<([^>]+)>)/gi, '')
		raw = processSentence(raw)
		raw = raw.replace(/(<([^>]+)>)/gi, '')

		const copy = fs.raw
		// update the sentence if it got reduced
		if (copy !== raw) {
		fs.raw = raw
		fs.index = fs.index + copy.indexOf(raw)
		}
		const copy = fs.raw
		// update the sentence if it got reduced
		if (copy !== raw) {
		fs.raw = raw
		fs.index = fs.index + copy.indexOf(raw)
		}
		@@ -464,3 +470,3 @@ }

		// // step 3: find the sentence that includes the most similar str
		// // step 4: find the sentence that includes the most similar str
		// let bestResult = null
		@@ -497,3 +503,3 @@ // let mostPossibleSentence = null

		// // step 4: if such sentence is found, derive and return the location of the most similar str
		// // step 5: if such sentence is found, derive and return the location of the most similar str
		// if (bestResult) {
		@@ -507,5 +513,6 @@ // let index = mostPossibleSentence.index

		// step 3: find the most possible sentence
		// step 4: find the most possible sentence
		let mostPossibleSentence = null
		filteredSentences.forEach((sentence, index) => {
		for (let i = 0; i < filteredSentences.length; i++) {
		const sentence = filteredSentences[i]
		const similarity = TextAnnotator.getSimilarity(
		@@ -519,8 +526,8 @@ sentence.raw,
		mostPossibleSentence = sentence
		} else if (index !== filteredSentences.length - 1) {
		} else if (i !== filteredSentences.length - 1) {
		// combine two sentences to reduce the inaccuracy of sentenizing text
		const newSentenceRaw = sentence.raw + filteredSentences[index + 1].raw
		const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw
		const lengthDiff =
		Math.abs(newSentenceRaw.length - str.length) / str.length
		// whether allowing the customization of length diff threshold****
		// whether allowing the customization of length diff threshold***
		if (lengthDiff <= 0.1) {
		@@ -541,5 +548,5 @@ const newSimilarity = TextAnnotator.getSimilarity(
		}
		})
		}

		// step 4: if the most possible sentence is found, derive and return the location of the most similar str from it
		// step 5: if the most possible sentence is found, derive and return the location of the most similar str from it
		if (mostPossibleSentence) {
		@@ -568,3 +575,4 @@ const result = TextAnnotator.getBestSubstring(
		adjustLoc(highlightIdPattern, highlightIndex, highlightClass) {
		const highlightLoc = this.highlights[highlightIndex].loc
		const { highlights } = this
		const highlightLoc = highlights[highlightIndex].loc
		const locInc = [0, 0]
		@@ -637,7 +645,8 @@
		// step 2: check locations of other highlights
		this.highlights.forEach((highlight, highlightIndex) => {
		for (let i = 0; i < highlights.length; i++) {
		const highlight = highlights[i]
		if (highlight.highlighted) {
		const openTagLength = TextAnnotator.getOpenTagLength(
		highlightIdPattern,
		highlightIndex,
		i,
		highlightClass
		@@ -670,3 +679,3 @@ )
		}
		})
		}

		@@ -745,2 +754,3 @@ return [highlightLoc[0] + locInc[0], highlightLoc[1] + locInc[1]]
		// step 1: derive best substr
		// /s may be better***
		const words = str.split(' ')
		@@ -747,0 +757,0 @@ while (words.length) {

text-annotator - npm Package Compare versions

Improved metrics