Huge News!Announcing our $40M Series B led by Abstract Ventures.Learn More
Socket
Sign inDemoInstall
Socket

text-annotator

Package Overview
Dependencies
Maintainers
1
Versions
38
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

text-annotator - npm Package Compare versions

Comparing version 0.6.9 to 0.7.0

2

package.json
{
"name": "text-annotator",
"version": "0.6.9",
"version": "0.7.0",
"description": "A JavaScript library for locating and annotating plain text in HTML",

@@ -5,0 +5,0 @@ "main": "build/text-annotator.js",

@@ -12,2 +12,3 @@ import getSentences from './ext/sbd'

const content = options.content
// isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false
const isHTML = options.isHTML === undefined || options.isHTML

@@ -20,3 +21,2 @@

: content
// isHTML is used to reduce the memory used: stripedHTML is empty if isHTML is false
this.isHTML = isHTML

@@ -27,3 +27,3 @@

this.tagLocations = []
// sentences are used in (sentence based) fuzzy search
// sentences are used in sentence-based fuzzy search
this.sentences = []

@@ -38,3 +38,3 @@ // one highlight can have more than one location because of the potential issue in tag insertion***

// lastHighlightIndex can be within options***
// lastHighlightIndex can be within options; it is currently used by searchAll
search(str, options = {}, lastHighlightIndex) {

@@ -98,3 +98,3 @@ let prefix = options.prefix || ''

// only support directly search for now***
// only support direct search for now
searchAll(str, options = {}) {

@@ -145,2 +145,3 @@ const highlightIndexes = []

)
// it has to be set after adjustLoc so that it will not be checked
this.highlights[highlightIndex].highlighted = true

@@ -163,8 +164,10 @@

: content
highlightIndexes.forEach(highlightIndex => {
for (let i = 0; i < highlightIndexes.length; i++) {
options.content = newContent
newContent = this.highlight(highlightIndex, options)
})
newContent = this.highlight(highlightIndexes[i], options)
}
if (!isBrowser || !containerId || returnContent) {
if (isBrowser && containerId && !returnContent) {
document.getElementById(containerId).innerHTML = newContent
} else {
return newContent

@@ -184,4 +187,2 @@ }

// add searchAndHighlightAll***
unhighlight(highlightIndex, options = {}) {

@@ -198,2 +199,3 @@ // byStringOperation is used to decide whether the content is changed by string operation or dom operation

// it has to be set before adjustLoc so that it will not be checked
this.highlights[highlightIndex].highlighted = false

@@ -243,4 +245,2 @@

// add unighlightAll***
stripAndStoreHTMLTags() {

@@ -276,2 +276,3 @@ let tag

// for searchAll
let offset = 0

@@ -301,3 +302,3 @@ if (lastHighlightIndex !== undefined) {

let highlightIndex = -1
// IE will not be considered***
// IE is not considered
if (window.find) {

@@ -313,2 +314,3 @@ document.designMode = 'on'

// step 2: locate the found within the container where the annotator is applied
// selector may become better
const found = document.querySelector(

@@ -348,15 +350,15 @@ '#' +

// improve later***
fuzzySearch(prefix, str, postfix, fuzzySearchOptions = {}) {
const caseSensitive = fuzzySearchOptions.caseSensitive
const tokenBased = fuzzySearchOptions.tokenBased
let tbThreshold = fuzzySearchOptions.tbThreshold || 0.68
const tokenBased = fuzzySearchOptions.tokenBased
// sentence-based fuzzy search is enabled by default
const sentenceBased =
fuzzySearchOptions.sentenceBased === undefined ||
fuzzySearchOptions.sentenceBased
let sbThreshold = fuzzySearchOptions.sbThreshold || 0.85
const lenRatio = fuzzySearchOptions.lenRatio || 2
const processSentence = fuzzySearchOptions.processSentence
const sentenceBased =
fuzzySearchOptions.sentenceBased === undefined ||
fuzzySearchOptions.sentenceBased

@@ -377,7 +379,9 @@ let highlightIndex = -1

const fragment = prefix + str + postfix
for (const i of strIndexes) {
for (let i = 0; i < strIndexes.length; i++) {
const si = strIndexes[i]
// f can be wider
const f =
text.substring(i - prefix.length, i) +
text.substring(si - prefix.length, si) +
str +
text.substring(i + str.length, i + str.length + postfix.length)
text.substring(si + str.length, si + str.length + postfix.length)
const similarity = TextAnnotator.getSimilarity(

@@ -390,3 +394,3 @@ f,

tbThreshold = similarity
strIndex = i
strIndex = si
}

@@ -414,6 +418,6 @@ }

const filteredSentences = []
for (const sentence of sentences) {
for (const word of words) {
if (sentence.raw.includes(word)) {
filteredSentences.push(sentence)
for (let i = 0; i < sentences.length; i++) {
for (let j = 0; j < words.length; j++) {
if (sentences[i].raw.includes(words[j])) {
filteredSentences.push(sentences[i])
break

@@ -424,37 +428,39 @@ }

//step 2.5: remove text that must not be annotated
//step 3 (optional)
if (processSentence) {
const tagLocations = this.tagLocations
const length = tagLocations.length
if (length) {
let index = 0
for (const fs of filteredSentences) {
let raw = fs.raw
const loc = [fs.index, fs.index + raw.length]
let locInc = 0
for (let i = index; i < length; i++) {
const tagLoc = tagLocations[i]
if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
const tag = this.originalContent.substring(
tagLoc[0] + tagLoc[2],
tagLoc[0] + tagLoc[2] + tagLoc[1]
)
const insertIndex = tagLoc[0] + locInc - loc[0]
raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex)
locInc += tagLoc[1]
} else if (tagLoc[0] > loc[1]) {
index = i - 1
break
}
let index = 0
// for each sentence
for (let i = 0; i < filteredSentences.length; i++) {
const fs = filteredSentences[i]
let raw = fs.raw
// loc without tags
const loc = [fs.index, fs.index + raw.length]
let locInc = 0
// add loc of all tags before the one being checked so as to derive the actual loc
const tagLocations = this.tagLocations
// for each loc of tag whose loc is larger than the last sentence
for (let j = index; j < tagLocations.length; j++) {
const tagLoc = tagLocations[j]
if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
const tag = this.originalContent.substring(
tagLoc[0] + tagLoc[2],
tagLoc[0] + tagLoc[2] + tagLoc[1]
)
const insertIndex = tagLoc[0] + locInc - loc[0]
raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex)
locInc += tagLoc[1]
} else if (tagLoc[0] > loc[1]) {
index = j // not sure this part
break
}
}
raw = processSentence(raw)
raw = raw.replace(/(<([^>]+)>)/gi, '')
raw = processSentence(raw)
raw = raw.replace(/(<([^>]+)>)/gi, '')
const copy = fs.raw
// update the sentence if it got reduced
if (copy !== raw) {
fs.raw = raw
fs.index = fs.index + copy.indexOf(raw)
}
const copy = fs.raw
// update the sentence if it got reduced
if (copy !== raw) {
fs.raw = raw
fs.index = fs.index + copy.indexOf(raw)
}

@@ -464,3 +470,3 @@ }

// // step 3: find the sentence that includes the most similar str
// // step 4: find the sentence that includes the most similar str
// let bestResult = null

@@ -497,3 +503,3 @@ // let mostPossibleSentence = null

// // step 4: if such sentence is found, derive and return the location of the most similar str
// // step 5: if such sentence is found, derive and return the location of the most similar str
// if (bestResult) {

@@ -507,5 +513,6 @@ // let index = mostPossibleSentence.index

// step 3: find the most possible sentence
// step 4: find the most possible sentence
let mostPossibleSentence = null
filteredSentences.forEach((sentence, index) => {
for (let i = 0; i < filteredSentences.length; i++) {
const sentence = filteredSentences[i]
const similarity = TextAnnotator.getSimilarity(

@@ -519,8 +526,8 @@ sentence.raw,

mostPossibleSentence = sentence
} else if (index !== filteredSentences.length - 1) {
} else if (i !== filteredSentences.length - 1) {
// combine two sentences to reduce the inaccuracy of sentenizing text
const newSentenceRaw = sentence.raw + filteredSentences[index + 1].raw
const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw
const lengthDiff =
Math.abs(newSentenceRaw.length - str.length) / str.length
// whether allowing the customization of length diff threshold****
// whether allowing the customization of length diff threshold***
if (lengthDiff <= 0.1) {

@@ -541,5 +548,5 @@ const newSimilarity = TextAnnotator.getSimilarity(

}
})
}
// step 4: if the most possible sentence is found, derive and return the location of the most similar str from it
// step 5: if the most possible sentence is found, derive and return the location of the most similar str from it
if (mostPossibleSentence) {

@@ -568,3 +575,4 @@ const result = TextAnnotator.getBestSubstring(

adjustLoc(highlightIdPattern, highlightIndex, highlightClass) {
const highlightLoc = this.highlights[highlightIndex].loc
const { highlights } = this
const highlightLoc = highlights[highlightIndex].loc
const locInc = [0, 0]

@@ -637,7 +645,8 @@

// step 2: check locations of other highlights
this.highlights.forEach((highlight, highlightIndex) => {
for (let i = 0; i < highlights.length; i++) {
const highlight = highlights[i]
if (highlight.highlighted) {
const openTagLength = TextAnnotator.getOpenTagLength(
highlightIdPattern,
highlightIndex,
i,
highlightClass

@@ -670,3 +679,3 @@ )

}
})
}

@@ -745,2 +754,3 @@ return [highlightLoc[0] + locInc[0], highlightLoc[1] + locInc[1]]

// step 1: derive best substr
// /s may be better***
const words = str.split(' ')

@@ -747,0 +757,0 @@ while (words.length) {

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc