New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

text-annotator

Package Overview
Dependencies
Maintainers
1
Versions
38
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

text-annotator - npm Package Compare versions

Comparing version 0.9.5 to 0.9.6

242

build/text-annotator.js

@@ -8,4 +8,2 @@ "use strict";

var _htmlEntities = require("html-entities");
var _sbd = _interopRequireDefault(require("./ext/sbd"));

@@ -15,3 +13,7 @@

// div inside span is a bad idea
const encode = str => {
return str.replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/'/g, '&#39;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
}; // div inside span is a bad idea
const blockElements = ['address', 'article', 'aside', 'blockquote', 'canvas', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main', 'nav', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 'tfoot', 'ul', 'video'];

@@ -202,3 +204,3 @@

if (ifEncode && index === -1) {
const encodedStrWithFixes = (0, _htmlEntities.encode)(strWithFixes);
const encodedStrWithFixes = encode(strWithFixes);
const index = text.indexOf(encodedStrWithFixes, offset);

@@ -208,4 +210,4 @@

const loc = [];
loc[0] = index + (0, _htmlEntities.encode)(prefix).length;
loc[1] = loc[0] + (0, _htmlEntities.encode)(str).length;
loc[0] = index + encode(prefix).length;
loc[1] = loc[0] + encode(str).length;
highlightIndex = this.highlights.push({

@@ -320,104 +322,104 @@ loc

else if (sentenceBased) {
// step 1: sentenize the text if has not done so
let sentences = [];
// step 1: sentenize the text if has not done so
let sentences = [];
if (this.sentences.length) {
sentences = this.sentences;
} else {
sentences = this.sentences = TextAnnotator.sentenize(text);
} // step 2 (for efficiency only): filter sentences by words of the str
if (this.sentences.length) {
sentences = this.sentences;
} else {
sentences = this.sentences = TextAnnotator.sentenize(text);
} // step 2 (for efficiency only): filter sentences by words of the str
const words = str.split(/\s/);
const filteredSentences = [];
const words = str.split(/\s/);
const filteredSentences = [];
for (let i = 0; i < sentences.length; i++) {
for (let j = 0; j < words.length; j++) {
if (sentences[i].raw.includes(words[j])) {
filteredSentences.push(sentences[i]);
break;
}
for (let i = 0; i < sentences.length; i++) {
for (let j = 0; j < words.length; j++) {
if (sentences[i].raw.includes(words[j])) {
filteredSentences.push(sentences[i]);
break;
}
} //step 3 (optional)
}
} //step 3 (optional)
if (processSentence) {
let index = 0; // for each sentence
if (processSentence) {
let index = 0; // for each sentence
for (let i = 0; i < filteredSentences.length; i++) {
const fs = filteredSentences[i];
let raw = fs.raw; // loc without tags
for (let i = 0; i < filteredSentences.length; i++) {
const fs = filteredSentences[i];
let raw = fs.raw; // loc without tags
const loc = [fs.index, fs.index + raw.length];
let locInc = 0; // add loc of all tags before the one being checked so as to derive the actual loc
const loc = [fs.index, fs.index + raw.length];
let locInc = 0; // add loc of all tags before the one being checked so as to derive the actual loc
const tagLocations = this.tagLocations; // for each loc of tag whose loc is larger than the last sentence
const tagLocations = this.tagLocations; // for each loc of tag whose loc is larger than the last sentence
for (let j = index; j < tagLocations.length; j++) {
const tagLoc = tagLocations[j];
for (let j = index; j < tagLocations.length; j++) {
const tagLoc = tagLocations[j];
if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]);
const insertIndex = tagLoc[0] + locInc - loc[0];
raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex);
locInc += tagLoc[1];
} else if (tagLoc[0] > loc[1]) {
index = j; // not sure this part
if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]);
const insertIndex = tagLoc[0] + locInc - loc[0];
raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex);
locInc += tagLoc[1];
} else if (tagLoc[0] > loc[1]) {
index = j; // not sure this part
break;
}
break;
}
}
raw = processSentence(raw);
raw = raw.replace(/(<([^>]+)>)/gi, '');
const copy = fs.raw; // update the sentence if it got reduced
raw = processSentence(raw);
raw = raw.replace(/(<([^>]+)>)/gi, '');
const copy = fs.raw; // update the sentence if it got reduced
if (copy !== raw) {
fs.raw = raw;
fs.index = fs.index + copy.indexOf(raw);
}
if (copy !== raw) {
fs.raw = raw;
fs.index = fs.index + copy.indexOf(raw);
}
} // step 4: find the most possible sentence
}
} // step 4: find the most possible sentence
let mostPossibleSentence = null;
let mostPossibleSentence = null;
for (let i = 0; i < filteredSentences.length; i++) {
const sentence = filteredSentences[i];
const similarity = TextAnnotator.getSimilarity(sentence.raw, str, caseSensitive);
for (let i = 0; i < filteredSentences.length; i++) {
const sentence = filteredSentences[i];
const similarity = TextAnnotator.getSimilarity(sentence.raw, str, caseSensitive);
if (similarity >= sbThreshold) {
sbThreshold = similarity;
mostPossibleSentence = sentence;
} else if (i !== filteredSentences.length - 1) {
// combine two sentences to reduce the inaccuracy of sentenizing text
const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw;
const lengthDiff = Math.abs(newSentenceRaw.length - str.length) / str.length;
if (similarity >= sbThreshold) {
sbThreshold = similarity;
mostPossibleSentence = sentence;
} else if (i !== filteredSentences.length - 1) {
// combine two sentences to reduce the inaccuracy of sentenizing text
const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw;
const lengthDiff = Math.abs(newSentenceRaw.length - str.length) / str.length;
if (lengthDiff <= maxLengthDiff) {
const newSimilarity = TextAnnotator.getSimilarity(newSentenceRaw, str, caseSensitive);
if (lengthDiff <= maxLengthDiff) {
const newSimilarity = TextAnnotator.getSimilarity(newSentenceRaw, str, caseSensitive);
if (newSimilarity >= sbThreshold) {
sbThreshold = newSimilarity;
mostPossibleSentence = {
raw: newSentenceRaw,
index: sentence.index
};
}
if (newSimilarity >= sbThreshold) {
sbThreshold = newSimilarity;
mostPossibleSentence = {
raw: newSentenceRaw,
index: sentence.index
};
}
}
} // step 5: if the most possible sentence is found, derive and return the location of the most similar str from it
}
} // step 5: if the most possible sentence is found, derive and return the location of the most similar str from it
if (mostPossibleSentence) {
const result = TextAnnotator.getBestSubstring(mostPossibleSentence.raw, str, sbThreshold, lenRatio, caseSensitive, true);
if (mostPossibleSentence) {
const result = TextAnnotator.getBestSubstring(mostPossibleSentence.raw, str, sbThreshold, lenRatio, caseSensitive, true);
if (result.loc) {
let index = mostPossibleSentence.index;
highlightIndex = this.highlights.push({
loc: [index + result.loc[0], index + result.loc[1]]
}) - 1;
}
if (result.loc) {
let index = mostPossibleSentence.index;
highlightIndex = this.highlights.push({
loc: [index + result.loc[0], index + result.loc[1]]
}) - 1;
}
}
}

@@ -460,23 +462,23 @@ return highlightIndex;

else {
for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) {
const tagLoc2 = this.tagLocations[i2];
for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) {
const tagLoc2 = this.tagLocations[i2];
if (highlightLoc[1] < tagLoc2[0]) {
break;
} else {
const tag2 = this.originalContent.substring(tagLoc2[0] + tagLoc2[2], tagLoc2[0] + tagLoc2[2] + tagLoc2[1]);
if (highlightLoc[1] < tagLoc2[0]) {
break;
} else {
const tag2 = this.originalContent.substring(tagLoc2[0] + tagLoc2[2], tagLoc2[0] + tagLoc2[2] + tagLoc2[1]);
if (tag2.startsWith('<' + tagName)) {
requiredTagNumber++;
} else if (tag2.startsWith('</' + tagName)) {
requiredTagCount++;
}
if (tag2.startsWith('<' + tagName)) {
requiredTagNumber++;
} else if (tag2.startsWith('</' + tagName)) {
requiredTagCount++;
}
if (requiredTagNumber === requiredTagCount) {
included = true;
break;
}
if (requiredTagNumber === requiredTagCount) {
included = true;
break;
}
}
}
}

@@ -499,22 +501,22 @@ return included;

else if (highlightLoc[1] === tagLoc[0]) {
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if end tag, not block element and include the required close tag, add right to the tag
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if end tag, not block element and include the required close tag, add right to the tag
if (!tag.endsWith('/>') && tag.startsWith('</') && !blockElements.includes(tag.split('</')[1].split('>')[0]) && this.includeRequiredTag(i, highlightLoc, tag)) {
locInc[1] += tagLoc[1];
}
} // start tag end
else if (highlightLoc[1] > tagLoc[0]) {
locInc[1] += tagLoc[1]; // start&tag end
if (!tag.endsWith('/>') && tag.startsWith('</') && !blockElements.includes(tag.split('</')[1].split('>')[0]) && this.includeRequiredTag(i, highlightLoc, tag)) {
locInc[1] += tagLoc[1];
}
} // start tag end
else if (highlightLoc[1] > tagLoc[0]) {
locInc[1] += tagLoc[1]; // start&tag end
if (highlightLoc[0] === tagLoc[0]) {
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if self close tag or end tag or block element or not include the required close tag, add right to the tag
if (highlightLoc[0] === tagLoc[0]) {
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if self close tag or end tag or block element or not include the required close tag, add right to the tag
if (tag.startsWith('</') || tag.endsWith('/>') || blockElements.includes(tag.split(' ')[0].split('<')[1].split('>')[0]) || !this.includeRequiredTag(i, highlightLoc, tag)) {
locInc[0] += tagLoc[1];
}
} // tag start end
else if (highlightLoc[0] > tagLoc[0]) {
locInc[0] += tagLoc[1];
}
if (tag.startsWith('</') || tag.endsWith('/>') || blockElements.includes(tag.split(' ')[0].split('<')[1].split('>')[0]) || !this.includeRequiredTag(i, highlightLoc, tag)) {
locInc[0] += tagLoc[1];
}
} // tag start end
else if (highlightLoc[0] > tagLoc[0]) {
locInc[0] += tagLoc[1];
}
}
} // step 2: check locations of other highlights

@@ -539,13 +541,13 @@ // all span (no blocks)

else if (highlightLoc[0] < loc[1] && highlightLoc[0] > loc[0] && highlightLoc[1] > loc[1]) {
locInc[0] += openTagLength;
locInc[1] += openTagLength + closeTagLength;
} else if (highlightLoc[0] <= loc[0] && highlightLoc[1] >= loc[1]) {
locInc[1] += openTagLength + closeTagLength;
} // syntactical correct but semantical incorrect
else if (highlightLoc[0] < loc[0] && highlightLoc[1] > loc[0] && highlightLoc[1] < loc[1]) {
locInc[1] += openTagLength;
} else if (highlightLoc[0] >= loc[0] && highlightLoc[1] <= loc[1]) {
locInc[0] += openTagLength;
locInc[1] += openTagLength;
}
locInc[0] += openTagLength;
locInc[1] += openTagLength + closeTagLength;
} else if (highlightLoc[0] <= loc[0] && highlightLoc[1] >= loc[1]) {
locInc[1] += openTagLength + closeTagLength;
} // syntactical correct but semantical incorrect
else if (highlightLoc[0] < loc[0] && highlightLoc[1] > loc[0] && highlightLoc[1] < loc[1]) {
locInc[1] += openTagLength;
} else if (highlightLoc[0] >= loc[0] && highlightLoc[1] <= loc[1]) {
locInc[0] += openTagLength;
locInc[1] += openTagLength;
}
}

@@ -552,0 +554,0 @@ }

{
"name": "text-annotator",
"version": "0.9.5",
"version": "0.9.6",
"description": "A JavaScript library for locating and annotating plain text in HTML",

@@ -8,6 +8,4 @@ "main": "build/text-annotator.js",

"lint": "./node_modules/.bin/eslint src/** test/** --fix",
"lint:nofix": "./node_modules/.bin/eslint src/** test/** --max-warnings 0",
"build": "babel src -d build",
"build-min": "webpack --config webpack.config.js",
"sync": "node sync.js",
"test": "jest"

@@ -31,17 +29,14 @@ },

"devDependencies": {
"@babel/cli": "^7.13.0",
"@babel/core": "^7.13.8",
"@babel/preset-env": "^7.13.9",
"babel-jest": "^26.6.3",
"dotenv": "^8.0.0",
"eslint": "^7.21.0",
"eslint-config-prettier": "^8.1.0",
"eslint-plugin-jest": "^24.1.5",
"eslint-plugin-prettier": "^3.3.1",
"jest": "^26.6.3",
"pre-commit": "^1.2.2",
"prettier": "2.2.1",
"sync-directory": "^2.2.17",
"webpack": "^4.46.0",
"webpack-cli": "^3.3.12"
"@babel/cli": "^7.17.10",
"@babel/core": "^7.18.5",
"@babel/preset-env": "^7.18.2",
"babel-jest": "^28.1.1",
"eslint": "^8.18.0",
"eslint-config-prettier": "^8.5.0",
"eslint-plugin-jest": "^26.5.3",
"eslint-plugin-prettier": "^4.0.0",
"jest": "^28.1.1",
"prettier": "2.7.1",
"webpack": "^5.73.0",
"webpack-cli": "^4.10.0"
},

@@ -86,10 +81,3 @@ "babel": {

"singleQuote": true
},
"pre-commit": [
"lint:nofix",
"test"
],
"dependencies": {
"html-entities": "^2.1.0"
}
}

@@ -148,2 +148,2 @@ # text-annotator

## Contact
[Zhan Huang](mailto:z2hm@outlook.com "Zhan Huang")
[Zhan Huang](mailto:z2hm@outlook.com "Zhan Huang")

@@ -1,4 +0,12 @@

import { encode } from 'html-entities'
import getSentences from './ext/sbd'
const encode = (str) => {
return str
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
}
// div inside span is a bad idea

@@ -5,0 +13,0 @@ const blockElements = [

const path = require('path')
module.exports = {
entry: './src/index.js',
entry: './build/index.js',
target: 'web',
mode: 'production',
mode: 'development',
output: {

@@ -8,0 +8,0 @@ path: path.join(__dirname, 'public/js'),

Sorry, the diff of this file is too big to display

SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc