@@ -8,4 +8,2 @@ "use strict";

		var _htmlEntities = require("html-entities");

		var _sbd = _interopRequireDefault(require("./ext/sbd"));
		@@ -15,3 +13,7 @@

		// div inside span is a bad idea
		const encode = str => {
		return str.replace(/&/g, '&').replace(/"/g, '"').replace(/'/g, ''').replace(/</g, '<').replace(/>/g, '>');
		}; // div inside span is a bad idea


		const blockElements = ['address', 'article', 'aside', 'blockquote', 'canvas', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main', 'nav', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 'tfoot', 'ul', 'video'];
		@@ -202,3 +204,3 @@
		if (ifEncode && index === -1) {
		const encodedStrWithFixes = (0, _htmlEntities.encode)(strWithFixes);
		const encodedStrWithFixes = encode(strWithFixes);
		const index = text.indexOf(encodedStrWithFixes, offset);
		@@ -208,4 +210,4 @@
		const loc = [];
		loc[0] = index + (0, _htmlEntities.encode)(prefix).length;
		loc[1] = loc[0] + (0, _htmlEntities.encode)(str).length;
		loc[0] = index + encode(prefix).length;
		loc[1] = loc[0] + encode(str).length;
		highlightIndex = this.highlights.push({
		@@ -320,104 +322,104 @@ loc
		else if (sentenceBased) {
		// step 1: sentenize the text if has not done so
		let sentences = [];
		// step 1: sentenize the text if has not done so
		let sentences = [];

		if (this.sentences.length) {
		sentences = this.sentences;
		} else {
		sentences = this.sentences = TextAnnotator.sentenize(text);
		} // step 2 (for efficiency only): filter sentences by words of the str
		if (this.sentences.length) {
		sentences = this.sentences;
		} else {
		sentences = this.sentences = TextAnnotator.sentenize(text);
		} // step 2 (for efficiency only): filter sentences by words of the str


		const words = str.split(/\s/);
		const filteredSentences = [];
		const words = str.split(/\s/);
		const filteredSentences = [];

		for (let i = 0; i < sentences.length; i++) {
		for (let j = 0; j < words.length; j++) {
		if (sentences[i].raw.includes(words[j])) {
		filteredSentences.push(sentences[i]);
		break;
		}
		for (let i = 0; i < sentences.length; i++) {
		for (let j = 0; j < words.length; j++) {
		if (sentences[i].raw.includes(words[j])) {
		filteredSentences.push(sentences[i]);
		break;
		}
		} //step 3 (optional)
		}
		} //step 3 (optional)


		if (processSentence) {
		let index = 0; // for each sentence
		if (processSentence) {
		let index = 0; // for each sentence

		for (let i = 0; i < filteredSentences.length; i++) {
		const fs = filteredSentences[i];
		let raw = fs.raw; // loc without tags
		for (let i = 0; i < filteredSentences.length; i++) {
		const fs = filteredSentences[i];
		let raw = fs.raw; // loc without tags

		const loc = [fs.index, fs.index + raw.length];
		let locInc = 0; // add loc of all tags before the one being checked so as to derive the actual loc
		const loc = [fs.index, fs.index + raw.length];
		let locInc = 0; // add loc of all tags before the one being checked so as to derive the actual loc

		const tagLocations = this.tagLocations; // for each loc of tag whose loc is larger than the last sentence
		const tagLocations = this.tagLocations; // for each loc of tag whose loc is larger than the last sentence

		for (let j = index; j < tagLocations.length; j++) {
		const tagLoc = tagLocations[j];
		for (let j = index; j < tagLocations.length; j++) {
		const tagLoc = tagLocations[j];

		if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]);
		const insertIndex = tagLoc[0] + locInc - loc[0];
		raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex);
		locInc += tagLoc[1];
		} else if (tagLoc[0] > loc[1]) {
		index = j; // not sure this part
		if (tagLoc[0] >= loc[0] && tagLoc[0] <= loc[1]) {
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]);
		const insertIndex = tagLoc[0] + locInc - loc[0];
		raw = raw.slice(0, insertIndex) + tag + raw.slice(insertIndex);
		locInc += tagLoc[1];
		} else if (tagLoc[0] > loc[1]) {
		index = j; // not sure this part

		break;
		}
		break;
		}
		}

		raw = processSentence(raw);
		raw = raw.replace(/(<([^>]+)>)/gi, '');
		const copy = fs.raw; // update the sentence if it got reduced
		raw = processSentence(raw);
		raw = raw.replace(/(<([^>]+)>)/gi, '');
		const copy = fs.raw; // update the sentence if it got reduced

		if (copy !== raw) {
		fs.raw = raw;
		fs.index = fs.index + copy.indexOf(raw);
		}
		if (copy !== raw) {
		fs.raw = raw;
		fs.index = fs.index + copy.indexOf(raw);
		}
		} // step 4: find the most possible sentence
		}
		} // step 4: find the most possible sentence


		let mostPossibleSentence = null;
		let mostPossibleSentence = null;

		for (let i = 0; i < filteredSentences.length; i++) {
		const sentence = filteredSentences[i];
		const similarity = TextAnnotator.getSimilarity(sentence.raw, str, caseSensitive);
		for (let i = 0; i < filteredSentences.length; i++) {
		const sentence = filteredSentences[i];
		const similarity = TextAnnotator.getSimilarity(sentence.raw, str, caseSensitive);

		if (similarity >= sbThreshold) {
		sbThreshold = similarity;
		mostPossibleSentence = sentence;
		} else if (i !== filteredSentences.length - 1) {
		// combine two sentences to reduce the inaccuracy of sentenizing text
		const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw;
		const lengthDiff = Math.abs(newSentenceRaw.length - str.length) / str.length;
		if (similarity >= sbThreshold) {
		sbThreshold = similarity;
		mostPossibleSentence = sentence;
		} else if (i !== filteredSentences.length - 1) {
		// combine two sentences to reduce the inaccuracy of sentenizing text
		const newSentenceRaw = sentence.raw + filteredSentences[i + 1].raw;
		const lengthDiff = Math.abs(newSentenceRaw.length - str.length) / str.length;

		if (lengthDiff <= maxLengthDiff) {
		const newSimilarity = TextAnnotator.getSimilarity(newSentenceRaw, str, caseSensitive);
		if (lengthDiff <= maxLengthDiff) {
		const newSimilarity = TextAnnotator.getSimilarity(newSentenceRaw, str, caseSensitive);

		if (newSimilarity >= sbThreshold) {
		sbThreshold = newSimilarity;
		mostPossibleSentence = {
		raw: newSentenceRaw,
		index: sentence.index
		};
		}
		if (newSimilarity >= sbThreshold) {
		sbThreshold = newSimilarity;
		mostPossibleSentence = {
		raw: newSentenceRaw,
		index: sentence.index
		};
		}
		}
		} // step 5: if the most possible sentence is found, derive and return the location of the most similar str from it
		}
		} // step 5: if the most possible sentence is found, derive and return the location of the most similar str from it


		if (mostPossibleSentence) {
		const result = TextAnnotator.getBestSubstring(mostPossibleSentence.raw, str, sbThreshold, lenRatio, caseSensitive, true);
		if (mostPossibleSentence) {
		const result = TextAnnotator.getBestSubstring(mostPossibleSentence.raw, str, sbThreshold, lenRatio, caseSensitive, true);

		if (result.loc) {
		let index = mostPossibleSentence.index;
		highlightIndex = this.highlights.push({
		loc: [index + result.loc[0], index + result.loc[1]]
		}) - 1;
		}
		if (result.loc) {
		let index = mostPossibleSentence.index;
		highlightIndex = this.highlights.push({
		loc: [index + result.loc[0], index + result.loc[1]]
		}) - 1;
		}
		}
		}

		@@ -460,23 +462,23 @@ return highlightIndex;
		else {
		for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) {
		const tagLoc2 = this.tagLocations[i2];
		for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) {
		const tagLoc2 = this.tagLocations[i2];

		if (highlightLoc[1] < tagLoc2[0]) {
		break;
		} else {
		const tag2 = this.originalContent.substring(tagLoc2[0] + tagLoc2[2], tagLoc2[0] + tagLoc2[2] + tagLoc2[1]);
		if (highlightLoc[1] < tagLoc2[0]) {
		break;
		} else {
		const tag2 = this.originalContent.substring(tagLoc2[0] + tagLoc2[2], tagLoc2[0] + tagLoc2[2] + tagLoc2[1]);

		if (tag2.startsWith('<' + tagName)) {
		requiredTagNumber++;
		} else if (tag2.startsWith('</' + tagName)) {
		requiredTagCount++;
		}
		if (tag2.startsWith('<' + tagName)) {
		requiredTagNumber++;
		} else if (tag2.startsWith('</' + tagName)) {
		requiredTagCount++;
		}

		if (requiredTagNumber === requiredTagCount) {
		included = true;
		break;
		}
		if (requiredTagNumber === requiredTagCount) {
		included = true;
		break;
		}
		}
		}
		}

		@@ -499,22 +501,22 @@ return included;
		else if (highlightLoc[1] === tagLoc[0]) {
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if end tag, not block element and include the required close tag, add right to the tag
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if end tag, not block element and include the required close tag, add right to the tag

		if (!tag.endsWith('/>') && tag.startsWith('</') && !blockElements.includes(tag.split('</')[1].split('>')[0]) && this.includeRequiredTag(i, highlightLoc, tag)) {
		locInc[1] += tagLoc[1];
		}
		} // start tag end
		else if (highlightLoc[1] > tagLoc[0]) {
		locInc[1] += tagLoc[1]; // start&tag end
		if (!tag.endsWith('/>') && tag.startsWith('</') && !blockElements.includes(tag.split('</')[1].split('>')[0]) && this.includeRequiredTag(i, highlightLoc, tag)) {
		locInc[1] += tagLoc[1];
		}
		} // start tag end
		else if (highlightLoc[1] > tagLoc[0]) {
		locInc[1] += tagLoc[1]; // start&tag end

		if (highlightLoc[0] === tagLoc[0]) {
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if self close tag or end tag or block element or not include the required close tag, add right to the tag
		if (highlightLoc[0] === tagLoc[0]) {
		const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if self close tag or end tag or block element or not include the required close tag, add right to the tag

		if (tag.startsWith('</') \|\| tag.endsWith('/>') \|\| blockElements.includes(tag.split(' ')[0].split('<')[1].split('>')[0]) \|\| !this.includeRequiredTag(i, highlightLoc, tag)) {
		locInc[0] += tagLoc[1];
		}
		} // tag start end
		else if (highlightLoc[0] > tagLoc[0]) {
		locInc[0] += tagLoc[1];
		}
		if (tag.startsWith('</') \|\| tag.endsWith('/>') \|\| blockElements.includes(tag.split(' ')[0].split('<')[1].split('>')[0]) \|\| !this.includeRequiredTag(i, highlightLoc, tag)) {
		locInc[0] += tagLoc[1];
		}
		} // tag start end
		else if (highlightLoc[0] > tagLoc[0]) {
		locInc[0] += tagLoc[1];
		}
		}
		} // step 2: check locations of other highlights
		@@ -539,13 +541,13 @@ // all span (no blocks)
		else if (highlightLoc[0] < loc[1] && highlightLoc[0] > loc[0] && highlightLoc[1] > loc[1]) {
		locInc[0] += openTagLength;
		locInc[1] += openTagLength + closeTagLength;
		} else if (highlightLoc[0] <= loc[0] && highlightLoc[1] >= loc[1]) {
		locInc[1] += openTagLength + closeTagLength;
		} // syntactical correct but semantical incorrect
		else if (highlightLoc[0] < loc[0] && highlightLoc[1] > loc[0] && highlightLoc[1] < loc[1]) {
		locInc[1] += openTagLength;
		} else if (highlightLoc[0] >= loc[0] && highlightLoc[1] <= loc[1]) {
		locInc[0] += openTagLength;
		locInc[1] += openTagLength;
		}
		locInc[0] += openTagLength;
		locInc[1] += openTagLength + closeTagLength;
		} else if (highlightLoc[0] <= loc[0] && highlightLoc[1] >= loc[1]) {
		locInc[1] += openTagLength + closeTagLength;
		} // syntactical correct but semantical incorrect
		else if (highlightLoc[0] < loc[0] && highlightLoc[1] > loc[0] && highlightLoc[1] < loc[1]) {
		locInc[1] += openTagLength;
		} else if (highlightLoc[0] >= loc[0] && highlightLoc[1] <= loc[1]) {
		locInc[0] += openTagLength;
		locInc[1] += openTagLength;
		}
		}
		@@ -552,0 +554,0 @@ }

package.json

		{
		"name": "text-annotator",
		"version": "0.9.5",
		"version": "0.9.6",
		"description": "A JavaScript library for locating and annotating plain text in HTML",
		@@ -8,6 +8,4 @@ "main": "build/text-annotator.js",
		"lint": "./node_modules/.bin/eslint src/ test/ --fix",
		"lint:nofix": "./node_modules/.bin/eslint src/ test/ --max-warnings 0",
		"build": "babel src -d build",
		"build-min": "webpack --config webpack.config.js",
		"sync": "node sync.js",
		"test": "jest"
		@@ -31,17 +29,14 @@ },
		"devDependencies": {
		"@babel/cli": "^7.13.0",
		"@babel/core": "^7.13.8",
		"@babel/preset-env": "^7.13.9",
		"babel-jest": "^26.6.3",
		"dotenv": "^8.0.0",
		"eslint": "^7.21.0",
		"eslint-config-prettier": "^8.1.0",
		"eslint-plugin-jest": "^24.1.5",
		"eslint-plugin-prettier": "^3.3.1",
		"jest": "^26.6.3",
		"pre-commit": "^1.2.2",
		"prettier": "2.2.1",
		"sync-directory": "^2.2.17",
		"webpack": "^4.46.0",
		"webpack-cli": "^3.3.12"
		"@babel/cli": "^7.17.10",
		"@babel/core": "^7.18.5",
		"@babel/preset-env": "^7.18.2",
		"babel-jest": "^28.1.1",
		"eslint": "^8.18.0",
		"eslint-config-prettier": "^8.5.0",
		"eslint-plugin-jest": "^26.5.3",
		"eslint-plugin-prettier": "^4.0.0",
		"jest": "^28.1.1",
		"prettier": "2.7.1",
		"webpack": "^5.73.0",
		"webpack-cli": "^4.10.0"
		},
		@@ -86,10 +81,3 @@ "babel": {
		"singleQuote": true
		},
		"pre-commit": [
		"lint:nofix",
		"test"
		],
		"dependencies": {
		"html-entities": "^2.1.0"
		}
		}

README.md

		@@ -148,2 +148,2 @@ # text-annotator
		## Contact
		[Zhan Huang](mailto:z2hm@outlook.com "Zhan Huang")
		[Zhan Huang](mailto:z2hm@outlook.com "Zhan Huang")

src/text-annotator.js

		@@ -1,4 +0,12 @@
		import { encode } from 'html-entities'
		import getSentences from './ext/sbd'

		const encode = (str) => {
		return str
		.replace(/&/g, '&')
		.replace(/"/g, '"')
		.replace(/'/g, ''')
		.replace(/</g, '<')
		.replace(/>/g, '>')
		}

		// div inside span is a bad idea
		@@ -5,0 +13,0 @@ const blockElements = [

webpack.config.js

		const path = require('path')

		module.exports = {
		entry: './src/index.js',
		entry: './build/index.js',
		target: 'web',
		mode: 'production',
		mode: 'development',
		output: {
		@@ -8,0 +8,0 @@ path: path.join(__dirname, 'public/js'),

sync.js

public/js/text-annotator.min.js

Sorry, the diff of this file is too big to display

text-annotator - npm Package Compare versions

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes