text-annotator
Advanced tools
Comparing version 0.7.3 to 0.7.4
@@ -13,3 +13,5 @@ "use strict"; | ||
// used to distinguish between browser and Node.js environments | ||
const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined'; | ||
// is it possible to relax so as to allow jsdom | ||
const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined'; // div inside span is a bad idea | ||
const blockElements = ['address', 'article', 'aside', 'blockquote', 'canvas', 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'main', 'nav', 'noscript', 'ol', 'output', 'p', 'pre', 'section', 'table', 'tfoot', 'ul', 'video']; | ||
@@ -31,3 +33,3 @@ | ||
this.sentences = []; // one highlight can have more than one location because of the potential issue in tag insertion*** | ||
this.sentences = []; // future work: one highlight can have more than one location because of the potential issue in tag insertion | ||
@@ -64,4 +66,5 @@ this.highlights = []; | ||
return highlightIndex; | ||
} | ||
} // experimental feature | ||
if (fuzzySearchOptions) { | ||
@@ -73,3 +76,4 @@ highlightIndex = this.fuzzySearch(prefix, str, postfix, fuzzySearchOptions); | ||
} | ||
} // eager search only works in (particular) browsers | ||
} // experimental feature | ||
// eager search only works in (particular) browsers | ||
@@ -86,3 +90,4 @@ | ||
return highlightIndex; | ||
} // only support direct search for now | ||
} // experimental feature | ||
// only support direct search for now | ||
@@ -131,4 +136,5 @@ | ||
} | ||
} | ||
} // experimental feature | ||
highlightAll(highlightIndexes, options = {}) { | ||
@@ -162,2 +168,3 @@ // either containerId or content is required | ||
if (highlightIndex !== -1) { | ||
// content is undefined if containerId and returnContent falsy | ||
return { | ||
@@ -188,3 +195,4 @@ highlightIndex, | ||
let newContent = content; | ||
let newContent = content; // need to change when one annotation => more than one highlight | ||
const loc = this.adjustLoc(highlightIdPattern, highlightIndex, highlightClass); | ||
@@ -494,4 +502,4 @@ const openTagLength = TextAnnotator.getOpenTagLength(highlightIdPattern, highlightIndex, highlightClass); | ||
return highlightIndex; | ||
} // further improvement when one annotation binds with more than one highlight*** | ||
// block elements are only used to check in the = condition for now | ||
} // future work: further improvement when one annotation binds with more than one highlight | ||
// includeRequiredTag used in = condition only | ||
@@ -501,6 +509,7 @@ | ||
const isCloseTag = tag.startsWith('</'); | ||
const tagType = isCloseTag ? tag.split('</')[1].split('>')[0] : tag.split(' ')[0].split('<')[1].split('>')[0]; | ||
const tagName = isCloseTag ? tag.split('</')[1].split('>')[0] : tag.split(' ')[0].split('<')[1].split('>')[0]; | ||
let included = false; | ||
let requiredTagNumber = 1; | ||
let requiredTagCount = 0; // outer | ||
let requiredTagCount = 0; // if both the start tag and the end tag are at the borders, place the tags outside the borders | ||
// if the close tag is at the border, check backwards until the start of the highlight | ||
@@ -516,5 +525,5 @@ if (isCloseTag) { | ||
if (tag2.startsWith('</' + tagType)) { | ||
if (tag2.startsWith('</' + tagName)) { | ||
requiredTagNumber++; | ||
} else if (tag2.startsWith('<' + tagType)) { | ||
} else if (tag2.startsWith('<' + tagName)) { | ||
requiredTagCount++; | ||
@@ -529,24 +538,25 @@ } | ||
} | ||
} else { | ||
for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) { | ||
const tagLoc2 = this.tagLocations[i2]; | ||
} // if the start tag is at the border, check forwards until the end of the highlight | ||
else { | ||
for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) { | ||
const tagLoc2 = this.tagLocations[i2]; | ||
if (highlightLoc[1] < tagLoc2[0]) { | ||
break; | ||
} else { | ||
const tag2 = this.originalContent.substring(tagLoc2[0] + tagLoc2[2], tagLoc2[0] + tagLoc2[2] + tagLoc2[1]); | ||
if (highlightLoc[1] < tagLoc2[0]) { | ||
break; | ||
} else { | ||
const tag2 = this.originalContent.substring(tagLoc2[0] + tagLoc2[2], tagLoc2[0] + tagLoc2[2] + tagLoc2[1]); | ||
if (tag2.startsWith('<' + tagType)) { | ||
requiredTagNumber++; | ||
} else if (tag2.startsWith('</' + tagType)) { | ||
requiredTagCount++; | ||
} | ||
if (tag2.startsWith('<' + tagName)) { | ||
requiredTagNumber++; | ||
} else if (tag2.startsWith('</' + tagName)) { | ||
requiredTagCount++; | ||
} | ||
if (requiredTagNumber === requiredTagCount) { | ||
included = true; | ||
break; | ||
if (requiredTagNumber === requiredTagCount) { | ||
included = true; | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
@@ -569,3 +579,3 @@ return included; | ||
else if (highlightLoc[1] === tagLoc[0]) { | ||
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); | ||
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if end tag, not block element and include the required close tag, add right to the tag | ||
@@ -580,3 +590,3 @@ if (!tag.endsWith('/>') && tag.startsWith('</') && !blockElements.includes(tag.split('</')[1].split('>')[0]) && this.includeRequiredTag(i, highlightLoc, tag)) { | ||
if (highlightLoc[0] === tagLoc[0]) { | ||
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); | ||
const tag = this.originalContent.substring(tagLoc[0] + tagLoc[2], tagLoc[0] + tagLoc[2] + tagLoc[1]); // if self close tag or end tag or block element or not include the required close tag, add right to the tag | ||
@@ -592,6 +602,9 @@ if (tag.startsWith('</') || tag.endsWith('/>') || blockElements.includes(tag.split(' ')[0].split('<')[1].split('>')[0]) || !this.includeRequiredTag(i, highlightLoc, tag)) { | ||
} // step 2: check locations of other highlights | ||
// all span (no blocks) | ||
// stored in a different array than tags | ||
// can intersect | ||
for (let i = 0; i < this.highlights.length; i++) { | ||
const highlight = this.highlights[i]; | ||
const highlight = this.highlights[i]; // only check the highlighted | ||
@@ -606,13 +619,15 @@ if (highlight.highlighted) { | ||
locInc[1] += openTagLength + closeTagLength; | ||
} else if (highlightLoc[0] < loc[1] && highlightLoc[0] > loc[0] && highlightLoc[1] > loc[1]) { | ||
locInc[0] += openTagLength; | ||
locInc[1] += openTagLength + closeTagLength; | ||
} else if (highlightLoc[0] <= loc[0] && highlightLoc[1] >= loc[1]) { | ||
locInc[1] += openTagLength + closeTagLength; | ||
} else if (highlightLoc[0] < loc[0] && highlightLoc[1] > loc[0] && highlightLoc[1] < loc[1]) { | ||
locInc[1] += openTagLength; | ||
} else if (highlightLoc[0] >= loc[0] && highlightLoc[1] <= loc[1]) { | ||
locInc[0] += openTagLength; | ||
locInc[1] += openTagLength; | ||
} | ||
} // syntactical correct but semantical incorrect | ||
else if (highlightLoc[0] < loc[1] && highlightLoc[0] > loc[0] && highlightLoc[1] > loc[1]) { | ||
locInc[0] += openTagLength; | ||
locInc[1] += openTagLength + closeTagLength; | ||
} else if (highlightLoc[0] <= loc[0] && highlightLoc[1] >= loc[1]) { | ||
locInc[1] += openTagLength + closeTagLength; | ||
} // syntactical correct but semantical incorrect | ||
else if (highlightLoc[0] < loc[0] && highlightLoc[1] > loc[0] && highlightLoc[1] < loc[1]) { | ||
locInc[1] += openTagLength; | ||
} else if (highlightLoc[0] >= loc[0] && highlightLoc[1] <= loc[1]) { | ||
locInc[0] += openTagLength; | ||
locInc[1] += openTagLength; | ||
} | ||
} | ||
@@ -673,3 +688,3 @@ } | ||
return (0, _sbd.default)(text, options).map(raw => { | ||
// can tokenizer return location directly*** | ||
// future work: can tokenizer return location directly | ||
const index = text.indexOf(raw); | ||
@@ -689,3 +704,3 @@ return { | ||
// step 1: derive best substr | ||
// /s may be better*** | ||
// future work: /s may be better | ||
const words = str.split(' '); | ||
@@ -692,0 +707,0 @@ |
{ | ||
"name": "text-annotator", | ||
"version": "0.7.3", | ||
"version": "0.7.4", | ||
"description": "A JavaScript library for locating and annotating plain text in HTML", | ||
@@ -5,0 +5,0 @@ "main": "build/text-annotator.js", |
import getSentences from './ext/sbd' | ||
// used to distinguish between browser and Node.js environments | ||
// is it possible to relax so as to allow jsdom | ||
const isBrowser = | ||
typeof window !== 'undefined' && typeof window.document !== 'undefined' | ||
// div inside span is a bad idea | ||
const blockElements = [ | ||
@@ -66,3 +68,3 @@ 'address', | ||
this.sentences = [] | ||
// one highlight can have more than one location because of the potential issue in tag insertion*** | ||
// future work: one highlight can have more than one location because of the potential issue in tag insertion | ||
this.highlights = [] | ||
@@ -106,2 +108,3 @@ | ||
// experimental feature | ||
if (fuzzySearchOptions) { | ||
@@ -119,2 +122,3 @@ highlightIndex = this.fuzzySearch( | ||
// experimental feature | ||
// eager search only works in (particular) browsers | ||
@@ -136,2 +140,3 @@ if (isBrowser && eagerSearchOptions) { | ||
// experimental feature | ||
// only support direct search for now | ||
@@ -193,2 +198,3 @@ searchAll(str, options = {}) { | ||
// experimental feature | ||
highlightAll(highlightIndexes, options = {}) { | ||
@@ -220,2 +226,3 @@ // either containerId or content is required | ||
if (highlightIndex !== -1) { | ||
// content is undefined if containerId and returnContent falsy | ||
return { | ||
@@ -248,2 +255,3 @@ highlightIndex, | ||
let newContent = content | ||
// need to change when one annotation => more than one highlight | ||
const loc = this.adjustLoc( | ||
@@ -599,7 +607,7 @@ highlightIdPattern, | ||
// further improvement when one annotation binds with more than one highlight*** | ||
// block elements are only used to check in the = condition for now | ||
// future work: further improvement when one annotation binds with more than one highlight | ||
// includeRequiredTag used in = condition only | ||
includeRequiredTag(i, highlightLoc, tag) { | ||
const isCloseTag = tag.startsWith('</') | ||
const tagType = isCloseTag | ||
const tagName = isCloseTag | ||
? tag.split('</')[1].split('>')[0] | ||
@@ -615,3 +623,4 @@ : tag | ||
let requiredTagCount = 0 | ||
// outer | ||
// if both the start tag and the end tag are at the borders, place the tags outside the borders | ||
// if the close tag is at the border, check backwards until the start of the highlight | ||
if (isCloseTag) { | ||
@@ -627,5 +636,5 @@ for (let i2 = i - 1; i2 >= 0; i2--) { | ||
) | ||
if (tag2.startsWith('</' + tagType)) { | ||
if (tag2.startsWith('</' + tagName)) { | ||
requiredTagNumber++ | ||
} else if (tag2.startsWith('<' + tagType)) { | ||
} else if (tag2.startsWith('<' + tagName)) { | ||
requiredTagCount++ | ||
@@ -639,3 +648,5 @@ } | ||
} | ||
} else { | ||
} | ||
// if the start tag is at the border, check forwards until the end of the highlight | ||
else { | ||
for (let i2 = i + 1; i2 < this.tagLocations.length; i2++) { | ||
@@ -650,5 +661,5 @@ const tagLoc2 = this.tagLocations[i2] | ||
) | ||
if (tag2.startsWith('<' + tagType)) { | ||
if (tag2.startsWith('<' + tagName)) { | ||
requiredTagNumber++ | ||
} else if (tag2.startsWith('</' + tagType)) { | ||
} else if (tag2.startsWith('</' + tagName)) { | ||
requiredTagCount++ | ||
@@ -685,2 +696,3 @@ } | ||
) | ||
// if end tag, not block element and include the required close tag, add right to the tag | ||
if ( | ||
@@ -704,2 +716,3 @@ !tag.endsWith('/>') && | ||
) | ||
// if self close tag or end tag or block element or not include the required close tag, add right to the tag | ||
if ( | ||
@@ -727,4 +740,8 @@ tag.startsWith('</') || | ||
// step 2: check locations of other highlights | ||
// all span (no blocks) | ||
// stored in a different array than tags | ||
// can intersect | ||
for (let i = 0; i < this.highlights.length; i++) { | ||
const highlight = this.highlights[i] | ||
// only check the highlighted | ||
if (highlight.highlighted) { | ||
@@ -741,3 +758,5 @@ const openTagLength = TextAnnotator.getOpenTagLength( | ||
locInc[1] += openTagLength + closeTagLength | ||
} else if ( | ||
} | ||
// syntactical correct but semantical incorrect | ||
else if ( | ||
highlightLoc[0] < loc[1] && | ||
@@ -751,3 +770,5 @@ highlightLoc[0] > loc[0] && | ||
locInc[1] += openTagLength + closeTagLength | ||
} else if ( | ||
} | ||
// syntactical correct but semantical incorrect | ||
else if ( | ||
highlightLoc[0] < loc[0] && | ||
@@ -816,3 +837,3 @@ highlightLoc[1] > loc[0] && | ||
return getSentences(text, options).map(raw => { | ||
// can tokenizer return location directly*** | ||
// future work: can tokenizer return location directly | ||
const index = text.indexOf(raw) | ||
@@ -838,3 +859,3 @@ return { raw, index } | ||
// step 1: derive best substr | ||
// /s may be better*** | ||
// future work: /s may be better | ||
const words = str.split(' ') | ||
@@ -841,0 +862,0 @@ while (words.length) { |
2649184
2367