text-annotator
Advanced tools
Comparing version 0.6.0 to 0.6.1
{ | ||
"name": "text-annotator", | ||
"version": "0.6.0", | ||
"version": "0.6.1", | ||
"description": "A library for locating and annotating plain text in HTML", | ||
"main": "public/js/text-annotator.min.js", | ||
"main": "src/text-annotator.js", | ||
"scripts": { | ||
@@ -7,0 +7,0 @@ "lint": "./node_modules/.bin/eslint src/** test/** --fix", |
@@ -1,1 +0,1 @@ | ||
var TextAnnotator=function(t){var e={};function n(i){if(e[i])return e[i].exports;var s=e[i]={i:i,l:!1,exports:{}};return t[i].call(s.exports,s,s.exports,n),s.l=!0,s.exports}return n.m=t,n.c=e,n.d=function(t,e,i){n.o(t,e)||Object.defineProperty(t,e,{enumerable:!0,get:i})},n.r=function(t){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})},n.t=function(t,e){if(1&e&&(t=n(t)),8&e)return t;if(4&e&&"object"==typeof t&&t&&t.__esModule)return t;var i=Object.create(null);if(n.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:t}),2&e&&"string"!=typeof t)for(var s in t)n.d(i,s,function(e){return t[e]}.bind(null,s));return i},n.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return n.d(e,"a",e),e},n.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},n.p="",n(n.s=0)}([function(t,e,n){t.exports=n(1).default},function(t,e,n){"use strict";n.r(e);let i=[];const s=["al","adj","assn","Ave","BSc","MSc","Cell","Ch","Co","cc","Corp","Dem","Dept","ed","eg","Eq","Eqs","est","est","etc","Ex","ext","Fig","fig","Figs","figs","i.e","ie","Inc","inc","Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec","jr","mi","Miss","Mrs","Mr","Ms","Mol","mt","mts","no","Nos","PhD","MD","BA","MA","MM","pl","pop","pp","Prof","Dr","pt","Ref","Refs","Rep","repr","rev","Sec","Secs","Sgt","Col","Gen","Rep","Sen","Gov","Lt","Maj","Capt","St","Sr","sr","Jr","jr","Rev","Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat","trans","Univ","Viz","Vol","vs","v"],r=t=>/^[A-Z][a-z].*/.test(t)||g(t),o=t=>r(t)||/``|"|'/.test(t.substring(0,2)),l=(t,e)=>{if("a.m."===t||"p.m."===t){if("day"===e.replace(/\W+/g,"").slice(-3).toLowerCase())return!0}return!1},c=t=>{const e=t.replace(/[()[\]{}]/g,"").match(/(.\.)*/);return e&&e[0].length>0},h=t=>t.length<=3||r(t),a=(t,e)=>{if(e.length>0){if(t<5&&e[0].length<6&&r(e[0]))return!0;return e.filter(t=>/[A-Z]/.test(t.charAt(0))).length>=3}return!1},g=(t,e)=>(e&&(t=t.slice(e-1,e+2)),!isNaN(t)),u=t=>t.match(/^(?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?$/),f=t=>t.match(/[-a-zA-Z0-9@:%._+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_+.~#?&//=]*)/),d=t=>{let e=0;if((e=t.indexOf("."))>-1||(e=t.indexOf("!"))>-1||(e=t.indexOf("?"))>-1){if(t.charAt(e+1).match(/[a-zA-Z].*/))return[t.slice(0,e+1),t.slice(e+1)]}return!1},p=(t,e)=>e.length>1?e.indexOf(t.slice(-1))>-1:t.slice(-1)===e,m=(t,e)=>t.slice(t.length-e.length)===e;function b(t,e){const n=" @~@ ".trim(),r=new RegExp("\\S",""),b=new RegExp("\\n+|[-#=_+*]{4,}","g"),x=new RegExp("\\S+|\\n","g");if(!t||"string"!=typeof t||!t.length)return[];if(!r.test(t))return[];const T={newline_boundaries:!1,html_boundaries:!1,html_boundaries_tags:["p","div","ul","ol"],sanitize:!1,allowed_tags:!1,preserve_whitespace:!1,abbreviations:null};if("boolean"==typeof e)T.newline_boundaries=!0;else for(let t in e)T[t]=e[t];var S;if(S=T.abbreviations,i=S||s,T.newline_boundaries&&(t=t.replace(b," @~@ ")),T.html_boundaries){const e="(<br\\s*\\/?>|<\\/("+T.html_boundaries_tags.join("|")+")>)",n=new RegExp(e,"g");t=t.replace(n,"$1 @~@ ")}let y,L;(T.sanitize||T.allowed_tags)&&(T.allowed_tags||(T.allowed_tags=[""]),t=(t=>{if(("string"==typeof t||t instanceof String)&&"undefined"!=typeof document){const e=document.createElement("DIV");e.innerHTML=t,t=(e.textContent||"").trim()}else"object"==typeof t&&t.textContent&&(t=(t.textContent||"").trim());return t})(t,T.allowed_tags)),T.preserve_whitespace?(L=t.split(/(<br\s*\/?>|\S+|\n+)/),y=L.filter((t,e)=>e%2)):y=t.trim().match(x);let M=0,w=0,O=[],v=[],C=[];if(!y||!y.length)return[];let _=0;for(let t=0,e=y.length;t<e;t++){if(M++,C.push(y[t]),~y[t].indexOf(",")&&(M=0),"."===(j=y[t])||"!"===j||"?"===j||p(y[t],"?!")||y[t]===n){(T.newline_boundaries||T.html_boundaries)&&y[t]===n&&C.pop(),v.push(C),M=0,C=[];continue}if((p(y[t],'"')||p(y[t],"”"))&&(y[t]=y[t].slice(0,-1),w=2*t+1),p(y[t],".")){if(t+1<e){if(2===y[t].length&&isNaN(y[t].charAt(0)))continue;if(H=y[t],~i.indexOf(H.replace(/\W+/g,"")))continue;if(o(y[t+1])){if(l(y[t],y[t+1]))continue;if(a(M,y.slice(t,6)))continue;if(g(y[t+1])&&h(y[t]))continue}else{if(m(y[t],".."))continue;if(c(y[t]))continue;if(a(M,y.slice(t,5)))continue}}v.push(C),C=[],M=0;continue}if((w=y[t].indexOf("."))>-1){if(g(y[t],w))continue;if(c(y[t]))continue;if(f(y[t])||u(y[t]))continue}const s=d(y[t]);s&&(O=s,C.pop(),C.push(O[0]),v.push(C),T.preserve_whitespace&&(L.splice(2*t+1+_,1,O[0],"",O[1]),_+=2),C=[],M=0,C.push(O[1]))}var H,j;C.length&&v.push(C);const A=[];let E="";v=v.filter((function(t){return t.length>0}));for(let t=0;t<v.length;t++){if(!T.preserve_whitespace||T.newline_boundaries||T.html_boundaries)E=v[t].join(" ");else{let e=2*v[t].length;0===t&&(e+=1),E=L.splice(0,e).join("")}1===v[t].length&&v[t][0].length<4&&v[t][0].indexOf(".")>-1&&v[t+1]&&v[t+1][0].indexOf(".")<0&&(E+=" "+v[t+1].join(" "),t++),A.push(E)}return A}const x="undefined"!=typeof window&&void 0!==window.document;class T{constructor(t={}){const e=t.containerId,n=t.content,i=t.isHTML;this.originalContent=x&&e?document.getElementById(e).innerHTML:n,this.isHTML=i,this.stripedHTML="",this.tagLocations=[],this.sentences=[],this.highlights=[],i&&this.stripAndStoreHTMLTags()}search(t,e={}){let n=e.prefix||"",i=e.postfix||"";const s=e.directSearchOptions,r=e.fuzzySearchOptions,o=e.eagerSearchOptions;if(void 0===e.trim||e.trim){const e=T.trim(n,t,i);n=e.prefix,t=e.str,i=e.postfix}let l=-1;return l=this.directSearch(n,t,i,s),-1!==l||r&&(l=this.fuzzySearch(n,t,i,r),-1!==l)||x&&o&&(l=this.eagerSearch(n,t,i,o)),l}searchAll(t,e={}){const n=[],i=(t,e,s)=>{const r=this.search(t,Object.assign({directSearchOptions:Object.assign({lastHighlightIndex:s},e.directSearchOptions)},e));-1!==r&&(n.push(r),i(t,e,r))};return i(t,e),n}highlight(t,e={}){const n=e.containerId;let i=e.content;const s=e.highlightClass||"highlight",r=e.highlightIdPattern||"highlight-",o=e.returnContent;x&&n&&(i=document.getElementById(n).innerHTML);const l=T.createOpenTag(r,t,s),c=this.adjustLoc(r,t,s);let h=T.insert(i,l,c[0]);if(h=T.insert(h,T.createCloseTag(),c[1]+l.length),this.highlights[t].highlighted=!0,!x||!n||o)return h;document.getElementById(n).innerHTML=h}highlightAll(t,e={}){const{containerId:n,content:i,returnContent:s}=e;let r=x&&n?document.getElementById(n).innerHTML:i;if(t.forEach(t=>{e.content=r,r=this.highlight(t,e)}),!x||!n||s)return r}searchAndHighlight(t,e){const n=this.search(t,e.searchOptions);if(-1!==n)return{highlightIndex:n,content:this.highlight(n,e.highlightOptions)}}unhighlight(t,e={}){const n=e.byStringOperation,i=e.containerId;let s=e.content;const r=e.highlightClass||"highlight",o=e.highlightIdPattern||"highlight-",l=e.returnContent;if(this.highlights[t].highlighted=!1,n){x&&i&&(s=document.getElementById(i).innerHTML);let e=s;const n=this.adjustLoc(o,t,r),c=T.getOpenTagLength(o,t,r),h=e.substring(n[0],n[1]+c+T.getCloseTagLength()),a=e.substring(n[0]+c,n[1]+c);if(e=e.replace(h,a),l)return e;document.getElementById(i).innerHTML=e}else if(x){const e=o+t;if(document.getElementById(e).outerHTML=document.getElementById(e).innerHTML,l)return document.getElementById(i).innerHTML}}stripAndStoreHTMLTags(){let t;this.stripedHTML=this.originalContent;const e=/<[^>]+>/;let n=0;for(;t=this.stripedHTML.match(e);){this.stripedHTML=this.stripedHTML.replace(t,"");const e=t[0].length;this.tagLocations.push([t.index,e,n]),n+=e}}directSearch(t,e,n,i={}){const s=void 0===i.caseSensitive||i.caseSensitive,r=i.lastHighlightIndex;let o=t+e+n,l=this.isHTML?this.stripedHTML:this.originalContent;s||(o=o.toLowerCase(),l=l.toLowerCase());let c=0;void 0!==r&&(c=this.highlights[r].loc[1]+1);let h=-1;const a=l.indexOf(o,c);if(-1!==a){const n=[];n[0]=a+t.length,n[1]=n[0]+e.length,h=this.highlights.push({loc:n})-1}return h}eagerSearch(t,e,n,i={}){const s=i.caseSensitive,r=i.containerId,o=i.threshold||.74,l=t+e+n;let c=-1;if(window.find){document.designMode="on";const t=window.getSelection();for(t.collapse(document.body,0);window.find(l,s);){document.execCommand("hiliteColor",!0,"rgba(255, 255, 255, 0)"),t.collapseToEnd();const n=document.querySelector("#"+r+' [style="background-color: rgba(255, 255, 255, 0);"]');if(n){const t=n.innerHTML.replace(/<[^>]*>/g,""),i=T.getBestSubstring(t,e,o);if(i.similarity){const e=this.isHTML?this.stripedHTML.indexOf(t):this.originalContent.indexOf(t);-1!==e&&(c=this.highlights.push({loc:[e+i.loc[0],e+i.loc[1]]})-1)}break}}document.execCommand("undo"),document.designMode="off"}return c}fuzzySearch(t,e,n,i={}){const s=i.caseSensitive;let r=i.tbThreshold||.68;const o=i.tokenBased;let l=i.sbThreshold||.85;const c=i.lenRatio||1.2,h=i.processSentence,a=void 0===i.sentenceBased||i.sentenceBased;let g=-1;const u=this.isHTML?this.stripedHTML:this.originalContent;if(o||t||n){const i=[];let o=-1;for(;-1!==(o=u.indexOf(e,o+1));)i.push(o);let l=-1;const c=t+e+n;for(const o of i){const i=u.substring(o-t.length,o)+e+u.substring(o+e.length,o+e.length+n.length),h=T.getSimilarity(i,c,s);h>=r&&(r=h,l=o)}-1!==l&&(g=this.highlights.push({loc:[l,l+e.length]})-1)}else if(a){let t=[];t=this.sentences.length?this.sentences:this.sentences=T.sentenize(u);const n=e.split(/\s/),i=[];for(const e of t)for(const t of n)if(e.raw.includes(t)){i.push(e);break}if(h){const t=this.tagLocations,e=t.length;if(e){let n=0;for(const s of i){let i=s.raw;const r=[s.index,s.index+i.length];let o=0;for(let s=n;s<e;s++){const e=t[s];if(e[0]>=r[0]&&e[0]<=r[1]){const t=this.originalContent.substring(e[0]+e[2],e[0]+e[2]+e[1]),n=e[0]+o-r[0];i=i.slice(0,n)+t+i.slice(n),o+=e[1]}else if(e[0]>r[1]){n=s-1;break}}i=h(i),i=i.replace(/(<([^>]+)>)/gi,"");const l=s.raw;l!==i&&(s.raw=i,s.index=s.index+l.indexOf(i))}}}let r=null,o=null;if(i.forEach((t,n)=>{let h=T.getBestSubstring(t.raw,e,l,c,s);h.similarity?(l=h.similarity,r=h,o=t):n!==i.length-1&&(h=T.getBestSubstring(t.raw+i[n+1].raw,e,l,c,s),h.similarity&&(l=h.similarity,r=h,o=i[n]))}),r){let t=o.index;g=this.highlights.push({loc:[t+r.loc[0],t+r.loc[1]]})-1}}return g}adjustLoc(t,e,n){const i=this.highlights[e].loc,s=[0,0],r=this.tagLocations,o=r.length;for(let t=0;t<o;t++){const e=r[t];if(i[1]<e[0])break;if(i[1]===e[0]){this.originalContent.substring(e[0]+e[2],e[0]+e[2]+e[1]).startsWith("</")&&(s[1]+=e[1])}else if(i[1]>e[0])if(s[1]+=e[1],i[0]===e[0]){const n=this.originalContent.substring(e[0]+e[2],e[0]+e[2]+e[1]);if(n.startsWith("</"))s[0]+=e[1];else{let o=!1,l=1,c=0;for(let e=t+1;e<r.length;e++){const t=r[e];if(i[1]<=t[0])break;{const e=this.originalContent.substring(t[0]+t[2],t[0]+t[2]+t[1]),i=n.split(" ")[0].split("<")[1].split(">")[0];if(e.startsWith("<"+i)?l++:e.startsWith("</"+i)&&c++,l===c){o=!0;break}}}o||(s[0]+=e[1])}}else i[0]>e[0]&&(s[0]+=e[1])}return this.highlights.forEach((e,r)=>{if(e.highlighted){const o=T.getOpenTagLength(t,r,n),l=T.getCloseTagLength(),c=e.loc;i[0]>=c[1]?(s[0]+=o+l,s[1]+=o+l):i[0]<c[1]&&i[0]>c[0]&&i[1]>c[1]?(s[0]+=o,s[1]+=o+l):i[0]<=c[0]&&i[1]>=c[1]?s[1]+=o+l:i[0]<c[0]&&i[1]>c[0]&&i[1]<c[1]?s[1]+=o:i[0]>=c[0]&&i[1]<=c[1]&&(s[0]+=o,s[1]+=o)}}),[i[0]+s[0],i[1]+s[1]]}static createOpenTag(t,e,n){return`<span id="${t+e}" class="${n}">`}static createCloseTag(){return"</span>"}static getOpenTagLength(t,e,n){return T.createOpenTag(t,e,n).length}static getCloseTagLength(){return T.createCloseTag().length}static trim(t,e,n){return t=t.replace(/^\s+/,""),n=n.replace(/\s+$/,""),t||(e=e.replace(/^\s+/,"")),n||(e=e.replace(/\s+$/,"")),{prefix:t,str:e,postfix:n}}static insert(t,e,n){return t.slice(0,n)+e+t.slice(n)}static sentenize(t){return b(t,{newline_boundaries:!1,html_boundaries:!1,sanitize:!1,allowed_tags:!1,preserve_whitespace:!0,abbreviations:null}).map(e=>({raw:e,index:t.indexOf(e)}))}static getBestSubstring(t,e,n,i,s){let r={},o=T.getSimilarity(t,e,s);if(o>=n){const n=t.split(" ");for(;n.length;){const t=n.shift(),i=n.join(" ");let r=T.getSimilarity(i,e,s);if(r<o){n.unshift(t);const i=n.pop();if(r=T.getSimilarity(n.join(" "),e,s),r<o){n.push(i);break}o=r}else o=r}const l=n.join(" ");if(!i||l.length/e.length<=i){const e=[];e[0]=t.indexOf(l),e[1]=e[0]+l.length,r={similarity:o,loc:e}}}return r}static getSimilarity(t,e,n){return n||(t=t.toLowerCase(),e=e.toLowerCase()),t===e?1:T.lcsLength(t,e)/e.length}static lcsLength(t,e){const n=t.length,i=e.length,s=t.split(""),r=e.split(""),o=Array(n+1).fill(Array(i+1).fill(0));for(let t=1;t<=n;t++)for(let e=1;e<=i;e++)o[t][e]=s[t-1]===r[e-1]?o[t-1][e-1]+1:Math.max(o[t][e-1],o[t-1][e]);return o[n][i]}}e.default=T}]); | ||
var TextAnnotator=function(t){var e={};function n(i){if(e[i])return e[i].exports;var s=e[i]={i:i,l:!1,exports:{}};return t[i].call(s.exports,s,s.exports,n),s.l=!0,s.exports}return n.m=t,n.c=e,n.d=function(t,e,i){n.o(t,e)||Object.defineProperty(t,e,{enumerable:!0,get:i})},n.r=function(t){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(t,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(t,"__esModule",{value:!0})},n.t=function(t,e){if(1&e&&(t=n(t)),8&e)return t;if(4&e&&"object"==typeof t&&t&&t.__esModule)return t;var i=Object.create(null);if(n.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:t}),2&e&&"string"!=typeof t)for(var s in t)n.d(i,s,function(e){return t[e]}.bind(null,s));return i},n.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return n.d(e,"a",e),e},n.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},n.p="",n(n.s=0)}([function(t,e,n){t.exports=n(1).default},function(t,e,n){"use strict";n.r(e);let i=[];const s=["al","adj","assn","Ave","BSc","MSc","Cell","Ch","Co","cc","Corp","Dem","Dept","ed","eg","Eq","Eqs","est","est","etc","Ex","ext","Fig","fig","Figs","figs","i.e","ie","Inc","inc","Jan","Feb","Mar","Apr","Jun","Jul","Aug","Sep","Sept","Oct","Nov","Dec","jr","mi","Miss","Mrs","Mr","Ms","Mol","mt","mts","no","Nos","PhD","MD","BA","MA","MM","pl","pop","pp","Prof","Dr","pt","Ref","Refs","Rep","repr","rev","Sec","Secs","Sgt","Col","Gen","Rep","Sen","Gov","Lt","Maj","Capt","St","Sr","sr","Jr","jr","Rev","Sun","Mon","Tu","Tue","Tues","Wed","Th","Thu","Thur","Thurs","Fri","Sat","trans","Univ","Viz","Vol","vs","v"],r=t=>/^[A-Z][a-z].*/.test(t)||g(t),o=t=>r(t)||/``|"|'/.test(t.substring(0,2)),l=(t,e)=>{if("a.m."===t||"p.m."===t){if("day"===e.replace(/\W+/g,"").slice(-3).toLowerCase())return!0}return!1},c=t=>{const e=t.replace(/[()[\]{}]/g,"").match(/(.\.)*/);return e&&e[0].length>0},h=t=>t.length<=3||r(t),a=(t,e)=>{if(e.length>0){if(t<5&&e[0].length<6&&r(e[0]))return!0;return e.filter(t=>/[A-Z]/.test(t.charAt(0))).length>=3}return!1},g=(t,e)=>(e&&(t=t.slice(e-1,e+2)),!isNaN(t)),u=t=>t.match(/^(?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?$/),f=t=>t.match(/[-a-zA-Z0-9@:%._+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_+.~#?&//=]*)/),d=t=>{let e=0;if((e=t.indexOf("."))>-1||(e=t.indexOf("!"))>-1||(e=t.indexOf("?"))>-1){if(t.charAt(e+1).match(/[a-zA-Z].*/))return[t.slice(0,e+1),t.slice(e+1)]}return!1},p=(t,e)=>e.length>1?e.indexOf(t.slice(-1))>-1:t.slice(-1)===e,m=(t,e)=>t.slice(t.length-e.length)===e;function b(t,e){const n=" @~@ ".trim(),r=new RegExp("\\S",""),b=new RegExp("\\n+|[-#=_+*]{4,}","g"),x=new RegExp("\\S+|\\n","g");if(!t||"string"!=typeof t||!t.length)return[];if(!r.test(t))return[];const T={newline_boundaries:!1,html_boundaries:!1,html_boundaries_tags:["p","div","ul","ol"],sanitize:!1,allowed_tags:!1,preserve_whitespace:!1,abbreviations:null};if("boolean"==typeof e)T.newline_boundaries=!0;else for(let t in e)T[t]=e[t];var S;if(S=T.abbreviations,i=S||s,T.newline_boundaries&&(t=t.replace(b," @~@ ")),T.html_boundaries){const e="(<br\\s*\\/?>|<\\/("+T.html_boundaries_tags.join("|")+")>)",n=new RegExp(e,"g");t=t.replace(n,"$1 @~@ ")}let y,L;(T.sanitize||T.allowed_tags)&&(T.allowed_tags||(T.allowed_tags=[""]),t=(t=>{if(("string"==typeof t||t instanceof String)&&"undefined"!=typeof document){const e=document.createElement("DIV");e.innerHTML=t,t=(e.textContent||"").trim()}else"object"==typeof t&&t.textContent&&(t=(t.textContent||"").trim());return t})(t,T.allowed_tags)),T.preserve_whitespace?(L=t.split(/(<br\s*\/?>|\S+|\n+)/),y=L.filter((t,e)=>e%2)):y=t.trim().match(x);let M=0,w=0,O=[],v=[],C=[];if(!y||!y.length)return[];let _=0;for(let t=0,e=y.length;t<e;t++){if(M++,C.push(y[t]),~y[t].indexOf(",")&&(M=0),"."===(j=y[t])||"!"===j||"?"===j||p(y[t],"?!")||y[t]===n){(T.newline_boundaries||T.html_boundaries)&&y[t]===n&&C.pop(),v.push(C),M=0,C=[];continue}if((p(y[t],'"')||p(y[t],"”"))&&(y[t]=y[t].slice(0,-1),w=2*t+1),p(y[t],".")){if(t+1<e){if(2===y[t].length&&isNaN(y[t].charAt(0)))continue;if(H=y[t],~i.indexOf(H.replace(/\W+/g,"")))continue;if(o(y[t+1])){if(l(y[t],y[t+1]))continue;if(a(M,y.slice(t,6)))continue;if(g(y[t+1])&&h(y[t]))continue}else{if(m(y[t],".."))continue;if(c(y[t]))continue;if(a(M,y.slice(t,5)))continue}}v.push(C),C=[],M=0;continue}if((w=y[t].indexOf("."))>-1){if(g(y[t],w))continue;if(c(y[t]))continue;if(f(y[t])||u(y[t]))continue}const s=d(y[t]);s&&(O=s,C.pop(),C.push(O[0]),v.push(C),T.preserve_whitespace&&(L.splice(2*t+1+_,1,O[0],"",O[1]),_+=2),C=[],M=0,C.push(O[1]))}var H,j;C.length&&v.push(C);const A=[];let E="";v=v.filter((function(t){return t.length>0}));for(let t=0;t<v.length;t++){if(!T.preserve_whitespace||T.newline_boundaries||T.html_boundaries)E=v[t].join(" ");else{let e=2*v[t].length;0===t&&(e+=1),E=L.splice(0,e).join("")}1===v[t].length&&v[t][0].length<4&&v[t][0].indexOf(".")>-1&&v[t+1]&&v[t+1][0].indexOf(".")<0&&(E+=" "+v[t+1].join(" "),t++),A.push(E)}return A}const x="undefined"!=typeof window&&void 0!==window.document;class T{constructor(t={}){const e=t.containerId,n=t.content,i=t.isHTML;this.originalContent=x&&e?document.getElementById(e).innerHTML:n,this.isHTML=i,this.stripedHTML="",this.tagLocations=[],this.sentences=[],this.highlights=[],i&&this.stripAndStoreHTMLTags()}search(t,e={}){let n=e.prefix||"",i=e.postfix||"";const s=e.directSearchOptions,r=e.fuzzySearchOptions,o=e.eagerSearchOptions;if(void 0===e.trim||e.trim){const e=T.trim(n,t,i);n=e.prefix,t=e.str,i=e.postfix}let l=-1;return l=this.directSearch(n,t,i,s),-1!==l||r&&(l=this.fuzzySearch(n,t,i,r),-1!==l)||x&&o&&(l=this.eagerSearch(n,t,i,o)),l}searchAll(t,e={}){const n=[],i=(t,e,s)=>{const r=this.search(t,Object.assign({directSearchOptions:Object.assign({lastHighlightIndex:s},e.directSearchOptions)},e));-1!==r&&(n.push(r),i(t,e,r))};return i(t,e),n}highlight(t,e={}){const n=e.containerId;let i=e.content;const s=e.highlightClass||"highlight",r=e.highlightIdPattern||"highlight-",o=e.returnContent;x&&n&&(i=document.getElementById(n).innerHTML);const l=T.createOpenTag(r,t,s),c=this.adjustLoc(r,t,s);let h=T.insert(i,l,c[0]);if(h=T.insert(h,T.createCloseTag(),c[1]+l.length),this.highlights[t].highlighted=!0,!x||!n||o)return h;document.getElementById(n).innerHTML=h}highlightAll(t,e={}){const{containerId:n,content:i,returnContent:s}=e;let r=x&&n?document.getElementById(n).innerHTML:i;if(t.forEach(t=>{e.content=r,r=this.highlight(t,e)}),!x||!n||s)return r}searchAndHighlight(t,e){const n=this.search(t,e.searchOptions);if(-1!==n)return{highlightIndex:n,content:this.highlight(n,e.highlightOptions)}}unhighlight(t,e={}){const n=e.byStringOperation,i=e.containerId;let s=e.content;const r=e.highlightClass||"highlight",o=e.highlightIdPattern||"highlight-",l=e.returnContent;if(this.highlights[t].highlighted=!1,n){x&&i&&(s=document.getElementById(i).innerHTML);let e=s;const n=this.adjustLoc(o,t,r),c=T.getOpenTagLength(o,t,r),h=e.substring(n[0],n[1]+c+T.getCloseTagLength()),a=e.substring(n[0]+c,n[1]+c);if(e=e.replace(h,a),l)return e;document.getElementById(i).innerHTML=e}else if(x){const e=o+t;if(document.getElementById(e).outerHTML=document.getElementById(e).innerHTML,l)return document.getElementById(i).innerHTML}}stripAndStoreHTMLTags(){let t;this.stripedHTML=this.originalContent;const e=/<[^>]+>/;let n=0;for(;t=this.stripedHTML.match(e);){this.stripedHTML=this.stripedHTML.replace(t,"");const e=t[0].length;this.tagLocations.push([t.index,e,n]),n+=e}}directSearch(t,e,n,i={}){const s=void 0===i.caseSensitive||i.caseSensitive,r=i.lastHighlightIndex;let o=t+e+n,l=this.isHTML?this.stripedHTML:this.originalContent;s||(o=o.toLowerCase(),l=l.toLowerCase());let c=0;void 0!==r&&(c=this.highlights[r].loc[1]+1);let h=-1;const a=l.indexOf(o,c);if(-1!==a){const n=[];n[0]=a+t.length,n[1]=n[0]+e.length,h=this.highlights.push({loc:n})-1}return h}eagerSearch(t,e,n,i={}){const s=i.caseSensitive,r=i.containerId,o=i.threshold||.74,l=t+e+n;let c=-1;if(window.find){document.designMode="on";const t=window.getSelection();for(t.collapse(document.body,0);window.find(l,s);){document.execCommand("hiliteColor",!0,"rgba(255, 255, 255, 0)"),t.collapseToEnd();const n=document.querySelector("#"+r+' [style="background-color: rgba(255, 255, 255, 0);"]');if(n){const t=n.innerHTML.replace(/<[^>]*>/g,""),i=T.getBestSubstring(t,e,o);if(i.similarity){const e=(this.isHTML?this.stripedHTML:this.originalContent).indexOf(t);-1!==e&&(c=this.highlights.push({loc:[e+i.loc[0],e+i.loc[1]]})-1)}break}}document.execCommand("undo"),document.designMode="off"}return c}fuzzySearch(t,e,n,i={}){const s=i.caseSensitive;let r=i.tbThreshold||.68;const o=i.tokenBased;let l=i.sbThreshold||.85;const c=i.lenRatio||1.2,h=i.processSentence,a=void 0===i.sentenceBased||i.sentenceBased;let g=-1;const u=this.isHTML?this.stripedHTML:this.originalContent;if(o||t||n){const i=[];let o=-1;for(;-1!==(o=u.indexOf(e,o+1));)i.push(o);let l=-1;const c=t+e+n;for(const o of i){const i=u.substring(o-t.length,o)+e+u.substring(o+e.length,o+e.length+n.length),h=T.getSimilarity(i,c,s);h>=r&&(r=h,l=o)}-1!==l&&(g=this.highlights.push({loc:[l,l+e.length]})-1)}else if(a){let t=[];t=this.sentences.length?this.sentences:this.sentences=T.sentenize(u);const n=e.split(/\s/),i=[];for(const e of t)for(const t of n)if(e.raw.includes(t)){i.push(e);break}if(h){const t=this.tagLocations,e=t.length;if(e){let n=0;for(const s of i){let i=s.raw;const r=[s.index,s.index+i.length];let o=0;for(let s=n;s<e;s++){const e=t[s];if(e[0]>=r[0]&&e[0]<=r[1]){const t=this.originalContent.substring(e[0]+e[2],e[0]+e[2]+e[1]),n=e[0]+o-r[0];i=i.slice(0,n)+t+i.slice(n),o+=e[1]}else if(e[0]>r[1]){n=s-1;break}}i=h(i),i=i.replace(/(<([^>]+)>)/gi,"");const l=s.raw;l!==i&&(s.raw=i,s.index=s.index+l.indexOf(i))}}}let r=null,o=null;if(i.forEach((t,n)=>{let h=T.getBestSubstring(t.raw,e,l,c,s);h.similarity?(l=h.similarity,r=h,o=t):n!==i.length-1&&(h=T.getBestSubstring(t.raw+i[n+1].raw,e,l,c,s),h.similarity&&(l=h.similarity,r=h,o=i[n]))}),r){let t=o.index;g=this.highlights.push({loc:[t+r.loc[0],t+r.loc[1]]})-1}}return g}adjustLoc(t,e,n){const i=this.highlights[e].loc,s=[0,0],r=this.tagLocations,o=r.length;for(let t=0;t<o;t++){const e=r[t];if(i[1]<e[0])break;if(i[1]===e[0]){this.originalContent.substring(e[0]+e[2],e[0]+e[2]+e[1]).startsWith("</")&&(s[1]+=e[1])}else if(i[1]>e[0])if(s[1]+=e[1],i[0]===e[0]){const n=this.originalContent.substring(e[0]+e[2],e[0]+e[2]+e[1]);if(n.startsWith("</"))s[0]+=e[1];else{let o=!1,l=1,c=0;for(let e=t+1;e<r.length;e++){const t=r[e];if(i[1]<=t[0])break;{const e=this.originalContent.substring(t[0]+t[2],t[0]+t[2]+t[1]),i=n.split(" ")[0].split("<")[1].split(">")[0];if(e.startsWith("<"+i)?l++:e.startsWith("</"+i)&&c++,l===c){o=!0;break}}}o||(s[0]+=e[1])}}else i[0]>e[0]&&(s[0]+=e[1])}return this.highlights.forEach((e,r)=>{if(e.highlighted){const o=T.getOpenTagLength(t,r,n),l=T.getCloseTagLength(),c=e.loc;i[0]>=c[1]?(s[0]+=o+l,s[1]+=o+l):i[0]<c[1]&&i[0]>c[0]&&i[1]>c[1]?(s[0]+=o,s[1]+=o+l):i[0]<=c[0]&&i[1]>=c[1]?s[1]+=o+l:i[0]<c[0]&&i[1]>c[0]&&i[1]<c[1]?s[1]+=o:i[0]>=c[0]&&i[1]<=c[1]&&(s[0]+=o,s[1]+=o)}}),[i[0]+s[0],i[1]+s[1]]}static createOpenTag(t,e,n){return`<span id="${t+e}" class="${n}">`}static createCloseTag(){return"</span>"}static getOpenTagLength(t,e,n){return T.createOpenTag(t,e,n).length}static getCloseTagLength(){return T.createCloseTag().length}static trim(t,e,n){return t=t.replace(/^\s+/,""),n=n.replace(/\s+$/,""),t||(e=e.replace(/^\s+/,"")),n||(e=e.replace(/\s+$/,"")),{prefix:t,str:e,postfix:n}}static insert(t,e,n){return t.slice(0,n)+e+t.slice(n)}static sentenize(t){return b(t,{newline_boundaries:!1,html_boundaries:!1,sanitize:!1,allowed_tags:!1,preserve_whitespace:!0,abbreviations:null}).map(e=>({raw:e,index:t.indexOf(e)}))}static getBestSubstring(t,e,n,i,s){let r={},o=T.getSimilarity(t,e,s);if(o>=n){const n=t.split(" ");for(;n.length;){const t=n.shift(),i=n.join(" ");let r=T.getSimilarity(i,e,s);if(r<o){n.unshift(t);const i=n.pop();if(r=T.getSimilarity(n.join(" "),e,s),r<o){n.push(i);break}o=r}else o=r}const l=n.join(" ");if(!i||l.length/e.length<=i){const e=[];e[0]=t.indexOf(l),e[1]=e[0]+l.length,r={similarity:o,loc:e}}}return r}static getSimilarity(t,e,n){return n||(t=t.toLowerCase(),e=e.toLowerCase()),t===e?1:T.lcsLength(t,e)/e.length}static lcsLength(t,e){const n=t.length,i=e.length,s=t.split(""),r=e.split(""),o=Array(n+1).fill(Array(i+1).fill(0));for(let t=1;t<=n;t++)for(let e=1;e<=i;e++)o[t][e]=s[t-1]===r[e-1]?o[t-1][e-1]+1:Math.max(o[t][e-1],o[t-1][e]);return o[n][i]}}e.default=T}]); |
@@ -316,5 +316,4 @@ import getSentences from './ext/sbd' | ||
if (result.similarity) { | ||
const index = this.isHTML | ||
? this.stripedHTML.indexOf(foundStr) | ||
: this.originalContent.indexOf(foundStr) | ||
const text = this.isHTML ? this.stripedHTML : this.originalContent | ||
const index = text.indexOf(foundStr) | ||
if (index !== -1) { | ||
@@ -321,0 +320,0 @@ highlightIndex = |
80969
15
1199