fast-text-encoding
Advanced tools
Comparing version 1.0.2 to 1.0.3
{ | ||
"name": "fast-text-encoding", | ||
"version": "1.0.2", | ||
"version": "1.0.3", | ||
"description": "Fast polyfill for TextEncoder and TextDecoder, only supports utf-8", | ||
@@ -5,0 +5,0 @@ "main": "text.min.js", |
@@ -6,2 +6,3 @@ [![Build](https://api.travis-ci.org/samthor/fast-text-encoding.svg?branch=master)](https://travis-ci.org/samthor/fast-text-encoding) | ||
It is fast partially as it does not support any encodings aside UTF-8 (and note that natively, only `TextDecoder` supports alternative encodings anyway). | ||
See [some benchmarks](https://github.com/samthor/fast-text-encoding/tree/master/bench). | ||
@@ -45,2 +46,4 @@ [1]: https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder | ||
In Node v5.1 and above, this polyfill uses `Buffer` to implement `TextDecoder`. | ||
# Release | ||
@@ -47,0 +50,0 @@ |
161
text.js
@@ -58,4 +58,4 @@ /* | ||
let at = 0; // output position | ||
let tlen = Math.max(32, len + (len >> 1) + 7); // 1.5x size | ||
let target = new Uint8Array((tlen >> 3) << 3); // ... but at 8 byte offset | ||
let tlen = Math.max(32, len + (len >>> 1) + 7); // 1.5x size | ||
let target = new Uint8Array((tlen >>> 3) << 3); // ... but at 8 byte offset | ||
@@ -82,3 +82,3 @@ while (pos < len) { | ||
tlen *= (1.0 + (pos / string.length) * 2); // take 2x the remaining | ||
tlen = (tlen >> 3) << 3; // 8 byte offset | ||
tlen = (tlen >>> 3) << 3; // 8 byte offset | ||
@@ -94,10 +94,10 @@ const update = new Uint8Array(tlen); | ||
} else if ((value & 0xfffff800) === 0) { // 2-byte | ||
target[at++] = ((value >> 6) & 0x1f) | 0xc0; | ||
target[at++] = ((value >>> 6) & 0x1f) | 0xc0; | ||
} else if ((value & 0xffff0000) === 0) { // 3-byte | ||
target[at++] = ((value >> 12) & 0x0f) | 0xe0; | ||
target[at++] = ((value >> 6) & 0x3f) | 0x80; | ||
target[at++] = ((value >>> 12) & 0x0f) | 0xe0; | ||
target[at++] = ((value >>> 6) & 0x3f) | 0x80; | ||
} else if ((value & 0xffe00000) === 0) { // 4-byte | ||
target[at++] = ((value >> 18) & 0x07) | 0xf0; | ||
target[at++] = ((value >> 12) & 0x3f) | 0x80; | ||
target[at++] = ((value >> 6) & 0x3f) | 0x80; | ||
target[at++] = ((value >>> 18) & 0x07) | 0xf0; | ||
target[at++] = ((value >>> 12) & 0x3f) | 0x80; | ||
target[at++] = ((value >>> 6) & 0x3f) | 0x80; | ||
} else { | ||
@@ -137,33 +137,59 @@ continue; // out of range | ||
/** | ||
* @param {(!ArrayBuffer|!ArrayBufferView)} buffer | ||
* @param {{stream: boolean}=} options | ||
* @param {!Uint8Array} bytes | ||
* @return {string} | ||
*/ | ||
FastTextDecoder.prototype['decode'] = function(buffer, options={stream: false}) { | ||
if (options['stream']) { | ||
throw new Error(`Failed to decode: the 'stream' option is unsupported.`); | ||
} | ||
function decodeBuffer(bytes) { | ||
return Buffer.from(bytes.buffer, bytes.byteOffset, bytes.byteLength).toString('utf-8'); | ||
} | ||
// Accept Uint8Array's as-is. | ||
let bytes = buffer; | ||
/** | ||
* @param {!Uint8Array} bytes | ||
* @return {string} | ||
*/ | ||
function decodeSyncXHR(bytes) { | ||
const b = new Blob([bytes], {type: 'text/plain;charset=UTF-8'}); | ||
const u = URL.createObjectURL(b); | ||
// Look for ArrayBufferView, which isn't a real type, but basically represents | ||
// all the valid TypedArray types plus DataView. They all have ".buffer" as | ||
// an instance of ArrayBuffer. | ||
if (!(bytes instanceof Uint8Array) && bytes.buffer instanceof ArrayBuffer) { | ||
bytes = new Uint8Array(buffer.buffer); | ||
// This hack will fail in non-Edgium Edge because sync XHRs are disabled (and | ||
// possibly in other places), so ensure there's a fallback call. | ||
try { | ||
const x = new XMLHttpRequest(); | ||
x.open('GET', u, false); | ||
x.send(); | ||
return x.responseText; | ||
} catch (e) { | ||
return decodeFallback(bytes); | ||
} finally { | ||
URL.revokeObjectURL(u); | ||
} | ||
} | ||
let pos = 0; | ||
let pending = []; | ||
/** | ||
* @param {!Uint8Array} bytes | ||
* @return {string} | ||
*/ | ||
function decodeFallback(bytes) { | ||
let inputIndex = 0; | ||
// Create a working buffer for UTF-16 code points, but don't generate one | ||
// which is too large for small input sizes. UTF-8 to UCS-16 conversion is | ||
// going to be at most 1:1, if all code points are ASCII. The other extreme | ||
// is 4-byte UTF-8, which results in two UCS-16 points, but this is still 50% | ||
// fewer entries in the output. | ||
const pendingSize = Math.min(256 * 256, bytes.length + 1); | ||
const pending = new Uint16Array(pendingSize); | ||
const chunks = []; | ||
let pendingIndex = 0; | ||
for (;;) { | ||
const more = pos < bytes.length; | ||
const more = inputIndex < bytes.length; | ||
// If there's no more data or we're >65k bytes, create a chunk. | ||
// This isn't done at the end by simply slicing the data into equal sized | ||
// chunks as we might hit a surrogate pair. | ||
if (!more || (pos & 0x10000)) { | ||
chunks.push(String.fromCharCode.apply(null, pending)); | ||
// If there's no more data or there'd be no room for two UTF-16 values, | ||
// create a chunk. This isn't done at the end by simply slicing the data | ||
// into equal sized chunks as we might hit a surrogate pair. | ||
if (!more || (pendingIndex >= pendingSize - 1)) { | ||
// nb. .apply and friends are *really slow*. Low-hanging fruit is to | ||
// expand this to literally pass pending[0], pending[1], ... etc, but | ||
// the output code expands pretty fast in this case. | ||
chunks.push(String.fromCharCode.apply(null, pending.subarray(0, pendingIndex))); | ||
@@ -175,5 +201,5 @@ if (!more) { | ||
// Move the buffer forward and create another chunk. | ||
pending = []; | ||
bytes = bytes.subarray(pos); | ||
pos = 0; | ||
bytes = bytes.subarray(inputIndex); | ||
inputIndex = 0; | ||
pendingIndex = 0; | ||
} | ||
@@ -185,18 +211,16 @@ | ||
const byte1 = bytes[pos++]; | ||
if (byte1 === 0) { | ||
pending.push(0); | ||
} else if ((byte1 & 0x80) === 0) { // 1-byte | ||
pending.push(byte1); | ||
const byte1 = bytes[inputIndex++]; | ||
if ((byte1 & 0x80) === 0) { // 1-byte or null | ||
pending[pendingIndex++] = byte1; | ||
} else if ((byte1 & 0xe0) === 0xc0) { // 2-byte | ||
const byte2 = bytes[pos++] & 0x3f; | ||
pending.push(((byte1 & 0x1f) << 6) | byte2); | ||
const byte2 = bytes[inputIndex++] & 0x3f; | ||
pending[pendingIndex++] = ((byte1 & 0x1f) << 6) | byte2; | ||
} else if ((byte1 & 0xf0) === 0xe0) { // 3-byte | ||
const byte2 = bytes[pos++] & 0x3f; | ||
const byte3 = bytes[pos++] & 0x3f; | ||
pending.push(((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3); | ||
const byte2 = bytes[inputIndex++] & 0x3f; | ||
const byte3 = bytes[inputIndex++] & 0x3f; | ||
pending[pendingIndex++] = ((byte1 & 0x1f) << 12) | (byte2 << 6) | byte3; | ||
} else if ((byte1 & 0xf8) === 0xf0) { // 4-byte | ||
const byte2 = bytes[pos++] & 0x3f; | ||
const byte3 = bytes[pos++] & 0x3f; | ||
const byte4 = bytes[pos++] & 0x3f; | ||
const byte2 = bytes[inputIndex++] & 0x3f; | ||
const byte3 = bytes[inputIndex++] & 0x3f; | ||
const byte4 = bytes[inputIndex++] & 0x3f; | ||
@@ -208,6 +232,6 @@ // this can be > 0xffff, so possibly generate surrogates | ||
codepoint -= 0x10000; | ||
pending.push((codepoint >>> 10) & 0x3ff | 0xd800); | ||
pending[pendingIndex++] = (codepoint >>> 10) & 0x3ff | 0xd800; | ||
codepoint = 0xdc00 | codepoint & 0x3ff; | ||
} | ||
pending.push(codepoint); | ||
pending[pendingIndex++] = codepoint; | ||
} else { | ||
@@ -219,2 +243,43 @@ // invalid initial byte | ||
// Decoding a string is pretty slow, but use alternative options where possible. | ||
let decodeImpl = decodeFallback; | ||
if (typeof Buffer === 'function' && Buffer.from) { | ||
// Buffer.from was added in Node v5.10.0 (2015-11-17). | ||
decodeImpl = decodeBuffer; | ||
} else if (typeof Blob === 'function' && typeof URL === 'function' && typeof URL.createObjectURL === 'function') { | ||
// Blob and URL.createObjectURL are available from IE10, Safari 6, Chrome 19 | ||
// (all released in 2012), Firefox 19 (2013), ... | ||
decodeImpl = decodeSyncXHR; | ||
} | ||
/** | ||
* @param {(!ArrayBuffer|!ArrayBufferView)} buffer | ||
* @param {{stream: boolean}=} options | ||
* @return {string} | ||
*/ | ||
FastTextDecoder.prototype['decode'] = function(buffer, options={stream: false}) { | ||
if (options['stream']) { | ||
throw new Error(`Failed to decode: the 'stream' option is unsupported.`); | ||
} | ||
let bytes; | ||
if (buffer instanceof Uint8Array) { | ||
// Accept Uint8Array instances as-is. | ||
bytes = buffer; | ||
} else if (buffer.buffer instanceof ArrayBuffer) { | ||
// Look for ArrayBufferView, which isn't a real type, but basically | ||
// represents all the valid TypedArray types plus DataView. They all have | ||
// ".buffer" as an instance of ArrayBuffer. | ||
bytes = new Uint8Array(buffer.buffer); | ||
} else { | ||
// The only other valid argument here is that "buffer" is an ArrayBuffer. | ||
// We also try to convert anything else passed to a Uint8Array, as this | ||
// catches anything that's array-like. Native code would throw here. | ||
bytes = new Uint8Array(buffer); | ||
} | ||
return decodeImpl(/** @type {!Uint8Array} */ (bytes)); | ||
} | ||
scope['TextEncoder'] = FastTextEncoder; | ||
@@ -221,0 +286,0 @@ scope['TextDecoder'] = FastTextDecoder; |
@@ -1,5 +0,6 @@ | ||
(function(l){function m(){}function k(c,a){c=void 0===c?"utf-8":c;a=void 0===a?{fatal:!1}:a;if(-1===n.indexOf(c.toLowerCase()))throw new RangeError("Failed to construct 'TextDecoder': The encoding label provided ('"+c+"') is invalid.");if(a.fatal)throw Error("Failed to construct 'TextDecoder': the 'fatal' option is unsupported.");}if(l.TextEncoder&&l.TextDecoder)return!1;var n=["utf-8","utf8","unicode-1-1-utf-8"];Object.defineProperty(m.prototype,"encoding",{value:"utf-8"});m.prototype.encode=function(c, | ||
a){a=void 0===a?{stream:!1}:a;if(a.stream)throw Error("Failed to encode: the 'stream' option is unsupported.");a=0;for(var g=c.length,f=0,b=Math.max(32,g+(g>>1)+7),e=new Uint8Array(b>>3<<3);a<g;){var d=c.charCodeAt(a++);if(55296<=d&&56319>=d){if(a<g){var h=c.charCodeAt(a);56320===(h&64512)&&(++a,d=((d&1023)<<10)+(h&1023)+65536)}if(55296<=d&&56319>=d)continue}f+4>e.length&&(b+=8,b*=1+a/c.length*2,b=b>>3<<3,h=new Uint8Array(b),h.set(e),e=h);if(0===(d&4294967168))e[f++]=d;else{if(0===(d&4294965248))e[f++]= | ||
d>>6&31|192;else if(0===(d&4294901760))e[f++]=d>>12&15|224,e[f++]=d>>6&63|128;else if(0===(d&4292870144))e[f++]=d>>18&7|240,e[f++]=d>>12&63|128,e[f++]=d>>6&63|128;else continue;e[f++]=d&63|128}}return e.slice?e.slice(0,f):e.subarray(0,f)};Object.defineProperty(k.prototype,"encoding",{value:"utf-8"});Object.defineProperty(k.prototype,"fatal",{value:!1});Object.defineProperty(k.prototype,"ignoreBOM",{value:!1});k.prototype.decode=function(c,a){a=void 0===a?{stream:!1}:a;if(a.stream)throw Error("Failed to decode: the 'stream' option is unsupported."); | ||
a=c;!(a instanceof Uint8Array)&&a.buffer instanceof ArrayBuffer&&(a=new Uint8Array(c.buffer));c=0;for(var g=[],f=[];;){var b=c<a.length;if(!b||c&65536){f.push(String.fromCharCode.apply(null,g));if(!b)return f.join("");g=[];a=a.subarray(c);c=0}b=a[c++];if(0===b)g.push(0);else if(0===(b&128))g.push(b);else if(192===(b&224)){var e=a[c++]&63;g.push((b&31)<<6|e)}else if(224===(b&240)){e=a[c++]&63;var d=a[c++]&63;g.push((b&31)<<12|e<<6|d)}else if(240===(b&248)){e=a[c++]&63;d=a[c++]&63;var h=a[c++]&63;b= | ||
(b&7)<<18|e<<12|d<<6|h;65535<b&&(b-=65536,g.push(b>>>10&1023|55296),b=56320|b&1023);g.push(b)}}};l.TextEncoder=m;l.TextDecoder=k})("undefined"!==typeof window?window:"undefined"!==typeof global?global:this); | ||
(function(l){function m(){}function k(a,c){a=void 0===a?"utf-8":a;c=void 0===c?{fatal:!1}:c;if(-1===r.indexOf(a.toLowerCase()))throw new RangeError("Failed to construct 'TextDecoder': The encoding label provided ('"+a+"') is invalid.");if(c.fatal)throw Error("Failed to construct 'TextDecoder': the 'fatal' option is unsupported.");}function t(a){return Buffer.from(a.buffer,a.byteOffset,a.byteLength).toString("utf-8")}function u(a){var c=URL.createObjectURL(new Blob([a],{type:"text/plain;charset=UTF-8"})); | ||
try{var f=new XMLHttpRequest;f.open("GET",c,!1);f.send();return f.responseText}catch(e){return q(a)}finally{URL.revokeObjectURL(c)}}function q(a){for(var c=0,f=Math.min(65536,a.length+1),e=new Uint16Array(f),h=[],d=0;;){var b=c<a.length;if(!b||d>=f-1){h.push(String.fromCharCode.apply(null,e.subarray(0,d)));if(!b)return h.join("");a=a.subarray(c);d=c=0}b=a[c++];if(0===(b&128))e[d++]=b;else if(192===(b&224)){var g=a[c++]&63;e[d++]=(b&31)<<6|g}else if(224===(b&240)){g=a[c++]&63;var n=a[c++]&63;e[d++]= | ||
(b&31)<<12|g<<6|n}else if(240===(b&248)){g=a[c++]&63;n=a[c++]&63;var v=a[c++]&63;b=(b&7)<<18|g<<12|n<<6|v;65535<b&&(b-=65536,e[d++]=b>>>10&1023|55296,b=56320|b&1023);e[d++]=b}}}if(l.TextEncoder&&l.TextDecoder)return!1;var r=["utf-8","utf8","unicode-1-1-utf-8"];Object.defineProperty(m.prototype,"encoding",{value:"utf-8"});m.prototype.encode=function(a,c){c=void 0===c?{stream:!1}:c;if(c.stream)throw Error("Failed to encode: the 'stream' option is unsupported.");c=0;for(var f=a.length,e=0,h=Math.max(32, | ||
f+(f>>>1)+7),d=new Uint8Array(h>>>3<<3);c<f;){var b=a.charCodeAt(c++);if(55296<=b&&56319>=b){if(c<f){var g=a.charCodeAt(c);56320===(g&64512)&&(++c,b=((b&1023)<<10)+(g&1023)+65536)}if(55296<=b&&56319>=b)continue}e+4>d.length&&(h+=8,h*=1+c/a.length*2,h=h>>>3<<3,g=new Uint8Array(h),g.set(d),d=g);if(0===(b&4294967168))d[e++]=b;else{if(0===(b&4294965248))d[e++]=b>>>6&31|192;else if(0===(b&4294901760))d[e++]=b>>>12&15|224,d[e++]=b>>>6&63|128;else if(0===(b&4292870144))d[e++]=b>>>18&7|240,d[e++]=b>>>12& | ||
63|128,d[e++]=b>>>6&63|128;else continue;d[e++]=b&63|128}}return d.slice?d.slice(0,e):d.subarray(0,e)};Object.defineProperty(k.prototype,"encoding",{value:"utf-8"});Object.defineProperty(k.prototype,"fatal",{value:!1});Object.defineProperty(k.prototype,"ignoreBOM",{value:!1});var p=q;"function"===typeof Buffer&&Buffer.from?p=t:"function"===typeof Blob&&"function"===typeof URL&&"function"===typeof URL.createObjectURL&&(p=u);k.prototype.decode=function(a,c){c=void 0===c?{stream:!1}:c;if(c.stream)throw Error("Failed to decode: the 'stream' option is unsupported."); | ||
a=a instanceof Uint8Array?a:a.buffer instanceof ArrayBuffer?new Uint8Array(a.buffer):new Uint8Array(a);return p(a)};l.TextEncoder=m;l.TextDecoder=k})("undefined"!==typeof window?window:"undefined"!==typeof global?global:this); |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
33704
9
271
59