fast-myers-diff
Advanced tools
Comparing version 2.0.0 to 2.0.1
@@ -1,8 +0,8 @@ | ||
export declare type GenericIndexable = { | ||
[key: number]: unknown; | ||
export declare type GenericIndexable<T> = { | ||
[key: number]: T; | ||
readonly length: number; | ||
}; | ||
declare type TypedArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array | Float32Array | Float64Array; | ||
export declare type Indexable = string | unknown[] | TypedArray | GenericIndexable; | ||
export interface Sliceable extends GenericIndexable { | ||
export declare type Indexable<T> = string | T[] | TypedArray | GenericIndexable<T>; | ||
export interface Sliceable<T> extends GenericIndexable<T> { | ||
slice(start: number, end?: number): this; | ||
@@ -12,7 +12,7 @@ } | ||
declare type Vec3 = [number, number, number]; | ||
export declare function diff_rec<T extends Indexable>(xs: T, i: number, N: number, ys: T, j: number, M: number): Generator<Vec4>; | ||
export declare function diff<T extends Indexable>(xs: T, ys: T): Generator<Vec4>; | ||
export declare function lcs<T extends Indexable>(xs: T, ys: T): Generator<Vec3>; | ||
export declare function calcPatch<T extends Sliceable>(xs: T, ys: T): Generator<[number, number, T]>; | ||
export declare function applyPatch<T extends Sliceable>(xs: T, patch: Iterable<[number, number, T]>): Generator<T>; | ||
export declare function diff_rec<T extends Indexable<unknown>>(xs: T, i: number, N: number, ys: T, j: number, M: number): Generator<Vec4>; | ||
export declare function diff<T extends Indexable<unknown>>(xs: T, ys: T): Generator<Vec4>; | ||
export declare function lcs<T extends Indexable<unknown>>(xs: T, ys: T): Generator<Vec3>; | ||
export declare function calcPatch<T, S extends Sliceable<T>>(xs: S, ys: S): Generator<[number, number, S]>; | ||
export declare function applyPatch<T, S extends Sliceable<T>>(xs: S, patch: Iterable<[number, number, S]>): Generator<S>; | ||
export {}; |
@@ -1,17 +0,23 @@ | ||
"use strict";Object.defineProperty(exports,"__esModule",{value:true});exports.applyPatch=exports.calcPatch=exports.lcs=exports.diff=exports.diff_rec=void 0;function*diff_rec(xs,i,N,ys,j,M){let Z=(Math.min(N,M)+1)<<1;const b=new ArrayBuffer(Z<<1);const g=new Uint8Array(b,0,Z);const p=new Uint8Array(b,Z,Z);let[u,v,s]=[0,0,0];const stack=[];for(;;){Z_block:while(N>0&&M>0){Z=(Math.min(N,M)+1)<<1;g.fill(0,0,Z);p.fill(0,0,Z);const W=N-M;const L=N+M;const parity=L&1;const hmax=(L>>>1)+parity;h_loop:for(let h=0;h<=hmax;h++){const kmin=2*Math.max(0,h-M)-h;const kmax=h-2*Math.max(0,h-N);{for(let k=kmin;k<=kmax;k+=2){const Zk=(k%Z)+Z;const gkm=g[(Zk-1)%Z];const gkp=g[(Zk+1)%Z];let x=u=(k===-h||(k!==h&&gkm<gkp))?gkp:gkm+1;let y=v=x-k;while(x<N&&y<M&&xs[i+x]===ys[j+y]) | ||
x++,y++;g[Zk%Z]=x;const z=W-k;if(parity===1&&z>=1-h&&z<h&&x+p[(Z+z%Z)%Z]>=N){if(h>1||x!==u){stack[s++]=[i+x,N-x,j+y,M-y];[N,M]=[u,v];continue Z_block;} | ||
"use strict";Object.defineProperty(exports,"__esModule",{value:true});exports.applyPatch=exports.calcPatch=exports.lcs=exports.diff=exports.diff_rec=void 0;function select_type(L){if(L<256) | ||
return[Uint8Array,1];if(L<65536) | ||
return[Uint16Array,2];return[Uint32Array,4];} | ||
function*diff_rec(xs,i,N,ys,j,M){let Z=(Math.min(N,M)+1)*2;const[cons,m]=select_type(N+M);const b=new ArrayBuffer(2*m*Z);const g=new cons(b,0,Z);const p=new cons(b,m*Z,Z);let[pxs,pxe,pys,pye]=[-1,-1,-1,-1];let[x,y,u,v,z,s]=[0,0,0,0,0,0];const stack=[];for(;;){Z_block:while(N>0&&M>0){g.fill(0,0,Z);p.fill(0,0,Z);const W=N-M;const L=N+M;const parity=L&1;const offsetx=i+N-1;const offsety=j+M-1;const hmax=(L+parity)/2;h_loop:for(let h=0;h<=hmax;h++){const kmin=2*Math.max(0,h-M)-h;const kmax=h-2*Math.max(0,h-N);for(let k=kmin;k<=kmax;k+=2){const Zk=(k%Z)+Z;const gkm=g[(Zk-1)%Z];const gkp=g[(Zk+1)%Z];x=u=(k===-h||(k!==h&&gkm<gkp))?gkp:gkm+1;y=v=x-k;while(x<N&&y<M&&xs[i+x]===ys[j+y]) | ||
x++,y++;g[Zk%Z]=x;if(parity===1&&(z=W-k)>=1-h&&z<h&&x+p[(Z+z%Z)%Z]>=N){if(h>1||x!==u){stack[s++]=[i+x,N-x,j+y,M-y];[N,M]=[u,v];Z=2*(Math.min(N,M)+1);continue Z_block;} | ||
else | ||
break h_loop;}} | ||
for(let k=kmin;k<=kmax;k+=2){const Zk=(k%Z)+Z;const pkm=p[(Zk-1)%Z];const pkp=p[(Zk+1)%Z];x=u=(k===-h||(k!==h&&pkm<pkp))?pkp:pkm+1;y=v=x-k;while(x<N&&y<M&&xs[offsetx-x]===ys[offsety-y]) | ||
x++,y++;p[Zk%Z]=x;if(parity===0&&(z=W-k)>=-h&&z<=h&&x+g[(Z+z%Z)%Z]>=N){if(h>0||x!==u){stack[s++]=[i+N-u,u,j+M-v,v];[N,M]=[N-x,M-y];Z=2*(Math.min(N,M)+1);continue Z_block;} | ||
else | ||
break h_loop;}}} | ||
{const offsetx=i+N-1;const offsety=j+M-1;for(let k=kmin;k<=kmax;k+=2){const Zk=(k%Z)+Z;const pkm=p[(Zk-1)%Z];const pkp=p[(Zk+1)%Z];let x=u=(k===-h||(k!==h&&pkm<pkp))?pkp:pkm+1;let y=v=x-k;while(x<N&&y<M&&xs[offsetx-x]===ys[offsety-y]) | ||
x++,y++;p[Zk%Z]=x;const z=W-k;if(parity===0&&z>=-h&&z<=h&&x+g[(Z+z%Z)%Z]>=N){if(h>0||x!==u){stack[s++]=[i+N-u,u,j+M-v,v];[N,M]=[N-x,M-y];continue Z_block;} | ||
else | ||
break Z_block;}}}} | ||
if(N===M) | ||
continue Z_block;if(M>N) | ||
[i,N,j,M]=[i+N,0,j+N,M-N];else if(M<N) | ||
[i,N,j,M]=[i+M,N-M,j+M,0];break Z_block;} | ||
if(N>0) | ||
yield[i,i+N,j,j];else if(M>0) | ||
yield[i,i,j,j+M];if(s===0) | ||
return;[i,N,j,M]=stack[--s];}} | ||
continue;if(M>N) | ||
[i,N,j,M]=[i+N,0,j+N,M-N];else | ||
[i,N,j,M]=[i+M,N-M,j+M,0];break;} | ||
if(N+M!==0){if(pxe===i||pye===j){[pxe,pye]=[i+N,j+M];} | ||
else{if(pxs>=0) | ||
yield[pxs,pxe,pys,pye];[pxs,pxe,pys,pye]=[i,i+N,j,j+M];}} | ||
if(s===0) | ||
break;[i,N,j,M]=stack[--s];Z=2*(Math.min(N,M)+1);} | ||
if(pxs>=0) | ||
yield[pxs,pxe,pys,pye];} | ||
exports.diff_rec=diff_rec;function*diff(xs,ys){let[i,N,M]=[0,xs.length,ys.length];while(i<N&&i<M&&xs[i]===ys[i]) | ||
@@ -18,0 +24,0 @@ i++;if(i===N&&i===M) |
{ | ||
"name": "fast-myers-diff", | ||
"version": "2.0.0", | ||
"version": "2.0.1", | ||
"description": "A fast, minimal, memory-efficient diff algorithm on strings, arrays, and typed arrays.", | ||
@@ -13,4 +13,6 @@ "main": "bin/index.js", | ||
"build": "tsc", | ||
"lint": "eslint . --ext .ts --fix", | ||
"minify": "jsmin -o bin/index.min.js bin/index.js && del bin\\index.js && move bin\\index.min.js bin\\index.js", | ||
"prepare": "tsc && npm run minify" | ||
"prepare": "tsc && npm run minify", | ||
"benchmark": "tsc && node test/benchmark.js" | ||
}, | ||
@@ -25,11 +27,22 @@ "keywords": [ | ||
"devDependencies": { | ||
"@types/benchmark": "2.1.0", | ||
"@types/chai": "^4.2.14", | ||
"@types/mocha": "^8.0.4", | ||
"@types/seedrandom": "^2.4.28", | ||
"@typescript-eslint/eslint-plugin": "^4.10.0", | ||
"@typescript-eslint/parser": "^4.10.0", | ||
"benchmark": "^2.1.4", | ||
"benchtable": "^0.1.0", | ||
"chai": "^4.2.0", | ||
"eslint": "^7.15.0", | ||
"fast-diff": "1.2.0", | ||
"fast-myers-diff": "2.0.0", | ||
"jsmin": "^1.0.1", | ||
"microtime": "^3.0.0", | ||
"mocha": "^8.2.1", | ||
"myers-diff": "^2.0.1", | ||
"seedrandom": "^3.0.5", | ||
"ts-node": "^9.1.1", | ||
"typescript": "^3.9.7" | ||
}, | ||
"dependencies": {}, | ||
"repository": { | ||
@@ -36,0 +49,0 @@ "type": "git", |
@@ -14,2 +14,3 @@ Fast-Myers-Diff | ||
* Buffers are allocated contiguously (using typed arrays) to improve cache locality. | ||
* Buffers use the smallest numeric type possible for the input length; note that this results in discontinuous bumps in memory usage at input sizes of 256 and 65536. | ||
@@ -20,2 +21,11 @@ Because the core algorithm does not slice or copy data, it depends only on being able to compare elements of the inputs at arbitrary indices. | ||
### Comparison With Other Lbraries | ||
- [myers-diff](https://www.npmjs.com/package/myers-diff/v/2.0.1) is focused on strings and does the tokenization internally, supporting `'words'`, `'chars'` or `'line'` compare modes as well as custom regular expressions. | ||
- [fast-diff](https://www.npmjs.com/package/fast-diff/v/1.2.1) is specialized on character mode, using substrings instead of comparing characters one by one. | ||
- **fast-myers-diff**: is type agnostic and uses an iterative implementation. | ||
All three libraries have the ability to compute character differences between strings. | ||
### Interface | ||
The library exports the following interface: | ||
@@ -33,8 +43,8 @@ | ||
function diff_rec<T extends Indexable>(xs: T, i: number, N: number, ys: T, j: number, M: number): Generator<Vec4>; | ||
function diff<T extends Indexable>(xs: T, ys: T): Generator<[number, number, number, number]>; | ||
function lcs<T extends Indexable>(xs: T, ys: T): Generator<[number, number, number]>; | ||
declare function diff_rec<T extends Indexable>(xs: T, i: number, N: number, ys: T, j: number, M: number): Generator<Vec4>; | ||
declare function diff<T extends Indexable>(xs: T, ys: T): Generator<[number, number, number, number]>; | ||
declare function lcs<T extends Indexable>(xs: T, ys: T): Generator<[number, number, number]>; | ||
function calcPatch<T extends Sliceable>(xs: T, ys: T): Generator<[number, number, T]>; | ||
function applyPatch<T extends Sliceable>(xs: T, patch: Iterable<[number, number, T]>): Generator<T>; | ||
declare function calcPatch<T extends Sliceable>(xs: T, ys: T): Generator<[number, number, T]>; | ||
declare function applyPatch<T extends Sliceable>(xs: T, patch: Iterable<[number, number, T]>): Generator<T>; | ||
``` | ||
@@ -44,3 +54,3 @@ | ||
`diff(xs, ys)` is a wrapper around `diff_rec` which checks for common affixes and calculates `i`, `j`, `N`, and `M` automatically. | ||
`diff(xs, ys)` is a wrapper around `diff_rec` which checks for common affixes (reducing the memory consumption and time spent in the core diff algorithm) and calculates `i`, `j`, `N`, and `M` automatically. | ||
@@ -55,2 +65,33 @@ `lcs(xs, ys)` calls `diff` internally, but pre-processes the output to produce triples of the form `[sx, sy, l]`, where `sx` and `sy` are the starting idices in `xs` and `ys` respectively of an aligned common substring, and `l` is the length of said substring. Indexing into the original input sequences can be used to retrieve the actual Longest Common Subsequence from this information, but the `lcs` function itself does not attempt to take slices of the inputs. | ||
`diff_rec`, `diff` and `lcs` will also work with custom container types, as long as your container objects have a numeric `length` property. `calcPatch` and `applyPatch` will work with custom types provided that they also implement a suitable `slice(start[, end])` method. | ||
`diff_rec`, `diff` and `lcs` will also work with custom container types, as long as your container objects have a numeric `length` property. `calcPatch` and `applyPatch` will work with custom types provided that they also implement a suitable `slice(start[, end])` method. | ||
### Empirical results | ||
The table below gives the number of operations per second reported by | ||
[benchmark](https://www.npmjs.com/package/benchmark/v/2.1.4) on a | ||
Windows 10 with Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz. | ||
| input | fast-myers-diff | fast-diff-1.2.0 | myers-diff-2.0.1 | fast-myers-diff-2.0.0 | | ||
| ------ | ----------- | ---------- | -----------------|-------------------| | ||
| 10, +100, -100 | 1,139 ops/sec | 2,724 ops/sec | 768 ops/sec | 1,115 ops/sec | | ||
| 10, +4, -200 | 4,217 ops/sec | 9,094 ops/sec | 875 ops/sec | 4,119 ops/sec | | ||
| 100, +10, -10 | 40,825 ops/sec | 14,531 ops/sec | 1,049 ops/sec | 42,327 ops/sec | | ||
| 100, +20, -0 | 43,265 ops/sec | 18,649 ops/sec | 976 ops/sec | 44,582 ops/sec | | ||
| 100, +0, -20 | 45,387 ops/sec | 15,867 ops/sec | 988 ops/sec | 48,545 ops/sec | | ||
| 10, +1000, -1000 | 12.06 ops/sec | 32.86 ops/sec | 7.23 ops/sec | Not supported | | ||
| 10000, +100, -100 | 587 ops/sec | 99.70 ops/sec | 0.23 ops/sec | Not supported | | ||
| 10000, +200, -0 | 685 ops/sec | 95.26 ops/sec | 0.23 ops/sec | Not supported | | ||
| 10000, +0, -200 | 705 ops/sec | 106 ops/sec | 0.24 ops/sec | Not supported | | ||
| 10000, +10, -10 | 2,905 ops/sec | 64.11 ops/sec | 0.28 ops/sec | Not supported | | ||
| 10000, +20, -0 | 3,378 ops/sec | 68.45 ops/sec | 0.26 ops/sec | Not supported | | ||
| 10000, +0, -20 | 3,730 ops/sec | 59.50 ops/sec | 0.27 ops/sec | Not supported | | ||
The `fast-myers-diff: 2.0.0` used `Uint8Array` to save indices, so it can handle correctly only | ||
inputs with added length less than 256. | ||
The `fast-diff` is faster than `fast-myers-diff` for inputs in which the longest common string is a | ||
small portion of the sequences. For differences of 20% `fast-myers-diff` is about 6x faster, for differences of 2% about 50x faster. | ||
13497
54
92
19