fast-myers-diff
Advanced tools
Comparing version 1.0.4 to 2.0.0
@@ -7,7 +7,8 @@ export declare type GenericIndexable = { | ||
export declare type Indexable = string | unknown[] | TypedArray | GenericIndexable; | ||
export declare type Sliceable = TypedArray | (GenericIndexable & { | ||
slice(start: number, end?: number): Indexable; | ||
}); | ||
export interface Sliceable extends GenericIndexable { | ||
slice(start: number, end?: number): this; | ||
} | ||
declare type Vec4 = [number, number, number, number]; | ||
declare type Vec3 = [number, number, number]; | ||
export declare function diff_rec<T extends Indexable>(xs: T, i: number, N: number, ys: T, j: number, M: number): Generator<Vec4>; | ||
export declare function diff<T extends Indexable>(xs: T, ys: T): Generator<Vec4>; | ||
@@ -14,0 +15,0 @@ export declare function lcs<T extends Indexable>(xs: T, ys: T): Generator<Vec3>; |
@@ -1,36 +0,30 @@ | ||
"use strict";Object.defineProperty(exports,"__esModule",{value:true});exports.applyPatch=exports.calcPatch=exports.lcs=exports.diff=void 0;function*diff_rec(xs,i,N,ys,j,M){const stack=[];let s=0;let Z=2*Math.min(N,M)+2;const b=new Uint32Array(2*Z);let[x,y,u,v]=[0,0,0,0,0];for(;;){Z_block:{if(N>0&&M>0){const L=N+M;const parity=L&1;const delta=N-M;const hmax=(L>>>1)+parity;const xoffset=i+N-1;const yoffset=j+M-1;hloop:for(let h=0;h<=hmax;h++){const kmin=2*Math.max(0,h-M)-h;const kmax=h-2*Math.max(0,h-N);for(let k=kmin;k<=kmax;k+=2){const Zk=(k%Z)+Z;let ckm,ckp=b[(Zk+1)%Z];x=u=(k===-h||(ckm=b[(Zk-1)%Z],k<h&&ckm<ckp))?ckp:ckm+1;y=v=u-k;while(u<N&&v<M&&xs[i+u]===ys[j+v]) | ||
u++,v++;b[Zk%Z]=u;const z=delta-k;if(parity===1&&z<h&&z>-h&&u+b[Z+(Z+z%Z)%Z]>=N){if(2*h-1>1||x!==u){stack[s++]=[i+u,N-u,j+v,M-v];[N,M]=[x,y];} | ||
break hloop;}} | ||
for(let k=kmin;k<=kmax;k+=2){const Zk=(k%Z)+Z;let ckm,ckp=b[Z+(Zk+1)%Z];x=u=(k===-h||(ckm=b[Z+(Zk-1)%Z],k<h&&ckm<ckp))?ckp:ckm+1;y=v=u-k;while(x<N&&y<M&&xs[xoffset-x]===ys[yoffset-y]) | ||
x++,y++;b[Z+Zk%Z]=x;const z=delta-k;if(parity===0&&z<=h&&z>=-h&&x+b[(Z+z%Z)%Z]>=N){if(h>0||x!==u){stack[s++]=[i+N-u,u,j+M-v,v];[N,M]=[N-x,M-y];} | ||
break hloop;}}}} | ||
if(N===0) | ||
yield[i,i,j,j+M];else if(M===0) | ||
yield[i,i+N,j,j];else | ||
break Z_block;if(s===0) | ||
return;[i,N,j,M]=stack[--s];} | ||
Z=2*Math.min(N,M)+2;b[1]=b[Z+1]=0;}} | ||
function*diff(xs,ys){let[i,N,M]=[0,xs.length,ys.length];while(i<N&&i<M&&xs[i]===ys[i]) | ||
i++;if(i===N&&N===M) | ||
return;while(N>i&&M>i&&xs[--N]===ys[--M]);if(N===0){yield[0,0,0,M];return;} | ||
if(M===0){yield[0,N,M,M];return;} | ||
const iter=diff_rec(xs,i,N+1-i,ys,i,M+1-i);let last=iter.next().value;for(const next of iter){const[nds,nde,nis,nie]=next;if(nds===nde){if(nis===last[3]){last[3]=nie;continue;} | ||
else if(nds>=last[0]&&nds<=last[1]&&last[2]===last[3]){[last[2],last[3]]=[nis,nie];continue;}} | ||
else if(nds===last[1]){last[1]=nde;continue;} | ||
yield last;last=next;} | ||
yield last;} | ||
exports.diff=diff;function*lcs(xs,ys){let[i,N,M]=[0,xs.length,ys.length];console.log('lcs',xs,ys,N,M);while(i<N&&i<M&&xs[i]===ys[i]) | ||
i++;if(i>0) | ||
yield[0,0,i];if(i===N||i===M) | ||
return;let[n,m]=[N,M];while(n>i&&m>i&&xs[--n]===ys[--m]);if(n===0||m===0){yield[n,m,Math.min(N,M)];return;} | ||
const iter=diff_rec(xs,i,n+1-i,ys,i,m+1-i);let[sx,ex,sy,ey]=iter.next().value;let j=i+ey-sy;i+=ex-sx;for(const rec of iter){[sx,ex,,ey]=rec;if(i!==sx){rec.length--;[rec[0],rec[1],rec[2]]=[i,j,sx-i];yield rec;} | ||
"use strict";Object.defineProperty(exports,"__esModule",{value:true});exports.applyPatch=exports.calcPatch=exports.lcs=exports.diff=exports.diff_rec=void 0;function*diff_rec(xs,i,N,ys,j,M){let Z=(Math.min(N,M)+1)<<1;const b=new ArrayBuffer(Z<<1);const g=new Uint8Array(b,0,Z);const p=new Uint8Array(b,Z,Z);let[u,v,s]=[0,0,0];const stack=[];for(;;){Z_block:while(N>0&&M>0){Z=(Math.min(N,M)+1)<<1;g.fill(0,0,Z);p.fill(0,0,Z);const W=N-M;const L=N+M;const parity=L&1;const hmax=(L>>>1)+parity;h_loop:for(let h=0;h<=hmax;h++){const kmin=2*Math.max(0,h-M)-h;const kmax=h-2*Math.max(0,h-N);{for(let k=kmin;k<=kmax;k+=2){const Zk=(k%Z)+Z;const gkm=g[(Zk-1)%Z];const gkp=g[(Zk+1)%Z];let x=u=(k===-h||(k!==h&&gkm<gkp))?gkp:gkm+1;let y=v=x-k;while(x<N&&y<M&&xs[i+x]===ys[j+y]) | ||
x++,y++;g[Zk%Z]=x;const z=W-k;if(parity===1&&z>=1-h&&z<h&&x+p[(Z+z%Z)%Z]>=N){if(h>1||x!==u){stack[s++]=[i+x,N-x,j+y,M-y];[N,M]=[u,v];continue Z_block;} | ||
else | ||
break h_loop;}}} | ||
{const offsetx=i+N-1;const offsety=j+M-1;for(let k=kmin;k<=kmax;k+=2){const Zk=(k%Z)+Z;const pkm=p[(Zk-1)%Z];const pkp=p[(Zk+1)%Z];let x=u=(k===-h||(k!==h&&pkm<pkp))?pkp:pkm+1;let y=v=x-k;while(x<N&&y<M&&xs[offsetx-x]===ys[offsety-y]) | ||
x++,y++;p[Zk%Z]=x;const z=W-k;if(parity===0&&z>=-h&&z<=h&&x+g[(Z+z%Z)%Z]>=N){if(h>0||x!==u){stack[s++]=[i+N-u,u,j+M-v,v];[N,M]=[N-x,M-y];continue Z_block;} | ||
else | ||
break Z_block;}}}} | ||
if(N===M) | ||
continue Z_block;if(M>N) | ||
[i,N,j,M]=[i+N,0,j+N,M-N];else if(M<N) | ||
[i,N,j,M]=[i+M,N-M,j+M,0];break Z_block;} | ||
if(N>0) | ||
yield[i,i+N,j,j];else if(M>0) | ||
yield[i,i,j,j+M];if(s===0) | ||
return;[i,N,j,M]=stack[--s];}} | ||
exports.diff_rec=diff_rec;function*diff(xs,ys){let[i,N,M]=[0,xs.length,ys.length];while(i<N&&i<M&&xs[i]===ys[i]) | ||
i++;if(i===N&&i===M) | ||
return;while(xs[--N]===ys[--M]&&N>i&&M>i);yield*diff_rec(xs,i,N+1-i,ys,i,M+1-i);} | ||
exports.diff=diff;function*lcs(xs,ys){const N=xs.length;let[i,j,sx,ex,ey]=[0,0,0,0,0];for(const rec of diff(xs,ys)){[sx,ex,,ey]=rec;if(i!==sx){rec.length--;[rec[0],rec[1],rec[2]]=[i,j,sx-i];yield rec;} | ||
[i,j]=[ex,ey];} | ||
if(i<N) | ||
yield[i,j,N-i];} | ||
exports.lcs=lcs;function*calcPatch(xs,ys){const slice=ArrayBuffer.isView(ys)?Uint8Array.prototype.subarray:ys.slice;for(const[ds,de,is,ie]of diff(xs,ys)){yield[ds,de,slice.call(ys,is,ie)];}} | ||
exports.calcPatch=calcPatch;function*applyPatch(xs,patch){let i=0;const slice=ArrayBuffer.isView(xs)?Uint8Array.prototype.subarray:xs.slice;for(const[ds,de,ins]of patch){if(i<ds) | ||
yield slice.call(xs,i,ds);if(ins.length>0) | ||
yield ins;i=de;} | ||
exports.lcs=lcs;function*calcPatch(xs,ys){const slice=ArrayBuffer.isView(xs)?Uint8Array.prototype.subarray:xs.slice;for(const[dels,dele,inss,inse]of diff_rec(xs,0,xs.length,ys,0,ys.length)){yield[dels,dele,slice.call(ys,inss,inse)];}} | ||
exports.calcPatch=calcPatch;function*applyPatch(xs,patch){let i=0;const slice=ArrayBuffer.isView(xs)?Uint8Array.prototype.subarray:xs.slice;for(const[dels,dele,ins]of patch){if(i<dels) | ||
yield slice.call(xs,i,dels);if(ins.length>0) | ||
yield ins;i=dele;} | ||
if(i<xs.length) | ||
yield slice.call(xs,i);} | ||
exports.applyPatch=applyPatch; |
{ | ||
"name": "fast-myers-diff", | ||
"version": "1.0.4", | ||
"version": "2.0.0", | ||
"description": "A fast, minimal, memory-efficient diff algorithm on strings, arrays, and typed arrays.", | ||
@@ -5,0 +5,0 @@ "main": "bin/index.js", |
@@ -5,4 +5,3 @@ Fast-Myers-Diff | ||
This is a fast, compact, memory efficient implementation of the O(ND) Myers diff algorithm. | ||
The core diff algorithm, including blank lines and comment, is only 117 lines. With LCS and patch features added, the total library is still less than 200 lines. | ||
Minified and including type definitions, the published library is less than 3KB. | ||
Minified and including type definitions, the published library is less than 4KB. | ||
@@ -14,3 +13,3 @@ This implementation improves on a naive implementation of Myers recursive algorithm in several ways: | ||
All other recursive calls are tail calls replaced with simple jumps (via `break` or `continue`). Huge inputs may blow the heap, but you'll never overflow the stack! | ||
* Allocation is minimized by pre-allocating buffer space to be re-used by each simulated recursive call, and tracking indices into the original inputs. The core diff algorithm performs no slice operations or other copying of data. This also minimizes garbage production and GC pause time. | ||
* Allocation is minimized by pre-allocating buffer space to be re-used by each simulated recursive call, re-using stack slots, and tracking indices into the original inputs. The core diff algorithm performs no slice operations or other copying of data. This also minimizes garbage production and GC pause time. | ||
* Buffers are allocated contiguously (using typed arrays) to improve cache locality. | ||
@@ -30,6 +29,7 @@ | ||
type Indexable = string | unknown[] | TypedArray | GenericIndexable; | ||
type Sliceable = TypedArray | (GenericIndexable & { | ||
slice(start: number, end?: number): Indexable; | ||
}); | ||
interface Sliceable extends GenericIndexable { | ||
slice(start: number, end?: number): this; | ||
} | ||
function diff_rec<T extends Indexable>(xs: T, i: number, N: number, ys: T, j: number, M: number): Generator<Vec4>; | ||
function diff<T extends Indexable>(xs: T, ys: T): Generator<[number, number, number, number]>; | ||
@@ -42,6 +42,8 @@ function lcs<T extends Indexable>(xs: T, ys: T): Generator<[number, number, number]>; | ||
`diff(xs, ys)` is the core of the library; given two indexable sequences, `xs` and `ys`, it produces a sequence of quadruples `[sx, ex, sy, ey]`, where [sx, ex) indicates a range to delete from `xs` and [sy, ey) indicates a range from `ys` to replace the deleted material with. Simple deletions are indicated when `sy === ey` and simple insertions when `sx === ex`. | ||
`diff_rec(xs, i, N, ys, j, M)` is the core of the library; given two indexable sequences, `xs` and `ys`, starting indices `i` and `j`, and slice-lengths `N` and `M` (i.e., the remaining length after the starting index), it produces a sequence of quadruples `[sx, ex, sy, ey]`, where [sx, ex) indicates a range to delete from `xs` and [sy, ey) indicates a range from `ys` to replace the deleted material with. Simple deletions are indicated when `sy === ey` and simple insertions when `sx === ex`. | ||
`lcs(xs, ys)` uses the same underlying diff implementation, but instead produces output in the form of triples `[sx, sy, l]`, where `sx` and `sy` are the starting idices in `xs` and `ys` respectively of an aligned common substring, and `l` is the length of said substring. Indexing into the original input sequences can be used to retrieve the actual LCS from this information. | ||
`diff(xs, ys)` is a wrapper around `diff_rec` which checks for common affixes and calculates `i`, `j`, `N`, and `M` automatically. | ||
`lcs(xs, ys)` calls `diff` internally, but pre-processes the output to produce triples of the form `[sx, sy, l]`, where `sx` and `sy` are the starting idices in `xs` and `ys` respectively of an aligned common substring, and `l` is the length of said substring. Indexing into the original input sequences can be used to retrieve the actual Longest Common Subsequence from this information, but the `lcs` function itself does not attempt to take slices of the inputs. | ||
`calcPatch(xs, ys)` is a thin wrapper over `diff(xs, ys)` which replaces the [sy, ey) indices with the relevant slice of `ys`. This can be used to reconstitute `ys` given `xs`. Once again, pure insertions are indicated when `sx === ex`, but pure deletions are indicated by an empty slice--i.e., an empty string, a zero-length array, etc. The insert slices are of the same type as the original `ys`. If `ys` is a string or an array, they are produced with the `slice` methods of strings or arrays, which will result in a shallow copy. If `ys` is a typed array, slices will be produced with `TypedArray.prototype.subarray`, which re-uses the existing underlying memory. | ||
@@ -53,2 +55,2 @@ | ||
This `diff(xs, ys)` and `lcs(xs, ys)` will also work with custom container types, as long as your container objects have a numeric `length` property. `calcPatch(xs, ys)` and `applyPatch(xs, ys)` will also with with custom types, provided that they also implement a suitable `slice(start[, end])` method. | ||
`diff_rec`, `diff` and `lcs` will also work with custom container types, as long as your container objects have a numeric `length` property. `calcPatch` and `applyPatch` will work with custom types provided that they also implement a suitable `slice(start[, end])` method. |
9884
51
48