bop
Advanced tools
Comparing version 2.3.1 to 3.0.0
/* | ||
* Bop is a very fast Boyer-Moore parser/matcher for string or buffer patterns. | ||
* It is optimized for using with pattern strings/buffers <= 255 bytes; | ||
* for example, it is ideal for parsing multipart/form-data streams, that have a | ||
* pattern / boundary length < ~70 bytes. | ||
* It is optimized for using with pattern strings/buffers <= 255 bytes | ||
* | ||
@@ -147,3 +145,3 @@ * Copyright(c) 2013-present Guglielmo Ferri <44gatti@gmail.com> | ||
if ( i < 0 ) { | ||
// don't collect overlapping patterns | ||
// don't collect overlapping sequences | ||
if ( ( ilen = ixs.length ) && ( j < ixs[ ilen - 1 ] + m ) && ( j += gs[ 0 ] ) ) continue; | ||
@@ -157,4 +155,37 @@ if ( ixs.push( j ) >= l ) break; | ||
bproto.count = function ( data, start, sparse, distance ) { | ||
var me = this | ||
, d = convert( data ) | ||
, p = me.p | ||
, m = p.length | ||
, n = d.length | ||
, i = 0 | ||
, j = start || 0 | ||
, gs = me.gs | ||
, bc = me.bc | ||
, end = n - m | ||
, last = 0 | ||
, cnt = 0 | ||
, ilen = 0 | ||
, mdist = 0 | ||
, dist = 0 | ||
; | ||
// search | ||
while ( j <= end ) { | ||
for ( i = m - 1; ( i >= 0 ) && ( p[ i ] === d[ i + j ] ); --i ); | ||
if ( i < 0 ) { | ||
dist = j - last; | ||
// if sparse switch is on, collect overlapping sequences | ||
if ( sparse && last && ( m > dist ) && ( j += gs[ 0 ] ) ) continue; | ||
if ( distance && ( mdist < dist ) ) mdist = dist; | ||
cnt++; | ||
last = j; | ||
j += gs[ 0 ]; | ||
} else j += max( gs[ i ], bc[ d[ i + j ] ] - m + 1 + i ); | ||
} | ||
return distance ? [ cnt, mdist - m ] : [ cnt ]; | ||
}; | ||
return Bop; | ||
} )(); |
{ | ||
"name" : "bop" | ||
, "version" : "2.3.1" | ||
, "description" : "Bop, an ultra fast Boyer-Moore parser/matcher optimized for string and buffer patterns (<= 255 bytes), then it is ideal for parsing multipart/form-data streams, that have a pattern / boundary length < ~70 bytes." | ||
, "version" : "3.0.0" | ||
, "description" : "Bop, an ultra fast Boyer-Moore parser/matcher, optimized for string and buffer patterns (<= 255 bytes)." | ||
, "homepage" : "https://github.com/rootslab/bop" | ||
@@ -6,0 +6,0 @@ , "private": false |
@@ -80,18 +80,31 @@ ### Bop | ||
```javascript | ||
// Change the current pattern to search. | ||
// Change the pattern to search. | ||
Bop#set( Buffer pattern | String pattern ) : Buffer | ||
/* | ||
* List all pattern occurrences into a String or Buffer data. | ||
* It returns a new array of indexes, or populates an array | ||
* passed as the last argument. | ||
* Count matches, optionally without overlapping sequences, | ||
* starting from a particular index (default is 0). | ||
* It returns an Array containing the number of matches, | ||
* then, if distance switch is on, it returns also the maximum | ||
* distance found between patterns. | ||
*/ | ||
Bop#count( Buffer data [, Number start_from [, Boolean sparse [, Boolean distance ] ] ] ) : Array | ||
/* | ||
* Collect all indexes of pattern occurrences. | ||
* | ||
* NOTE: it is faster using Buffers. | ||
* As options you can: | ||
* | ||
* - start parsing from a particular index, | ||
* - limit the number of results to parse | ||
* - fill your array with resulting indexes. | ||
* | ||
* NOTE: use Buffers when possible (faster). | ||
* | ||
*/ | ||
Bop#parse( String data | Buffer data [, Number startFromIndex [, Number limitResultsTo [, Array array ] ] ] ) : Array | ||
Bop#parse( Buffer data | String data [, Number start_from [, Number limit_results [, Array my_array ] ] ] ) : Array | ||
/* | ||
* Strict parse, it's the same as parse, but it parses data | ||
* without collecting overlapping sequences. | ||
* Strict parse, it's the same as parse, without collecting | ||
* any overlapping sequences. | ||
* | ||
@@ -127,3 +140,3 @@ * Example with CRLF sequence: | ||
*/ | ||
Bop#sparse( String data | Buffer data [, Number startFromIndex [, Number limitResultsTo [, Array array ] ] ] ) : Array | ||
Bop#sparse( Buffer data | String data [, Number start_from [, Number limit_results [, Array my_array ] ] ] ) : Array | ||
``` | ||
@@ -143,3 +156,5 @@ | ||
``` | ||
> See __[examples](example/)__. | ||
#### Benchmark for a short pattern ( length <= 255 bytes ) | ||
@@ -171,2 +186,3 @@ | ||
#### Benchmark for a big pattern ( length > 255 bytes ) | ||
@@ -173,0 +189,0 @@ |
@@ -6,3 +6,2 @@ var log = console.log | ||
, splen = spattern.length | ||
// 2^n times | ||
, n = 8 | ||
@@ -20,5 +19,3 @@ , tlen = splen * n | ||
for ( ; i < n; ++i ) { | ||
spattern += spattern; | ||
}; | ||
for ( ; i < n; ++i ) spattern += spattern; | ||
@@ -37,5 +34,8 @@ log( '- resulting pattern length is ' + spattern.length / 1024 + ' KB' ); | ||
log( '- parse data for patterns and get results' ); | ||
var bop = Bop( bpattern ), | ||
results = bop.parse( data ); | ||
var bop = Bop( bpattern ) | ||
, results = bop.parse( data ) | ||
, cnt = bop.count( data ) | ||
; | ||
log( '- check results length, it should be equal to ' + n ); | ||
@@ -46,1 +46,4 @@ assert.equal( results.length, n, 'results length is wrong, must be ' + n + ', now it\'s ' + results.length ); | ||
assert.deepEqual( results, indexes, 'results don\'t match : ' + indexes + ' !== ' + results ); | ||
log( '- check #count results, it should be: %d', results.length ); | ||
assert.ok( cnt[ 0 ] === results.length, 'erroneous #count result!' ); |
@@ -10,3 +10,4 @@ var log = console.log | ||
, indexes = [] | ||
, data = new Buffer( 256 ) | ||
, offset = 27 | ||
, data = new Buffer( offset + 256 ) | ||
, dlen = data.length | ||
@@ -23,20 +24,30 @@ , bpattern = null | ||
bpattern = new Buffer( spattern ); | ||
bpattern.copy( data, bpattern.length ); | ||
bpattern.copy( data, ( bpattern.length * 2 ) ); | ||
bpattern.copy( data, offset + bpattern.length ); | ||
bpattern.copy( data, offset + ( bpattern.length * 2 ) ); | ||
log( '- parse data for patterns and get results' ); | ||
log( '- parse data (with overlapping sequences)' ); | ||
var bop = Bop( bpattern ) | ||
, results = bop.parse( data ) | ||
// count with overlapping sequences | ||
, cnt = bop.count( data ) | ||
; | ||
log( '- counting matches (with overlapping sequences), they should be: %d', results.length ); | ||
assert.ok( cnt[ 0 ] === results.length, 'erroneous #count result!' ); | ||
log( '- check if the parse method returns exactly 3 results' ); | ||
assert.equal( results.length, 3, 'results length is wrong, must be 3, now it\'s ' + results.length ); | ||
assert.deepEqual( results, [ 20, 30, 40 ], 'results don\'t match' ); | ||
assert.deepEqual( results, [ offset + 20, offset + 30, offset + 40 ], 'results don\'t match' ); | ||
log( '- now parse data with Bop#sparse, it doesn\'t collect overlapping patterns' ); | ||
var bop = Bop( bpattern ) | ||
, results = bop.sparse( data ) | ||
; | ||
log( '- check if the parse method returns exactly 2 results this time' ); | ||
log( '- now parse data with Bop#sparse (it doesn\'t collect overlapping sequences)' ); | ||
results = bop.sparse( data ) | ||
log( '- results should be: %d', 2 ); | ||
assert.equal( results.length, 2, 'results length is wrong, must be 2, now it\'s ' + results.length ); | ||
assert.deepEqual( results, [ 20, 40 ], 'results don\'t match' ); | ||
log( '- check resulting indexes' ); | ||
assert.deepEqual( results, [ offset + 20, offset + 40 ], 'results don\'t match' ); | ||
log( '- counting matches (with overlapping sequences), they should be: %d', results.length ); | ||
cnt = bop.count( data, 0, true ); | ||
assert.ok( cnt[ 0 ] === results.length, 'erroneous #count result!' ); |
45635
29
915
223