'use strict'
/**
* Copyright Brian White. All rights reserved.
*
* @see https://github.com/mscdex/streamsearch
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation
* by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
*/
const EventEmitter = require('node:events').EventEmitter
const inherits = require('node:util').inherits
function SBMH (needle) {
if (typeof needle === 'string') {
needle = Buffer.from(needle)
}
if (!Buffer.isBuffer(needle)) {
throw new TypeError('The needle has to be a String or a Buffer.')
}
const needleLength = needle.length
if (needleLength === 0) {
throw new Error('The needle cannot be an empty String/Buffer.')
}
if (needleLength > 256) {
throw new Error('The needle cannot have a length bigger than 256.')
}
this.maxMatches = Infinity
this.matches = 0
this._occ = new Array(256)
.fill(needleLength) // Initialize occurrence table.
this._lookbehind_size = 0
this._needle = needle
this._bufpos = 0
this._lookbehind = Buffer.alloc(needleLength)
// Populate occurrence table with analysis of the needle,
// ignoring last letter.
for (var i = 0; i < needleLength - 1; ++i) { // eslint-disable-line no-var
this._occ[needle[i]] = needleLength - 1 - i
}
}
inherits(SBMH, EventEmitter)
SBMH.prototype.reset = function () {
this._lookbehind_size = 0
this.matches = 0
this._bufpos = 0
}
SBMH.prototype.push = function (chunk, pos) {
if (!Buffer.isBuffer(chunk)) {
chunk = Buffer.from(chunk, 'binary')
}
const chlen = chunk.length
this._bufpos = pos || 0
let r
while (r !== chlen && this.matches < this.maxMatches) { r = this._sbmh_feed(chunk) }
return r
}
SBMH.prototype._sbmh_feed = function (data) {
const len = data.length
const needle = this._needle
const needleLength = needle.length
const lastNeedleChar = needle[needleLength - 1]
// Positive: points to a position in `data`
// pos == 3 points to data[3]
// Negative: points to a position in the lookbehind buffer
// pos == -2 points to lookbehind[lookbehind_size - 2]
let pos = -this._lookbehind_size
let ch
if (pos < 0) {
// Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
// search with character lookup code that considers both the
// lookbehind buffer and the current round's haystack data.
//
// Loop until
// there is a match.
// or until
// we've moved past the position that requires the
// lookbehind buffer. In this case we switch to the
// optimized loop.
// or until
// the character to look at lies outside the haystack.
while (pos < 0 && pos <= len - needleLength) {
ch = this._sbmh_lookup_char(data, pos + needleLength - 1)
if (
ch === lastNeedleChar &&
this._sbmh_memcmp(data, pos, needleLength - 1)
) {
this._lookbehind_size = 0
++this.matches
this.emit('info', true)
return (this._bufpos = pos + needleLength)
}
pos += this._occ[ch]
}
// No match.
if (pos < 0) {
// There's too few data for Boyer-Moore-Horspool to run,
// so let's use a different algorithm to skip as much as
// we can.
// Forward pos until
// the trailing part of lookbehind + data
// looks like the beginning of the needle
// or until
// pos == 0
while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) { ++pos }
}
if (pos >= 0) {
// Discard lookbehind buffer.
this.emit('info', false, this._lookbehind, 0, this._lookbehind_size)
this._lookbehind_size = 0
} else {
// Cut off part of the lookbehind buffer that has
// been processed and append the entire haystack
// into it.
const bytesToCutOff = this._lookbehind_size + pos
if (bytesToCutOff > 0) {
// The cut off data is guaranteed not to contain the needle.
this.emit('info', false, this._lookbehind, 0, bytesToCutOff)
}
this._lookbehind.copy(this._lookbehind, 0, bytesToCutOff,
this._lookbehind_size - bytesToCutOff)
this._lookbehind_size -= bytesToCutOff
data.copy(this._lookbehind, this._lookbehind_size)
this._lookbehind_size += len
this._bufpos = len
return len
}
}
pos += (pos >= 0) * this._bufpos
// Lookbehind buffer is now empty. We only need to check if the
// needle is in the haystack.
if (data.indexOf(needle, pos) !== -1) {
pos = data.indexOf(needle, pos)
++this.matches
if (pos > 0) { this.emit('info', true, data, this._bufpos, pos) } else { this.emit('info', true) }
return (this._bufpos = pos + needleLength)
} else {
pos = len - needleLength
}
// There was no match. If there's trailing haystack data that we cannot
// match yet using the Boyer-Moore-Horspool algorithm (because the trailing
// data is less than the needle size) then match using a modified
// algorithm that starts matching from the beginning instead of the end.
// Whatever trailing data is left after running this algorithm is added to
// the lookbehind buffer.
while (
pos < len &&
(
data[pos] !== needle[0] ||
(
(Buffer.compare(
data.subarray(pos, pos + len - pos),
needle.subarray(0, len - pos)
) !== 0)
)
)
) {
++pos
}
if (pos < len) {
data.copy(this._lookbehind, 0, pos, pos + (len - pos))
this._lookbehind_size = len - pos
}
// Everything until pos is guaranteed not to contain needle data.
if (pos > 0) { this.emit('info', false, data, this._bufpos, pos < len ? pos : len) }
this._bufpos = len
return len
}
SBMH.prototype._sbmh_lookup_char = function (data, pos) {
return (pos < 0)
? this._lookbehind[this._lookbehind_size + pos]
: data[pos]
}
SBMH.prototype._sbmh_memcmp = function (data, pos, len) {
for (var i = 0; i < len; ++i) { // eslint-disable-line no-var
if (this._sbmh_lookup_char(data, pos + i) !== this._needle[i]) { return false }
}
return true
}
module.exports = SBMH