Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Rabin-Karp #110

Merged
merged 3 commits into from
Jul 30, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions src/algorithms/string/rabin-karp/__test__/rabinKarp.test.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,20 @@
import { rabinKarp, hashWord, reHashWord } from '../rabinKarp';
import rabinKarp from '../rabinKarp';

describe('rabinKarp', () => {
it('should correctly calculates hash and re-hash', () => {
expect(hashWord('a')).toBe(97);
expect(hashWord('b')).toBe(98);
expect(hashWord('abc')).toBe(941094);
expect(hashWord('bcd')).toBe(950601);
expect(reHashWord(hashWord('abc'), 'abc', 'bcd')).toBe(950601);
expect(reHashWord(hashWord('abc'), 'abc', 'bcd')).toBe(hashWord('bcd'));
});

it('should find substring in a string', () => {
expect(rabinKarp('', '')).toBe(0);
expect(rabinKarp('a', '')).toBe(0);
expect(rabinKarp('a', 'a')).toBe(0);
expect(rabinKarp('ab', 'b')).toBe(1);
expect(rabinKarp('abcbcglx', 'abca')).toBe(-1);
expect(rabinKarp('abcbcglx', 'bcgl')).toBe(3);
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabcy')).toBe(15);
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabca')).toBe(-1);
expect(rabinKarp('abcxabcdabxaabcdabcabcdabcdabcy', 'abcdabca')).toBe(12);
expect(rabinKarp('abcxabcdabxaabaabaaaabcdabcdabcy', 'aabaabaaa')).toBe(11);
expect(rabinKarp('^ !/\'#\'pp', ' !/\'#\'pp')).toBe(1);
expect(rabinKarp('a\u{ffff}', '\u{ffff}')).toBe(1);
expect(rabinKarp('a\u{10000}', '\u{10000}')).toBe(1);
expect(rabinKarp('\u0000耀\u0000', '耀\u0000')).toBe(1);
});
});
101 changes: 23 additions & 78 deletions src/algorithms/string/rabin-karp/rabinKarp.js
Original file line number Diff line number Diff line change
@@ -1,88 +1,33 @@
/**
* A prime number used to create
* the hash representation of a word
*
* Bigger the prime number,
* bigger the hash value
*/
const PRIME = 97;

/**
* Function that creates hash representation of the word.
*
* @param {string} word
* @return {number}
*/
export function hashWord(word) {
let hash = 0;

for (let charIndex = 0; charIndex < word.length; charIndex += 1) {
hash += word[charIndex].charCodeAt(0) * (PRIME ** charIndex);
}

return hash;
}

/**
* Function that creates hash representation of the word
* based on previous word (shifted by one character left) hash value.
*
* Recalculates the hash representation of a word so that it isn't
* necessary to traverse the whole word again
*
* @param {number} prevHash
* @param {string} prevWord
* @param {string} newWord
* @return {number}
*/
export function reHashWord(prevHash, prevWord, newWord) {
const newWordLastIndex = newWord.length - 1;
let newHash = prevHash - prevWord[0].charCodeAt(0);
newHash /= PRIME;
newHash += newWord[newWordLastIndex].charCodeAt(0) * (PRIME ** newWordLastIndex);

return newHash;
}
import RabinFingerprint from '../../../utils/hash/rolling/Rabin_Fingerprint';

/**
* @param {string} text
* @param {string} word
* @return {number}
*/
export function rabinKarp(text, word) {
// Calculate word hash that we will use for comparison with other substring hashes.
const wordHash = hashWord(word);

let prevSegment = null;
let currentSegmentHash = null;

// Go through all substring of the text that may match
for (let charIndex = 0; charIndex <= text.length - word.length; charIndex += 1) {
const currentSegment = text.substring(charIndex, charIndex + word.length);

// Calculate the hash of current substring.
if (currentSegmentHash === null) {
currentSegmentHash = hashWord(currentSegment);
} else {
currentSegmentHash = reHashWord(currentSegmentHash, prevSegment, currentSegment);
}

prevSegment = currentSegment;

// Compare the hash of current substring and seeking string.
if (wordHash === currentSegmentHash) {
// In case if hashes match let's check substring char by char.
let numberOfMatches = 0;

for (let deepCharIndex = 0; deepCharIndex < word.length; deepCharIndex += 1) {
if (word[deepCharIndex] === text[charIndex + deepCharIndex]) {
numberOfMatches += 1;
}
}
export default function rabinKarp(text, word) {
const toNum = function toNum(character) {
const surrogate = character.codePointAt(1);
return ((surrogate === undefined) ? 0 : surrogate) + (character.codePointAt(0) * (2 ** 16));
};
const arrEq = (a1, a2) => ((a1.length === a2.length) && a1.every((val, idx) => val === a2[idx]));

const wordArr = [...word].map(toNum);
const textArr = [...text].map(toNum);

// The prime generation function could depend on the inputs for collision guarantees.
const hasher = new RabinFingerprint(() => 229);
const cmpVal = hasher.init(wordArr);

let currHash = hasher.init(textArr.slice(0, wordArr.length));
if ((currHash === cmpVal) && arrEq(wordArr, textArr.slice(0, wordArr.length))) {
return 0;
}

if (numberOfMatches === word.length) {
return charIndex;
}
for (let i = 0; i < (textArr.length - wordArr.length); i += 1) {
currHash = hasher.roll(textArr[i], textArr[i + wordArr.length]);
if ((currHash === cmpVal) && arrEq(wordArr, textArr.slice(i + 1, i + wordArr.length + 1))) {
return i + 1;
}
}

51 changes: 51 additions & 0 deletions src/utils/hash/rolling/Rabin_Fingerprint.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/**
* Generates fingerprints using Rabin scheme with x = 2 (for potential compiler optimizations).
* Guaranteed not to over or underflow if function assumptions are met.
*/
export default class RabinFingerprint {
/**
* @param { function() : number } [primeGenerator]
* @assumes Output from any function call is prime less than Number.MAX_SAFE_INTEGER / 2.
*/
constructor(primeGenerator) {
this.prime = primeGenerator();
}

/**
* @param { array[number] } [values]
* @returns {number} - The hash value after digesting input.
* @assumes All array elements are non-negative.
* @note First element in array is considered to be oldest value.
*/
init(values) {
this.val = 0;
this.len = values.length;

for (let i = 0; i < values.length; i += 1) {
this.val = (((this.val * 2) % this.prime) + (values[i] % this.prime)) % this.prime;
}

return this.val;
}

/*
* @param {number} [oldValue]
* @param {number} [newValue]
* @returns {number} - The hash value after removing the oldest value & inserting the newest.
* @assumes Instance has already been initialized.
* @assumes oldValue is the oldest value still processed by the hash.
* @assumes newValue is non-negative.
*/
roll(oldValue, newValue) {
let oldVal = oldValue % this.prime;
for (let i = 1; i < this.len; i += 1) {
oldVal = (oldVal * 2) % this.prime;
}
this.val = (this.val + this.prime - (oldVal % this.prime)) % this.prime;

const newVal = newValue % this.prime;
this.val = (((this.val * 2) % this.prime) + (newVal % this.prime)) % this.prime;

return this.val;
}
}
59 changes: 59 additions & 0 deletions src/utils/hash/rolling/__test__/Rabin_Fingerprint.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import RabinFingerprint from '../Rabin_Fingerprint';

describe('Rabin fingerprint Hash Family', () => {
it('should hash deterministically', () => {
const primeVals = [3, 5, 19, 53, 97, 401, 7039, 193939];
for (let primeIdx = 0; primeIdx < primeVals.length; primeIdx += 1) {
const primeVal = primeVals[primeIdx];
const hasher = new RabinFingerprint(() => primeVal);

// Test basic values
expect(hasher.init([])).toEqual(0);
expect(hasher.init([1])).toEqual(1);

// Test overflow
const largeVal = Number.MAX_SAFE_INTEGER;
expect(hasher.init([primeVal])).toEqual(0);
expect(hasher.init([largeVal])).toEqual(largeVal % primeVal);

const numLargeVal = 2; // 2 ^ numLargeVal fits in javascript number
const largeValues = new Array(numLargeVal).fill(largeVal);

const expVal = ((largeVal % primeVal) * ((2 ** numLargeVal) - 1)) % primeVal;
expect(hasher.init(largeValues)).toEqual(expVal);

// Test using Fermat's little theorem
const fermatValues = new Array(primeVal).fill(primeVal);
const numFermatTests = 100;
for (let i = 0; i < numFermatTests; i += 1) {
const randomValue = Math.floor(Math.random() * largeVal);
fermatValues[0] = randomValue;
expect(hasher.init(fermatValues)).toEqual(randomValue % primeVal);
}
}
});

it('should roll appropriately', () => {
const primeVals = [3, 5, 19, 53, 97, 401, 7039, 193939];

for (let primeIdx = 0; primeIdx < primeVals.length; primeIdx += 1) {
const primeVal = primeVals[primeIdx];
const hasher = new RabinFingerprint(() => primeVal);

// Test basic values
const largeVal = Number.MAX_SAFE_INTEGER;
expect(hasher.init([0])).toEqual(0);
expect(hasher.roll(0, 1)).toEqual(1);
expect(hasher.roll(1, primeVal)).toEqual(0);
expect(hasher.roll(primeVal, largeVal)).toEqual(largeVal % primeVal);

const numRollTest = 100;
let previousValue = largeVal;
for (let i = 0; i < numRollTest; i += 1) {
const randomVal = Math.floor(Math.random() * largeVal);
expect(hasher.roll(previousValue, randomVal)).toEqual(randomVal % primeVal);
previousValue = randomVal;
}
}
});
});