Skip to content

Commit 393980e

Browse files
authoredMay 7, 2019
Merge pull request #2 from jixianu/issue-102-rabin-karp-fix
Refactor Rabin-Karp (trekhleb#110)
2 parents ecc8c65 + c4605ea commit 393980e

File tree

4 files changed

+139
-88
lines changed

4 files changed

+139
-88
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,20 @@
1-
import { rabinKarp, hashWord, reHashWord } from '../rabinKarp';
1+
import rabinKarp from '../rabinKarp';
22

33
describe('rabinKarp', () => {
4-
it('should correctly calculates hash and re-hash', () => {
5-
expect(hashWord('a')).toBe(97);
6-
expect(hashWord('b')).toBe(98);
7-
expect(hashWord('abc')).toBe(941094);
8-
expect(hashWord('bcd')).toBe(950601);
9-
expect(reHashWord(hashWord('abc'), 'abc', 'bcd')).toBe(950601);
10-
expect(reHashWord(hashWord('abc'), 'abc', 'bcd')).toBe(hashWord('bcd'));
11-
});
12-
134
it('should find substring in a string', () => {
145
expect(rabinKarp('', '')).toBe(0);
156
expect(rabinKarp('a', '')).toBe(0);
167
expect(rabinKarp('a', 'a')).toBe(0);
8+
expect(rabinKarp('ab', 'b')).toBe(1);
179
expect(rabinKarp('abcbcglx', 'abca')).toBe(-1);
1810
expect(rabinKarp('abcbcglx', 'bcgl')).toBe(3);
1911
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabcy')).toBe(15);
2012
expect(rabinKarp('abcxabcdabxabcdabcdabcy', 'abcdabca')).toBe(-1);
2113
expect(rabinKarp('abcxabcdabxaabcdabcabcdabcdabcy', 'abcdabca')).toBe(12);
2214
expect(rabinKarp('abcxabcdabxaabaabaaaabcdabcdabcy', 'aabaabaaa')).toBe(11);
15+
expect(rabinKarp('^ !/\'#\'pp', ' !/\'#\'pp')).toBe(1);
16+
expect(rabinKarp('a\u{ffff}', '\u{ffff}')).toBe(1);
17+
expect(rabinKarp('a\u{10000}', '\u{10000}')).toBe(1);
18+
expect(rabinKarp('\u0000耀\u0000', '耀\u0000')).toBe(1);
2319
});
2420
});

‎src/algorithms/string/rabin-karp/rabinKarp.js

+23-78
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,33 @@
1-
/**
2-
* A prime number used to create
3-
* the hash representation of a word
4-
*
5-
* Bigger the prime number,
6-
* bigger the hash value
7-
*/
8-
const PRIME = 97;
9-
10-
/**
11-
* Function that creates hash representation of the word.
12-
*
13-
* @param {string} word
14-
* @return {number}
15-
*/
16-
export function hashWord(word) {
17-
let hash = 0;
18-
19-
for (let charIndex = 0; charIndex < word.length; charIndex += 1) {
20-
hash += word[charIndex].charCodeAt(0) * (PRIME ** charIndex);
21-
}
22-
23-
return hash;
24-
}
25-
26-
/**
27-
* Function that creates hash representation of the word
28-
* based on previous word (shifted by one character left) hash value.
29-
*
30-
* Recalculates the hash representation of a word so that it isn't
31-
* necessary to traverse the whole word again
32-
*
33-
* @param {number} prevHash
34-
* @param {string} prevWord
35-
* @param {string} newWord
36-
* @return {number}
37-
*/
38-
export function reHashWord(prevHash, prevWord, newWord) {
39-
const newWordLastIndex = newWord.length - 1;
40-
let newHash = prevHash - prevWord[0].charCodeAt(0);
41-
newHash /= PRIME;
42-
newHash += newWord[newWordLastIndex].charCodeAt(0) * (PRIME ** newWordLastIndex);
43-
44-
return newHash;
45-
}
1+
import RabinFingerprint from '../../../utils/hash/rolling/Rabin_Fingerprint';
462

473
/**
484
* @param {string} text
495
* @param {string} word
506
* @return {number}
517
*/
52-
export function rabinKarp(text, word) {
53-
// Calculate word hash that we will use for comparison with other substring hashes.
54-
const wordHash = hashWord(word);
55-
56-
let prevSegment = null;
57-
let currentSegmentHash = null;
58-
59-
// Go through all substring of the text that may match
60-
for (let charIndex = 0; charIndex <= text.length - word.length; charIndex += 1) {
61-
const currentSegment = text.substring(charIndex, charIndex + word.length);
62-
63-
// Calculate the hash of current substring.
64-
if (currentSegmentHash === null) {
65-
currentSegmentHash = hashWord(currentSegment);
66-
} else {
67-
currentSegmentHash = reHashWord(currentSegmentHash, prevSegment, currentSegment);
68-
}
69-
70-
prevSegment = currentSegment;
71-
72-
// Compare the hash of current substring and seeking string.
73-
if (wordHash === currentSegmentHash) {
74-
// In case if hashes match let's check substring char by char.
75-
let numberOfMatches = 0;
76-
77-
for (let deepCharIndex = 0; deepCharIndex < word.length; deepCharIndex += 1) {
78-
if (word[deepCharIndex] === text[charIndex + deepCharIndex]) {
79-
numberOfMatches += 1;
80-
}
81-
}
8+
export default function rabinKarp(text, word) {
9+
const toNum = function toNum(character) {
10+
const surrogate = character.codePointAt(1);
11+
return ((surrogate === undefined) ? 0 : surrogate) + (character.codePointAt(0) * (2 ** 16));
12+
};
13+
const arrEq = (a1, a2) => ((a1.length === a2.length) && a1.every((val, idx) => val === a2[idx]));
14+
15+
const wordArr = [...word].map(toNum);
16+
const textArr = [...text].map(toNum);
17+
18+
// The prime generation function could depend on the inputs for collision guarantees.
19+
const hasher = new RabinFingerprint(() => 229);
20+
const cmpVal = hasher.init(wordArr);
21+
22+
let currHash = hasher.init(textArr.slice(0, wordArr.length));
23+
if ((currHash === cmpVal) && arrEq(wordArr, textArr.slice(0, wordArr.length))) {
24+
return 0;
25+
}
8226

83-
if (numberOfMatches === word.length) {
84-
return charIndex;
85-
}
27+
for (let i = 0; i < (textArr.length - wordArr.length); i += 1) {
28+
currHash = hasher.roll(textArr[i], textArr[i + wordArr.length]);
29+
if ((currHash === cmpVal) && arrEq(wordArr, textArr.slice(i + 1, i + wordArr.length + 1))) {
30+
return i + 1;
8631
}
8732
}
8833

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/**
2+
* Generates fingerprints using Rabin scheme with x = 2 (for potential compiler optimizations).
3+
* Guaranteed not to over or underflow if function assumptions are met.
4+
*/
5+
export default class RabinFingerprint {
6+
/**
7+
* @param { function() : number } [primeGenerator]
8+
* @assumes Output from any function call is prime less than Number.MAX_SAFE_INTEGER / 2.
9+
*/
10+
constructor(primeGenerator) {
11+
this.prime = primeGenerator();
12+
}
13+
14+
/**
15+
* @param { array[number] } [values]
16+
* @returns {number} - The hash value after digesting input.
17+
* @assumes All array elements are non-negative.
18+
* @note First element in array is considered to be oldest value.
19+
*/
20+
init(values) {
21+
this.val = 0;
22+
this.len = values.length;
23+
24+
for (let i = 0; i < values.length; i += 1) {
25+
this.val = (((this.val * 2) % this.prime) + (values[i] % this.prime)) % this.prime;
26+
}
27+
28+
return this.val;
29+
}
30+
31+
/*
32+
* @param {number} [oldValue]
33+
* @param {number} [newValue]
34+
* @returns {number} - The hash value after removing the oldest value & inserting the newest.
35+
* @assumes Instance has already been initialized.
36+
* @assumes oldValue is the oldest value still processed by the hash.
37+
* @assumes newValue is non-negative.
38+
*/
39+
roll(oldValue, newValue) {
40+
let oldVal = oldValue % this.prime;
41+
for (let i = 1; i < this.len; i += 1) {
42+
oldVal = (oldVal * 2) % this.prime;
43+
}
44+
this.val = (this.val + this.prime - (oldVal % this.prime)) % this.prime;
45+
46+
const newVal = newValue % this.prime;
47+
this.val = (((this.val * 2) % this.prime) + (newVal % this.prime)) % this.prime;
48+
49+
return this.val;
50+
}
51+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import RabinFingerprint from '../Rabin_Fingerprint';
2+
3+
describe('Rabin fingerprint Hash Family', () => {
4+
it('should hash deterministically', () => {
5+
const primeVals = [3, 5, 19, 53, 97, 401, 7039, 193939];
6+
for (let primeIdx = 0; primeIdx < primeVals.length; primeIdx += 1) {
7+
const primeVal = primeVals[primeIdx];
8+
const hasher = new RabinFingerprint(() => primeVal);
9+
10+
// Test basic values
11+
expect(hasher.init([])).toEqual(0);
12+
expect(hasher.init([1])).toEqual(1);
13+
14+
// Test overflow
15+
const largeVal = Number.MAX_SAFE_INTEGER;
16+
expect(hasher.init([primeVal])).toEqual(0);
17+
expect(hasher.init([largeVal])).toEqual(largeVal % primeVal);
18+
19+
const numLargeVal = 2; // 2 ^ numLargeVal fits in javascript number
20+
const largeValues = new Array(numLargeVal).fill(largeVal);
21+
22+
const expVal = ((largeVal % primeVal) * ((2 ** numLargeVal) - 1)) % primeVal;
23+
expect(hasher.init(largeValues)).toEqual(expVal);
24+
25+
// Test using Fermat's little theorem
26+
const fermatValues = new Array(primeVal).fill(primeVal);
27+
const numFermatTests = 100;
28+
for (let i = 0; i < numFermatTests; i += 1) {
29+
const randomValue = Math.floor(Math.random() * largeVal);
30+
fermatValues[0] = randomValue;
31+
expect(hasher.init(fermatValues)).toEqual(randomValue % primeVal);
32+
}
33+
}
34+
});
35+
36+
it('should roll appropriately', () => {
37+
const primeVals = [3, 5, 19, 53, 97, 401, 7039, 193939];
38+
39+
for (let primeIdx = 0; primeIdx < primeVals.length; primeIdx += 1) {
40+
const primeVal = primeVals[primeIdx];
41+
const hasher = new RabinFingerprint(() => primeVal);
42+
43+
// Test basic values
44+
const largeVal = Number.MAX_SAFE_INTEGER;
45+
expect(hasher.init([0])).toEqual(0);
46+
expect(hasher.roll(0, 1)).toEqual(1);
47+
expect(hasher.roll(1, primeVal)).toEqual(0);
48+
expect(hasher.roll(primeVal, largeVal)).toEqual(largeVal % primeVal);
49+
50+
const numRollTest = 100;
51+
let previousValue = largeVal;
52+
for (let i = 0; i < numRollTest; i += 1) {
53+
const randomVal = Math.floor(Math.random() * largeVal);
54+
expect(hasher.roll(previousValue, randomVal)).toEqual(randomVal % primeVal);
55+
previousValue = randomVal;
56+
}
57+
}
58+
});
59+
});

0 commit comments

Comments
 (0)
Please sign in to comment.