|
1 |
| -/** |
2 |
| - * A prime number used to create |
3 |
| - * the hash representation of a word |
4 |
| - * |
5 |
| - * Bigger the prime number, |
6 |
| - * bigger the hash value |
7 |
| - */ |
8 |
| -const PRIME = 97; |
9 |
| - |
10 |
| -/** |
11 |
| - * Function that creates hash representation of the word. |
12 |
| - * |
13 |
| - * @param {string} word |
14 |
| - * @return {number} |
15 |
| - */ |
16 |
| -export function hashWord(word) { |
17 |
| - let hash = 0; |
18 |
| - |
19 |
| - for (let charIndex = 0; charIndex < word.length; charIndex += 1) { |
20 |
| - hash += word[charIndex].charCodeAt(0) * (PRIME ** charIndex); |
21 |
| - } |
22 |
| - |
23 |
| - return hash; |
24 |
| -} |
25 |
| - |
26 |
| -/** |
27 |
| - * Function that creates hash representation of the word |
28 |
| - * based on previous word (shifted by one character left) hash value. |
29 |
| - * |
30 |
| - * Recalculates the hash representation of a word so that it isn't |
31 |
| - * necessary to traverse the whole word again |
32 |
| - * |
33 |
| - * @param {number} prevHash |
34 |
| - * @param {string} prevWord |
35 |
| - * @param {string} newWord |
36 |
| - * @return {number} |
37 |
| - */ |
38 |
| -export function reHashWord(prevHash, prevWord, newWord) { |
39 |
| - const newWordLastIndex = newWord.length - 1; |
40 |
| - let newHash = prevHash - prevWord[0].charCodeAt(0); |
41 |
| - newHash /= PRIME; |
42 |
| - newHash += newWord[newWordLastIndex].charCodeAt(0) * (PRIME ** newWordLastIndex); |
43 |
| - |
44 |
| - return newHash; |
45 |
| -} |
| 1 | +import RabinFingerprint from '../../../utils/hash/rolling/Rabin_Fingerprint'; |
46 | 2 |
|
47 | 3 | /**
|
48 | 4 | * @param {string} text
|
49 | 5 | * @param {string} word
|
50 | 6 | * @return {number}
|
51 | 7 | */
|
52 |
| -export function rabinKarp(text, word) { |
53 |
| - // Calculate word hash that we will use for comparison with other substring hashes. |
54 |
| - const wordHash = hashWord(word); |
55 |
| - |
56 |
| - let prevSegment = null; |
57 |
| - let currentSegmentHash = null; |
58 |
| - |
59 |
| - // Go through all substring of the text that may match |
60 |
| - for (let charIndex = 0; charIndex <= text.length - word.length; charIndex += 1) { |
61 |
| - const currentSegment = text.substring(charIndex, charIndex + word.length); |
62 |
| - |
63 |
| - // Calculate the hash of current substring. |
64 |
| - if (currentSegmentHash === null) { |
65 |
| - currentSegmentHash = hashWord(currentSegment); |
66 |
| - } else { |
67 |
| - currentSegmentHash = reHashWord(currentSegmentHash, prevSegment, currentSegment); |
68 |
| - } |
69 |
| - |
70 |
| - prevSegment = currentSegment; |
71 |
| - |
72 |
| - // Compare the hash of current substring and seeking string. |
73 |
| - if (wordHash === currentSegmentHash) { |
74 |
| - // In case if hashes match let's check substring char by char. |
75 |
| - let numberOfMatches = 0; |
76 |
| - |
77 |
| - for (let deepCharIndex = 0; deepCharIndex < word.length; deepCharIndex += 1) { |
78 |
| - if (word[deepCharIndex] === text[charIndex + deepCharIndex]) { |
79 |
| - numberOfMatches += 1; |
80 |
| - } |
81 |
| - } |
| 8 | +export default function rabinKarp(text, word) { |
| 9 | + const toNum = function toNum(character) { |
| 10 | + const surrogate = character.codePointAt(1); |
| 11 | + return ((surrogate === undefined) ? 0 : surrogate) + (character.codePointAt(0) * (2 ** 16)); |
| 12 | + }; |
| 13 | + const arrEq = (a1, a2) => ((a1.length === a2.length) && a1.every((val, idx) => val === a2[idx])); |
| 14 | + |
| 15 | + const wordArr = [...word].map(toNum); |
| 16 | + const textArr = [...text].map(toNum); |
| 17 | + |
| 18 | + // The prime generation function could depend on the inputs for collision guarantees. |
| 19 | + const hasher = new RabinFingerprint(() => 229); |
| 20 | + const cmpVal = hasher.init(wordArr); |
| 21 | + |
| 22 | + let currHash = hasher.init(textArr.slice(0, wordArr.length)); |
| 23 | + if ((currHash === cmpVal) && arrEq(wordArr, textArr.slice(0, wordArr.length))) { |
| 24 | + return 0; |
| 25 | + } |
82 | 26 |
|
83 |
| - if (numberOfMatches === word.length) { |
84 |
| - return charIndex; |
85 |
| - } |
| 27 | + for (let i = 0; i < (textArr.length - wordArr.length); i += 1) { |
| 28 | + currHash = hasher.roll(textArr[i], textArr[i + wordArr.length]); |
| 29 | + if ((currHash === cmpVal) && arrEq(wordArr, textArr.slice(i + 1, i + wordArr.length + 1))) { |
| 30 | + return i + 1; |
86 | 31 | }
|
87 | 32 | }
|
88 | 33 |
|
|
0 commit comments