From e48f70db2b78434cbb0548575281f1f78ca7268f Mon Sep 17 00:00:00 2001
From: JackyYin <jjyyg1123@gmail.com>
Date: Fri, 18 Dec 2020 11:25:48 +0800
Subject: [PATCH 1/2] fix: implementing Pearson Hashing in Hash Table (#529)

---
 src/data-structures/hash-table/HashTable.js   | 47 ++++++++++++++++---
 .../hash-table/__test__/HashTable.test.js     | 21 ++++-----
 2 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/src/data-structures/hash-table/HashTable.js b/src/data-structures/hash-table/HashTable.js
index b8b523ea85..742c19b200 100644
--- a/src/data-structures/hash-table/HashTable.js
+++ b/src/data-structures/hash-table/HashTable.js
@@ -6,6 +6,27 @@ import LinkedList from '../linked-list/LinkedList';
 // are being handled.
 const defaultHashTableSize = 32;
 
+// This is the table used for the Pearson hashing.
+// Which contains shuffled number from 0 to 255.
+const table = [
+  84, 124, 126, 180, 160, 221, 178, 97, 71, 13, 102, 167, 209, 153, 232, 237,
+  243, 24, 205, 228, 182, 83, 111, 245, 86, 99, 59, 26, 172, 225, 9, 94,
+  37, 179, 138, 250, 154, 142, 132, 36, 134, 233, 188, 100, 29, 57, 161, 17,
+  41, 136, 198, 61, 241, 46, 201, 137, 214, 10, 45, 109, 145, 204, 15, 119,
+  21, 16, 252, 170, 47, 12, 95, 246, 249, 116, 7, 64, 149, 219, 171, 120,
+  212, 216, 80, 74, 186, 192, 210, 190, 85, 78, 196, 181, 150, 110, 176, 127,
+  140, 22, 98, 82, 18, 53, 230, 240, 139, 62, 70, 151, 157, 159, 213, 207,
+  56, 81, 147, 168, 49, 229, 189, 235, 152, 130, 222, 215, 141, 6, 175, 2,
+  244, 28, 224, 96, 91, 27, 129, 227, 199, 251, 69, 242, 223, 158, 8, 23,
+  19, 31, 236, 25, 68, 254, 112, 247, 208, 148, 38, 114, 40, 55, 88, 231,
+  32, 93, 163, 253, 90, 30, 89, 146, 113, 200, 177, 101, 135, 169, 128, 48,
+  174, 66, 4, 155, 14, 166, 60, 193, 162, 0, 58, 79, 92, 239, 72, 203,
+  123, 115, 67, 217, 106, 52, 173, 20, 35, 50, 5, 44, 11, 143, 206, 73,
+  42, 34, 75, 131, 184, 220, 125, 156, 202, 238, 164, 103, 77, 248, 195, 183,
+  211, 65, 226, 105, 43, 191, 3, 54, 234, 187, 194, 117, 121, 107, 1, 218,
+  185, 76, 133, 33, 39, 255, 197, 63, 51, 144, 122, 87, 104, 118, 165, 108,
+];
+
 export default class HashTable {
   /**
    * @param {number} hashTableSize
@@ -25,20 +46,32 @@ export default class HashTable {
    * @return {number}
    */
   hash(key) {
-    // For simplicity reasons we will just use character codes sum of all characters of the key
-    // to calculate the hash.
+    // Using Pearson hashing algorithm to generate the hash number.
     //
-    // But you may also use more sophisticated approaches like polynomial string hash to reduce the
+    // But you may also use other approaches like polynomial string hash to reduce the
     // number of collisions:
     //
     // hash = charCodeAt(0) * PRIME^(n-1) + charCodeAt(1) * PRIME^(n-2) + ... + charCodeAt(n-1)
     //
     // where charCodeAt(i) is the i-th character code of the key, n is the length of the key and
     // PRIME is just any prime number like 31.
-    const hash = Array.from(key).reduce(
-      (hashAccumulator, keySymbol) => (hashAccumulator + keySymbol.charCodeAt(0)),
-      0,
-    );
+    let hash = 0;
+
+    // Produce a 32-bits hash number.
+    for (let i = 0; i < 4; i += 1) {
+      // charCodeAt may return number from 0 to 65536,
+      // should remap to range 0 to 255.
+      let hashPerRound = table[(key.charCodeAt(0) + i) % 256];
+      for (let j = 1; j < key.length; j += 1) {
+        hashPerRound = table[hashPerRound ^ (key.charCodeAt(j) % 256)];
+      }
+
+      // Left shift by 8-bits and OR the per-rounded hash.
+      hash = (hash << 8) | hashPerRound;
+    }
+
+    // Unsigned right shift to avoid negative number.
+    hash >>>= 1;
 
     // Reduce hash number so it would fit hash table size.
     return hash % this.buckets.length;
diff --git a/src/data-structures/hash-table/__test__/HashTable.test.js b/src/data-structures/hash-table/__test__/HashTable.test.js
index 86bbf3adbd..0b6ec9c3da 100644
--- a/src/data-structures/hash-table/__test__/HashTable.test.js
+++ b/src/data-structures/hash-table/__test__/HashTable.test.js
@@ -12,18 +12,18 @@ describe('HashTable', () => {
   it('should generate proper hash for specified keys', () => {
     const hashTable = new HashTable();
 
-    expect(hashTable.hash('a')).toBe(1);
-    expect(hashTable.hash('b')).toBe(2);
-    expect(hashTable.hash('abc')).toBe(6);
+    expect(hashTable.hash('a')).toBe(9);
+    expect(hashTable.hash('b')).toBe(26);
+    expect(hashTable.hash('abc')).toBe(26);
   });
 
   it('should set, read and delete data with collisions', () => {
     const hashTable = new HashTable(3);
 
-    expect(hashTable.hash('a')).toBe(1);
+    expect(hashTable.hash('a')).toBe(2);
     expect(hashTable.hash('b')).toBe(2);
-    expect(hashTable.hash('c')).toBe(0);
-    expect(hashTable.hash('d')).toBe(1);
+    expect(hashTable.hash('c')).toBe(1);
+    expect(hashTable.hash('d')).toBe(2);
 
     hashTable.set('a', 'sky-old');
     hashTable.set('a', 'sky');
@@ -37,9 +37,9 @@ describe('HashTable', () => {
 
     const stringifier = (value) => `${value.key}:${value.value}`;
 
-    expect(hashTable.buckets[0].toString(stringifier)).toBe('c:earth');
-    expect(hashTable.buckets[1].toString(stringifier)).toBe('a:sky,d:ocean');
-    expect(hashTable.buckets[2].toString(stringifier)).toBe('b:sea');
+    expect(hashTable.buckets[0].toString(stringifier)).toBe('');
+    expect(hashTable.buckets[1].toString(stringifier)).toBe('c:earth');
+    expect(hashTable.buckets[2].toString(stringifier)).toBe('a:sky,b:sea,d:ocean');
 
     expect(hashTable.get('a')).toBe('sky');
     expect(hashTable.get('d')).toBe('ocean');
@@ -105,13 +105,12 @@ describe('HashTable', () => {
   it('should get all the values in case of hash collision', () => {
     const hashTable = new HashTable(3);
 
-    // Keys `ab` and `ba` in current implementation should result in one hash (one bucket).
     // We need to make sure that several items from one bucket will be serialized.
     hashTable.set('ab', 'one');
     hashTable.set('ba', 'two');
 
     hashTable.set('ac', 'three');
 
-    expect(hashTable.getValues()).toEqual(['one', 'two', 'three']);
+    expect(hashTable.getValues()).toEqual(['one', 'three', 'two']);
   });
 });

From a25febeb2535e863861f4c477112c43855f00b78 Mon Sep 17 00:00:00 2001
From: JackyYin <jjyyg1123@gmail.com>
Date: Mon, 21 Dec 2020 00:01:43 +0800
Subject: [PATCH 2/2] fix: replace right shift by using bit mask

---
 src/data-structures/hash-table/HashTable.js   |  5 +++--
 .../hash-table/__test__/HashTable.test.js     | 20 +++++++++----------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/data-structures/hash-table/HashTable.js b/src/data-structures/hash-table/HashTable.js
index 742c19b200..c10a0a9537 100644
--- a/src/data-structures/hash-table/HashTable.js
+++ b/src/data-structures/hash-table/HashTable.js
@@ -70,8 +70,9 @@ export default class HashTable {
       hash = (hash << 8) | hashPerRound;
     }
 
-    // Unsigned right shift to avoid negative number.
-    hash >>>= 1;
+    // Bit mask to clear the left most bit,
+    // so the result will be a positive number.
+    hash &= 0x7FFFFFFF;
 
     // Reduce hash number so it would fit hash table size.
     return hash % this.buckets.length;
diff --git a/src/data-structures/hash-table/__test__/HashTable.test.js b/src/data-structures/hash-table/__test__/HashTable.test.js
index 0b6ec9c3da..054ad09f2d 100644
--- a/src/data-structures/hash-table/__test__/HashTable.test.js
+++ b/src/data-structures/hash-table/__test__/HashTable.test.js
@@ -12,18 +12,18 @@ describe('HashTable', () => {
   it('should generate proper hash for specified keys', () => {
     const hashTable = new HashTable();
 
-    expect(hashTable.hash('a')).toBe(9);
-    expect(hashTable.hash('b')).toBe(26);
-    expect(hashTable.hash('abc')).toBe(26);
+    expect(hashTable.hash('a')).toBe(18);
+    expect(hashTable.hash('b')).toBe(21);
+    expect(hashTable.hash('abc')).toBe(21);
   });
 
   it('should set, read and delete data with collisions', () => {
     const hashTable = new HashTable(3);
 
-    expect(hashTable.hash('a')).toBe(2);
+    expect(hashTable.hash('a')).toBe(1);
     expect(hashTable.hash('b')).toBe(2);
-    expect(hashTable.hash('c')).toBe(1);
-    expect(hashTable.hash('d')).toBe(2);
+    expect(hashTable.hash('c')).toBe(2);
+    expect(hashTable.hash('d')).toBe(1);
 
     hashTable.set('a', 'sky-old');
     hashTable.set('a', 'sky');
@@ -38,8 +38,8 @@ describe('HashTable', () => {
     const stringifier = (value) => `${value.key}:${value.value}`;
 
     expect(hashTable.buckets[0].toString(stringifier)).toBe('');
-    expect(hashTable.buckets[1].toString(stringifier)).toBe('c:earth');
-    expect(hashTable.buckets[2].toString(stringifier)).toBe('a:sky,b:sea,d:ocean');
+    expect(hashTable.buckets[1].toString(stringifier)).toBe('a:sky,d:ocean');
+    expect(hashTable.buckets[2].toString(stringifier)).toBe('b:sea,c:earth');
 
     expect(hashTable.get('a')).toBe('sky');
     expect(hashTable.get('d')).toBe('ocean');
@@ -94,7 +94,7 @@ describe('HashTable', () => {
     hashTable.set('b', 'beta');
     hashTable.set('c', 'gamma');
 
-    expect(hashTable.getValues()).toEqual(['gamma', 'alpha', 'beta']);
+    expect(hashTable.getValues()).toEqual(['alpha', 'beta', 'gamma']);
   });
 
   it('should get all the values from empty hash table', () => {
@@ -111,6 +111,6 @@ describe('HashTable', () => {
 
     hashTable.set('ac', 'three');
 
-    expect(hashTable.getValues()).toEqual(['one', 'three', 'two']);
+    expect(hashTable.getValues()).toEqual(['three', 'one', 'two']);
   });
 });