Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: implementing Pearson Hashing in Hash Table (#529) #596

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions src/data-structures/hash-table/HashTable.js
Original file line number Diff line number Diff line change
@@ -6,6 +6,27 @@ import LinkedList from '../linked-list/LinkedList';
// are being handled.
const defaultHashTableSize = 32;

// This is the table used for the Pearson hashing.
// Which contains shuffled number from 0 to 255.
const table = [
84, 124, 126, 180, 160, 221, 178, 97, 71, 13, 102, 167, 209, 153, 232, 237,
243, 24, 205, 228, 182, 83, 111, 245, 86, 99, 59, 26, 172, 225, 9, 94,
37, 179, 138, 250, 154, 142, 132, 36, 134, 233, 188, 100, 29, 57, 161, 17,
41, 136, 198, 61, 241, 46, 201, 137, 214, 10, 45, 109, 145, 204, 15, 119,
21, 16, 252, 170, 47, 12, 95, 246, 249, 116, 7, 64, 149, 219, 171, 120,
212, 216, 80, 74, 186, 192, 210, 190, 85, 78, 196, 181, 150, 110, 176, 127,
140, 22, 98, 82, 18, 53, 230, 240, 139, 62, 70, 151, 157, 159, 213, 207,
56, 81, 147, 168, 49, 229, 189, 235, 152, 130, 222, 215, 141, 6, 175, 2,
244, 28, 224, 96, 91, 27, 129, 227, 199, 251, 69, 242, 223, 158, 8, 23,
19, 31, 236, 25, 68, 254, 112, 247, 208, 148, 38, 114, 40, 55, 88, 231,
32, 93, 163, 253, 90, 30, 89, 146, 113, 200, 177, 101, 135, 169, 128, 48,
174, 66, 4, 155, 14, 166, 60, 193, 162, 0, 58, 79, 92, 239, 72, 203,
123, 115, 67, 217, 106, 52, 173, 20, 35, 50, 5, 44, 11, 143, 206, 73,
42, 34, 75, 131, 184, 220, 125, 156, 202, 238, 164, 103, 77, 248, 195, 183,
211, 65, 226, 105, 43, 191, 3, 54, 234, 187, 194, 117, 121, 107, 1, 218,
185, 76, 133, 33, 39, 255, 197, 63, 51, 144, 122, 87, 104, 118, 165, 108,
];

export default class HashTable {
/**
* @param {number} hashTableSize
@@ -25,20 +46,33 @@ export default class HashTable {
* @return {number}
*/
hash(key) {
// For simplicity reasons we will just use character codes sum of all characters of the key
// to calculate the hash.
// Using Pearson hashing algorithm to generate the hash number.
//
// But you may also use more sophisticated approaches like polynomial string hash to reduce the
// But you may also use other approaches like polynomial string hash to reduce the
// number of collisions:
//
// hash = charCodeAt(0) * PRIME^(n-1) + charCodeAt(1) * PRIME^(n-2) + ... + charCodeAt(n-1)
//
// where charCodeAt(i) is the i-th character code of the key, n is the length of the key and
// PRIME is just any prime number like 31.
const hash = Array.from(key).reduce(
(hashAccumulator, keySymbol) => (hashAccumulator + keySymbol.charCodeAt(0)),
0,
);
let hash = 0;

// Produce a 32-bits hash number.
for (let i = 0; i < 4; i += 1) {
// charCodeAt may return number from 0 to 65536,
// should remap to range 0 to 255.
let hashPerRound = table[(key.charCodeAt(0) + i) % 256];
for (let j = 1; j < key.length; j += 1) {
hashPerRound = table[hashPerRound ^ (key.charCodeAt(j) % 256)];
}

// Left shift by 8-bits and OR the per-rounded hash.
hash = (hash << 8) | hashPerRound;
}

// Bit mask to clear the left most bit,
// so the result will be a positive number.
hash &= 0x7FFFFFFF;

// Reduce hash number so it would fit hash table size.
return hash % this.buckets.length;
17 changes: 8 additions & 9 deletions src/data-structures/hash-table/__test__/HashTable.test.js
Original file line number Diff line number Diff line change
@@ -12,17 +12,17 @@ describe('HashTable', () => {
it('should generate proper hash for specified keys', () => {
const hashTable = new HashTable();

expect(hashTable.hash('a')).toBe(1);
expect(hashTable.hash('b')).toBe(2);
expect(hashTable.hash('abc')).toBe(6);
expect(hashTable.hash('a')).toBe(18);
expect(hashTable.hash('b')).toBe(21);
expect(hashTable.hash('abc')).toBe(21);
});

it('should set, read and delete data with collisions', () => {
const hashTable = new HashTable(3);

expect(hashTable.hash('a')).toBe(1);
expect(hashTable.hash('b')).toBe(2);
expect(hashTable.hash('c')).toBe(0);
expect(hashTable.hash('c')).toBe(2);
expect(hashTable.hash('d')).toBe(1);

hashTable.set('a', 'sky-old');
@@ -37,9 +37,9 @@ describe('HashTable', () => {

const stringifier = (value) => `${value.key}:${value.value}`;

expect(hashTable.buckets[0].toString(stringifier)).toBe('c:earth');
expect(hashTable.buckets[0].toString(stringifier)).toBe('');
expect(hashTable.buckets[1].toString(stringifier)).toBe('a:sky,d:ocean');
expect(hashTable.buckets[2].toString(stringifier)).toBe('b:sea');
expect(hashTable.buckets[2].toString(stringifier)).toBe('b:sea,c:earth');

expect(hashTable.get('a')).toBe('sky');
expect(hashTable.get('d')).toBe('ocean');
@@ -94,7 +94,7 @@ describe('HashTable', () => {
hashTable.set('b', 'beta');
hashTable.set('c', 'gamma');

expect(hashTable.getValues()).toEqual(['gamma', 'alpha', 'beta']);
expect(hashTable.getValues()).toEqual(['alpha', 'beta', 'gamma']);
});

it('should get all the values from empty hash table', () => {
@@ -105,13 +105,12 @@ describe('HashTable', () => {
it('should get all the values in case of hash collision', () => {
const hashTable = new HashTable(3);

// Keys `ab` and `ba` in current implementation should result in one hash (one bucket).
// We need to make sure that several items from one bucket will be serialized.
hashTable.set('ab', 'one');
hashTable.set('ba', 'two');

hashTable.set('ac', 'three');

expect(hashTable.getValues()).toEqual(['one', 'two', 'three']);
expect(hashTable.getValues()).toEqual(['three', 'one', 'two']);
});
});