Code style fixes for RadixSort.

trekhleb · trekhleb · commit afd561714404 · 2018-05-29T09:01:16.000+03:00
diff --git a/src/algorithms/sorting/SortTester.js b/src/algorithms/sorting/SortTester.js
@@ -2,8 +2,6 @@ export const sortedArr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
 export const reverseArr = [20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1];
 export const notSortedArr = [15, 8, 5, 12, 10, 1, 16, 9, 11, 7, 20, 3, 2, 6, 17, 18, 4, 13, 14, 19];
 export const equalArr = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
-export const stringArr = ['zzz', 'bb', 'a', 'rr', 'rrb', 'rrba'];
-export const intArr = [3, 1, 75, 32, 884, 523, 4343456, 232, 123, 656, 343];
 
 export class SortTester {
   static testSort(SortingClass) {
diff --git a/src/algorithms/sorting/radix-sort/README.md b/src/algorithms/sorting/radix-sort/README.md
@@ -1,13 +1,29 @@
 # Radix Sort
 
-In computer science, radix sort is a non-comparative integer sorting algorithm that sorts data with integer keys by grouping keys by the individual digits which share the same significant position and value. A positional notation is required, but because integers can represent strings of characters (e.g., names or dates) and specially formatted floating point numbers, radix sort is not limited to integers. Radix sort dates back as far as 1887 to the work of Herman Hollerith on tabulating machines.
+In computer science, **radix sort** is a non-comparative integer sorting 
+algorithm that sorts data with integer keys by grouping keys by the individual 
+digits which share the same significant position and value. A positional notation
+is required, but because integers can represent strings of characters 
+(e.g., names or dates) and specially formatted floating point numbers, radix 
+sort is not limited to integers.
 
 ## Efficiency
 
-The topic of the efficiency of radix sort compared to other sorting algorithms is somewhat tricky and subject to quite a lot of misunderstandings. Whether radix sort is equally efficient, less efficient or more efficient than the best comparison-based algorithms depends on the details of the assumptions made. Radix sort complexity is O(wn) for n keys which are integers of word size w. Sometimes w is presented as a constant, which would make radix sort better (for sufficiently large n) than the best comparison-based sorting algorithms, which all perform O(n log n) comparisons to sort n keys. However, in general w cannot be considered a constant: if all n keys are distinct, then w has to be at least log n for a random-access machine to be able to store them in memory, which gives at best a time complexity O(n log n).[2] That would seem to make radix sort at most equally efficient as the best comparison-based sorts (and worse if keys are much longer than log n).
-
-The counter argument is that comparison-based algorithms are measured in number of comparisons, not actual time complexity. Under some assumptions the comparisons will be constant time on average, under others they will not. Comparisons of randomly generated keys takes constant time on average, as keys differ on the very first bit in half the cases, and differ on the second bit in half of the remaining half, and so on, resulting in an average of two bits that need to be compared. In a sorting algorithm the first comparisons made satisfies the randomness condition, but as the sort progresses the keys compared are clearly not randomly chosen anymore. For example, consider a bottom-up merge sort. The first pass will compare pairs of random keys, but the last pass will compare keys that are very close in the sorting order. This makes merge sort, on this class of inputs, take O(n (log n)2) time. That assumes all memory accesses cost the same, which is not a physically reasonable assumption as we scale n to infinity, and not, in practice, how real computers work.
+The topic of the efficiency of radix sort compared to other sorting algorithms is 
+somewhat tricky and subject to quite a lot of misunderstandings. Whether radix 
+sort is equally efficient, less efficient or more efficient than the best 
+comparison-based algorithms depends on the details of the assumptions made. 
+Radix sort complexity is `O(wn)` for `n` keys which are integers of word size `w`. 
+Sometimes `w` is presented as a constant, which would make radix sort better 
+(for sufficiently large `n`) than the best comparison-based sorting algorithms, 
+which all perform `O(n log n)` comparisons to sort `n` keys. However, in 
+general `w` cannot be considered a constant: if all `n` keys are distinct, 
+then `w` has to be at least `log n` for a random-access machine to be able to 
+store them in memory, which gives at best a time complexity `O(n log n)`. That 
+would seem to make radix sort at most equally efficient as the best 
+comparison-based sorts (and worse if keys are much longer than `log n`).
 
 ## References
 
-- [Wikipedia](https://en.wikipedia.org/wiki/Radix_sort)
+- [Wikipedia](https://en.wikipedia.org/wiki/Radix_sort)
+- [YouTube](https://www.youtube.com/watch?v=XiuSW_mEn7g&index=62&t=0s&list=PLLXdhg_r2hKA7DPDsunoDZ-Z769jWn4R8)
diff --git a/src/algorithms/sorting/radix-sort/RadixSort.js b/src/algorithms/sorting/radix-sort/RadixSort.js
@@ -1,96 +1,27 @@
 import Sort from '../Sort';
 
+// Using charCode (a = 97, b = 98, etc), we can map characters to buckets from 0 - 25
+const BASE_CHAR_CODE = 97;
+const NUMBER_OF_DIGITS = 10;
+const ENGLISH_ALPHABET_LENGTH = 26;
+
 export default class RadixSort extends Sort {
+  /**
+   * @param {*[]} originalArray
+   * @return {*[]}
+   */
   sort(originalArray) {
-    const isNumber = (element) => {
-      return Number.isInteger(element);
-    };
-
-    const createBuckets = (numBuckets) => {
-      /**
-       * Mapping buckets to an array instead of filling them with
-       * an array prevents each bucket from containing a reference to the same array
-       */
-      return new Array(numBuckets).fill(null).map(() => []);
-    };
-
-    const placeElementsInNumberBuckets = (array, index) => {
-      // See below. These are used to determine which digit to use for bucket allocation
-      const modded = 10 ** (index + 1);
-      const divided = 10 ** index;
-      const buckets = createBuckets(10);
-
-      array.forEach((element) => {
-        this.callbacks.visitingCallback(element);
-        if (element < divided) {
-          buckets[0].push(element);
-        } else {
-          /**
-           * Say we have element of 1,052 and are currently on index 1 (starting from 0). This means
-           * we want to use '5' as the bucket. `modded` would be 10 ** (1 + 1), which
-           * is 100. So we take 1,052 % 100 (52) and divide it by 10 (5.2) and floor it (5).
-           */
-          const currentDigit = Math.floor((element % modded) / divided);
-          buckets[currentDigit].push(element);
-        }
-      });
-
-      return buckets;
-    };
-
-    const placeElementsInCharacterBuckets = (array, index, numPasses) => {
-      const getCharCodeOfElementAtIndex = (element) => {
-        // Place element in last bucket if not ready to organize
-        if ((numPasses - index) > element.length) return 25;
-        // Using charCode (a = 97, b = 98, etc), we can map characters to buckets from 0 - 25
-        const BASE_CHAR_CODE = 97;
-        /**
-         * If each character has been organized, use first character to determine bucket,
-         * otherwise iterate backwards through element
-         */
-        const charPos = index > element.length - 1 ? 0 : element.length - index - 1;
-
-        return element.toLowerCase().charCodeAt(charPos) - BASE_CHAR_CODE;
-      };
-
-      const buckets = createBuckets(26);
-
-      array.forEach((element) => {
-        this.callbacks.visitingCallback(element);
-        const currentBucket = getCharCodeOfElementAtIndex(element);
-        buckets[currentBucket].push(element);
-      });
-
-      return buckets;
-    };
-
     // Assumes all elements of array are of the same type
-    const isArrayOfNumbers = isNumber(originalArray[0]);
-
-    /** Number of passes is determined by the length of the longest element in the array.
-     * For integers, this log10(num), and for strings, this would be the lenght of the string.
-     */
-    const determineNumPasses = () => {
-      const getLengthOfLongestElement = () => {
-        if (isArrayOfNumbers) {
-          return Math.floor(Math.log10(Math.max(...originalArray))) + 1;
-        }
-
-        return originalArray.reduce((acc, val) => {
-          return val.length > acc ? val.length : acc;
-        }, -Infinity);
-      };
-
-      return getLengthOfLongestElement(originalArray);
-    };
+    const isArrayOfNumbers = this.isArrayOfNumbers(originalArray);
 
     let sortedArray = [...originalArray];
-    const numPasses = determineNumPasses();
+    const numPasses = this.determineNumPasses(sortedArray);
 
     for (let currentIndex = 0; currentIndex < numPasses; currentIndex += 1) {
       const buckets = isArrayOfNumbers ?
-        placeElementsInNumberBuckets(sortedArray, currentIndex) :
-        placeElementsInCharacterBuckets(sortedArray, currentIndex, numPasses);
+        this.placeElementsInNumberBuckets(sortedArray, currentIndex) :
+        this.placeElementsInCharacterBuckets(sortedArray, currentIndex, numPasses);
+
       // Flatten buckets into sortedArray, and repeat at next index
       sortedArray = buckets.reduce((acc, val) => {
         return [...acc, ...val];
@@ -99,4 +30,123 @@ export default class RadixSort extends Sort {
 
     return sortedArray;
   }
+
+  /**
+   * @param {*[]} array
+   * @param {number} index
+   * @return {*[]}
+   */
+  placeElementsInNumberBuckets(array, index) {
+    // See below. These are used to determine which digit to use for bucket allocation
+    const modded = 10 ** (index + 1);
+    const divided = 10 ** index;
+    const buckets = this.createBuckets(NUMBER_OF_DIGITS);
+
+    array.forEach((element) => {
+      this.callbacks.visitingCallback(element);
+      if (element < divided) {
+        buckets[0].push(element);
+      } else {
+        /**
+         * Say we have element of 1,052 and are currently on index 1 (starting from 0). This means
+         * we want to use '5' as the bucket. `modded` would be 10 ** (1 + 1), which
+         * is 100. So we take 1,052 % 100 (52) and divide it by 10 (5.2) and floor it (5).
+         */
+        const currentDigit = Math.floor((element % modded) / divided);
+        buckets[currentDigit].push(element);
+      }
+    });
+
+    return buckets;
+  }
+
+  /**
+   * @param {*[]} array
+   * @param {number} index
+   * @param {number} numPasses
+   * @return {*[]}
+   */
+  placeElementsInCharacterBuckets(array, index, numPasses) {
+    const buckets = this.createBuckets(ENGLISH_ALPHABET_LENGTH);
+
+    array.forEach((element) => {
+      this.callbacks.visitingCallback(element);
+      const currentBucket = this.getCharCodeOfElementAtIndex(element, index, numPasses);
+      buckets[currentBucket].push(element);
+    });
+
+    return buckets;
+  }
+
+  /**
+   * @param {string} element
+   * @param {number} index
+   * @param {number} numPasses
+   * @return {number}
+   */
+  getCharCodeOfElementAtIndex(element, index, numPasses) {
+    // Place element in last bucket if not ready to organize
+    if ((numPasses - index) > element.length) {
+      return ENGLISH_ALPHABET_LENGTH - 1;
+    }
+
+    /**
+     * If each character has been organized, use first character to determine bucket,
+     * otherwise iterate backwards through element
+     */
+    const charPos = index > element.length - 1 ? 0 : element.length - index - 1;
+
+    return element.toLowerCase().charCodeAt(charPos) - BASE_CHAR_CODE;
+  }
+
+  /**
+   * Number of passes is determined by the length of the longest element in the array.
+   * For integers, this log10(num), and for strings, this would be the length of the string.
+   */
+  determineNumPasses(array) {
+    return this.getLengthOfLongestElement(array);
+  }
+
+  /**
+   * @param {*[]} array
+   * @return {number}
+   */
+  getLengthOfLongestElement(array) {
+    if (this.isArrayOfNumbers(array)) {
+      return Math.floor(Math.log10(Math.max(...array))) + 1;
+    }
+
+    return array.reduce((acc, val) => {
+      return val.length > acc ? val.length : acc;
+    }, -Infinity);
+  }
+
+  /**
+   * @param {*[]} array
+   * @return {boolean}
+   */
+  isArrayOfNumbers(array) {
+    // Assumes all elements of array are of the same type
+    return this.isNumber(array[0]);
+  }
+
+  /**
+   * @param {number} numBuckets
+   * @return {*[]}
+   */
+  createBuckets(numBuckets) {
+    /**
+     * Mapping buckets to an array instead of filling them with
+     * an array prevents each bucket from containing a reference to the same array
+     */
+    return new Array(numBuckets).fill(null).map(() => []);
+  }
+
+  /**
+   * @param {*} element
+   * @return {boolean}
+   */
+  isNumber(element) {
+    return Number.isInteger(element);
+  }
 }
diff --git a/src/algorithms/sorting/radix-sort/__test__/RadixSort.test.js b/src/algorithms/sorting/radix-sort/__test__/RadixSort.test.js
@@ -1,9 +1,5 @@
 import RadixSort from '../RadixSort';
-import {
-  stringArr,
-  intArr,
-  SortTester,
-} from '../../SortTester';
+import { SortTester } from '../../SortTester';
 
 // Complexity constants.
 const ARRAY_OF_STRINGS_VISIT_COUNT = 24;
@@ -16,15 +12,15 @@ describe('RadixSort', () => {
   it('should visit array of strings n (number of strings) x m (length of longest element) times', () => {
     SortTester.testAlgorithmTimeComplexity(
       RadixSort,
-      stringArr,
+      ['zzz', 'bb', 'a', 'rr', 'rrb', 'rrba'],
       ARRAY_OF_STRINGS_VISIT_COUNT,
     );
   });
 
   it('should visit array of integers n (number of elements) x m (length of longest integer) times', () => {
     SortTester.testAlgorithmTimeComplexity(
       RadixSort,
-      intArr,
+      [3, 1, 75, 32, 884, 523, 4343456, 232, 123, 656, 343],
       ARRAY_OF_INTEGERS_VISIT_COUNT,
     );
   });