Skip to content

Commit 856ba36

Browse files
committedSep 6, 2018
Fix levenshteinDistance for Unicode characters
1 parent 2a7faeb commit 856ba36

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed
 

‎src/algorithms/string/levenshtein-distance/__test__/levenshteinDistance.test.js

+1-4
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,7 @@ describe('levenshteinDistance', () => {
5151
fc.fullUnicodeString(),
5252
(aBegin, aEnd, bBegin, bEnd, common) =>
5353
levenshteinDistance(aBegin + common + aEnd, bBegin + common + bEnd)
54-
<= Math.max(aBegin.length, bBegin.length) + Math.max(aEnd.length, bEnd.length)
55-
// @TODO: Provide Unicode support for characters outside of the BMP plan.
56-
// @TODO: Replace previous line by the one below:
57-
// <= Math.max([...aBegin].length, [...bBegin].length) + Math.max([...aEnd].length, [...bEnd].length)
54+
<= Math.max([...aBegin].length, [...bBegin].length) + Math.max([...aEnd].length, [...bEnd].length)
5855
)
5956
));
6057
});

‎src/algorithms/string/levenshtein-distance/levenshteinDistance.js

+7-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@
33
* @param {string} b
44
* @return {number}
55
*/
6-
export default function levenshteinDistance(a, b) {
6+
export default function levenshteinDistance(aRaw, bRaw) {
7+
// Split strings a and b in characters
8+
// In JavaScript '\u{1f431}' has a length of 2 while it is a single character
9+
// Array.from(string) or [...string] can extract those characters properly
10+
const a = Array.from(aRaw);
11+
const b = Array.from(bRaw);
12+
713
// Create empty edit distance matrix for all possible modifications of
814
// substrings of a to substrings of b.
915
const distanceMatrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null));

0 commit comments

Comments
 (0)
Please sign in to comment.