Skip to content

Commit 768e16d

Browse files
wxiaoguangdelvhsilverwind
authored
Use weighted algorithm for string matching when finding files in repo (#21370)
This PR is for: * #20231 Now, when a user searches `word`, they always see `/{word}.txt` before `/{w}e-g{o}t-{r}esult.{d}at` Demo: When searching "a", "a.ext" comes first. Then when searching "at", the longer matched "template" comes first. <details> ![image](https://user-images.githubusercontent.com/2114189/194588738-3644d891-956f-40e4-b79b-b97d34265456.png) ![image](https://user-images.githubusercontent.com/2114189/194588797-9b124670-4e1e-4510-a170-780295ed89b8.png) </details> This PR also makes the frontend tests could import feature JS files by introducing `jestSetup.js` Co-authored-by: delvh <[email protected]> Co-authored-by: silverwind <[email protected]>
1 parent 7bb12d7 commit 768e16d

File tree

6 files changed

+113
-66
lines changed

6 files changed

+113
-66
lines changed

jest.config.js

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// to run tests with ES6 module, node must run with "--experimental-vm-modules", or see Makefile's "test-frontend" for reference
12
export default {
23
rootDir: 'web_src',
34
setupFilesAfterEnv: ['jest-extended/all'],
@@ -7,6 +8,8 @@ export default {
78
transform: {
89
'\\.svg$': '<rootDir>/js/testUtils/jestRawLoader.js',
910
},
11+
setupFiles: [
12+
'./js/testUtils/jestSetup.js', // prepare global variables used by our code (eg: window.config)
13+
],
1014
verbose: false,
1115
};
12-

web_src/js/features/repo-findfile.js

+69-18
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,96 @@
11
import $ from 'jquery';
22

33
import {svg} from '../svg.js';
4-
import {strSubMatch} from '../utils.js';
54
const {csrf} = window.config;
65

76
const threshold = 50;
87
let files = [];
98
let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult;
109

11-
function filterRepoFiles(filter) {
12-
const treeLink = $repoFindFileInput.attr('data-url-tree-link');
13-
$repoFindFileTableBody.empty();
1410

15-
const fileRes = [];
11+
// return the case-insensitive sub-match result as an array: [unmatched, matched, unmatched, matched, ...]
12+
// res[even] is unmatched, res[odd] is matched, see unit tests for examples
13+
// argument subLower must be a lower-cased string.
14+
export function strSubMatch(full, subLower) {
15+
const res = [''];
16+
let i = 0, j = 0;
17+
const fullLower = full.toLowerCase();
18+
while (i < subLower.length && j < fullLower.length) {
19+
if (subLower[i] === fullLower[j]) {
20+
if (res.length % 2 !== 0) res.push('');
21+
res[res.length - 1] += full[j];
22+
j++;
23+
i++;
24+
} else {
25+
if (res.length % 2 === 0) res.push('');
26+
res[res.length - 1] += full[j];
27+
j++;
28+
}
29+
}
30+
if (i !== subLower.length) {
31+
// if the sub string doesn't match the full, only return the full as unmatched.
32+
return [full];
33+
}
34+
if (j < full.length) {
35+
// append remaining chars from full to result as unmatched
36+
if (res.length % 2 === 0) res.push('');
37+
res[res.length - 1] += full.substring(j);
38+
}
39+
return res;
40+
}
41+
42+
export function calcMatchedWeight(matchResult) {
43+
let weight = 0;
44+
for (let i = 0; i < matchResult.length; i++) {
45+
if (i % 2 === 1) { // matches are on odd indices, see strSubMatch
46+
// use a function f(x+x) > f(x) + f(x) to make the longer matched string has higher weight.
47+
weight += matchResult[i].length * matchResult[i].length;
48+
}
49+
}
50+
return weight;
51+
}
52+
53+
export function filterRepoFilesWeighted(files, filter) {
54+
let filterResult = [];
1655
if (filter) {
17-
for (let i = 0; i < files.length && fileRes.length < threshold; i++) {
18-
const subMatch = strSubMatch(files[i], filter);
19-
if (subMatch.length > 1) {
20-
fileRes.push(subMatch);
56+
const filterLower = filter.toLowerCase();
57+
// TODO: for large repo, this loop could be slow, maybe there could be one more limit:
58+
// ... && filterResult.length < threshold * 20, wait for more feedbacks
59+
for (let i = 0; i < files.length; i++) {
60+
const res = strSubMatch(files[i], filterLower);
61+
if (res.length > 1) { // length==1 means unmatched, >1 means having matched sub strings
62+
filterResult.push({matchResult: res, matchWeight: calcMatchedWeight(res)});
2163
}
2264
}
65+
filterResult.sort((a, b) => b.matchWeight - a.matchWeight);
66+
filterResult = filterResult.slice(0, threshold);
2367
} else {
2468
for (let i = 0; i < files.length && i < threshold; i++) {
25-
fileRes.push([files[i]]);
69+
filterResult.push({matchResult: [files[i]], matchWeight: 0});
2670
}
2771
}
72+
return filterResult;
73+
}
74+
75+
function filterRepoFiles(filter) {
76+
const treeLink = $repoFindFileInput.attr('data-url-tree-link');
77+
$repoFindFileTableBody.empty();
2878

79+
const filterResult = filterRepoFilesWeighted(files, filter);
2980
const tmplRow = `<tr><td><a></a></td></tr>`;
3081

31-
$repoFindFileNoResult.toggle(fileRes.length === 0);
32-
for (const matchRes of fileRes) {
82+
$repoFindFileNoResult.toggle(filterResult.length === 0);
83+
for (const r of filterResult) {
3384
const $row = $(tmplRow);
3485
const $a = $row.find('a');
35-
$a.attr('href', `${treeLink}/${matchRes.join('')}`);
86+
$a.attr('href', `${treeLink}/${r.matchResult.join('')}`);
3687
const $octiconFile = $(svg('octicon-file')).addClass('mr-3');
3788
$a.append($octiconFile);
38-
// if the target file path is "abc/xyz", to search "bx", then the matchRes is ['a', 'b', 'c/', 'x', 'yz']
39-
// the matchRes[odd] is matched and highlighted to red.
40-
for (let j = 0; j < matchRes.length; j++) {
41-
if (!matchRes[j]) continue;
42-
const $span = $('<span>').text(matchRes[j]);
89+
// if the target file path is "abc/xyz", to search "bx", then the matchResult is ['a', 'b', 'c/', 'x', 'yz']
90+
// the matchResult[odd] is matched and highlighted to red.
91+
for (let j = 0; j < r.matchResult.length; j++) {
92+
if (!r.matchResult[j]) continue;
93+
const $span = $('<span>').text(r.matchResult[j]);
4394
if (j % 2 === 1) $span.addClass('ui text red');
4495
$a.append($span);
4596
}
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import {strSubMatch, calcMatchedWeight, filterRepoFilesWeighted} from './repo-findfile.js';
2+
3+
describe('Repo Find Files', () => {
4+
test('strSubMatch', () => {
5+
expect(strSubMatch('abc', '')).toEqual(['abc']);
6+
expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']);
7+
expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']);
8+
expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']);
9+
expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']);
10+
expect(strSubMatch('abc', 'z')).toEqual(['abc']);
11+
expect(strSubMatch('abc', 'az')).toEqual(['abc']);
12+
13+
expect(strSubMatch('ABc', 'ac')).toEqual(['', 'A', 'B', 'c']);
14+
expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']);
15+
16+
expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']);
17+
expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']);
18+
});
19+
20+
test('calcMatchedWeight', () => {
21+
expect(calcMatchedWeight(['a', 'b', 'c', 'd']) < calcMatchedWeight(['a', 'bc', 'c'])).toBeTruthy();
22+
});
23+
24+
test('filterRepoFilesWeighted', () => {
25+
// the first matched result should always be the "word.txt"
26+
let res = filterRepoFilesWeighted(['word.txt', 'we-got-result.dat'], 'word');
27+
expect(res).toHaveLength(2);
28+
expect(res[0].matchResult).toEqual(['', 'word', '.txt']);
29+
30+
res = filterRepoFilesWeighted(['we-got-result.dat', 'word.txt'], 'word');
31+
expect(res).toHaveLength(2);
32+
expect(res[0].matchResult).toEqual(['', 'word', '.txt']);
33+
});
34+
});

web_src/js/testUtils/jestSetup.js

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
window.config = {
2+
csrfToken: 'jest-test-csrf-token-123456',
3+
pageData: {},
4+
i18n: {},
5+
};

web_src/js/utils.js

-30
Original file line numberDiff line numberDiff line change
@@ -59,36 +59,6 @@ export function parseIssueHref(href) {
5959
return {owner, repo, type, index};
6060
}
6161

62-
// return the sub-match result as an array: [unmatched, matched, unmatched, matched, ...]
63-
// res[even] is unmatched, res[odd] is matched, see unit tests for examples
64-
export function strSubMatch(full, sub) {
65-
const res = [''];
66-
let i = 0, j = 0;
67-
const subLower = sub.toLowerCase(), fullLower = full.toLowerCase();
68-
while (i < subLower.length && j < fullLower.length) {
69-
if (subLower[i] === fullLower[j]) {
70-
if (res.length % 2 !== 0) res.push('');
71-
res[res.length - 1] += full[j];
72-
j++;
73-
i++;
74-
} else {
75-
if (res.length % 2 === 0) res.push('');
76-
res[res.length - 1] += full[j];
77-
j++;
78-
}
79-
}
80-
if (i !== sub.length) {
81-
// if the sub string doesn't match the full, only return the full as unmatched.
82-
return [full];
83-
}
84-
if (j < full.length) {
85-
// append remaining chars from full to result as unmatched
86-
if (res.length % 2 === 0) res.push('');
87-
res[res.length - 1] += full.substring(j);
88-
}
89-
return res;
90-
}
91-
9262
// pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200
9363
export function prettyNumber(num, locale = 'en-US') {
9464
if (typeof num !== 'number') return '';

web_src/js/utils.test.js

+1-17
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import {
2-
basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, strSubMatch,
2+
basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref,
33
prettyNumber, parseUrl,
44
} from './utils.js';
55

@@ -86,22 +86,6 @@ test('parseIssueHref', () => {
8686
expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined});
8787
});
8888

89-
test('strSubMatch', () => {
90-
expect(strSubMatch('abc', '')).toEqual(['abc']);
91-
expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']);
92-
expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']);
93-
expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']);
94-
expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']);
95-
expect(strSubMatch('abc', 'z')).toEqual(['abc']);
96-
expect(strSubMatch('abc', 'az')).toEqual(['abc']);
97-
98-
expect(strSubMatch('abc', 'aC')).toEqual(['', 'a', 'b', 'c']);
99-
expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']);
100-
101-
expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']);
102-
expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']);
103-
});
104-
10589
test('prettyNumber', () => {
10690
expect(prettyNumber()).toEqual('');
10791
expect(prettyNumber(null)).toEqual('');

0 commit comments

Comments
 (0)