Skip to content

Commit

Permalink
refactor: lift tokenization to before ranking
Browse files Browse the repository at this point in the history
Moves the tokenization step outside of the fuzzy algorithm. It now
expects the query to be tokenized. This allows the highlighting and
ranking algorithms to become separate and increase performance.

This does not have a measurable impact on performance, but it makes
the code easier to use as a library without requiring allocations.
  • Loading branch information
natecraddock committed Dec 9, 2022
1 parent f88c76e commit 426f4d2
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 49 deletions.
67 changes: 42 additions & 25 deletions src/filter.zig
Original file line number Diff line number Diff line change
Expand Up @@ -72,40 +72,31 @@ test "collectCandidates excess whitespace" {
try testing.expectEqualStrings("fourth", candidates[3].str);
}

fn hasUpper(query: []const u8) bool {
for (query) |*c| {
if (std.ascii.isUpper(c.*)) return true;
}
return false;
}

/// rank each candidate against the query
///
/// returns a sorted slice of Candidates that match the query ready for display
/// in a tui or output to stdout
pub fn rankCandidates(
allocator: std.mem.Allocator,
candidates: []Candidate,
query: []const u8,
tokens: [][]const u8,
keep_order: bool,
plain: bool,
smart_case: bool,
) ![]Candidate {
var ranked = ArrayList(Candidate).init(allocator);
const smart_case = !hasUpper(query);

if (query.len == 0) {
if (tokens.len == 0) {
for (candidates) |candidate| {
try ranked.append(candidate);
}
return ranked.toOwnedSlice();
}

var query_tokens = try splitQuery(allocator, query);
defer allocator.free(query_tokens);
for (candidates) |candidate| {
// TODO: is this copy needed?
var c = candidate;
if (rankCandidate(&c, query_tokens, smart_case, plain)) {
if (rankCandidate(&c, tokens, smart_case, plain)) {
try ranked.append(c);
}
}
Expand All @@ -117,18 +108,6 @@ pub fn rankCandidates(
return ranked.toOwnedSlice();
}

/// split the query on spaces and return a slice of query tokens
fn splitQuery(allocator: std.mem.Allocator, query: []const u8) ![][]const u8 {
var tokens = ArrayList([]const u8).init(allocator);

var it = std.mem.tokenize(u8, query, " ");
while (it.next()) |token| {
try tokens.append(token);
}

return tokens.toOwnedSlice();
}

const indexOfCaseSensitive = std.mem.indexOfScalarPos;

fn indexOf(comptime T: type, slice: []const T, start_index: usize, value: T) ?usize {
Expand Down Expand Up @@ -271,6 +250,44 @@ test "rankToken" {
try testing.expect(rankToken("path/to/file.ext", "file.ext", "p/t/f", false) != null);
}

pub fn highlightToken(
str: []const u8,
filenameOrNull: ?[]const u8,
token: []const u8,
smart_case: bool,
) Range {
var best_rank: ?f64 = null;
var range: Range = .{};

// highlight on the filename if requested
if (filenameOrNull) |filename| {
const offs = str.len - filename.len;
var it = IndexIterator.init(filename, token[0], smart_case);
while (it.next()) |start_index| {
if (scanToEnd(filename, token[1..], start_index, smart_case)) |match| {
if (best_rank == null or match.rank < best_rank.?) {
best_rank = match.rank;
range = .{ .start = match.start + offs, .end = match.end + offs };
}
} else break;
}
if (best_rank != null) return range;
}

// highlight the full string if requested or if no match was found on the filename
var it = IndexIterator.init(str, token[0], smart_case);
while (it.next()) |start_index| {
if (scanToEnd(str, token[1..], start_index, smart_case)) |match| {
if (best_rank == null or match.rank < best_rank.?) {
best_rank = match.rank;
range = .{ .start = match.start, .end = match.end };
}
} else break;
}

return range;
}

const Match = struct {
rank: f64,
start: usize,
Expand Down
7 changes: 6 additions & 1 deletion src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,12 @@ pub fn main() anyerror!void {
if (candidates.len == 0) std.process.exit(1);

if (config.skip_ui) {
const filtered = try filter.rankCandidates(allocator, candidates, config.query, config.keep_order, config.plain);
// Use the heap here rather than an array on the stack. Testing showed that this is actually
// faster, likely due to locality with other heap-alloced data used in the algorithm.
var tokens_buf = try allocator.alloc([]const u8, 16);
const tokens = ui.splitQuery(tokens_buf, config.query);
const smart_case = !ui.hasUpper(config.query);
const filtered = try filter.rankCandidates(allocator, candidates, tokens, config.keep_order, config.plain, smart_case);
if (filtered.len == 0) std.process.exit(1);
for (filtered) |candidate| {
try stdout.print("{s}\n", .{candidate.str});
Expand Down
107 changes: 84 additions & 23 deletions src/ui.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const ArrayList = std.ArrayList;
const Candidate = filter.Candidate;
const File = std.fs.File;
const BufferedWriter = std.io.BufferedWriter;
const Range = filter.Range;

const filter = @import("filter.zig");

Expand Down Expand Up @@ -212,17 +213,17 @@ const HighlightSlice = struct {
const Slicer = struct {
index: usize = 0,
str: []const u8,
ranges: []filter.Range,
ranges: []Range,

fn init(str: []const u8, ranges: []filter.Range) Slicer {
fn init(str: []const u8, ranges: []Range) Slicer {
return .{
.str = str,
.ranges = ranges,
};
}

fn nextRange(slicer: *Slicer) ?*filter.Range {
var next_range: ?*filter.Range = null;
fn nextRange(slicer: *Slicer) ?*Range {
var next_range: ?*Range = null;
for (slicer.ranges) |*r| {
if (r.start >= slicer.index) {
if (next_range == null or r.start < next_range.?.start) {
Expand Down Expand Up @@ -260,54 +261,88 @@ const Slicer = struct {
}
};

inline fn drawCandidate(terminal: *Terminal, candidate: Candidate, width: usize, selected: bool) void {
fn computeRanges(
str: []const u8,
filenameOrNull: ?[]const u8,
ranges: []Range,
tokens: [][]const u8,
smart_case: bool,
) []Range {
for (tokens) |token, i| {
ranges[i] = filter.highlightToken(str, filenameOrNull, token, smart_case);
}
return ranges[0..tokens.len];
}

inline fn drawCandidate(
terminal: *Terminal,
candidate: Candidate,
tokens: [][]const u8,
width: usize,
selected: bool,
smart_case: bool,
plain: bool,
) void {
if (selected) terminal.sgr(.REVERSE);
defer terminal.sgr(.RESET);

var ranges_buf: [16]Range = undefined;
const filename = if (plain) null else std.fs.path.basename(candidate.str);
const ranges = computeRanges(candidate.str, filename, &ranges_buf, tokens, smart_case);

const str = candidate.str[0..std.math.min(width, candidate.str.len)];

// no highlights, just draw the string
// if (candidate.ranges == null or terminal.no_color) {
if (ranges.len == 0 or terminal.no_color) {
_ = terminal.writer.write(str) catch unreachable;
// } else {
// var slicer = Slicer.init(str, candidate.ranges.?);
// while (slicer.next()) |slice| {
// if (slice.highlight) {
// terminal.sgr(.FG_CYAN);
// } else {
// terminal.sgr(.FG_DEFAULT);
// }
// terminal.writeBytes(slice.str);
// }
// }
} else {
var slicer = Slicer.init(str, ranges);
while (slicer.next()) |slice| {
if (slice.highlight) {
terminal.sgr(.FG_CYAN);
} else {
terminal.sgr(.FG_DEFAULT);
}
terminal.writeBytes(slice.str);
}
}
}

inline fn numDigits(number: usize) u16 {
if (number == 0) return 1;
return @intCast(u16, std.math.log10(number) + 1);
}

fn draw(terminal: *Terminal, state: *State, query: ArrayList(u8), candidates: []Candidate, total_candidates: usize) !void {
fn draw(
terminal: *Terminal,
state: *State,
query: []const u8,
tokens: [][]const u8,
candidates: []Candidate,
total_candidates: usize,
smart_case: bool,
plain: bool,
) !void {
const width = terminal.windowSize().?.x;

// draw the candidates
var line: usize = 0;
while (line < terminal.height) : (line += 1) {
terminal.cursorDown(1);
terminal.clearLine();
if (line < candidates.len) drawCandidate(terminal, candidates[line], width, line == state.selected);
if (line < candidates.len) drawCandidate(terminal, candidates[line], tokens, width, line == state.selected, smart_case, plain);
}
terminal.sgr(.RESET);
terminal.cursorUp(terminal.height);

// draw the prompt
const prompt_width = state.prompt.len;
terminal.clearLine();
terminal.print("{s}{s}", .{ state.prompt, query.items[0..std.math.min(width - prompt_width, query.items.len)] });
terminal.print("{s}{s}", .{ state.prompt, query[0..std.math.min(width - prompt_width, query.len)] });

// draw info if there is room
const separator_width = 1;
const spacing = @intCast(i32, width) - @intCast(i32, prompt_width + query.items.len + numDigits(candidates.len) + numDigits(total_candidates) + separator_width);
const spacing = @intCast(i32, width) - @intCast(i32, prompt_width + query.len + numDigits(candidates.len) + numDigits(total_candidates) + separator_width);
if (spacing >= 1) {
terminal.cursorRight(@intCast(usize, spacing));
terminal.print("{}/{}", .{ candidates.len, total_candidates });
Expand Down Expand Up @@ -407,6 +442,25 @@ fn actionDeleteWord(query: *ArrayList(u8), cursor: *usize) void {
}
}

/// split the query on spaces and return a slice of query tokens
pub fn splitQuery(query_tokens: [][]const u8, query: []const u8) [][]const u8 {
var index: u8 = 0;
var it = std.mem.tokenize(u8, query, " ");
while (it.next()) |token| : (index += 1) {
if (index == query_tokens.len) break;
query_tokens[index] = token;
}

return query_tokens[0..index];
}

pub fn hasUpper(query: []const u8) bool {
for (query) |c| {
if (std.ascii.isUpper(c)) return true;
}
return false;
}

pub fn run(
allocator: std.mem.Allocator,
terminal: *Terminal,
Expand Down Expand Up @@ -436,6 +490,10 @@ pub fn run(
var old_state = state;
var old_query = try allocator.alloc(u8, query.items.len);

var tokens_buf = try allocator.alloc([]const u8, 16);
var tokens: [][]const u8 = splitQuery(tokens_buf, query.items);
var smart_case: bool = !hasUpper(query.items);

var redraw = true;
while (true) {
// did the query change?
Expand All @@ -444,15 +502,18 @@ pub fn run(
old_query = try allocator.alloc(u8, query.items.len);
std.mem.copy(u8, old_query, query.items);

filtered = try filter.rankCandidates(allocator, candidates, query.items, keep_order, plain);
tokens = splitQuery(tokens_buf, query.items);
smart_case = !hasUpper(query.items);

filtered = try filter.rankCandidates(allocator, candidates, tokens, keep_order, plain, smart_case);
redraw = true;
state.selected = 0;
}

// did the selection move?
if (redraw or state.cursor != old_state.cursor or state.selected != old_state.selected) {
old_state = state;
try draw(terminal, &state, query, filtered, candidates.len);
try draw(terminal, &state, query.items, tokens, filtered, candidates.len, smart_case, plain);
redraw = false;
}

Expand Down

0 comments on commit 426f4d2

Please sign in to comment.