-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extend benchmark script suite to CPU profiling with
perf
and samply
This is useful to dig into why exactly C Zopfli has different performance than Rust Zopfli in a case-by-case basis.
- Loading branch information
1 parent
05e2ddd
commit cff69f5
Showing
8 changed files
with
74 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Do a best effort at producing builds with frame pointers, which | ||
# are useful for low-overhead, cross-platform, and accurate enough | ||
# stack unwinding by profilers such as perf. | ||
# | ||
# This does not affect downstream Zopfli users. | ||
# | ||
# Related reads: | ||
# - <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html> | ||
# - <https://fedoraproject.org/wiki/Changes/fno-omit-frame-pointer> | ||
# - <https://pagure.io/fedora-rust/rust2rpm/pull-request/237> | ||
[build] | ||
rustflags = ["-C", "force-frame-pointers=true"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,5 @@ data/*.gz | |
data_google | ||
data_rust | ||
zopfli | ||
benchmark*.json | ||
benchmark*.json | ||
profiles |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#!/bin/sh -eu | ||
|
||
# Helper script to record profiles of Zopfli's CPU usage on a Linux computer with `perf`, | ||
# and display them with `samply`. The profiles are not recorded directly with `samply` | ||
# because doing so does not simplify the script much anyway, `perf`'s output format is | ||
# more interoperable with other visualization frontends, and `perf record` exposes | ||
# much more native features and knobs that are useful. | ||
|
||
mkdir -p test/perf/profiles | ||
|
||
if [ -d /sys/devices/cpu_core ]; then | ||
# For hybrid Intel CPUs (Alder Lake+, with performance and efficiency cores), | ||
# only record cycles for the performance cores Zopfli will be scheduled to, | ||
# as Samply does not have proper support for combining both efficiency and | ||
# performance events | ||
readonly CPU_CYCLES_EVENT='cpu_core/cycles/' | ||
else | ||
readonly CPU_CYCLES_EVENT='cpu-cycles' | ||
fi | ||
|
||
case "$1" in | ||
'rust') | ||
readonly BINARY_PATH='target/release/zopfli' | ||
readonly PROFILE_FILE_PREFIX='rust' | ||
cargo build --release;; | ||
'c') | ||
# Run the prepare.sh script first to get the Zopfli sources | ||
readonly BINARY_PATH='zopfli/zopfli' | ||
readonly PROFILE_FILE_PREFIX='c' | ||
make -C zopfli;; | ||
*) | ||
echo 'Invalid Zopfli flavor specified, expected either "rust" or "c"' >&2 | ||
exit 1;; | ||
esac | ||
shift | ||
|
||
PERF_DATA_FILE="test/perf/profiles/${PROFILE_FILE_PREFIX}_zopfli_$(date +%s).perf.data" | ||
readonly PERF_DATA_FILE | ||
|
||
(set -x; perf record \ | ||
--call-graph=fp -F 5000 \ | ||
--event="$CPU_CYCLES_EVENT" \ | ||
-o "$PERF_DATA_FILE" -- "$BINARY_PATH" "$@") | ||
|
||
PROCESSED_PROFILE="${TMPDIR:-/tmp}/profile-$(date +%s).bin" | ||
readonly PROCESSED_PROFILE | ||
trap 'rm -f "$PROCESSED_PROFILE" || true' EXIT INT TERM | ||
|
||
(set -x; samply import -o "$PROCESSED_PROFILE" "$PERF_DATA_FILE") |