From c72c379a8b8340f51aed1cc05b1399ac1f8e1f80 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Wed, 27 Apr 2022 13:44:25 +0900 Subject: [PATCH] Add an example of the pstats macro to README (closes #21) (#25) * make comments a bit shorter * add example of pstats macro to README * add example of specifying events --- README.md | 37 +++++++++++++++++++++++++++++++++++++ src/LinuxPerf.jl | 14 +++++++------- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0156f32..bcd6c91 100644 --- a/README.md +++ b/README.md @@ -52,5 +52,42 @@ julia> data = zeros(10000); @measure g(data) └───────────────────────┴────────────┴─────────────┘ ``` +The `@pstats' macro provides another (perhaps more concise) tool to measure +performance events, which can be used in the same way as `@timed` of the +standard library. The following example measures default events and reports its +summary: +``` +julia> using LinuxPerf, Random + +julia> mt = MersenneTwister(1234); + +julia> @pstats rand(mt, 1_000_000); # compile + +julia> @pstats rand(mt, 1_000_000) # default events +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +┌ cpu-cycles 2.88e+06 58.1% # 1.2 cycles per ns +│ stalled-cycles-frontend 9.50e+03 58.1% # 0.3% of cycles +└ stalled-cycles-backend 1.76e+06 58.1% # 61.2% of cycles +┌ instructions 1.11e+07 41.9% # 3.9 insns per cycle +│ branch-instructions 5.32e+05 41.9% # 4.8% of insns +└ branch-misses 2.07e+03 41.9% # 0.4% of branch insns +┌ task-clock 2.38e+06 100.0% # 2.4 ms +│ context-switches 0.00e+00 100.0% +│ cpu-migrations 0.00e+00 100.0% +└ page-faults 1.95e+03 100.0% +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +julia> insn = "(instructions,branch-instructions,branch-misses)" + +julia> @pstats insn rand(mt, 1_000_000) # specific events +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +┌ instructions 1.05e+07 100.0% +│ branch-instructions 5.03e+05 100.0% # 4.8% of insns +└ branch-misses 2.01e+03 100.0% # 0.4% of branch insns +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +``` + +See the documentation of `@pstats` for more details and available options. + For more fine tuned performance profile examples, please check out the `test` directory. diff --git a/src/LinuxPerf.jl b/src/LinuxPerf.jl index b85e8ff..933831e 100644 --- a/src/LinuxPerf.jl +++ b/src/LinuxPerf.jl @@ -898,9 +898,9 @@ function printcounters(io::IO, groups::Vector{Vector{Counter}}) for (num, den, label) in [ ("stalled-cycles-frontend", "cpu-cycles", "cycles"), ("stalled-cycles-backend", "cpu-cycles", "cycles"), - ("branch-instructions", "instructions", "instructions"), - ("branch-misses", "branch-instructions", "branch instructions"), - ("cache-misses", "cache-references", "cache references"), + ("branch-instructions", "instructions", "insns"), + ("branch-misses", "branch-instructions", "branch insns"), + ("cache-misses", "cache-references", "cache refs"), ("L1-dcache-load-misses", "L1-dcache-loads", "dcache loads"), ("L1-icache-load-misses", "L1-icache-loads", "icache loads"), ("dTLB-load-misses", "dTLB-loads", "dTLB loads"), @@ -1017,8 +1017,8 @@ julia> @pstats sort(xs) │ stalled-cycles-frontend 1.09e+07 49.7% # 4.2% of cycles └ stalled-cycles-backend 7.07e+06 49.7% # 2.7% of cycles ┌ instructions 1.96e+08 50.3% # 0.8 insns per cycle -│ branch-instructions 4.02e+07 50.3% # 20.5% of instructions -└ branch-misses 8.15e+06 50.3% # 20.3% of branch instructions +│ branch-instructions 4.02e+07 50.3% # 20.5% of insns +└ branch-misses 8.15e+06 50.3% # 20.3% of branch insns ┌ task-clock 7.61e+07 100.0% # 76.1 ms │ context-switches 7.00e+00 100.0% │ cpu-migrations 0.00e+00 100.0% @@ -1029,8 +1029,8 @@ julia> @pstats "(cpu-cycles,instructions,branch-instructions,branch-misses),page ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ┌ cpu-cycles 2.64e+08 100.0% # 3.5 cycles per ns │ instructions 1.86e+08 100.0% # 0.7 insns per cycle -│ branch-instructions 3.74e+07 100.0% # 20.1% of instructions -└ branch-misses 8.21e+06 100.0% # 21.9% of branch instructions +│ branch-instructions 3.74e+07 100.0% # 20.1% of insns +└ branch-misses 8.21e+06 100.0% # 21.9% of branch insns ╶ page-faults 1.95e+03 100.0% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ```