Skip to content

Commit 320a2f8

Browse files
add excessive scheduling time to time macro etc.
1 parent 636a35d commit 320a2f8

File tree

3 files changed

+61
-30
lines changed

3 files changed

+61
-30
lines changed

base/Base.jl

+3
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,9 @@ include("weakkeydict.jl")
424424
# ScopedValues
425425
include("scopedvalues.jl")
426426

427+
# used by task.jl
428+
const Workqueue_sched_times = ScopedValues.ScopedValue{Vector{UInt}}()
429+
427430
# metaprogramming
428431
include("meta.jl")
429432

base/task.jl

+13
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,20 @@ end
10981098

10991099
function try_yieldto(undo)
11001100
try
1101+
# when timing time spent in the scheduler we time the time spent in the task
1102+
# then subtract the elapsed time in `@timed` from the total time to get the scheduler time
1103+
t, ts = isassigned(Workqueue_sched_times) ? (time_ns(), Workqueue_sched_times[]) : (nothing, nothing)
11011104
ccall(:jl_switch, Cvoid, ())
1105+
if ts !== nothing
1106+
t = time_ns() - t
1107+
tid = Threads.threadid()
1108+
if tid <= length(ts)
1109+
ts[tid] += t
1110+
else
1111+
# this might be redundant? doesn't seem that try_yieldto is ever called from a thread other than 1?
1112+
println("WARNING: Workqueue_sched_times is too short. tid: $tid, length: $(length(ts))")
1113+
end
1114+
end
11021115
catch
11031116
undo(ccall(:jl_get_next_task, Ref{Task}, ()))
11041117
rethrow()

base/timing.jl

+45-30
Original file line numberDiff line numberDiff line change
@@ -196,17 +196,18 @@ function format_bytes(bytes; binary=true) # also used by InteractiveUtils
196196
end
197197
end
198198

199-
function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, newline=false;
200-
msg::Union{String,Nothing}=nothing)
199+
function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, wall_time_sched=0,
200+
newline=false; msg::Union{String,Nothing}=nothing)
201201
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
202+
wall_time_sched_perc = wall_time_sched / (elapsedtime / 1e9)
202203
str = sprint() do io
203204
if msg isa String
204205
print(io, msg, ": ")
205206
else
206207
print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
207208
end
208209
print(io, timestr, " seconds")
209-
parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
210+
parens = bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > 0.1 || lock_conflicts > 0 || compile_time > 0
210211
parens && print(io, " (")
211212
if bytes != 0 || allocs != 0
212213
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
@@ -223,15 +224,21 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
223224
end
224225
print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
225226
end
226-
if lock_conflicts > 0
227+
if wall_time_sched_perc > 0.1
227228
if bytes != 0 || allocs != 0 || gctime > 0
228229
print(io, ", ")
229230
end
231+
print(io, Ryu.writefixed(Float64(100*wall_time_sched_perc), 2), "% wall time scheduling")
232+
end
233+
if lock_conflicts > 0
234+
if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > 0.1
235+
print(io, ", ")
236+
end
230237
plural = lock_conflicts == 1 ? "" : "s"
231238
print(io, lock_conflicts, " lock conflict$plural")
232239
end
233240
if compile_time > 0
234-
if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
241+
if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > 0.1 || lock_conflicts > 0
235242
print(io, ", ")
236243
end
237244
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
@@ -346,7 +353,7 @@ macro time(msg, ex)
346353
local ret = @timed $(esc(ex))
347354
local _msg = $(esc(msg))
348355
local _msg_str = _msg === nothing ? _msg : string(_msg)
349-
time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg_str)
356+
time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, ret.wall_time_sched, true; msg=_msg_str)
350357
ret.value
351358
end
352359
end
@@ -600,29 +607,37 @@ julia> stats.recompile_time
600607
macro timed(ex)
601608
quote
602609
Experimental.@force_compile
603-
Threads.lock_profiling(true)
604-
local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
605-
local stats = gc_num()
606-
local elapsedtime = time_ns()
607-
cumulative_compile_timing(true)
608-
local compile_elapsedtimes = cumulative_compile_time_ns()
609-
local val = @__tryfinally($(esc(ex)),
610-
(elapsedtime = time_ns() - elapsedtime;
611-
cumulative_compile_timing(false);
612-
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes;
613-
lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts;
614-
Threads.lock_profiling(false))
615-
)
616-
local diff = GC_Diff(gc_num(), stats)
617-
(
618-
value=val,
619-
time=elapsedtime/1e9,
620-
bytes=diff.allocd,
621-
gctime=diff.total_time/1e9,
622-
gcstats=diff,
623-
lock_conflicts=lock_conflicts,
624-
compile_time=compile_elapsedtimes[1]/1e9,
625-
recompile_time=compile_elapsedtimes[2]/1e9
626-
)
610+
ScopedValues.@with Workqueue_sched_times => zeros(UInt, Threads.maxthreadid()) begin
611+
Experimental.@force_compile
612+
Threads.lock_profiling(true)
613+
local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
614+
local stats = gc_num()
615+
local elapsedtime = time_ns()
616+
cumulative_compile_timing(true)
617+
local compile_elapsedtimes = cumulative_compile_time_ns()
618+
local val = @__tryfinally($(esc(ex)),
619+
(elapsedtime = time_ns() - elapsedtime;
620+
cumulative_compile_timing(false);
621+
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes;
622+
lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts;
623+
Threads.lock_profiling(false))
624+
)
625+
local diff = GC_Diff(gc_num(), stats)
626+
# filter out zeros which can only happen if nothing was scheduled
627+
local sched_times = Int.(filter(>(0), Workqueue_sched_times[]))
628+
local wall_time_sched = isempty(sched_times) ? 0 : sum(Int(elapsedtime) .- sched_times) / length(sched_times)
629+
630+
(
631+
value=val,
632+
time=elapsedtime/1e9,
633+
bytes=diff.allocd,
634+
gctime=diff.total_time/1e9,
635+
gcstats=diff,
636+
lock_conflicts=lock_conflicts,
637+
compile_time=compile_elapsedtimes[1]/1e9,
638+
recompile_time=compile_elapsedtimes[2]/1e9,
639+
wall_time_sched=wall_time_sched/1e9
640+
)
641+
end
627642
end
628643
end

0 commit comments

Comments
 (0)