Skip to content

Commit 86a6aa1

Browse files
add excessive scheduling time to time macro etc.
1 parent ab6df86 commit 86a6aa1

File tree

3 files changed

+60
-30
lines changed

3 files changed

+60
-30
lines changed

base/Base.jl

+3
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,9 @@ include("weakkeydict.jl")
424424
# ScopedValues
425425
include("scopedvalues.jl")
426426

427+
# used by task.jl
428+
const Workqueue_sched_times = ScopedValues.ScopedValue{Vector{UInt}}()
429+
427430
# metaprogramming
428431
include("meta.jl")
429432

base/task.jl

+11
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,18 @@ end
10981098

10991099
function try_yieldto(undo)
11001100
try
1101+
# when timing time spent in the scheduler we time the time spent in the task
1102+
# then subtract the elapsed time in `@timed` from the total time to get the scheduler time
1103+
t, ts = isassigned(Workqueue_sched_times) ? (time_ns(), Workqueue_sched_times[]) : (nothing, nothing)
11011104
ccall(:jl_switch, Cvoid, ())
1105+
if ts !== nothing
1106+
t = time_ns() - t
1107+
tid = Threads.threadid()
1108+
if tid <= length(ts)
1109+
ts[tid] += t
1110+
end
1111+
# TODO: grow the array if needed i.e. if a thread is added within the `@timed` block
1112+
end
11021113
catch
11031114
undo(ccall(:jl_get_next_task, Ref{Task}, ()))
11041115
rethrow()

base/timing.jl

+46-30
Original file line numberDiff line numberDiff line change
@@ -196,17 +196,19 @@ function format_bytes(bytes; binary=true) # also used by InteractiveUtils
196196
end
197197
end
198198

199-
function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, newline=false;
200-
msg::Union{String,Nothing}=nothing)
199+
function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, wall_time_sched=0,
200+
newline=false; msg::Union{String,Nothing}=nothing)
201201
timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
202+
wall_time_sched_perc = wall_time_sched / (elapsedtime / 1e9)
203+
sched_thresh = 0.1
202204
str = sprint() do io
203205
if msg isa String
204206
print(io, msg, ": ")
205207
else
206208
print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
207209
end
208210
print(io, timestr, " seconds")
209-
parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
211+
parens = bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh || lock_conflicts > 0 || compile_time > 0
210212
parens && print(io, " (")
211213
if bytes != 0 || allocs != 0
212214
allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
@@ -223,15 +225,21 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
223225
end
224226
print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
225227
end
226-
if lock_conflicts > 0
228+
if wall_time_sched_perc > sched_thresh
227229
if bytes != 0 || allocs != 0 || gctime > 0
228230
print(io, ", ")
229231
end
232+
print(io, Ryu.writefixed(Float64(100*wall_time_sched_perc), 2), "% scheduling time")
233+
end
234+
if lock_conflicts > 0
235+
if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh
236+
print(io, ", ")
237+
end
230238
plural = lock_conflicts == 1 ? "" : "s"
231239
print(io, lock_conflicts, " lock conflict$plural")
232240
end
233241
if compile_time > 0
234-
if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
242+
if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh || lock_conflicts > 0
235243
print(io, ", ")
236244
end
237245
print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
@@ -346,7 +354,7 @@ macro time(msg, ex)
346354
local ret = @timed $(esc(ex))
347355
local _msg = $(esc(msg))
348356
local _msg_str = _msg === nothing ? _msg : string(_msg)
349-
time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg_str)
357+
time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, ret.wall_time_sched, true; msg=_msg_str)
350358
ret.value
351359
end
352360
end
@@ -600,29 +608,37 @@ julia> stats.recompile_time
600608
macro timed(ex)
601609
quote
602610
Experimental.@force_compile
603-
Threads.lock_profiling(true)
604-
local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
605-
local stats = gc_num()
606-
local elapsedtime = time_ns()
607-
cumulative_compile_timing(true)
608-
local compile_elapsedtimes = cumulative_compile_time_ns()
609-
local val = @__tryfinally($(esc(ex)),
610-
(elapsedtime = time_ns() - elapsedtime;
611-
cumulative_compile_timing(false);
612-
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes;
613-
lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts;
614-
Threads.lock_profiling(false))
615-
)
616-
local diff = GC_Diff(gc_num(), stats)
617-
(
618-
value=val,
619-
time=elapsedtime/1e9,
620-
bytes=diff.allocd,
621-
gctime=diff.total_time/1e9,
622-
gcstats=diff,
623-
lock_conflicts=lock_conflicts,
624-
compile_time=compile_elapsedtimes[1]/1e9,
625-
recompile_time=compile_elapsedtimes[2]/1e9
626-
)
611+
ScopedValues.@with Workqueue_sched_times => zeros(UInt, Threads.maxthreadid()) begin
612+
Experimental.@force_compile
613+
Threads.lock_profiling(true)
614+
local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[]
615+
local stats = gc_num()
616+
local elapsedtime = time_ns()
617+
cumulative_compile_timing(true)
618+
local compile_elapsedtimes = cumulative_compile_time_ns()
619+
local val = @__tryfinally($(esc(ex)),
620+
(elapsedtime = time_ns() - elapsedtime;
621+
cumulative_compile_timing(false);
622+
compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes;
623+
lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] - lock_conflicts;
624+
Threads.lock_profiling(false))
625+
)
626+
local diff = GC_Diff(gc_num(), stats)
627+
# filter out zeros which can only happen if nothing was scheduled
628+
local sched_times = Int.(filter(>(0), Workqueue_sched_times[]))
629+
local wall_time_sched = isempty(sched_times) ? 0 : sum(Int(elapsedtime) .- sched_times) / length(sched_times)
630+
631+
(
632+
value=val,
633+
time=elapsedtime/1e9,
634+
bytes=diff.allocd,
635+
gctime=diff.total_time/1e9,
636+
gcstats=diff,
637+
lock_conflicts=lock_conflicts,
638+
compile_time=compile_elapsedtimes[1]/1e9,
639+
recompile_time=compile_elapsedtimes[2]/1e9,
640+
wall_time_sched=wall_time_sched/1e9
641+
)
642+
end
627643
end
628644
end

0 commit comments

Comments
 (0)