@@ -196,17 +196,19 @@ function format_bytes(bytes; binary=true) # also used by InteractiveUtils
196
196
end
197
197
end
198
198
199
- function time_print (io:: IO , elapsedtime, bytes= 0 , gctime= 0 , allocs= 0 , lock_conflicts= 0 , compile_time= 0 , recompile_time= 0 , newline = false ;
200
- msg:: Union{String,Nothing} = nothing )
199
+ function time_print (io:: IO , elapsedtime, bytes= 0 , gctime= 0 , allocs= 0 , lock_conflicts= 0 , compile_time= 0 , recompile_time= 0 , wall_time_sched = 0 ,
200
+ newline = false ; msg:: Union{String,Nothing} = nothing )
201
201
timestr = Ryu. writefixed (Float64 (elapsedtime/ 1e9 ), 6 )
202
+ wall_time_sched_perc = wall_time_sched / (elapsedtime / 1e9 )
203
+ sched_thresh = 0.1
202
204
str = sprint () do io
203
205
if msg isa String
204
206
print (io, msg, " : " )
205
207
else
206
208
print (io, length (timestr) < 10 ? (" " ^ (10 - length (timestr))) : " " )
207
209
end
208
210
print (io, timestr, " seconds" )
209
- parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
211
+ parens = bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh || lock_conflicts > 0 || compile_time > 0
210
212
parens && print (io, " (" )
211
213
if bytes != 0 || allocs != 0
212
214
allocs, ma = prettyprint_getunits (allocs, length (_cnt_units), Int64 (1000 ))
@@ -223,15 +225,21 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
223
225
end
224
226
print (io, Ryu. writefixed (Float64 (100 * gctime/ elapsedtime), 2 ), " % gc time" )
225
227
end
226
- if lock_conflicts > 0
228
+ if wall_time_sched_perc > sched_thresh
227
229
if bytes != 0 || allocs != 0 || gctime > 0
228
230
print (io, " , " )
229
231
end
232
+ print (io, Ryu. writefixed (Float64 (100 * wall_time_sched_perc), 2 ), " % scheduling time" )
233
+ end
234
+ if lock_conflicts > 0
235
+ if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh
236
+ print (io, " , " )
237
+ end
230
238
plural = lock_conflicts == 1 ? " " : " s"
231
239
print (io, lock_conflicts, " lock conflict$plural " )
232
240
end
233
241
if compile_time > 0
234
- if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
242
+ if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > sched_thresh || lock_conflicts > 0
235
243
print (io, " , " )
236
244
end
237
245
print (io, Ryu. writefixed (Float64 (100 * compile_time/ elapsedtime), 2 ), " % compilation time" )
@@ -346,7 +354,7 @@ macro time(msg, ex)
346
354
local ret = @timed $ (esc (ex))
347
355
local _msg = $ (esc (msg))
348
356
local _msg_str = _msg === nothing ? _msg : string (_msg)
349
- time_print (stdout , ret. time* 1e9 , ret. gcstats. allocd, ret. gcstats. total_time, gc_alloc_count (ret. gcstats), ret. lock_conflicts, ret. compile_time* 1e9 , ret. recompile_time* 1e9 , true ; msg= _msg_str)
357
+ time_print (stdout , ret. time* 1e9 , ret. gcstats. allocd, ret. gcstats. total_time, gc_alloc_count (ret. gcstats), ret. lock_conflicts, ret. compile_time* 1e9 , ret. recompile_time* 1e9 , ret . wall_time_sched, true ; msg= _msg_str)
350
358
ret. value
351
359
end
352
360
end
@@ -600,29 +608,37 @@ julia> stats.recompile_time
600
608
macro timed (ex)
601
609
quote
602
610
Experimental. @force_compile
603
- Threads. lock_profiling (true )
604
- local lock_conflicts = Threads. LOCK_CONFLICT_COUNT[]
605
- local stats = gc_num ()
606
- local elapsedtime = time_ns ()
607
- cumulative_compile_timing (true )
608
- local compile_elapsedtimes = cumulative_compile_time_ns ()
609
- local val = @__tryfinally ($ (esc (ex)),
610
- (elapsedtime = time_ns () - elapsedtime;
611
- cumulative_compile_timing (false );
612
- compile_elapsedtimes = cumulative_compile_time_ns () .- compile_elapsedtimes;
613
- lock_conflicts = Threads. LOCK_CONFLICT_COUNT[] - lock_conflicts;
614
- Threads. lock_profiling (false ))
615
- )
616
- local diff = GC_Diff (gc_num (), stats)
617
- (
618
- value= val,
619
- time= elapsedtime/ 1e9 ,
620
- bytes= diff. allocd,
621
- gctime= diff. total_time/ 1e9 ,
622
- gcstats= diff,
623
- lock_conflicts= lock_conflicts,
624
- compile_time= compile_elapsedtimes[1 ]/ 1e9 ,
625
- recompile_time= compile_elapsedtimes[2 ]/ 1e9
626
- )
611
+ ScopedValues. @with Workqueue_sched_times => zeros (UInt, Threads. maxthreadid ()) begin
612
+ Experimental. @force_compile
613
+ Threads. lock_profiling (true )
614
+ local lock_conflicts = Threads. LOCK_CONFLICT_COUNT[]
615
+ local stats = gc_num ()
616
+ local elapsedtime = time_ns ()
617
+ cumulative_compile_timing (true )
618
+ local compile_elapsedtimes = cumulative_compile_time_ns ()
619
+ local val = @__tryfinally ($ (esc (ex)),
620
+ (elapsedtime = time_ns () - elapsedtime;
621
+ cumulative_compile_timing (false );
622
+ compile_elapsedtimes = cumulative_compile_time_ns () .- compile_elapsedtimes;
623
+ lock_conflicts = Threads. LOCK_CONFLICT_COUNT[] - lock_conflicts;
624
+ Threads. lock_profiling (false ))
625
+ )
626
+ local diff = GC_Diff (gc_num (), stats)
627
+ # filter out zeros which can only happen if nothing was scheduled
628
+ local sched_times = Int .(filter (> (0 ), Workqueue_sched_times[]))
629
+ local wall_time_sched = isempty (sched_times) ? 0 : sum (Int (elapsedtime) .- sched_times) / length (sched_times)
630
+
631
+ (
632
+ value= val,
633
+ time= elapsedtime/ 1e9 ,
634
+ bytes= diff. allocd,
635
+ gctime= diff. total_time/ 1e9 ,
636
+ gcstats= diff,
637
+ lock_conflicts= lock_conflicts,
638
+ compile_time= compile_elapsedtimes[1 ]/ 1e9 ,
639
+ recompile_time= compile_elapsedtimes[2 ]/ 1e9 ,
640
+ wall_time_sched= wall_time_sched/ 1e9
641
+ )
642
+ end
627
643
end
628
644
end
0 commit comments