@@ -196,17 +196,18 @@ function format_bytes(bytes; binary=true) # also used by InteractiveUtils
196
196
end
197
197
end
198
198
199
- function time_print (io:: IO , elapsedtime, bytes= 0 , gctime= 0 , allocs= 0 , lock_conflicts= 0 , compile_time= 0 , recompile_time= 0 , newline = false ;
200
- msg:: Union{String,Nothing} = nothing )
199
+ function time_print (io:: IO , elapsedtime, bytes= 0 , gctime= 0 , allocs= 0 , lock_conflicts= 0 , compile_time= 0 , recompile_time= 0 , wall_time_sched = 0 ,
200
+ newline = false ; msg:: Union{String,Nothing} = nothing )
201
201
timestr = Ryu. writefixed (Float64 (elapsedtime/ 1e9 ), 6 )
202
+ wall_time_sched_perc = wall_time_sched / (elapsedtime / 1e9 )
202
203
str = sprint () do io
203
204
if msg isa String
204
205
print (io, msg, " : " )
205
206
else
206
207
print (io, length (timestr) < 10 ? (" " ^ (10 - length (timestr))) : " " )
207
208
end
208
209
print (io, timestr, " seconds" )
209
- parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
210
+ parens = bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > 0.1 || lock_conflicts > 0 || compile_time > 0
210
211
parens && print (io, " (" )
211
212
if bytes != 0 || allocs != 0
212
213
allocs, ma = prettyprint_getunits (allocs, length (_cnt_units), Int64 (1000 ))
@@ -223,15 +224,21 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
223
224
end
224
225
print (io, Ryu. writefixed (Float64 (100 * gctime/ elapsedtime), 2 ), " % gc time" )
225
226
end
226
- if lock_conflicts > 0
227
+ if wall_time_sched_perc > 0.1
227
228
if bytes != 0 || allocs != 0 || gctime > 0
228
229
print (io, " , " )
229
230
end
231
+ print (io, Ryu. writefixed (Float64 (100 * wall_time_sched_perc), 2 ), " % wall time scheduling" )
232
+ end
233
+ if lock_conflicts > 0
234
+ if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > 0.1
235
+ print (io, " , " )
236
+ end
230
237
plural = lock_conflicts == 1 ? " " : " s"
231
238
print (io, lock_conflicts, " lock conflict$plural " )
232
239
end
233
240
if compile_time > 0
234
- if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
241
+ if bytes != 0 || allocs != 0 || gctime > 0 || wall_time_sched_perc > 0.1 || lock_conflicts > 0
235
242
print (io, " , " )
236
243
end
237
244
print (io, Ryu. writefixed (Float64 (100 * compile_time/ elapsedtime), 2 ), " % compilation time" )
@@ -346,7 +353,7 @@ macro time(msg, ex)
346
353
local ret = @timed $ (esc (ex))
347
354
local _msg = $ (esc (msg))
348
355
local _msg_str = _msg === nothing ? _msg : string (_msg)
349
- time_print (stdout , ret. time* 1e9 , ret. gcstats. allocd, ret. gcstats. total_time, gc_alloc_count (ret. gcstats), ret. lock_conflicts, ret. compile_time* 1e9 , ret. recompile_time* 1e9 , true ; msg= _msg_str)
356
+ time_print (stdout , ret. time* 1e9 , ret. gcstats. allocd, ret. gcstats. total_time, gc_alloc_count (ret. gcstats), ret. lock_conflicts, ret. compile_time* 1e9 , ret. recompile_time* 1e9 , ret . wall_time_sched, true ; msg= _msg_str)
350
357
ret. value
351
358
end
352
359
end
@@ -600,29 +607,37 @@ julia> stats.recompile_time
600
607
macro timed (ex)
601
608
quote
602
609
Experimental. @force_compile
603
- Threads. lock_profiling (true )
604
- local lock_conflicts = Threads. LOCK_CONFLICT_COUNT[]
605
- local stats = gc_num ()
606
- local elapsedtime = time_ns ()
607
- cumulative_compile_timing (true )
608
- local compile_elapsedtimes = cumulative_compile_time_ns ()
609
- local val = @__tryfinally ($ (esc (ex)),
610
- (elapsedtime = time_ns () - elapsedtime;
611
- cumulative_compile_timing (false );
612
- compile_elapsedtimes = cumulative_compile_time_ns () .- compile_elapsedtimes;
613
- lock_conflicts = Threads. LOCK_CONFLICT_COUNT[] - lock_conflicts;
614
- Threads. lock_profiling (false ))
615
- )
616
- local diff = GC_Diff (gc_num (), stats)
617
- (
618
- value= val,
619
- time= elapsedtime/ 1e9 ,
620
- bytes= diff. allocd,
621
- gctime= diff. total_time/ 1e9 ,
622
- gcstats= diff,
623
- lock_conflicts= lock_conflicts,
624
- compile_time= compile_elapsedtimes[1 ]/ 1e9 ,
625
- recompile_time= compile_elapsedtimes[2 ]/ 1e9
626
- )
610
+ ScopedValues. @with Workqueue_sched_times => zeros (UInt, Threads. maxthreadid ()) begin
611
+ Experimental. @force_compile
612
+ Threads. lock_profiling (true )
613
+ local lock_conflicts = Threads. LOCK_CONFLICT_COUNT[]
614
+ local stats = gc_num ()
615
+ local elapsedtime = time_ns ()
616
+ cumulative_compile_timing (true )
617
+ local compile_elapsedtimes = cumulative_compile_time_ns ()
618
+ local val = @__tryfinally ($ (esc (ex)),
619
+ (elapsedtime = time_ns () - elapsedtime;
620
+ cumulative_compile_timing (false );
621
+ compile_elapsedtimes = cumulative_compile_time_ns () .- compile_elapsedtimes;
622
+ lock_conflicts = Threads. LOCK_CONFLICT_COUNT[] - lock_conflicts;
623
+ Threads. lock_profiling (false ))
624
+ )
625
+ local diff = GC_Diff (gc_num (), stats)
626
+ # filter out zeros which can only happen if nothing was scheduled
627
+ local sched_times = Int .(filter (> (0 ), Workqueue_sched_times[]))
628
+ local wall_time_sched = isempty (sched_times) ? 0 : sum (Int (elapsedtime) .- sched_times) / length (sched_times)
629
+
630
+ (
631
+ value= val,
632
+ time= elapsedtime/ 1e9 ,
633
+ bytes= diff. allocd,
634
+ gctime= diff. total_time/ 1e9 ,
635
+ gcstats= diff,
636
+ lock_conflicts= lock_conflicts,
637
+ compile_time= compile_elapsedtimes[1 ]/ 1e9 ,
638
+ recompile_time= compile_elapsedtimes[2 ]/ 1e9 ,
639
+ wall_time_sched= wall_time_sched/ 1e9
640
+ )
641
+ end
627
642
end
628
643
end
0 commit comments