-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
/
Copy pathtypeinfer.jl
1133 lines (1053 loc) · 44.4 KB
/
typeinfer.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# This file is a part of Julia. License is MIT: https://julialang.org/license
# Tracking of newly-inferred CodeInstances during precompilation
const track_newly_inferred = RefValue{Bool}(false)
const newly_inferred = CodeInstance[]
# build (and start inferring) the inference frame for the top-level MethodInstance
function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
frame = InferenceState(result, cache, interp)
frame === nothing && return false
cache === :global && lock_mi_inference(interp, result.linfo)
return typeinf(interp, frame)
end
"""
The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
can be used to measure the exclusive time spent inferring each method instance that is
recursively inferred during type inference.
This is meant to be internal to the compiler, and makes some specific assumptions about
being used for this purpose alone.
"""
module Timings
using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
@inbounds, copy, backtrace
# What we record for any given frame we infer during type inference.
struct InferenceFrameInfo
mi::Core.MethodInstance
world::UInt64
sptypes::Vector{Core.Compiler.VarState}
slottypes::Vector{Any}
nargs::Int
end
function _typeinf_identifier(frame::Core.Compiler.InferenceState)
mi_info = InferenceFrameInfo(
frame.linfo,
frame.world,
copy(frame.sptypes),
copy(frame.slottypes),
length(frame.result.argtypes),
)
return mi_info
end
_typeinf_identifier(frame::InferenceFrameInfo) = frame
"""
Core.Compiler.Timing(mi_info, start_time, ...)
Internal type containing the timing result for running type inference on a single
MethodInstance.
"""
struct Timing
mi_info::InferenceFrameInfo
start_time::UInt64
cur_start_time::UInt64
time::UInt64
children::Core.Array{Timing,1}
bt # backtrace collected upon initial entry to typeinf
end
Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
_time_ns() = ccall(:jl_hrtime, UInt64, ()) # Re-implemented here because Base not yet available.
# We keep a stack of the Timings for each of the MethodInstances currently being timed.
# Since type inference currently operates via a depth-first search (during abstract
# evaluation), this vector operates like a call stack. The last node in _timings is the
# node currently being inferred, and its parent is directly before it, etc.
# Each Timing also contains its own vector for all of its children, so that the tree
# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
# because we create a new node for duplicates.)
const _timings = Timing[]
# ROOT() is an empty function used as the top-level Timing node to measure all time spent
# *not* in type inference during a given recording trace. It is used as a "dummy" node.
function ROOT() end
const ROOTmi = Core.Compiler.specialize_method(
first(Core.Compiler.methods(ROOT)), Tuple{typeof(ROOT)}, Core.svec())
"""
Core.Compiler.reset_timings()
Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
"""
function reset_timings()
empty!(_timings)
push!(_timings, Timing(
# The MethodInstance for ROOT(), and default empty values for other fields.
InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
_time_ns()))
return nothing
end
reset_timings()
# (This is split into a function so that it can be called both in this module, at the top
# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
# the operation and called `reset_timings()`.)
# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between
# timer manipulations and the tasks we're timing.
@inline function close_current_timer()
stop_time = _time_ns()
parent_timer = _timings[end]
accum_time = stop_time - parent_timer.cur_start_time
# Add in accum_time ("modify" the immutable struct)
@inbounds begin
_timings[end] = Timing(
parent_timer.mi_info,
parent_timer.start_time,
parent_timer.cur_start_time,
parent_timer.time + accum_time,
parent_timer.children,
parent_timer.bt,
)
end
return nothing
end
@inline function enter_new_timer(frame)
# Very first thing, stop the active timer: get the current time and add in the
# time since it was last started to its aggregate exclusive time.
close_current_timer()
mi_info = _typeinf_identifier(frame)
# Start the new timer right before returning
push!(_timings, Timing(mi_info, UInt64(0)))
len = length(_timings)
new_timer = @inbounds _timings[len]
# Set the current time _after_ appending the node, to try to exclude the
# overhead from measurement.
start = _time_ns()
@inbounds begin
_timings[len] = Timing(
new_timer.mi_info,
start,
start,
new_timer.time,
new_timer.children,
)
end
return nothing
end
# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
# assert that indeed we are always returning to a parent after finishing all of its
# children (that is, asserting that inference proceeds via depth-first-search).
@inline function exit_current_timer(_expected_frame_)
# Finish the new timer
stop_time = _time_ns()
expected_mi_info = _typeinf_identifier(_expected_frame_)
# Grab the new timer again because it might have been modified in _timings
# (since it's an immutable struct)
# And remove it from the current timings stack
new_timer = pop!(_timings)
Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi
# Prepare to unwind one level of the stack and record in the parent
parent_timer = _timings[end]
accum_time = stop_time - new_timer.cur_start_time
# Add in accum_time ("modify" the immutable struct)
new_timer = Timing(
new_timer.mi_info,
new_timer.start_time,
new_timer.cur_start_time,
new_timer.time + accum_time,
new_timer.children,
parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing,
)
# Record the final timing with the original parent timer
push!(parent_timer.children, new_timer)
# And finally restart the parent timer:
len = length(_timings)
@inbounds begin
_timings[len] = Timing(
parent_timer.mi_info,
parent_timer.start_time,
_time_ns(),
parent_timer.time,
parent_timer.children,
parent_timer.bt,
)
end
return nothing
end
end # module Timings
"""
Core.Compiler.__set_measure_typeinf(onoff::Bool)
If set to `true`, record per-method-instance timings within type inference in the Compiler.
"""
__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
const __measure_typeinf__ = fill(false)
# Wrapper around _typeinf that optionally records the exclusive time for each invocation.
function typeinf(interp::AbstractInterpreter, frame::InferenceState)
interp = switch_from_irinterp(interp)
if __measure_typeinf__[]
Timings.enter_new_timer(frame)
v = _typeinf(interp, frame)
Timings.exit_current_timer(frame)
return v
else
return _typeinf(interp, frame)
end
end
function finish!(interp::AbstractInterpreter, caller::InferenceResult)
# If we didn't transform the src for caching, we may have to transform
# it anyway for users like typeinf_ext. Do that here.
opt = caller.src
if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
if opt.ir !== nothing
if caller.must_be_codeinf
caller.src = ir_to_codeinf!(opt)
elseif is_inlineable(opt.src)
# TODO: If the CFG is too big, inlining becomes more expensive and if we're going to
# use this IR over and over, it's worth simplifying it. Round trips through
# CodeInstance do this implicitly, since they recompute the CFG, so try to
# match that behavior here.
# ir = cfg_simplify!(opt.ir)
caller.src = opt.ir
else
# Not cached and not inlineable - drop the ir
caller.src = nothing
end
end
end
return caller.src
end
function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
# with no active ip's, frame is done
frames = frame.callers_in_cycle
isempty(frames) && push!(frames, frame)
valid_worlds = WorldRange()
for caller in frames
@assert !(caller.dont_work_on_me)
caller.dont_work_on_me = true
# might might not fully intersect these earlier, so do that now
valid_worlds = intersect(caller.valid_worlds, valid_worlds)
end
for caller in frames
caller.valid_worlds = valid_worlds
finish(caller, interp)
end
# collect results for the new expanded frame
results = Tuple{InferenceResult, Vector{Any}, Bool}[
( frames[i].result,
frames[i].stmt_edges[1]::Vector{Any},
frames[i].cached )
for i in 1:length(frames) ]
empty!(frames)
for (caller, _, _) in results
opt = caller.src
if opt isa OptimizationState{typeof(interp)} # implies `may_optimize(interp) === true`
analyzed = optimize(interp, opt, caller)
caller.valid_worlds = (opt.inlining.et::EdgeTracker).valid_worlds[]
end
end
for (caller, edges, cached) in results
valid_worlds = caller.valid_worlds
if last(valid_worlds) >= get_world_counter()
# if we aren't cached, we don't need this edge
# but our caller might, so let's just make it anyways
store_backedges(caller, edges)
end
if cached
cache_result!(interp, caller)
end
finish!(interp, caller)
end
return true
end
function CodeInstance(interp::AbstractInterpreter, result::InferenceResult,
@nospecialize(inferred_result), valid_worlds::WorldRange)
local const_flags::Int32
result_type = result.result
@assert !(result_type === nothing || result_type isa LimitedAccuracy)
if isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
# use constant calling convention
rettype_const = result_type.val
const_flags = 0x3
if may_discard_trees(interp)
inferred_result = nothing
end
else
if isa(result_type, Const)
rettype_const = result_type.val
const_flags = 0x2
elseif isa(result_type, PartialOpaque)
rettype_const = result_type
const_flags = 0x2
elseif isconstType(result_type)
rettype_const = result_type.parameters[1]
const_flags = 0x2
elseif isa(result_type, PartialStruct)
rettype_const = result_type.fields
const_flags = 0x2
elseif isa(result_type, InterConditional)
rettype_const = result_type
const_flags = 0x2
elseif isa(result_type, InterMustAlias)
rettype_const = result_type
const_flags = 0x2
else
rettype_const = nothing
const_flags = 0x00
end
end
relocatability = isa(inferred_result, Vector{UInt8}) ? inferred_result[end] :
inferred_result === nothing ? UInt8(1) : UInt8(0)
# relocatability = isa(inferred_result, Vector{UInt8}) ? inferred_result[end] : UInt8(0)
return CodeInstance(result.linfo,
widenconst(result_type), rettype_const, inferred_result,
const_flags, first(valid_worlds), last(valid_worlds),
# TODO: Actually do something with non-IPO effects
encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.argescapes,
relocatability)
end
function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInstance, ci::CodeInfo)
def = linfo.def
toplevel = !isa(def, Method)
if toplevel
return ci
end
if may_discard_trees(interp)
cache_the_tree = ci.inferred && (is_inlineable(ci) || isa_compileable_sig(linfo.specTypes, linfo.sparam_vals, def))
else
cache_the_tree = true
end
if cache_the_tree
if may_compress(interp)
nslots = length(ci.slotflags)
resize!(ci.slottypes::Vector{Any}, nslots)
resize!(ci.slotnames, nslots)
return ccall(:jl_compress_ir, Vector{UInt8}, (Any, Any), def, ci)
else
return ci
end
else
return nothing
end
end
function transform_result_for_cache(interp::AbstractInterpreter,
linfo::MethodInstance, valid_worlds::WorldRange, result::InferenceResult)
inferred_result = result.src
# If we decided not to optimize, drop the OptimizationState now.
# External interpreters can override as necessary to cache additional information
if inferred_result isa OptimizationState{typeof(interp)}
inferred_result = ir_to_codeinf!(inferred_result)
end
if inferred_result isa CodeInfo
inferred_result.min_world = first(valid_worlds)
inferred_result.max_world = last(valid_worlds)
inferred_result = maybe_compress_codeinfo(interp, linfo, inferred_result)
end
# The global cache can only handle objects that codegen understands
if !isa(inferred_result, MaybeCompressed)
inferred_result = nothing
end
return inferred_result
end
function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
valid_worlds = result.valid_worlds
if last(valid_worlds) == get_world_counter()
# if we've successfully recorded all of the backedges in the global reverse-cache,
# we can now widen our applicability in the global cache too
valid_worlds = WorldRange(first(valid_worlds), typemax(UInt))
end
# check if the existing linfo metadata is also sufficient to describe the current inference result
# to decide if it is worth caching this
linfo = result.linfo
already_inferred = already_inferred_quick_test(interp, linfo)
if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
already_inferred = true
end
# TODO: also don't store inferred code if we've previously decided to interpret this function
if !already_inferred
inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result)
code_cache(interp)[linfo] = ci = CodeInstance(interp, result, inferred_result, valid_worlds)
if track_newly_inferred[]
m = linfo.def
if isa(m, Method) && m.module != Core
ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
end
end
end
unlock_mi_inference(interp, linfo)
nothing
end
function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
if typ isa LimitedAccuracy
if sv.parent === nothing
# when part of a cycle, we might have unintentionally introduced a limit marker
@assert !isempty(sv.callers_in_cycle)
return typ.typ
end
causes = copy(typ.causes)
delete!(causes, sv)
for caller in sv.callers_in_cycle
delete!(causes, caller)
end
if isempty(causes)
return typ.typ
end
if length(causes) != length(typ.causes)
return LimitedAccuracy(typ.typ, causes)
end
end
return typ
end
function adjust_effects(sv::InferenceState)
ipo_effects = sv.ipo_effects
# refine :consistent-cy effect using the return type information
# TODO this adjustment tries to compromise imprecise :consistent-cy information,
# that is currently modeled in a flow-insensitive way: ideally we want to model it
# with a proper dataflow analysis instead
rt = sv.bestguess
if ipo_effects.noinbounds && rt === Bottom
# always throwing an error counts or never returning both count as consistent
ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
end
if is_inaccessiblemem_or_argmemonly(ipo_effects) && all(1:narguments(sv, #=include_va=#true)) do i::Int
return is_mutation_free_argtype(sv.slottypes[i])
end
ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
end
if is_consistent_if_notreturned(ipo_effects) && is_identity_free_argtype(rt)
# in a case when the :consistent-cy here is only tainted by mutable allocations
# (indicated by `CONSISTENT_IF_NOTRETURNED`), we may be able to refine it if the return
# type guarantees that the allocations are never returned
consistent = ipo_effects.consistent & ~CONSISTENT_IF_NOTRETURNED
ipo_effects = Effects(ipo_effects; consistent)
end
if is_consistent_if_inaccessiblememonly(ipo_effects)
if is_inaccessiblememonly(ipo_effects)
consistent = ipo_effects.consistent & ~CONSISTENT_IF_INACCESSIBLEMEMONLY
ipo_effects = Effects(ipo_effects; consistent)
elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
ipo_effects = Effects(ipo_effects; consistent=ALWAYS_FALSE)
end
end
if is_effect_free_if_inaccessiblememonly(ipo_effects)
if is_inaccessiblememonly(ipo_effects)
effect_free = ipo_effects.effect_free & ~EFFECT_FREE_IF_INACCESSIBLEMEMONLY
ipo_effects = Effects(ipo_effects; effect_free)
elseif is_inaccessiblemem_or_argmemonly(ipo_effects)
else # `:inaccessiblememonly` is already tainted, there will be no chance to refine this
ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_FALSE)
end
end
# override the analyzed effects using manually annotated effect settings
def = sv.linfo.def
if isa(def, Method)
override = decode_effects_override(def.purity)
if is_effect_overridden(override, :consistent)
ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
end
if is_effect_overridden(override, :effect_free)
ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_TRUE)
end
if is_effect_overridden(override, :nothrow)
ipo_effects = Effects(ipo_effects; nothrow=true)
end
if is_effect_overridden(override, :terminates_globally)
ipo_effects = Effects(ipo_effects; terminates=true)
end
if is_effect_overridden(override, :notaskstate)
ipo_effects = Effects(ipo_effects; notaskstate=true)
end
if is_effect_overridden(override, :inaccessiblememonly)
ipo_effects = Effects(ipo_effects; inaccessiblememonly=ALWAYS_TRUE)
end
end
return ipo_effects
end
# inference completed on `me`
# update the MethodInstance
function finish(me::InferenceState, interp::AbstractInterpreter)
# prepare to run optimization passes on fulltree
s_edges = me.stmt_edges[1]
if s_edges === nothing
s_edges = me.stmt_edges[1] = []
end
for edges in me.stmt_edges
edges === nothing && continue
edges === s_edges && continue
append!(s_edges, edges)
empty!(edges)
end
if me.src.edges !== nothing
append!(s_edges, me.src.edges::Vector)
me.src.edges = nothing
end
# inspect whether our inference had a limited result accuracy,
# else it may be suitable to cache
bestguess = me.bestguess = cycle_fix_limited(me.bestguess, me)
limited_ret = bestguess isa LimitedAccuracy
limited_src = false
if !limited_ret
gt = me.ssavaluetypes
for j = 1:length(gt)
gt[j] = gtj = cycle_fix_limited(gt[j], me)
if gtj isa LimitedAccuracy && me.parent !== nothing
limited_src = true
break
end
end
end
if limited_ret
# a parent may be cached still, but not this intermediate work:
# we can throw everything else away now
me.result.src = nothing
me.cached = false
set_inlineable!(me.src, false)
unlock_mi_inference(interp, me.linfo)
elseif limited_src
# a type result will be cached still, but not this intermediate work:
# we can throw everything else away now
me.result.src = nothing
set_inlineable!(me.src, false)
else
# annotate fulltree with type information,
# either because we are the outermost code, or we might use this later
doopt = (me.cached || me.parent !== nothing)
recompute_cfg = type_annotate!(interp, me, doopt)
if doopt && may_optimize(interp)
me.result.src = OptimizationState(me, interp, recompute_cfg)
else
me.result.src = me.src::CodeInfo # stash a convenience copy of the code (e.g. for reflection)
end
end
me.result.valid_worlds = me.valid_worlds
me.result.result = bestguess
me.ipo_effects = me.result.ipo_effects = adjust_effects(me)
validate_code_in_debug_mode(me.linfo, me.src, "inferred")
nothing
end
# record the backedges
function store_backedges(caller::InferenceResult, edges::Vector{Any})
isa(caller.linfo.def, Method) || return nothing # don't add backedges to toplevel method instance
return store_backedges(caller.linfo, edges)
end
function store_backedges(caller::MethodInstance, edges::Vector{Any})
for itr in BackedgeIterator(edges)
callee = itr.caller
if isa(callee, MethodInstance)
ccall(:jl_method_instance_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
else
typeassert(callee, MethodTable)
ccall(:jl_method_table_add_backedge, Cvoid, (Any, Any, Any), callee, itr.sig, caller)
end
end
return nothing
end
function record_slot_assign!(sv::InferenceState)
# look at all assignments to slots
# and union the set of types stored there
# to compute a lower bound on the storage required
body = sv.src.code::Vector{Any}
slottypes = sv.slottypes::Vector{Any}
ssavaluetypes = sv.ssavaluetypes
for i = 1:length(body)
expr = body[i]
# find all reachable assignments to locals
if was_reached(sv, i) && isexpr(expr, :(=))
lhs = expr.args[1]
if isa(lhs, SlotNumber)
typ = ssavaluetypes[i]
@assert typ !== NOT_FOUND "active slot in unreached region"
vt = widenconst(typ)
if vt !== Bottom
id = slot_id(lhs)
otherTy = slottypes[id]
if otherTy === Bottom
slottypes[id] = vt
elseif otherTy === Any
slottypes[id] = Any
else
slottypes[id] = tmerge(otherTy, vt)
end
end
end
end
end
sv.src.slottypes = slottypes
return nothing
end
function record_bestguess!(sv::InferenceState)
bestguess = sv.bestguess
@assert !(bestguess isa LimitedAccuracy)
sv.src.rettype = bestguess
return nothing
end
function annotate_slot_load!(interp::AbstractInterpreter, undefs::Vector{Bool}, idx::Int, sv::InferenceState, @nospecialize x)
if isa(x, SlotNumber)
id = slot_id(x)
pc = find_dominating_assignment(id, idx, sv)
if pc === nothing
block = block_for_inst(sv.cfg, idx)
state = sv.bb_vartables[block]::VarTable
vt = state[id]
undefs[id] |= vt.undef
typ = widenslotwrapper(ignorelimited(vt.typ))
else
typ = sv.ssavaluetypes[pc]
@assert typ !== NOT_FOUND "active slot in unreached region"
end
# add type annotations where needed
if !⊑(typeinf_lattice(interp), sv.slottypes[id], typ)
return TypedSlot(id, typ)
end
return x
elseif isa(x, Expr)
head = x.head
i0 = 1
if is_meta_expr_head(head) || head === :const
return x
end
if head === :(=) || head === :method
i0 = 2
end
for i = i0:length(x.args)
x.args[i] = annotate_slot_load!(interp, undefs, idx, sv, x.args[i])
end
return x
elseif isa(x, ReturnNode) && isdefined(x, :val)
return ReturnNode(annotate_slot_load!(interp, undefs, idx, sv, x.val))
elseif isa(x, GotoIfNot)
return GotoIfNot(annotate_slot_load!(interp, undefs, idx, sv, x.cond), x.dest)
end
return x
end
# find the dominating assignment to the slot `id` in the block containing statement `idx`,
# returns `nothing` otherwise
function find_dominating_assignment(id::Int, idx::Int, sv::InferenceState)
block = block_for_inst(sv.cfg, idx)
for pc in reverse(sv.cfg.blocks[block].stmts) # N.B. reverse since the last assignment is dominating this block
pc < idx || continue # N.B. needs pc ≠ idx as `id` can be assigned at `idx`
stmt = sv.src.code[pc]
isexpr(stmt, :(=)) || continue
lhs = stmt.args[1]
isa(lhs, SlotNumber) || continue
slot_id(lhs) == id || continue
return pc
end
return nothing
end
# annotate types of all symbols in AST, preparing for optimization
function type_annotate!(interp::AbstractInterpreter, sv::InferenceState, run_optimizer::Bool)
# widen `Conditional`s from `slottypes`
slottypes = sv.slottypes
for i = 1:length(slottypes)
slottypes[i] = widenconditional(slottypes[i])
end
# compute the required type for each slot
# to hold all of the items assigned into it
record_slot_assign!(sv)
record_bestguess!(sv)
# annotate variables load types
# remove dead code optimization
# and compute which variables may be used undef
stmt_info = sv.stmt_info
src = sv.src
body = src.code
nexpr = length(body)
codelocs = src.codelocs
ssavaluetypes = sv.ssavaluetypes
ssaflags = src.ssaflags
slotflags = src.slotflags
nslots = length(slotflags)
undefs = fill(false, nslots)
any_unreachable = false
# this statement traversal does five things:
# 1. introduce temporary `TypedSlot`s that are supposed to be replaced with π-nodes later
# 2. mark used-undef slots (required by the `slot2reg` conversion)
# 3. mark unreached statements for a bulk code deletion (see issue #7836)
# 4. widen slot wrappers (`Conditional` and `MustAlias`) and remove `NOT_FOUND` from `ssavaluetypes`
# NOTE because of this, `was_reached` will no longer be available after this point
# 5. eliminate GotoIfNot if either branch target is unreachable
changemap = nothing # initialized if there is any dead region
for i = 1:nexpr
expr = body[i]
if was_reached(sv, i)
if run_optimizer
if isa(expr, GotoIfNot) && widenconst(argextype(expr.cond, src, sv.sptypes)) === Bool
# 5: replace this live GotoIfNot with:
# - GotoNode if the fallthrough target is unreachable
# - no-op if the branch target is unreachable
if !was_reached(sv, i+1)
expr = GotoNode(expr.dest)
elseif !was_reached(sv, expr.dest)
expr = nothing
end
end
end
body[i] = annotate_slot_load!(interp, undefs, i, sv, expr) # 1&2
ssavaluetypes[i] = widenslotwrapper(ssavaluetypes[i]) # 4
else # i.e. any runtime execution will never reach this statement
any_unreachable = true
if is_meta_expr(expr) # keep any lexically scoped expressions
ssavaluetypes[i] = Any # 4
else
ssavaluetypes[i] = Bottom # 4
body[i] = Const(expr) # annotate that this statement actually is dead
end
end
end
# finish marking used-undef variables
for j = 1:nslots
if undefs[j]
slotflags[j] |= SLOT_USEDUNDEF | SLOT_STATICUNDEF
end
end
return any_unreachable
end
# at the end, all items in b's cycle
# will now be added to a's cycle
function union_caller_cycle!(a::InferenceState, b::InferenceState)
callers_in_cycle = b.callers_in_cycle
b.parent = a.parent
b.callers_in_cycle = a.callers_in_cycle
contains_is(a.callers_in_cycle, b) || push!(a.callers_in_cycle, b)
if callers_in_cycle !== a.callers_in_cycle
for caller in callers_in_cycle
if caller !== b
caller.parent = a.parent
caller.callers_in_cycle = a.callers_in_cycle
push!(a.callers_in_cycle, caller)
end
end
end
return
end
function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, ancestor::InferenceState, child::InferenceState)
# add backedge of parent <- child
# then add all backedges of parent <- parent.parent
# and merge all of the callers into ancestor.callers_in_cycle
# and ensure that walking the parent list will get the same result (DAG) from everywhere
while true
add_cycle_backedge!(parent, child, parent.currpc)
union_caller_cycle!(ancestor, child)
child = parent
child === ancestor && break
parent = frame_parent(child)
while !isa(parent, InferenceState)
# XXX we may miss some edges here?
parent = frame_parent(parent::IRInterpretationState)
end
parent = parent::InferenceState
end
end
function is_same_frame(interp::AbstractInterpreter, mi::MethodInstance, frame::InferenceState)
return mi === frame_instance(frame)
end
function poison_callstack!(infstate::InferenceState, topmost::InferenceState)
push!(infstate.pclimitations, topmost)
nothing
end
# Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
# frame matching `mi` is encountered, then there is a cycle in the call graph
# (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
# we "resolve" it by merging the call chain, which entails unioning each intermediary
# frame's `callers_in_cycle` field and adding the appropriate backedges. Finally,
# we return `mi`'s pre-existing frame. If no cycles are found, `nothing` is
# returned instead.
function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
# TODO (#48913) implement a proper recursion handling for irinterp:
# This works just because currently the `:terminate` condition guarantees that
# irinterp doesn't fail into unresolved cycles, but it's not a good solution.
# We should revisit this once we have a better story for handling cycles in irinterp.
isa(parent, InferenceState) || return false
frame = parent
uncached = false
while isa(frame, InferenceState)
uncached |= !is_cached(frame) # ensure we never add an uncached frame to a cycle
if is_same_frame(interp, mi, frame)
if uncached
# our attempt to speculate into a constant call lead to an undesired self-cycle
# that cannot be converged: poison our call-stack (up to the discovered duplicate frame)
# with the limited flag and abort (set return type to Any) now
poison_callstack!(parent, frame)
return true
end
merge_call_chain!(interp, parent, frame, frame)
return frame
end
for caller in callers_in_cycle(frame)
if is_same_frame(interp, mi, caller)
if uncached
poison_callstack!(parent, frame)
return true
end
merge_call_chain!(interp, parent, frame, caller)
return caller
end
end
frame = frame_parent(frame)
end
return false
end
generating_sysimg() = ccall(:jl_generating_output, Cint, ()) != 0 && JLOptions().incremental == 0
ipo_effects(code::CodeInstance) = decode_effects(code.ipo_purity_bits)
struct EdgeCallResult
rt #::Type
edge::Union{Nothing,MethodInstance}
effects::Effects
function EdgeCallResult(@nospecialize(rt),
edge::Union{Nothing,MethodInstance},
effects::Effects)
return new(rt, edge, effects)
end
end
# compute (and cache) an inferred AST and return the current best estimate of the result type
function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
mi = specialize_method(method, atype, sparams)::MethodInstance
code = get(code_cache(interp), mi, nothing)
if code isa CodeInstance # return existing rettype if the code is already inferred
inferred = @atomic :monotonic code.inferred
if inferred === nothing && is_stmt_inline(get_curr_ssaflag(caller))
# we already inferred this edge before and decided to discard the inferred code,
# nevertheless we re-infer it here again and keep it around in the local cache
# since the inliner will request to use it later
cache = :local
else
effects = ipo_effects(code)
update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
rettype = code.rettype
if isdefined(code, :rettype_const)
rettype_const = code.rettype_const
# the second subtyping/egal conditions are necessary to distinguish usual cases
# from rare cases when `Const` wrapped those extended lattice type objects
if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
rettype = PartialStruct(rettype, rettype_const)
elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
rettype = rettype_const
elseif isa(rettype_const, InterConditional) && rettype !== InterConditional
rettype = rettype_const
elseif isa(rettype_const, InterMustAlias) && rettype !== InterMustAlias
rettype = rettype_const
else
rettype = Const(rettype_const)
end
end
return EdgeCallResult(rettype, mi, effects)
end
else
cache = :global # cache edge targets by default
end
if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
add_remark!(interp, caller, "Inference is disabled for the target module")
return EdgeCallResult(Any, nothing, Effects())
end
if !is_cached(caller) && frame_parent(caller) === nothing
# this caller exists to return to the user
# (if we asked resolve_call_cycle!, it might instead detect that there is a cycle that it can't merge)
frame = false
else
frame = resolve_call_cycle!(interp, mi, caller)
end
if frame === false
# completely new
lock_mi_inference(interp, mi)
result = InferenceResult(mi, typeinf_lattice(interp))
frame = InferenceState(result, cache, interp) # always use the cache for edge targets
if frame === nothing
add_remark!(interp, caller, "Failed to retrieve source")
# can't get the source for this, so we know nothing
unlock_mi_inference(interp, mi)
return EdgeCallResult(Any, nothing, Effects())
end
if is_cached(caller) || frame_parent(caller) !== nothing # don't involve uncached functions in cycle resolution
frame.parent = caller
end
typeinf(interp, frame)
update_valid_age!(caller, frame.valid_worlds)
edge = is_inferred(frame) ? mi : nothing
return EdgeCallResult(frame.bestguess, edge, frame.ipo_effects) # effects are adjusted already within `finish`
elseif frame === true
# unresolvable cycle
return EdgeCallResult(Any, nothing, Effects())
end
# return the current knowledge about this cycle
frame = frame::InferenceState
update_valid_age!(caller, frame.valid_worlds)
return EdgeCallResult(frame.bestguess, nothing, adjust_effects(frame))
end
#### entry points for inferring a MethodInstance given a type signature ####
# compute an inferred AST and return type
function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
frame = typeinf_frame(interp, method, atype, sparams, run_optimizer)
frame === nothing && return nothing, Any
is_inferred(frame) || return nothing, Any
code = frame.src
rt = widenconst(ignorelimited(frame.result.result))
return code, rt
end
"""
typeinf_ircode(
interp::AbstractInterpreter,
method::Method,
atype,
sparams::SimpleVector,
optimize_until::Union{Integer,AbstractString,Nothing},
) -> (ir::Union{IRCode,Nothing}, returntype::Type)
Infer a `method` and return an `IRCode` with inferred `returntype` on success.
"""
function typeinf_ircode(
interp::AbstractInterpreter,
method::Method,
@nospecialize(atype),
sparams::SimpleVector,
optimize_until::Union{Integer,AbstractString,Nothing},
)
start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
frame = typeinf_frame(interp, method, atype, sparams, false)
if frame === nothing
ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
return nothing, Any
end
(; result) = frame
opt = OptimizationState(frame, interp)
ir = run_passes(opt.src, opt, result, optimize_until)
rt = widenconst(ignorelimited(result.result))
ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
return ir, rt
end
# compute an inferred frame
function typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
mi = specialize_method(method, atype, sparams)::MethodInstance
start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
result = InferenceResult(mi, typeinf_lattice(interp))
frame = InferenceState(result, run_optimizer ? :global : :no, interp)
frame === nothing && return nothing
typeinf(interp, frame)
ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
return frame
end
# compute (and cache) an inferred AST and return type
function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
method = mi.def::Method
start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
code = get(code_cache(interp), mi, nothing)
if code isa CodeInstance
# see if this code already exists in the cache