Skip to content
This repository was archived by the owner on May 17, 2020. It is now read-only.

don't overdub IntrinsicFunctions #70

Merged
merged 1 commit into from
Jun 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions src/context.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ end

const INTERACTIVE = haskey(ENV, "GPUIFYLOOPS_INTERACTIVE") && ENV["GPUIFYLOOPS_INTERACTIVE"] == "1"

function ir_element(x, code::Vector)
while isa(x, Core.SSAValue)
x = code[x.id]
end
return x
end


##
# Forces inlining on everything that is not marked `@noinline`
# Cassette has a #265 issue, let's try to work around that.
Expand All @@ -47,6 +55,34 @@ function transform(ctx, ref)
Expr(:call, Expr(:nooverdub, Core.SlotNumber(1)), (Core.SlotNumber(i) for i in 2:ref.method.nargs)...),
x] : nothing)
end

# overdubbing IntrinsicFunctions removes our ability to profile code
newstmt = (x, i) -> begin
isassign = Base.Meta.isexpr(x, :(=))
stmt = isassign ? x.args[2] : x
if Base.Meta.isexpr(stmt, :call)
applycall = Cassette.is_ir_element(stmt.args[1], GlobalRef(Core, :_apply), CI.code)
if applycall
f = stmt.args[2]
else
f = stmt.args[1]
end
f = ir_element(f, CI.code)
if f isa GlobalRef
ff = getfield(f.mod, f.name)
if ff isa Core.IntrinsicFunction || ff isa Core.Builtin
if applycall
stmt.args[2] = Expr(:nooverdub, f)
else
stmt.args[1] = Expr(:nooverdub, f)
end
end
end
end
return [x]
end

Cassette.insert_statements!(CI.code, CI.codelocs, (x, i) -> 1, newstmt)
CI.ssavaluetypes = length(CI.code)
# Core.Compiler.validate_code(CI)
return CI
Expand Down
24 changes: 24 additions & 0 deletions test/test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,30 @@ end
@test occursin(r"call .* double @__nv_sin", asm)
@test occursin("fadd contract double", asm)

@testset "don't overdub intrinsics" begin
global simple_kernel, kernel
simple_kernel(A, x) = (A[1] = 1 + x; return nothing)
kernel(A, x) = GPUifyLoops.contextualize(simple_kernel)(A, x)
CI, ret = CUDAnative.code_typed(kernel, Tuple{CUDAnative.CuDeviceArray{Int64,1, CUDAnative.AS.Global}, Int64}, debuginfo=:source)[1]

intrinsics = findall(CI.code) do stmt
if Base.Meta.isexpr(stmt, :call)
f = stmt.args[1]
if f isa GlobalRef
f = getfield(f.mod, f.name)
return f isa Core.IntrinsicFunction || f isa Core.Builtin
end
end
return false
end

for i in intrinsics
lineinfo = CI.linetable[CI.codelocs[i]]
@test !(lineinfo.method === :call ||
lineinfo.file === Symbol("context.jl"))
end
end

begin
global kernel2!
data = rand(Float32, 1024)
Expand Down