Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add imaging_mode support (static compilation) #125

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ TimerOutputs = "0.5"
julia = "1.6"

[extras]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
SPIRV_LLVM_Translator_jll = "4a5d46fc-d8cf-5151-a261-86b458210efb"
SPIRV_Tools_jll = "6ac6d60f-d740-5983-97d7-a4482c0689f4"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "SPIRV_LLVM_Translator_jll", "SPIRV_Tools_jll"]
test = ["Test", "SPIRV_LLVM_Translator_jll", "SPIRV_Tools_jll", "Distributed"]
37 changes: 37 additions & 0 deletions src/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,12 @@ const __llvm_initialized = Ref(false)
internalize!(pm, exports)

# eliminate all unused internal functions
add!(pm, ModulePass("ExternalizeJuliaGlobals",
externalize_julia_globals!))
global_optimizer!(pm)
global_dce!(pm)
add!(pm, ModulePass("InternalizeJuliaGlobals",
internalize_julia_globals!))
strip_dead_prototypes!(pm)

# merge constants (such as exception messages) from the runtime
Expand Down Expand Up @@ -309,6 +313,39 @@ const __llvm_initialized = Ref(false)
return ir, (; entry, compiled)
end

# Protect null globals from being killed and inlined
function externalize_julia_globals!(mod::LLVM.Module)
changed = false
for gbl in LLVM.globals(mod)
if LLVM.linkage(gbl) == LLVM.API.LLVMInternalLinkage &&
typeof(LLVM.initializer(gbl)) <: LLVM.PointerNull &&
(startswith(LLVM.name(gbl), "jl_global") ||
startswith(LLVM.name(gbl), "jl_sym"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why also jl_sym, not just jl_global (in a function that externalizes globals)?

LLVM.linkage!(gbl, LLVM.API.LLVMExternalLinkage)
LLVM.initializer!(gbl, nothing)
LLVM.extinit!(gbl, true)
changed = true
end
end
changed
end
# And reset the back later
function internalize_julia_globals!(mod::LLVM.Module)
changed = false
for gbl in LLVM.globals(mod)
if LLVM.linkage(gbl) == LLVM.API.LLVMExternalLinkage &&
LLVM.initializer(gbl) === nothing &&
(startswith(LLVM.name(gbl), "jl_global") ||
startswith(LLVM.name(gbl), "jl_sym"))
LLVM.extinit!(gbl, false)
LLVM.initializer!(gbl, null(eltype(llvmtype(gbl))))
LLVM.linkage!(gbl, LLVM.API.LLVMInternalLinkage)
changed = true
end
end
changed
end

@locked function emit_asm(@nospecialize(job::CompilerJob), ir::LLVM.Module;
strip::Bool=false, validate::Bool=true, format::LLVM.API.LLVMCodeGenFileType)
finish_module!(job, ir)
Expand Down
3 changes: 3 additions & 0 deletions src/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,6 @@ function llvm_debug_info(@nospecialize(job::CompilerJob))
LLVM.API.LLVMDebugEmissionKindFullDebug
end
end

# whether we should compile in imaging mode
extern_policy(::CompilerJob) = false
4 changes: 4 additions & 0 deletions src/mcgen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@ function prepare_execution!(@nospecialize(job::CompilerJob), mod::LLVM.Module)
global current_job
current_job = job

add!(pm, ModulePass("ExternalizeJuliaGlobals",
externalize_julia_globals!))
global_optimizer!(pm)

add!(pm, ModulePass("ResolveCPUReferences", resolve_cpu_references!))

global_dce!(pm)
add!(pm, ModulePass("InternalizeJuliaGlobals",
internalize_julia_globals!))
strip_dead_prototypes!(pm)

run!(pm, mod)
Expand Down
11 changes: 9 additions & 2 deletions src/native.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ export NativeCompilerTarget
Base.@kwdef struct NativeCompilerTarget <: AbstractCompilerTarget
cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName())
features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures())
always_inline::Bool=false # will mark the job function as always inline
always_inline::Bool=false # will mark the job function as always inline
reloc::LLVM.API.LLVMRelocMode=LLVM.API.LLVMRelocDefault
extern::Bool=false
end

llvm_triple(::NativeCompilerTarget) = Sys.MACHINE
Expand All @@ -17,7 +19,9 @@ function llvm_machine(target::NativeCompilerTarget)

t = Target(triple=triple)

tm = TargetMachine(t, triple, target.cpu, target.features)
optlevel = LLVM.API.LLVMCodeGenLevelDefault
reloc = target.reloc
tm = TargetMachine(t, triple, target.cpu, target.features, optlevel, reloc)
asm_verbosity!(tm, true)

return tm
Expand All @@ -30,6 +34,9 @@ function process_entry!(job::CompilerJob{NativeCompilerTarget}, mod::LLVM.Module
invoke(process_entry!, Tuple{CompilerJob, LLVM.Module, LLVM.Function}, job, mod, entry)
end

GPUCompiler.extern_policy(job::CompilerJob{NativeCompilerTarget,P} where P) =
job.target.extern

## job

runtime_slug(job::CompilerJob{NativeCompilerTarget}) = "native_$(job.target.cpu)-$(hash(job.target.features))"
89 changes: 89 additions & 0 deletions test/native.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using Distributed
@testset "native" begin

include("definitions/native.jl")
Expand Down Expand Up @@ -262,6 +263,7 @@ end
occursin("[2] func", msg)
end
end
end

@testset "LazyCodegen" begin
import .LazyCodegen: call_delayed
Expand Down Expand Up @@ -311,7 +313,94 @@ end
@test call_delayed(complex, 1.0, 2.0) == 1.0+2.0im
end

addprocs(1)

@everywhere workers() begin
using GPUCompiler
using Libdl
include("definitions/native.jl")
end
@everywhere begin
function generate_shlib(f, tt, name=GPUCompiler.safe_name(repr(f)))
mktemp() do path, io
source = FunctionSpec(f, Base.to_tuple_type(tt), false, name)
target = NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true)
params = TestCompilerParams()
job = CompilerJob(target, source, params)
obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false)
write(io, obj)
flush(io)
# FIXME: Be more portable
run(`ld -shared -o $path.$dlext $path`)
return "$path.$dlext", name
end
end
function load_fptr(path, name)
ptr = dlopen("$path", Libdl.RTLD_LOCAL)
fptr = dlsym(ptr, "julia_$name")
@assert fptr != C_NULL
atexit(()->rm("$path"))
fptr
end
generate_shlib_fptr(f, tt, name=GPUCompiler.safe_name(repr(f))) =
load_fptr(generate_shlib(f, tt, name)...)
end

@static if VERSION >= v"1.7.0-DEV.600" && Sys.isunix()
@testset "shared library emission" begin
@testset "primitive types" begin
f1(x) = x+1
@test ccall(generate_shlib_fptr(f1, (Int,)), Int, (Int,), 1) == 2
f2(x,y) = x+y
path, name = generate_shlib(f2, (Int,Int))
@test fetch(@spawnat 2 ccall(load_fptr(path, name), Int, (Int,Int), 1, 2)) == 3
end
@testset "runtime calls" begin
function f3()
# Something reasonably complicated
if isdir(homedir())
true
else
false
end
end
@test ccall(generate_shlib_fptr(f3, ()), Bool, ())
end
@testset "String/Symbol" begin
f4(str) = str*"!"
@test ccall(generate_shlib_fptr(f4, (String,)), String, (String,), "Hello") == "Hello!"

f5() = :asymbol
@test ccall(generate_shlib_fptr(f5, ()), Symbol, ()) == :asymbol

f6(x) = x == :asymbol ? true : false
@test ccall(generate_shlib_fptr(f6, (Symbol,)), Bool, (Symbol,), :asymbol)
@test !ccall(generate_shlib_fptr(f6, (Symbol,)), Bool, (Symbol,), :bsymbol)
end
@testset "closures" begin
y = [42.0]
function cf1(x)
x + y[1]
end
@test ccall(generate_shlib_fptr(cf1, (Float64,)), Float64, (Any, Float64,), cf1, 1.0) == 43.0
end
@testset "mutation" begin
function cf2(A, sym)
if sym != :asymbol
A[] = true
else
A[] = false
end
return nothing
end
A = Ref(false)
fptr = generate_shlib_fptr(cf2, (Base.RefValue{Bool}, Symbol))
ccall(fptr, Nothing, (Any, Symbol), A, :asymbol); @test !A[]
ccall(fptr, Nothing, (Any, Symbol), A, :bsymbol); @test A[]
end
end
end
rmprocs(2)

############################################################################################

Expand Down