73 lines
2.0 KiB
Plaintext
73 lines
2.0 KiB
Plaintext
load("@aspect_bazel_lib//lib:copy_to_directory.bzl", "copy_to_directory")
|
|
load("@zml//bazel:cc_import.bzl", "cc_import")
|
|
load("@zml//bazel:patchelf.bzl", "patchelf")
|
|
|
|
cc_shared_library(
|
|
name = "zmlxcuda_so",
|
|
shared_lib_name = "lib/libzmlxcuda.so.0",
|
|
deps = ["@zml//runtimes/cuda:zmlxcuda_lib"],
|
|
)
|
|
|
|
patchelf(
|
|
name = "libpjrt_cuda.patchelf",
|
|
src = "libpjrt_cuda.so",
|
|
add_needed = [
|
|
"libzmlxcuda.so.0",
|
|
],
|
|
rename_dynamic_symbols = {
|
|
"dlopen": "zmlxcuda_dlopen",
|
|
},
|
|
set_rpath = "$ORIGIN",
|
|
)
|
|
|
|
copy_to_directory(
|
|
name = "sandbox",
|
|
srcs = [
|
|
":zmlxcuda_so",
|
|
":libpjrt_cuda.patchelf",
|
|
"@cuda_nvcc//:libdevice",
|
|
"@cuda_nvcc//:ptxas",
|
|
"@cuda_nvcc//:nvlink",
|
|
"@cuda_cupti//:so_files",
|
|
"@cuda_nvtx//:so_files",
|
|
"@cuda_nvcc//:so_files",
|
|
"@cuda_nvrtc//:so_files",
|
|
"@cuda_cudart//:so_files",
|
|
"@cudnn//:so_files",
|
|
"@libcublas//:so_files",
|
|
"@libcufft//:so_files",
|
|
"@libcusolver//:so_files",
|
|
"@libcusparse//:so_files",
|
|
"@libnvjitlink//:so_files",
|
|
"@nccl//:so_files",
|
|
"@zlib1g",
|
|
],
|
|
replace_prefixes = {
|
|
"nvidia/nccl/lib": "lib",
|
|
"nvvm/lib64": "lib",
|
|
"libpjrt_cuda.patchelf": "lib",
|
|
"lib/x86_64-linux-gnu": "lib",
|
|
},
|
|
add_directory_to_runfiles = False,
|
|
include_external_repositories = ["**"],
|
|
)
|
|
|
|
cc_library(
|
|
name = "libpjrt_cuda",
|
|
data = [":sandbox"],
|
|
deps = [
|
|
"@cuda_cudart//:cuda",
|
|
],
|
|
linkopts = [
|
|
# Defer function call resolution until the function is called
|
|
# (lazy loading) rather than at load time.
|
|
#
|
|
# This is required because we want to let downstream use weak CUDA symbols.
|
|
#
|
|
# We force it here because -z,now (which resolve all symbols at load time),
|
|
# is the default in most bazel CC toolchains as well as in certain linkers.
|
|
"-Wl,-z,lazy",
|
|
],
|
|
visibility = ["@zml//runtimes/cuda:__subpackages__"],
|
|
)
|