Update CUDA runtime sandboxing and dynamic symbol renaming, switch to pre‑built jax‑cuda‑pjrt plugin, and bump CUDA to 12.6.2 and cuDNN to 9.5.1.

This commit is contained in:
Tarry Singh 2023-09-14 13:28:25 +00:00
parent 4abdd32f0d
commit 0d5389ceda
9 changed files with 1308 additions and 121 deletions

View File

@ -29,14 +29,14 @@ RUNTIMES = {
]
write_file(
name = "zml.txt",
name = "zml_txt",
out = "zml.txt",
content = ["ZML loves you <3"],
)
tar(
name = "empty",
srcs = [":zml.txt"],
srcs = [":zml_txt"],
mtree = "auto",
)

View File

@ -1,5 +1,11 @@
load("@rules_zig//zig:defs.bzl", "zig_library")
cc_library(
name = "zmlxcuda_lib",
srcs = ["zmlxcuda.c"],
visibility = ["@libpjrt_cuda//:__subpackages__"],
)
cc_library(
name = "empty",
)

View File

@ -4,124 +4,133 @@ load("//bazel:http_deb_archive.bzl", "http_deb_archive")
ARCH = "linux-x86_64"
CUDA_VERSION = "12.6.2"
CUDNN_VERSION = "9.4.0"
CUDA_VERSION = "12.6.3"
CUDNN_VERSION = "9.5.1"
_CC_IMPORT_TPL = """\
cc_import(
name = "{name}",
shared_library = "lib/{shared_library}",
def _filegroup(name, srcs):
return """\
filegroup(
name = {name},
srcs = {srcs},
visibility = ["@libpjrt_cuda//:__subpackages__"],
)
"""
""".format(name = repr(name), srcs = repr(srcs))
def _cc_import(name, shared_library, deps = []):
return """\
cc_import(
name = {name},
shared_library = {shared_library},
deps = {deps},
visibility = ["@libpjrt_cuda//:__subpackages__"],
)
""".format(name = repr(name), shared_library = repr(shared_library), deps = repr(deps))
CUDA_PACKAGES = {
"cuda_cudart": _CC_IMPORT_TPL.format(name = "cudart", shared_library = "libcudart.so.12"),
"cuda_cupti": _CC_IMPORT_TPL.format(name = "cupti", shared_library = "libcupti.so.12"),
"libcufft": _CC_IMPORT_TPL.format(name = "cufft", shared_library = "libcufft.so.11"),
"libcusolver": _CC_IMPORT_TPL.format(name = "cusolver", shared_library = "libcusolver.so.11"),
"libcusparse": _CC_IMPORT_TPL.format(name = "cusparse", shared_library = "libcusparse.so.12"),
"libnvjitlink": _CC_IMPORT_TPL.format(name = "nvjitlink", shared_library = "libnvJitLink.so.12"),
"cuda_nvcc": """\
filegroup(
name = "ptxas",
srcs = ["bin/ptxas"],
visibility = ["@libpjrt_cuda//:__subpackages__"],
)
filegroup(
name = "libdevice",
srcs = ["nvvm/libdevice/libdevice.10.bc"],
visibility = ["@libpjrt_cuda//:__subpackages__"],
)
cc_import(
name = "nvvm",
shared_library = "nvvm/lib64/libnvvm.so.4",
visibility = ["@libpjrt_cuda//:__subpackages__"],
)
""",
"cuda_nvrtc": """\
cc_import(
name = "nvrtc",
shared_library = "lib/libnvrtc.so.12",
visibility = ["@libpjrt_cuda//:__subpackages__"],
deps = [":nvrtc_builtins"],
)
cc_import(
name = "nvrtc_builtins",
shared_library = "lib/libnvrtc-builtins.so.12.6",
)
""",
"libcublas": """\
cc_import(
name = "cublasLt",
shared_library = "lib/libcublasLt.so.12",
)
cc_import(
name = "cublas",
shared_library = "lib/libcublas.so.12",
visibility = ["@libpjrt_cuda//:__subpackages__"],
deps = [":cublasLt"],
)
""",
"cuda_cudart": _cc_import(
name = "cudart",
shared_library = "lib/libcudart.so.12",
),
"cuda_cupti": _cc_import(
name = "cupti",
shared_library = "lib/libcupti.so.12",
),
"libcufft": _cc_import(
name = "cufft",
shared_library = "lib/libcufft.so.11",
),
"libcusolver": _cc_import(
name = "cusolver",
shared_library = "lib/libcusolver.so.11",
),
"libcusparse": _cc_import(
name = "cusparse",
shared_library = "lib/libcusparse.so.12",
),
"libnvjitlink": _cc_import(
name = "nvjitlink",
shared_library = "lib/libnvJitLink.so.12",
),
"cuda_nvcc": "\n".join([
_filegroup(
name = "ptxas",
srcs = ["bin/ptxas"],
),
_filegroup(
name = "libdevice",
srcs = ["nvvm/libdevice/libdevice.10.bc"],
),
_cc_import(name = "nvvm", shared_library = "nvvm/lib64/libnvvm.so.4"),
]),
"cuda_nvrtc": "\n".join([
_cc_import(
name = "nvrtc",
shared_library = "lib/libnvrtc.so.12",
deps = [":nvrtc_builtins"],
),
_cc_import(
name = "nvrtc_builtins",
shared_library = "lib/libnvrtc-builtins.so.12.6",
),
]),
"libcublas": "\n".join([
_cc_import(
name = "cublasLt",
shared_library = "lib/libcublasLt.so.12",
),
_cc_import(
name = "cublas",
shared_library = "lib/libcublas.so.12",
deps = [":cublasLt"],
),
]),
}
CUDNN_PACKAGES = {
"cudnn": """\
cc_import(
name = "cudnn",
shared_library = "lib/libcudnn.so.9",
visibility = ["@libpjrt_cuda//:__subpackages__"],
deps = [
":cudnn_adv",
":cudnn_ops",
":cudnn_cnn",
":cudnn_graph",
":cudnn_engines_precompiled",
":cudnn_engines_runtime_compiled",
":cudnn_heuristic",
],
)
cc_import(
name = "cudnn_adv",
shared_library = "lib/libcudnn_adv.so.9",
)
cc_import(
name = "cudnn_ops",
shared_library = "lib/libcudnn_ops.so.9",
)
cc_import(
name = "cudnn_cnn",
shared_library = "lib/libcudnn_cnn.so.9",
deps = [":cudnn_ops"],
)
cc_import(
name = "cudnn_graph",
shared_library = "lib/libcudnn_graph.so.9",
)
cc_import(
name = "cudnn_engines_precompiled",
shared_library = "lib/libcudnn_engines_precompiled.so.9",
)
cc_import(
name = "cudnn_engines_runtime_compiled",
shared_library = "lib/libcudnn_engines_runtime_compiled.so.9",
)
cc_import(
name = "cudnn_heuristic",
shared_library = "lib/libcudnn_heuristic.so.9",
)
""",
"cudnn": "\n".join([
_cc_import(
name = "cudnn",
shared_library = "lib/libcudnn.so.9",
deps = [
":cudnn_adv",
":cudnn_ops",
":cudnn_cnn",
":cudnn_graph",
":cudnn_engines_precompiled",
":cudnn_engines_runtime_compiled",
":cudnn_heuristic",
],
),
_cc_import(
name = "cudnn_adv",
shared_library = "lib/libcudnn_adv.so.9",
),
_cc_import(
name = "cudnn_ops",
shared_library = "lib/libcudnn_ops.so.9",
),
_cc_import(
name = "cudnn_cnn",
shared_library = "lib/libcudnn_cnn.so.9",
deps = [":cudnn_ops"],
),
_cc_import(
name = "cudnn_graph",
shared_library = "lib/libcudnn_graph.so.9",
),
_cc_import(
name = "cudnn_engines_precompiled",
shared_library = "lib/libcudnn_engines_precompiled.so.9",
),
_cc_import(
name = "cudnn_engines_runtime_compiled",
shared_library = "lib/libcudnn_engines_runtime_compiled.so.9",
),
_cc_import(
name = "cudnn_heuristic",
shared_library = "lib/libcudnn_heuristic.so.9",
),
]),
}
def _cuda_impl(mctx):
@ -157,8 +166,8 @@ def _cuda_impl(mctx):
http_deb_archive(
name = "libnccl",
urls = ["https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libnccl2_2.22.3-1+cuda12.6_amd64.deb"],
sha256 = "2f64685bcd503150ab45d00503236a56da58a15eac5fd36508045a74f4e10678",
urls = ["https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/libnccl2_2.23.4-1+cuda12.6_amd64.deb"],
sha256 = "161e6da03d5faf8f5661a46d63ad524802464b24eadf182cfb4460a8675b2376",
build_file_content = """\
cc_import(
name = "nccl",
@ -169,8 +178,8 @@ cc_import(
)
http_deb_archive(
name = "zlib",
urls = ["http://archive.ubuntu.com/ubuntu/pool/main/z/zlib/zlib1g_1.3.dfsg-3.1ubuntu2.1_amd64.deb"],
sha256 = "7074b6a2f6367a10d280c00a1cb02e74277709180bab4f2491a2f355ab2d6c20",
urls = ["https://snapshot-cloudflare.debian.org/archive/debian/20241127T143620Z/pool/main/z/zlib/zlib1g_1.3.dfsg%2Breally1.3.1-1%2Bb1_amd64.deb"],
sha256 = "015be740d6236ad114582dea500c1d907f29e16d6db00566ca32fb68d71ac90d",
build_file_content = """\
cc_import(
name = "zlib",
@ -183,8 +192,9 @@ cc_import(
http_archive(
name = "libpjrt_cuda",
build_file = "libpjrt_cuda.BUILD.bazel",
url = "https://github.com/zml/pjrt-artifacts/releases/download/v3.0.0/pjrt-cuda_linux-amd64.tar.gz",
sha256 = "1af968c5357b0b78e43416e2b583512d203aa67a770c6b7e616006e7dd63aecc",
url = "https://files.pythonhosted.org/packages/d7/aa/f15ea857ad9bcff7a0c942dc570ca718b026cc0cc5c513525bb08cacf3c0/jax_cuda12_pjrt-0.4.35-py3-none-manylinux2014_x86_64.whl",
type = "zip",
sha256 = "0ffe7e1ba65659bd5738c2cc5addaf0a56205d2188eec5da194b63c068e1fdd2",
)
return mctx.extension_metadata(

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,7 @@ pub fn isEnabled() bool {
}
fn hasNvidiaDevice() bool {
asynk.File.access("/dev/nvidia0", .{ .mode = .read_only }) catch return false;
asynk.File.access("/dev/nvidiactl", .{ .mode = .read_only }) catch return false;
return true;
}

View File

@ -0,0 +1,77 @@
{
"release_date": "2024-10-25",
"release_label": "9.5.1",
"release_product": "cudnn",
"cudnn": {
"name": "NVIDIA CUDA Deep Neural Network library",
"license": "cudnn",
"license_path": "cudnn/LICENSE.txt",
"version": "9.5.1.17",
"linux-x86_64": {
"cuda11": {
"relative_path": "cudnn/linux-x86_64/cudnn-linux-x86_64-9.5.1.17_cuda11-archive.tar.xz",
"sha256": "b1f5050cd2bfd7fa9d3d0dd00d417cc2124692d8421295e12f841be6c8e3a426",
"md5": "5da3b0533fcd3d6a9020d08f3b78ddba",
"size": "736935276"
},
"cuda12": {
"relative_path": "cudnn/linux-x86_64/cudnn-linux-x86_64-9.5.1.17_cuda12-archive.tar.xz",
"sha256": "35dd20b9c68324ae1288ac36f66ab1f318d2bfecfafb703a82617aa283272be4",
"md5": "a8604f6b80f42ec60e98ba9c8f681572",
"size": "744697316"
}
},
"cuda_variant": [
"11",
"12"
],
"linux-sbsa": {
"cuda11": {
"relative_path": "cudnn/linux-sbsa/cudnn-linux-sbsa-9.5.1.17_cuda11-archive.tar.xz",
"sha256": "ad68d12ee351b5f3478078fc8188eefb8712721c3e501c9345ec5ffb0b85fae8",
"md5": "a9438457a47b2bca7951a19736e8d4e8",
"size": "735387008"
},
"cuda12": {
"relative_path": "cudnn/linux-sbsa/cudnn-linux-sbsa-9.5.1.17_cuda12-archive.tar.xz",
"sha256": "340c49b32c133b0321c5c5b00d14fb64887dcac83ee8fd24195d9191061f1ad7",
"md5": "83c9f3f9eddadd0c1941d7f3e763174c",
"size": "743147752"
}
},
"windows-x86_64": {
"cuda11": {
"relative_path": "cudnn/windows-x86_64/cudnn-windows-x86_64-9.5.1.17_cuda11-archive.zip",
"sha256": "8318e93ab017af2356d3b6cf35aab2238e2a51c426450842eb4ade12e4619bbb",
"md5": "b7c456ddab820ec335a724be7a969091",
"size": "554195447"
},
"cuda12": {
"relative_path": "cudnn/windows-x86_64/cudnn-windows-x86_64-9.5.1.17_cuda12-archive.zip",
"sha256": "3a4cecc8b6d6aa7f6777620e6f2c129b76be635357c4506f2c4ccdbe0e2a1641",
"md5": "fda9196a60fb8e2b4c78e8a19ff056a3",
"size": "557597538"
}
},
"linux-aarch64": {
"cuda12": {
"relative_path": "cudnn/linux-aarch64/cudnn-linux-aarch64-9.5.1.17_cuda12-archive.tar.xz",
"sha256": "0099b8e4081ac146f802e769cdd30d9e01a289ea0fd056e64e44297a13e1aa0c",
"md5": "9d20deeb313a05c442fbff036ca29581",
"size": "780854928"
}
}
},
"cudnn_samples": {
"name": "NVIDIA cuDNN samples",
"license": "cudnn",
"license_path": "cudnn_samples/LICENSE.txt",
"version": "9.5.1.17",
"source": {
"relative_path": "cudnn_samples/source/cudnn_samples-source-9.5.1.17-archive.tar.xz",
"sha256": "bb79dc528c6a3b2a019a60d4af13cb4cb3d56146b692b3f3badec3fd8bfc98e7",
"md5": "76fe86423261f1ae984b00b1de2e40f3",
"size": "1664836"
}
}
}

View File

@ -1,6 +1,17 @@
load("@aspect_bazel_lib//lib:copy_to_directory.bzl", "copy_to_directory")
load("@zml//bazel:cc_import.bzl", "cc_import")
cc_shared_library(
name = "zmlxcuda_so",
shared_lib_name = "libzmlxcuda.so.0",
deps = ["@zml//runtimes/cuda:zmlxcuda_lib"],
)
cc_import(
name = "zmlxcuda",
shared_library = ":zmlxcuda_so",
)
copy_to_directory(
name = "sandbox",
srcs = [
@ -13,9 +24,15 @@ copy_to_directory(
cc_import(
name = "libpjrt_cuda",
data = [":sandbox"],
shared_library = "libpjrt_cuda.so",
shared_library = "jax_plugins/xla_cuda12/xla_cuda_plugin.so",
soname = "libpjrt_cuda.so",
add_needed = ["libzmlxcuda.so.0"],
rename_dynamic_symbols = {
"dlopen": "zmlxcuda_dlopen",
},
visibility = ["@zml//runtimes/cuda:__subpackages__"],
deps = [
":zmlxcuda",
"@cuda_cudart//:cudart",
"@cuda_cupti//:cupti",
"@cuda_nvcc//:nvvm",

40
runtimes/cuda/zmlxcuda.c Normal file
View File

@ -0,0 +1,40 @@
#include <dlfcn.h>
#include <string.h>
void *zmlxcuda_dlopen(const char *filename, int flags)
{
if (filename != NULL)
{
char *replacements[] = {
"libcublas.so",
"libcublas.so.12",
"libcublasLt.so",
"libcublasLt.so.12",
"libcudart.so",
"libcudart.so.12",
"libcudnn.so",
"libcudnn.so.9",
"libcufft.so",
"libcufft.so.11",
"libcupti.so",
"libcupti.so.12",
"libcusolver.so",
"libcusolver.so.11",
"libcusparse.so",
"libcusparse.so.12",
"libnccl.so",
"libnccl.so.2",
NULL,
NULL,
};
for (int i = 0; replacements[i] != NULL; i += 2)
{
if (strcmp(filename, replacements[i]) == 0)
{
filename = replacements[i + 1];
break;
}
}
}
return dlopen(filename, flags);
}

View File

@ -214,7 +214,7 @@ const cuda = struct {
const StreamSynchronize = *const fn (stream: *Stream) callconv(.C) c_int;
pub fn init() !Runtime {
var cudart = try std.DynLib.open("libcudart.so");
var cudart = try std.DynLib.open("libcudart.so.12");
defer cudart.close();
return .{