Radix/examples/llama/BUILD.bazel

load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar")
load("@aspect_bazel_lib//lib:transitions.bzl", "platform_transition_filegroup")
load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load", "oci_push")
load("@zml//bazel:zig.bzl", "zig_cc_binary")

zig_cc_binary(
    name = "llama",
    srcs = [
        "llama.zig",
    ],
    main = "main.zig",
    deps = [
        "//third_party/tigerbeetle:flags",
        "@zml//async",
        "@zml//stdx",
        "@zml//zml",
    ],
)

cc_binary(
    name = "Llama-3.1-8B-Instruct",
    args = [
        "--model=$(location @Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json)",
        "--tokenizer=$(location @Meta-Llama-3.1-8B-Instruct//:tokenizer)",
        "--num-heads=32",
        "--num-kv-heads=8",
        "--rope-freq-base=500000",
    ],
    data = [
        "@Meta-Llama-3.1-8B-Instruct//:model",
        "@Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json",
        "@Meta-Llama-3.1-8B-Instruct//:tokenizer",
    ],
    deps = [":llama_lib"],
)

cc_binary(
    name = "Llama-3.1-70B-Instruct",
    args = [
        "--model=$(location @Meta-Llama-3.1-70B-Instruct//:model.safetensors.index.json)",
        "--tokenizer=$(location @Meta-Llama-3.1-70B-Instruct//:tokenizer)",
        "--num-heads=64",
        "--num-kv-heads=8",
        "--rope-freq-base=500000",
    ],
    data = [
        "@Meta-Llama-3.1-70B-Instruct//:model",
        "@Meta-Llama-3.1-70B-Instruct//:model.safetensors.index.json",
        "@Meta-Llama-3.1-70B-Instruct//:tokenizer",
    ],
    deps = [":llama_lib"],
)

cc_binary(
    name = "OpenLLaMA-3B",
    args = [
        "--model=$(location @OpenLM-Research-OpenLLaMA-3B//:model)",
        "--tokenizer=$(location @OpenLM-Research-OpenLLaMA-3B//:tokenizer)",
        "--num-heads=32",
        "--num-kv-heads=32",
        "--rope-freq-base=10000",
    ],
    data = [
        "@OpenLM-Research-OpenLLaMA-3B//:model",
        "@OpenLM-Research-OpenLLaMA-3B//:tokenizer",
    ],
    deps = [":llama_lib"],
)

cc_binary(
    name = "TinyLlama-1.1B-Chat",
    args = [
        "--model=$(location @TinyLlama-1.1B-Chat-v1.0//:model.safetensors)",
        "--tokenizer=$(location @TinyLlama-1.1B-Chat-v1.0//:tokenizer)",
        "--num-heads=32",
        "--num-kv-heads=4",
        "--rope-freq-base=10000",
    ],
    data = [
        "@TinyLlama-1.1B-Chat-v1.0//:model.safetensors",
        "@TinyLlama-1.1B-Chat-v1.0//:tokenizer",
    ],
    deps = [":llama_lib"],
)

cc_binary(
    name = "TinyLlama-Stories-110M",
    args = [
        "--model=$(location @Karpathy-TinyLlama-Stories//:stories110M)",
        "--tokenizer=$(location @Karpathy-TinyLlama-Tokenizer//file)",
    ],
    data = [
        "@Karpathy-TinyLlama-Stories//:stories110M",
        "@Karpathy-TinyLlama-Tokenizer//file",
    ],
    deps = [":llama_lib"],
)

cc_binary(
    name = "TinyLlama-Stories-15M",
    args = [
        "--model=$(location @Karpathy-TinyLlama-Stories//:stories15M)",
        "--tokenizer=$(location @Karpathy-TinyLlama-Tokenizer//file)",
    ],
    data = [
        "@Karpathy-TinyLlama-Stories//:stories15M",
        "@Karpathy-TinyLlama-Tokenizer//file",
    ],
    deps = [":llama_lib"],
)

zig_cc_binary(
    name = "test-implementation",
    srcs = ["llama.zig"],
    args = [
        "--model=$(location @Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json)",
        "--num-heads=32",
        "--num-kv-heads=8",
        "--rope-freq-base=500000",
    ],
    data = [
        "@Meta-Llama-3.1-8B-Instruct//:model",
        "@Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json",
    ],
    main = "test.zig",
    deps = [
        "//third_party/tigerbeetle:flags",
        "@zml//async",
        "@zml//metax",
        "@zml//zml",
    ],
)

mtree_spec(
    name = "mtree",
    srcs = [":llama"],
)

tar(
    name = "archive",
    srcs = [":llama"],
    args = [
        "--options",
        "zstd:compression-level=9",
    ],
    compress = "zstd",
    mtree = ":mtree",
)

oci_image(
    name = "image_",
    base = "@distroless_cc_debian12_debug",
    entrypoint = ["./{}/llama".format(package_name())],
    tars = [
        "@zml//runtimes:layers",
        ":archive",
    ],
)

platform_transition_filegroup(
    name = "image",
    srcs = [":image_"],
    target_platform = "@zml//platforms:linux_amd64",
)

oci_load(
    name = "load",
    image = ":image",
    repo_tags = [
        "distroless/llama:latest",
    ],
)

oci_push(
    name = "push",
    image = ":image",
    remote_tags = ["latest"],
    repository = "index.docker.io/steeve/llama",
)
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar")`
			`load("@aspect_bazel_lib//lib:transitions.bzl", "platform_transition_filegroup")`
			`load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load", "oci_push")`
			`load("@zml//bazel:zig.bzl", "zig_cc_binary")`

			`zig_cc_binary(`
			`name = "llama",`
			`srcs = [`
			`"llama.zig",`
			`],`
			`main = "main.zig",`
			`deps = [`
			`"//third_party/tigerbeetle:flags",`
			`"@zml//async",`
Add new Zig example programs (benchmark, llama, loader, mnist, simple_layer) and include a test for the llama example. 2023-06-27 14:23:22 +00:00			`"@zml//stdx",`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`"@zml//zml",`
			`],`
			`)`

Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`cc_binary(`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`name = "Llama-3.1-8B-Instruct",`
			`args = [`
			`"--model=$(location @Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json)",`
			`"--tokenizer=$(location @Meta-Llama-3.1-8B-Instruct//:tokenizer)",`
			`"--num-heads=32",`
			`"--num-kv-heads=8",`
			`"--rope-freq-base=500000",`
			`],`
			`data = [`
			`"@Meta-Llama-3.1-8B-Instruct//:model",`
			`"@Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json",`
			`"@Meta-Llama-3.1-8B-Instruct//:tokenizer",`
			`],`
Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`deps = [":llama_lib"],`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`)`

Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`cc_binary(`
Add support for the Llama 3.1 70B Instruct model to facilitate testing on high‑performance accelerators. 2023-04-19 10:23:44 +00:00			`name = "Llama-3.1-70B-Instruct",`
			`args = [`
			`"--model=$(location @Meta-Llama-3.1-70B-Instruct//:model.safetensors.index.json)",`
			`"--tokenizer=$(location @Meta-Llama-3.1-70B-Instruct//:tokenizer)",`
			`"--num-heads=64",`
			`"--num-kv-heads=8",`
			`"--rope-freq-base=500000",`
			`],`
			`data = [`
			`"@Meta-Llama-3.1-70B-Instruct//:model",`
			`"@Meta-Llama-3.1-70B-Instruct//:model.safetensors.index.json",`
			`"@Meta-Llama-3.1-70B-Instruct//:tokenizer",`
			`],`
Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`deps = [":llama_lib"],`
Add support for the Llama 3.1 70B Instruct model to facilitate testing on high‑performance accelerators. 2023-04-19 10:23:44 +00:00			`)`

Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`cc_binary(`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`name = "OpenLLaMA-3B",`
			`args = [`
			`"--model=$(location @OpenLM-Research-OpenLLaMA-3B//:model)",`
			`"--tokenizer=$(location @OpenLM-Research-OpenLLaMA-3B//:tokenizer)",`
			`"--num-heads=32",`
			`"--num-kv-heads=32",`
			`"--rope-freq-base=10000",`
			`],`
			`data = [`
			`"@OpenLM-Research-OpenLLaMA-3B//:model",`
			`"@OpenLM-Research-OpenLLaMA-3B//:tokenizer",`
			`],`
Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`deps = [":llama_lib"],`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`)`

Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`cc_binary(`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`name = "TinyLlama-1.1B-Chat",`
			`args = [`
			`"--model=$(location @TinyLlama-1.1B-Chat-v1.0//:model.safetensors)",`
			`"--tokenizer=$(location @TinyLlama-1.1B-Chat-v1.0//:tokenizer)",`
			`"--num-heads=32",`
			`"--num-kv-heads=4",`
			`"--rope-freq-base=10000",`
			`],`
			`data = [`
			`"@TinyLlama-1.1B-Chat-v1.0//:model.safetensors",`
			`"@TinyLlama-1.1B-Chat-v1.0//:tokenizer",`
			`],`
Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`deps = [":llama_lib"],`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`)`

Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`cc_binary(`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`name = "TinyLlama-Stories-110M",`
			`args = [`
			`"--model=$(location @Karpathy-TinyLlama-Stories//:stories110M)",`
			`"--tokenizer=$(location @Karpathy-TinyLlama-Tokenizer//file)",`
			`],`
			`data = [`
			`"@Karpathy-TinyLlama-Stories//:stories110M",`
			`"@Karpathy-TinyLlama-Tokenizer//file",`
			`],`
Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`deps = [":llama_lib"],`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`)`

Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`cc_binary(`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`name = "TinyLlama-Stories-15M",`
			`args = [`
			`"--model=$(location @Karpathy-TinyLlama-Stories//:stories15M)",`
			`"--tokenizer=$(location @Karpathy-TinyLlama-Tokenizer//file)",`
			`],`
			`data = [`
			`"@Karpathy-TinyLlama-Stories//:stories15M",`
			`"@Karpathy-TinyLlama-Tokenizer//file",`
			`],`
Update llama example BUILD to use jax-cuda-pjrt plugin and bump CUDA (12.6.2) / CuDNN (9.5.1) versions. 2023-09-12 15:40:21 +00:00			`deps = [":llama_lib"],`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`)`

			`zig_cc_binary(`
			`name = "test-implementation",`
			`srcs = ["llama.zig"],`
			`args = [`
			`"--model=$(location @Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json)",`
			`"--num-heads=32",`
			`"--num-kv-heads=8",`
			`"--rope-freq-base=500000",`
			`],`
			`data = [`
			`"@Meta-Llama-3.1-8B-Instruct//:model",`
			`"@Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json",`
			`],`
			`main = "test.zig",`
			`deps = [`
			`"//third_party/tigerbeetle:flags",`
			`"@zml//async",`
Add new Zig example programs (benchmark, llama, loader, mnist, simple_layer) and include a test for the llama example. 2023-06-27 14:23:22 +00:00			`"@zml//metax",`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`"@zml//zml",`
			`],`
			`)`

			`mtree_spec(`
			`name = "mtree",`
			`srcs = [":llama"],`
			`)`

			`tar(`
			`name = "archive",`
			`srcs = [":llama"],`
			`args = [`
			`"--options",`
			`"zstd:compression-level=9",`
			`],`
			`compress = "zstd",`
			`mtree = ":mtree",`
			`)`

			`oci_image(`
			`name = "image_",`
Update docs (deploy_on_server, dockerize_models, getting_started) and example Bazel files to include AWS Neuron/Trainium/Inferentia deployment guidance. 2023-08-21 09:15:48 +00:00			`base = "@distroless_cc_debian12_debug",`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`entrypoint = ["./{}/llama".format(package_name())],`
Update docs (deploy_on_server, dockerize_models, getting_started) and example Bazel files to include AWS Neuron/Trainium/Inferentia deployment guidance. 2023-08-21 09:15:48 +00:00			`tars = [`
			`"@zml//runtimes:layers",`
			`":archive",`
			`],`
Add initial documentation and example projects for ZML, covering how‑to guides, tutorials, and benchmark examples. 2023-01-03 10:21:07 +00:00			`)`

			`platform_transition_filegroup(`
			`name = "image",`
			`srcs = [":image_"],`
			`target_platform = "@zml//platforms:linux_amd64",`
			`)`

			`oci_load(`
			`name = "load",`
			`image = ":image",`
			`repo_tags = [`
			`"distroless/llama:latest",`
			`],`
			`)`

			`oci_push(`
			`name = "push",`
			`image = ":image",`
			`remote_tags = ["latest"],`
			`repository = "index.docker.io/steeve/llama",`
			`)`