load("@aspect_bazel_lib//lib:expand_template.bzl", "expand_template") load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar") load("@aspect_bazel_lib//lib:transitions.bzl", "platform_transition_filegroup") load("@bazel_skylib//rules:native_binary.bzl", "native_test") load("@bazel_skylib//rules:write_file.bzl", "write_file") load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load", "oci_push") load("@zml//bazel:zig.bzl", "zig_cc_binary") zig_cc_binary( name = "llama", srcs = [ "llama.zig", ], main = "main.zig", deps = [ "@com_github_hejsil_clap//:clap", "@zml//async", "@zml//stdx", "@zml//zml", ], ) cc_binary( name = "Llama-3.1-8B-Instruct", args = [ "--config=$(location @Meta-Llama-3.1-8B-Instruct//:config.json)", "--weights=$(location @Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json)", "--tokenizer=$(location @Meta-Llama-3.1-8B-Instruct//:tokenizer.json)", ], data = [ "@Meta-Llama-3.1-8B-Instruct", "@Meta-Llama-3.1-8B-Instruct//:config.json", "@Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json", "@Meta-Llama-3.1-8B-Instruct//:tokenizer.json", ], tags = [ "no_ci", ], deps = [":llama_lib"], ) cc_binary( name = "Llama-3.1-70B-Instruct", args = [ "--config=$(location @Meta-Llama-3.1-70B-Instruct//:config.json)", "--weights=$(location @Meta-Llama-3.1-70B-Instruct//:model.safetensors.index.json)", "--tokenizer=$(location @Meta-Llama-3.1-70B-Instruct//:tokenizer.json)", ], data = [ "@Meta-Llama-3.1-70B-Instruct", "@Meta-Llama-3.1-70B-Instruct//:config.json", "@Meta-Llama-3.1-70B-Instruct//:model.safetensors.index.json", "@Meta-Llama-3.1-70B-Instruct//:tokenizer.json", ], tags = [ "no_ci", ], deps = [":llama_lib"], ) cc_binary( name = "Llama-3.2-1B-Instruct", args = [ "--config=$(location @Meta-Llama-3.2-1B-Instruct//:config.json)", "--weights=$(location @Meta-Llama-3.2-1B-Instruct//:model.safetensors)", "--tokenizer=$(location @Meta-Llama-3.2-1B-Instruct//:tokenizer.json)", ], data = [ "@Meta-Llama-3.2-1B-Instruct", "@Meta-Llama-3.2-1B-Instruct//:config.json", "@Meta-Llama-3.2-1B-Instruct//:model.safetensors", "@Meta-Llama-3.2-1B-Instruct//:tokenizer.json", ], tags = [ "no_ci", ], deps = [":llama_lib"], ) cc_binary( name = "Llama-3.2-3B-Instruct", args = [ "--config=$(location @Meta-Llama-3.2-3B-Instruct//:config.json)", "--weights=$(location @Meta-Llama-3.2-3B-Instruct//:model.safetensors.index.json)", "--tokenizer=$(location @Meta-Llama-3.2-3B-Instruct//:tokenizer.json)", ], data = [ "@Meta-Llama-3.2-3B-Instruct", "@Meta-Llama-3.2-3B-Instruct//:config.json", "@Meta-Llama-3.2-3B-Instruct//:model.safetensors.index.json", "@Meta-Llama-3.2-3B-Instruct//:tokenizer.json", ], tags = [ "no_ci", ], deps = [":llama_lib"], ) cc_binary( name = "TinyLlama-Stories-15M", args = [ "--config=$(location :tinyllama_stories15M_json)", "--weights=$(location @Karpathy-TinyLlama-Stories15M//file)", "--tokenizer=$(location @Karpathy-TinyLlama-Tokenizer//file)", "--prompt='Once upon a time, there was a little girl named Lily.'", "--no-llama3=1", # don't do template prompt encoding, I'm a simple model "--sharding=false", # don't shard me, I'm so small ], data = [ ":tinyllama_stories15M_json", "@Karpathy-TinyLlama-Stories15M//file", "@Karpathy-TinyLlama-Tokenizer//file", ], deps = [":llama_lib"], ) write_file( name = "tinyllama_stories15M_json", out = "config.json", content = ['{"bos_token_id":1,"eos_token_id":2,"hidden_act":"silu","hidden_size":288,"intermediate_size":768,"max_position_embeddings":256,"model_type":"llama","num_attention_heads":6,"num_hidden_layers":6,"num_key_value_heads":6,"rms_norm_eps":1e-05,"hf_rope_impl":false,"rope_scaling":null,"rope_theta":10000.0}'], ) zig_cc_binary( name = "test-implementation", srcs = ["llama.zig"], args = [ "--weights=$(location @Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json)", "--config=$(location @Meta-Llama-3.1-8B-Instruct//:config.json)", ], data = [ "@Meta-Llama-3.1-8B-Instruct//:config.json", "@Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json", ], main = "test.zig", tags = [ "no_ci", ], deps = [ "@zml//async", "@zml//stdx", "@zml//zml", ], ) native_test( name = "test_tokenizer", src = "@zml//zml/tokenizer:main", # Note: all Llama-3.x tokenizers are the same, # but using the 3.2-1B version because downloading the tokenizer triggers downloading the model. args = [ "--tokenizer=$(location @Meta-Llama-3.2-1B-Instruct//:tokenizer.json)", """--prompt='Examples of titles: 📉 Stock Market Trends 🍪 Perfect Chocolate Chip Recipe Evolution of Music Streaming Remote Work Productivity Tips Artificial Intelligence in Healthcare 🎮 Video Game Development Insights '""", # this correspond to encoding with HF tokenizers, with bos=False "--expected=41481,315,15671,512,9468,241,231,12937,8152,50730,198,9468,235,103,24118,39520,32013,26371,198,35212,3294,315,10948,45910,198,25732,5664,5761,1968,26788,198,9470,16895,22107,304,39435,198,9468,236,106,8519,4140,11050,73137,198", ], data = ["@Meta-Llama-3.2-1B-Instruct//:tokenizer.json"], tags = [ "no_ci", ], ) mtree_spec( name = "mtree", srcs = [":Llama-3.2-1B-Instruct"], tags = [ "no_ci", ], ) tar( name = "archive", srcs = [":Llama-3.2-1B-Instruct"], args = [ "--options", "zstd:compression-level=9", ], compress = "zstd", mtree = ":mtree", tags = [ "no_ci", ], ) expand_template( name = "entrypoint", data = [ ":Llama-3.2-1B-Instruct", "@Meta-Llama-3.2-1B-Instruct", "@Meta-Llama-3.2-1B-Instruct//:config.json", "@Meta-Llama-3.2-1B-Instruct//:model.safetensors", "@Meta-Llama-3.2-1B-Instruct//:tokenizer.json", ], substitutions = { ":config": "$(rlocationpath @Meta-Llama-3.2-1B-Instruct//:config.json)", ":weights": "$(rlocationpath @Meta-Llama-3.2-1B-Instruct//:model.safetensors)", ":tokenizer": "$(rlocationpath @Meta-Llama-3.2-1B-Instruct//:tokenizer.json)", }, tags = [ "no_ci", ], template = [ "./{}/Llama-3.2-1B-Instruct".format(package_name()), "--config=./{}/Llama-3.2-1B-Instruct.runfiles/:config".format(package_name()), "--weights=./{}/Llama-3.2-1B-Instruct.runfiles/:weights".format(package_name()), "--tokenizer=./{}/Llama-3.2-1B-Instruct.runfiles/:tokenizer".format(package_name()), ], ) oci_image( name = "image_", base = "@distroless_cc_debian12_debug", # entrypoint = ["./{}/Llama-3.2-1B-Instruct".format(package_name())], entrypoint = ":entrypoint", tags = [ "no_ci", ], tars = [ "@zml//runtimes:layers", ":archive", ], ) platform_transition_filegroup( name = "image", srcs = [":image_"], tags = [ "no_ci", ], target_platform = "@zml//platforms:linux_amd64", ) oci_load( name = "load", image = ":image", repo_tags = [ "distroless/llama-3.2-1b-instruct:latest", ], tags = [ "no_ci", ], ) oci_push( name = "push", image = ":image", remote_tags = ["latest"], repository = "index.docker.io/steeve/llama-3.2-1b-instruct", tags = [ "no_ci", ], )