2024-03-04 12:11:13 +00:00
|
|
|
load("@aspect_bazel_lib//lib:expand_template.bzl", "expand_template")
|
2023-01-03 10:21:07 +00:00
|
|
|
load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar")
|
|
|
|
|
load("@aspect_bazel_lib//lib:transitions.bzl", "platform_transition_filegroup")
|
2024-04-01 17:40:18 +00:00
|
|
|
load("@bazel_skylib//rules:native_binary.bzl", "native_test")
|
|
|
|
|
load("@bazel_skylib//rules:write_file.bzl", "write_file")
|
2023-01-03 10:21:07 +00:00
|
|
|
load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load", "oci_push")
|
|
|
|
|
load("@zml//bazel:zig.bzl", "zig_cc_binary")
|
|
|
|
|
|
|
|
|
|
zig_cc_binary(
|
|
|
|
|
name = "llama",
|
|
|
|
|
srcs = [
|
|
|
|
|
"llama.zig",
|
|
|
|
|
],
|
|
|
|
|
main = "main.zig",
|
|
|
|
|
deps = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"@com_github_hejsil_clap//:clap",
|
2023-01-03 10:21:07 +00:00
|
|
|
"@zml//async",
|
2023-06-27 14:23:22 +00:00
|
|
|
"@zml//stdx",
|
2023-01-03 10:21:07 +00:00
|
|
|
"@zml//zml",
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
|
2023-11-01 10:16:48 +00:00
|
|
|
cc_binary(
|
2024-03-04 12:11:13 +00:00
|
|
|
name = "Llama-3.1-8B-Instruct",
|
2023-11-01 10:16:48 +00:00
|
|
|
args = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"--config=$(location @Meta-Llama-3.1-8B-Instruct//:config.json)",
|
|
|
|
|
"--weights=$(location @Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json)",
|
|
|
|
|
"--tokenizer=$(location @Meta-Llama-3.1-8B-Instruct//:tokenizer.json)",
|
2023-11-01 10:16:48 +00:00
|
|
|
],
|
|
|
|
|
data = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"@Meta-Llama-3.1-8B-Instruct",
|
|
|
|
|
"@Meta-Llama-3.1-8B-Instruct//:config.json",
|
|
|
|
|
"@Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json",
|
|
|
|
|
"@Meta-Llama-3.1-8B-Instruct//:tokenizer.json",
|
2023-04-19 10:23:44 +00:00
|
|
|
],
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-09-12 15:40:21 +00:00
|
|
|
deps = [":llama_lib"],
|
2023-04-19 10:23:44 +00:00
|
|
|
)
|
|
|
|
|
|
2023-09-12 15:40:21 +00:00
|
|
|
cc_binary(
|
2024-03-04 12:11:13 +00:00
|
|
|
name = "Llama-3.1-70B-Instruct",
|
2023-01-03 10:21:07 +00:00
|
|
|
args = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"--config=$(location @Meta-Llama-3.1-70B-Instruct//:config.json)",
|
|
|
|
|
"--weights=$(location @Meta-Llama-3.1-70B-Instruct//:model.safetensors.index.json)",
|
|
|
|
|
"--tokenizer=$(location @Meta-Llama-3.1-70B-Instruct//:tokenizer.json)",
|
2023-01-03 10:21:07 +00:00
|
|
|
],
|
|
|
|
|
data = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"@Meta-Llama-3.1-70B-Instruct",
|
|
|
|
|
"@Meta-Llama-3.1-70B-Instruct//:config.json",
|
|
|
|
|
"@Meta-Llama-3.1-70B-Instruct//:model.safetensors.index.json",
|
|
|
|
|
"@Meta-Llama-3.1-70B-Instruct//:tokenizer.json",
|
2023-01-03 10:21:07 +00:00
|
|
|
],
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-09-12 15:40:21 +00:00
|
|
|
deps = [":llama_lib"],
|
2023-01-03 10:21:07 +00:00
|
|
|
)
|
|
|
|
|
|
2023-09-12 15:40:21 +00:00
|
|
|
cc_binary(
|
2024-03-04 12:11:13 +00:00
|
|
|
name = "Llama-3.2-1B-Instruct",
|
2023-01-03 10:21:07 +00:00
|
|
|
args = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"--config=$(location @Meta-Llama-3.2-1B-Instruct//:config.json)",
|
|
|
|
|
"--weights=$(location @Meta-Llama-3.2-1B-Instruct//:model.safetensors)",
|
|
|
|
|
"--tokenizer=$(location @Meta-Llama-3.2-1B-Instruct//:tokenizer.json)",
|
2023-01-03 10:21:07 +00:00
|
|
|
],
|
|
|
|
|
data = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"@Meta-Llama-3.2-1B-Instruct",
|
|
|
|
|
"@Meta-Llama-3.2-1B-Instruct//:config.json",
|
|
|
|
|
"@Meta-Llama-3.2-1B-Instruct//:model.safetensors",
|
|
|
|
|
"@Meta-Llama-3.2-1B-Instruct//:tokenizer.json",
|
2023-01-03 10:21:07 +00:00
|
|
|
],
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-09-12 15:40:21 +00:00
|
|
|
deps = [":llama_lib"],
|
2023-01-03 10:21:07 +00:00
|
|
|
)
|
|
|
|
|
|
2023-09-12 15:40:21 +00:00
|
|
|
cc_binary(
|
2024-03-04 12:11:13 +00:00
|
|
|
name = "Llama-3.2-3B-Instruct",
|
2023-01-03 10:21:07 +00:00
|
|
|
args = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"--config=$(location @Meta-Llama-3.2-3B-Instruct//:config.json)",
|
|
|
|
|
"--weights=$(location @Meta-Llama-3.2-3B-Instruct//:model.safetensors.index.json)",
|
|
|
|
|
"--tokenizer=$(location @Meta-Llama-3.2-3B-Instruct//:tokenizer.json)",
|
2023-01-03 10:21:07 +00:00
|
|
|
],
|
|
|
|
|
data = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"@Meta-Llama-3.2-3B-Instruct",
|
|
|
|
|
"@Meta-Llama-3.2-3B-Instruct//:config.json",
|
|
|
|
|
"@Meta-Llama-3.2-3B-Instruct//:model.safetensors.index.json",
|
|
|
|
|
"@Meta-Llama-3.2-3B-Instruct//:tokenizer.json",
|
2023-01-03 10:21:07 +00:00
|
|
|
],
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-09-12 15:40:21 +00:00
|
|
|
deps = [":llama_lib"],
|
2023-01-03 10:21:07 +00:00
|
|
|
)
|
2024-04-01 17:40:18 +00:00
|
|
|
|
|
|
|
|
cc_binary(
|
|
|
|
|
name = "TinyLlama-Stories-15M",
|
|
|
|
|
args = [
|
|
|
|
|
"--config=$(location :tinyllama_stories15M_json)",
|
|
|
|
|
"--weights=$(location @Karpathy-TinyLlama-Stories15M//file)",
|
|
|
|
|
"--tokenizer=$(location @Karpathy-TinyLlama-Tokenizer//file)",
|
|
|
|
|
"--prompt='Once upon a time, there was a little girl named Lily.'",
|
2024-05-23 15:52:34 +00:00
|
|
|
"--no-llama3=1", # don't do template prompt encoding, I'm a simple model
|
|
|
|
|
"--sharding=false", # don't shard me, I'm so small
|
2024-04-01 17:40:18 +00:00
|
|
|
],
|
|
|
|
|
data = [
|
|
|
|
|
":tinyllama_stories15M_json",
|
|
|
|
|
"@Karpathy-TinyLlama-Stories15M//file",
|
|
|
|
|
"@Karpathy-TinyLlama-Tokenizer//file",
|
|
|
|
|
],
|
|
|
|
|
deps = [":llama_lib"],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
write_file(
|
|
|
|
|
name = "tinyllama_stories15M_json",
|
|
|
|
|
out = "config.json",
|
|
|
|
|
content = ['{"bos_token_id":1,"eos_token_id":2,"hidden_act":"silu","hidden_size":288,"intermediate_size":768,"max_position_embeddings":256,"model_type":"llama","num_attention_heads":6,"num_hidden_layers":6,"num_key_value_heads":6,"rms_norm_eps":1e-05,"hf_rope_impl":false,"rope_scaling":null,"rope_theta":10000.0}'],
|
|
|
|
|
)
|
|
|
|
|
|
2023-01-03 10:21:07 +00:00
|
|
|
zig_cc_binary(
|
|
|
|
|
name = "test-implementation",
|
|
|
|
|
srcs = ["llama.zig"],
|
|
|
|
|
args = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"--weights=$(location @Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json)",
|
|
|
|
|
"--config=$(location @Meta-Llama-3.1-8B-Instruct//:config.json)",
|
2023-01-03 10:21:07 +00:00
|
|
|
],
|
|
|
|
|
data = [
|
2024-05-23 15:52:34 +00:00
|
|
|
"@Meta-Llama-3.1-8B-Instruct//:config.json",
|
2023-01-03 10:21:07 +00:00
|
|
|
"@Meta-Llama-3.1-8B-Instruct//:model.safetensors.index.json",
|
|
|
|
|
],
|
|
|
|
|
main = "test.zig",
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-01-03 10:21:07 +00:00
|
|
|
deps = [
|
|
|
|
|
"@zml//async",
|
2024-04-01 17:40:18 +00:00
|
|
|
"@zml//stdx",
|
2023-01-03 10:21:07 +00:00
|
|
|
"@zml//zml",
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
|
2024-04-01 17:40:18 +00:00
|
|
|
native_test(
|
2024-02-02 10:25:48 +00:00
|
|
|
name = "test_tokenizer",
|
2024-04-01 17:40:18 +00:00
|
|
|
src = "@zml//zml/tokenizer:main",
|
2024-02-02 10:25:48 +00:00
|
|
|
# Note: all Llama-3.x tokenizers are the same,
|
|
|
|
|
# but using the 3.2-1B version because downloading the tokenizer triggers downloading the model.
|
|
|
|
|
args = [
|
2024-05-23 15:52:34 +00:00
|
|
|
"--tokenizer=$(location @Meta-Llama-3.2-1B-Instruct//:tokenizer.json)",
|
|
|
|
|
"""--prompt='Examples of titles:
|
2024-04-01 17:40:18 +00:00
|
|
|
📉 Stock Market Trends
|
|
|
|
|
🍪 Perfect Chocolate Chip Recipe
|
|
|
|
|
Evolution of Music Streaming
|
|
|
|
|
Remote Work Productivity Tips
|
|
|
|
|
Artificial Intelligence in Healthcare
|
|
|
|
|
🎮 Video Game Development Insights
|
|
|
|
|
'""",
|
2024-05-23 15:52:34 +00:00
|
|
|
# this correspond to encoding with HF tokenizers, with bos=False
|
|
|
|
|
"--expected=41481,315,15671,512,9468,241,231,12937,8152,50730,198,9468,235,103,24118,39520,32013,26371,198,35212,3294,315,10948,45910,198,25732,5664,5761,1968,26788,198,9470,16895,22107,304,39435,198,9468,236,106,8519,4140,11050,73137,198",
|
2024-04-01 17:40:18 +00:00
|
|
|
],
|
|
|
|
|
data = ["@Meta-Llama-3.2-1B-Instruct//:tokenizer.json"],
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2024-02-02 10:25:48 +00:00
|
|
|
)
|
|
|
|
|
|
2023-01-03 10:21:07 +00:00
|
|
|
mtree_spec(
|
|
|
|
|
name = "mtree",
|
2024-03-04 12:11:13 +00:00
|
|
|
srcs = [":Llama-3.2-1B-Instruct"],
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-01-03 10:21:07 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
tar(
|
|
|
|
|
name = "archive",
|
2024-03-04 12:11:13 +00:00
|
|
|
srcs = [":Llama-3.2-1B-Instruct"],
|
2023-01-03 10:21:07 +00:00
|
|
|
args = [
|
|
|
|
|
"--options",
|
|
|
|
|
"zstd:compression-level=9",
|
|
|
|
|
],
|
|
|
|
|
compress = "zstd",
|
|
|
|
|
mtree = ":mtree",
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-01-03 10:21:07 +00:00
|
|
|
)
|
|
|
|
|
|
2024-03-04 12:11:13 +00:00
|
|
|
expand_template(
|
|
|
|
|
name = "entrypoint",
|
|
|
|
|
data = [
|
|
|
|
|
":Llama-3.2-1B-Instruct",
|
|
|
|
|
"@Meta-Llama-3.2-1B-Instruct",
|
|
|
|
|
"@Meta-Llama-3.2-1B-Instruct//:config.json",
|
|
|
|
|
"@Meta-Llama-3.2-1B-Instruct//:model.safetensors",
|
|
|
|
|
"@Meta-Llama-3.2-1B-Instruct//:tokenizer.json",
|
|
|
|
|
],
|
|
|
|
|
substitutions = {
|
|
|
|
|
":config": "$(rlocationpath @Meta-Llama-3.2-1B-Instruct//:config.json)",
|
|
|
|
|
":weights": "$(rlocationpath @Meta-Llama-3.2-1B-Instruct//:model.safetensors)",
|
|
|
|
|
":tokenizer": "$(rlocationpath @Meta-Llama-3.2-1B-Instruct//:tokenizer.json)",
|
|
|
|
|
},
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2024-03-04 12:11:13 +00:00
|
|
|
template = [
|
|
|
|
|
"./{}/Llama-3.2-1B-Instruct".format(package_name()),
|
|
|
|
|
"--config=./{}/Llama-3.2-1B-Instruct.runfiles/:config".format(package_name()),
|
|
|
|
|
"--weights=./{}/Llama-3.2-1B-Instruct.runfiles/:weights".format(package_name()),
|
|
|
|
|
"--tokenizer=./{}/Llama-3.2-1B-Instruct.runfiles/:tokenizer".format(package_name()),
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
|
2023-01-03 10:21:07 +00:00
|
|
|
oci_image(
|
|
|
|
|
name = "image_",
|
2023-08-21 09:15:48 +00:00
|
|
|
base = "@distroless_cc_debian12_debug",
|
2024-03-04 12:11:13 +00:00
|
|
|
# entrypoint = ["./{}/Llama-3.2-1B-Instruct".format(package_name())],
|
|
|
|
|
entrypoint = ":entrypoint",
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-08-21 09:15:48 +00:00
|
|
|
tars = [
|
|
|
|
|
"@zml//runtimes:layers",
|
|
|
|
|
":archive",
|
|
|
|
|
],
|
2023-01-03 10:21:07 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
platform_transition_filegroup(
|
|
|
|
|
name = "image",
|
|
|
|
|
srcs = [":image_"],
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-01-03 10:21:07 +00:00
|
|
|
target_platform = "@zml//platforms:linux_amd64",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
oci_load(
|
|
|
|
|
name = "load",
|
|
|
|
|
image = ":image",
|
|
|
|
|
repo_tags = [
|
2024-03-04 12:11:13 +00:00
|
|
|
"distroless/llama-3.2-1b-instruct:latest",
|
2023-01-03 10:21:07 +00:00
|
|
|
],
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-01-03 10:21:07 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
oci_push(
|
|
|
|
|
name = "push",
|
|
|
|
|
image = ":image",
|
|
|
|
|
remote_tags = ["latest"],
|
2024-03-04 12:11:13 +00:00
|
|
|
repository = "index.docker.io/steeve/llama-3.2-1b-instruct",
|
2024-05-23 15:52:34 +00:00
|
|
|
tags = [
|
|
|
|
|
"no_ci",
|
|
|
|
|
],
|
2023-01-03 10:21:07 +00:00
|
|
|
)
|