Add example Bazel build files and tokenizer test for tinyllama, including tigerbeetle integration and flags.
This commit is contained in:
parent
567210d1d7
commit
b67685b941
@ -126,25 +126,19 @@ filegroup(
|
|||||||
)
|
)
|
||||||
use_repo(huggingface, "Meta-Llama-3.1-70B-Instruct")
|
use_repo(huggingface, "Meta-Llama-3.1-70B-Instruct")
|
||||||
|
|
||||||
|
http_file(
|
||||||
huggingface.model(
|
name = "Karpathy-TinyLlama-Stories15M",
|
||||||
name = "TinyLlama-120M-scratch",
|
downloaded_file_path = "stories15M.tinyllama",
|
||||||
build_file_content = """\
|
sha256 = "cd590644d963867a2b6e5a1107f51fad663c41d79c149fbecbbb1f95fa81f49a",
|
||||||
package(default_visibility = ["//visibility:public"])
|
url = "https://huggingface.co/karpathy/tinyllamas/resolve/0bd21da7698eaf29a0d7de3992de8a46ef624add/stories15M.bin?download=true",
|
||||||
filegroup(
|
|
||||||
name = "TinyLlama-120M-scratch",
|
|
||||||
srcs = glob(["*.json", "*.safetensors"]),
|
|
||||||
)
|
)
|
||||||
""",
|
|
||||||
commit = "89c1bb4ea00861ddaa26c55f102ccb25e161feee",
|
|
||||||
includes = [
|
|
||||||
"*.safetensors",
|
|
||||||
"*.json",
|
|
||||||
],
|
|
||||||
model = "Hoyeon/TinyLlama-120M-scratch",
|
|
||||||
)
|
|
||||||
use_repo(huggingface, "TinyLlama-120M-scratch")
|
|
||||||
|
|
||||||
|
http_file(
|
||||||
|
name = "Karpathy-TinyLlama-Tokenizer",
|
||||||
|
downloaded_file_path = "stories260K.tinyllama",
|
||||||
|
sha256 = "50a52ef822ee9e83de5ce9d0be0a025a773d019437f58b5ff9dcafb063ece361",
|
||||||
|
url = "https://github.com/karpathy/llama2.c/raw/c02865df300f3bd9e567ce061000dc23bf785a17/tokenizer.bin",
|
||||||
|
)
|
||||||
|
|
||||||
bazel_dep(name = "rules_rust", version = "0.57.0")
|
bazel_dep(name = "rules_rust", version = "0.57.0")
|
||||||
rust = use_extension("@rules_rust//rust:extensions.bzl", "rust")
|
rust = use_extension("@rules_rust//rust:extensions.bzl", "rust")
|
||||||
|
|||||||
@ -1,7 +1,8 @@
|
|||||||
load("@aspect_bazel_lib//lib:expand_template.bzl", "expand_template")
|
load("@aspect_bazel_lib//lib:expand_template.bzl", "expand_template")
|
||||||
load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar")
|
load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar")
|
||||||
load("@aspect_bazel_lib//lib:transitions.bzl", "platform_transition_filegroup")
|
load("@aspect_bazel_lib//lib:transitions.bzl", "platform_transition_filegroup")
|
||||||
load("@bazel_skylib//rules:native_binary.bzl", "native_binary")
|
load("@bazel_skylib//rules:native_binary.bzl", "native_test")
|
||||||
|
load("@bazel_skylib//rules:write_file.bzl", "write_file")
|
||||||
load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load", "oci_push")
|
load("@rules_oci//oci:defs.bzl", "oci_image", "oci_load", "oci_push")
|
||||||
load("@zml//bazel:zig.bzl", "zig_cc_binary")
|
load("@zml//bazel:zig.bzl", "zig_cc_binary")
|
||||||
|
|
||||||
@ -20,24 +21,6 @@ zig_cc_binary(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_binary(
|
|
||||||
name = "TinyLlama-120M-scratch",
|
|
||||||
args = [
|
|
||||||
"--config=$(location @TinyLlama-120M-scratch//:config.json)",
|
|
||||||
"--weights=$(location @TinyLlama-120M-scratch//:model.safetensors)",
|
|
||||||
"--tokenizer=$(location @TinyLlama-120M-scratch//:tokenizer.json)",
|
|
||||||
"--no-llama3=true", # don't do llama3 template prompt encoding
|
|
||||||
"--sharding=false", # don't shard this
|
|
||||||
],
|
|
||||||
data = [
|
|
||||||
"@TinyLlama-120M-scratch",
|
|
||||||
"@TinyLlama-120M-scratch//:config.json",
|
|
||||||
"@TinyLlama-120M-scratch//:model.safetensors",
|
|
||||||
"@TinyLlama-120M-scratch//:tokenizer.json",
|
|
||||||
],
|
|
||||||
deps = [":llama_lib"],
|
|
||||||
)
|
|
||||||
|
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "Llama-3.1-8B-Instruct",
|
name = "Llama-3.1-8B-Instruct",
|
||||||
args = [
|
args = [
|
||||||
@ -70,7 +53,6 @@ cc_binary(
|
|||||||
deps = [":llama_lib"],
|
deps = [":llama_lib"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
cc_binary(
|
cc_binary(
|
||||||
name = "Llama-3.2-1B-Instruct",
|
name = "Llama-3.2-1B-Instruct",
|
||||||
args = [
|
args = [
|
||||||
@ -102,7 +84,31 @@ cc_binary(
|
|||||||
],
|
],
|
||||||
deps = [":llama_lib"],
|
deps = [":llama_lib"],
|
||||||
)
|
)
|
||||||
#
|
|
||||||
|
cc_binary(
|
||||||
|
name = "TinyLlama-Stories-15M",
|
||||||
|
args = [
|
||||||
|
"--config=$(location :tinyllama_stories15M_json)",
|
||||||
|
"--weights=$(location @Karpathy-TinyLlama-Stories15M//file)",
|
||||||
|
"--tokenizer=$(location @Karpathy-TinyLlama-Tokenizer//file)",
|
||||||
|
"--prompt='Once upon a time, there was a little girl named Lily.'",
|
||||||
|
"--no-llama3=1", # don't do template prompt encoding, I'm a simple model
|
||||||
|
"--sharding=false", # don't shard me, I'm so small
|
||||||
|
],
|
||||||
|
data = [
|
||||||
|
":tinyllama_stories15M_json",
|
||||||
|
"@Karpathy-TinyLlama-Stories15M//file",
|
||||||
|
"@Karpathy-TinyLlama-Tokenizer//file",
|
||||||
|
],
|
||||||
|
deps = [":llama_lib"],
|
||||||
|
)
|
||||||
|
|
||||||
|
write_file(
|
||||||
|
name = "tinyllama_stories15M_json",
|
||||||
|
out = "config.json",
|
||||||
|
content = ['{"bos_token_id":1,"eos_token_id":2,"hidden_act":"silu","hidden_size":288,"intermediate_size":768,"max_position_embeddings":256,"model_type":"llama","num_attention_heads":6,"num_hidden_layers":6,"num_key_value_heads":6,"rms_norm_eps":1e-05,"hf_rope_impl":false,"rope_scaling":null,"rope_theta":10000.0}'],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
zig_cc_binary(
|
zig_cc_binary(
|
||||||
name = "test-implementation",
|
name = "test-implementation",
|
||||||
@ -117,27 +123,31 @@ zig_cc_binary(
|
|||||||
],
|
],
|
||||||
main = "test.zig",
|
main = "test.zig",
|
||||||
deps = [
|
deps = [
|
||||||
"//third_party/tigerbeetle:flags",
|
|
||||||
"@zml//async",
|
"@zml//async",
|
||||||
"@zml//metax",
|
"@zml//stdx",
|
||||||
"@zml//zml",
|
"@zml//zml",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
zig_cc_binary(
|
native_test(
|
||||||
name = "test_tokenizer",
|
name = "test_tokenizer",
|
||||||
main = "test_tokenizer.zig",
|
src = "@zml//zml/tokenizer:main",
|
||||||
deps = [
|
|
||||||
"//third_party/tigerbeetle:flags",
|
|
||||||
"@zml//stdx",
|
|
||||||
"@zml//zml",
|
|
||||||
],
|
|
||||||
# Note: all Llama-3.x tokenizers are the same,
|
# Note: all Llama-3.x tokenizers are the same,
|
||||||
# but using the 3.2-1B version because downloading the tokenizer triggers downloading the model.
|
# but using the 3.2-1B version because downloading the tokenizer triggers downloading the model.
|
||||||
args = [
|
args = [
|
||||||
"--tokenizer=$(location @Meta-Llama-3.2-1B-Instruct//:tokenizer)",
|
"--tokenizer=$(location @Meta-Llama-3.2-1B-Instruct//:tokenizer.json)",
|
||||||
|
"""--prompt='Examples of titles:
|
||||||
|
📉 Stock Market Trends
|
||||||
|
🍪 Perfect Chocolate Chip Recipe
|
||||||
|
Evolution of Music Streaming
|
||||||
|
Remote Work Productivity Tips
|
||||||
|
Artificial Intelligence in Healthcare
|
||||||
|
🎮 Video Game Development Insights
|
||||||
|
'""",
|
||||||
|
# this correspond to encoding with HF tokenizers, with bos=False
|
||||||
|
"--expected=41481,315,15671,512,9468,241,231,12937,8152,50730,198,9468,235,103,24118,39520,32013,26371,198,35212,3294,315,10948,45910,198,25732,5664,5761,1968,26788,198,9470,16895,22107,304,39435,198,9468,236,106,8519,4140,11050,73137,198",
|
||||||
],
|
],
|
||||||
data = ["@Meta-Llama-3.2-1B-Instruct//:tokenizer"],
|
data = ["@Meta-Llama-3.2-1B-Instruct//:tokenizer.json"],
|
||||||
)
|
)
|
||||||
|
|
||||||
mtree_spec(
|
mtree_spec(
|
||||||
|
|||||||
@ -26,6 +26,7 @@ pub const LlamaLM = struct {
|
|||||||
rope_theta: f32,
|
rope_theta: f32,
|
||||||
max_position_embeddings: usize,
|
max_position_embeddings: usize,
|
||||||
rms_norm_eps: f32,
|
rms_norm_eps: f32,
|
||||||
|
hf_rope_impl: bool = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const Options = struct {
|
pub const Options = struct {
|
||||||
@ -47,7 +48,7 @@ pub const LlamaLM = struct {
|
|||||||
self.model.num_heads = @intCast(config.num_attention_heads);
|
self.model.num_heads = @intCast(config.num_attention_heads);
|
||||||
self.model.num_kv_heads = @intCast(config.num_key_value_heads);
|
self.model.num_kv_heads = @intCast(config.num_key_value_heads);
|
||||||
self.model.rope_opts = .{
|
self.model.rope_opts = .{
|
||||||
.impl = .sequential,
|
.impl = if (config.hf_rope_impl) .sequential else .interleaved,
|
||||||
.freq_base = config.rope_theta,
|
.freq_base = config.rope_theta,
|
||||||
};
|
};
|
||||||
for (self.model.layers) |*layer| {
|
for (self.model.layers) |*layer| {
|
||||||
|
|||||||
@ -27,9 +27,9 @@ pub fn tokenizePromptLlama3(allocator: std.mem.Allocator, tokenizer: zml.tokeniz
|
|||||||
var encoder = try tokenizer.encoder();
|
var encoder = try tokenizer.encoder();
|
||||||
defer encoder.deinit();
|
defer encoder.deinit();
|
||||||
|
|
||||||
const start_header_id = tokenizer.token_to_id("<|start_header_id|>") orelse return error.NoSuchToken;
|
const start_header_id = tokenizer.tokenToId("<|start_header_id|>") orelse return error.NoSuchToken;
|
||||||
const end_header_id = tokenizer.token_to_id("<|end_header_id|>") orelse return error.NoSuchToken;
|
const end_header_id = tokenizer.tokenToId("<|end_header_id|>") orelse return error.NoSuchToken;
|
||||||
const eot_id = tokenizer.token_to_id("<|eot_id|>") orelse return error.NoSuchToken;
|
const eot_id = tokenizer.tokenToId("<|eot_id|>") orelse return error.NoSuchToken;
|
||||||
const newline_id = (try encoder.encode("\n"))[0];
|
const newline_id = (try encoder.encode("\n"))[0];
|
||||||
|
|
||||||
try tokens.append(config.bos_token_id);
|
try tokens.append(config.bos_token_id);
|
||||||
@ -312,7 +312,7 @@ pub fn asyncMain() !void {
|
|||||||
var timer = try stdx.time.Timer.start();
|
var timer = try stdx.time.Timer.start();
|
||||||
defer log.info("Loaded tokenizer from {s} [{}]", .{ tok, timer.read() });
|
defer log.info("Loaded tokenizer from {s} [{}]", .{ tok, timer.read() });
|
||||||
|
|
||||||
break :blk try zml.tokenizer.Tokenizer.from_file(model_arena.allocator(), tok);
|
break :blk try zml.tokenizer.Tokenizer.fromFile(model_arena.allocator(), tok);
|
||||||
} else {
|
} else {
|
||||||
log.err("Missing --tokenizer", .{});
|
log.err("Missing --tokenizer", .{});
|
||||||
return;
|
return;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user