diff --git a/docs/howtos/howto_torch2zml.md b/docs/howtos/howto_torch2zml.md index 026e201..5c751c5 100644 --- a/docs/howtos/howto_torch2zml.md +++ b/docs/howtos/howto_torch2zml.md @@ -118,13 +118,13 @@ model. Put the following in `my_project/torch2zml.zig`. const std = @import("std"); const log = std.log; -const asynk = @import("async"); +const async = @import("async"); const zml = @import("zml"); pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); - try asynk.AsyncThread.main(gpa.allocator(), asyncMain, .{}); + try async.AsyncThread.main(gpa.allocator(), asyncMain, .{}); } pub fn asyncMain() !void { diff --git a/docs/tutorials/write_first_model.md b/docs/tutorials/write_first_model.md index 7cea735..ebaf6c2 100644 --- a/docs/tutorials/write_first_model.md +++ b/docs/tutorials/write_first_model.md @@ -99,10 +99,7 @@ Let's start by writing some Zig code, importing ZML and often-used modules: ```zig const std = @import("std"); const zml = @import("zml"); -const asynk = @import("async"); - -// shortcut to the asyncc function in the asynk module -const asyncc = asynk.asyncc; +const async = @import("async"); ``` You will use above lines probably in all ZML projects. Also, note that **ZML is @@ -154,7 +151,7 @@ like this: pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); - try asynk.AsyncThread.main(gpa.allocator(), asyncMain); + try async.AsyncThread.main(gpa.allocator(), asyncMain); } @@ -251,7 +248,7 @@ const model_shapes = try zml.aio.populateModel(Layer, allocator, bs); // Start compiling. This uses the inferred shapes from the BufferStore. // The shape of the input tensor, we have to pass in manually. -var compilation = try asyncc( +var compilation = try async.async( zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform }, ); @@ -263,11 +260,11 @@ var model_weights = try zml.aio.loadBuffers(Layer, .{}, bs, arena, platform); defer zml.aio.unloadBuffers(&model_weights); // for good practice // Wait for compilation to finish -const compiled = try compilation.awaitt(); +const compiled = try compilation.await(); ``` -Compiling is happening in the background via the `asyncc` function. We call -`asyncc` with the `zml.compileModel` function and its arguments +Compiling is happening in the background via the `async` function. We call +`async` with the `zml.compileModel` function and its arguments separately. The arguments themselves are basically the shapes of the weights in the BufferStore, the `.forward` function name in order to compile `Layer.forward`, the shape of the input tensor(s), and the platform for which to @@ -371,7 +368,7 @@ top of the Zig file: ```zig const zml = @import("zml"); -const asynk = @import("async"); +const async = @import("async"); ``` @@ -418,9 +415,7 @@ You can access the complete source code of this walkthrough here: ```zig const std = @import("std"); const zml = @import("zml"); -const asynk = @import("async"); - -const asyncc = asynk.asyncc; +const async = @import("async"); /// Model definition const Layer = struct { @@ -439,7 +434,7 @@ const Layer = struct { pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); - try asynk.AsyncThread.main(gpa.allocator(), asyncMain); + try async.AsyncThread.main(gpa.allocator(), asyncMain); } pub fn asyncMain() !void { @@ -482,7 +477,7 @@ pub fn asyncMain() !void { // Start compiling. This uses the inferred shapes from the BufferStore. // The shape of the input tensor, we have to pass in manually. - var compilation = try asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform }); + var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform }); // Produce a bufferized weights struct from the fake BufferStore. // This is like the inferred shapes, but with actual values. @@ -491,7 +486,7 @@ pub fn asyncMain() !void { defer zml.aio.unloadBuffers(&model_weights); // for good practice // Wait for compilation to finish - const compiled = try compilation.awaitt(); + const compiled = try compilation.await(); // pass the model weights to the compiled module to create an executable // module diff --git a/examples/benchmark/main.zig b/examples/benchmark/main.zig index 6428eda..fb6bccb 100644 --- a/examples/benchmark/main.zig +++ b/examples/benchmark/main.zig @@ -1,13 +1,13 @@ const std = @import("std"); const zml = @import("zml"); const stdx = @import("stdx"); -const asynk = @import("async"); +const async = @import("async"); const flags = stdx.flags; // set log level to debug to print the generated IR pub const std_options: std.Options = .{ .log_level = .warn, - .logFn = asynk.logFn(std.log.defaultLog), + .logFn = async.logFn(std.log.defaultLog), }; pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor { @@ -15,7 +15,7 @@ pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor { } pub fn main() !void { - try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); + try async.AsyncThread.main(std.heap.c_allocator, asyncMain); } pub fn asyncMain() !void { @@ -53,10 +53,10 @@ pub fn asyncMain() !void { // Start compiling. // The shape of the input tensor, we have to pass in manually. timer.reset(); - var compilation = try asynk.asyncc(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform }); + var compilation = try async.async(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform }); // Wait for compilation to finish - const executable = try compilation.awaitt(); + const executable = try compilation.await(); defer executable.deinit(); const compilation_elapsed = timer.lap() / std.time.ns_per_ms; std.debug.print("-" ** 160 ++ "\n\n", .{}); diff --git a/examples/callback/main.zig b/examples/callback/main.zig index be3b309..107c5f9 100644 --- a/examples/callback/main.zig +++ b/examples/callback/main.zig @@ -1,13 +1,13 @@ const std = @import("std"); -const asynk = @import("async"); +const async = @import("async"); const runtimes = @import("runtimes"); const zml = @import("zml"); const cu = zml.platform_specific; pub const std_options: std.Options = .{ .log_level = .info, - .logFn = asynk.logFn(std.log.defaultLog), + .logFn = async.logFn(std.log.defaultLog), }; const log = std.log.scoped(.@"examples/custom_call"); @@ -126,7 +126,7 @@ pub fn grayscale(rgb: zml.Tensor) zml.Tensor { } pub fn main() !void { - try asynk.AsyncThread.main(std.heap.smp_allocator, asyncMain); + try async.AsyncThread.main(std.heap.smp_allocator, asyncMain); } pub fn asyncMain() !void { diff --git a/examples/llama/main.zig b/examples/llama/main.zig index c1c8d80..dc31af5 100644 --- a/examples/llama/main.zig +++ b/examples/llama/main.zig @@ -1,6 +1,6 @@ const std = @import("std"); -const asynk = @import("async"); +const async = @import("async"); const clap = @import("clap"); const stdx = @import("stdx"); const zml = @import("zml"); @@ -19,7 +19,7 @@ const log = std.log.scoped(.llama); pub const std_options: std.Options = .{ .log_level = .info, - .logFn = asynk.logFn(std.log.defaultLog), + .logFn = async.logFn(std.log.defaultLog), }; const params = clap.parseParamsComptime( @@ -152,7 +152,7 @@ pub fn generateText( } pub fn main() !void { - try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); + try async.AsyncThread.main(std.heap.c_allocator, asyncMain); } pub fn asyncMain() !void { @@ -198,7 +198,7 @@ pub fn asyncMain() !void { const model_weights_path = b: { const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" }); - if (asynk.File.access(simple_path, .{})) { + if (async.File.access(simple_path, .{})) { break :b simple_path; } else |_| { allocator.free(simple_path); @@ -213,7 +213,7 @@ pub fn asyncMain() !void { defer allocator.free(model_tokenizer_path); const config = blk: { - var config_json_file = try asynk.File.open(model_config_path, .{ .mode = .read_only }); + var config_json_file = try async.File.open(model_config_path, .{ .mode = .read_only }); defer config_json_file.close() catch unreachable; var config_json_buffer: [256]u8 = undefined; var config_reader = config_json_file.reader(&config_json_buffer); @@ -276,7 +276,7 @@ pub fn asyncMain() !void { // Compile the model twice, one for prefill, one for generation. var start = try std.time.Timer.start(); - var fut_mod_prefill = try asynk.asyncc(zml.compileModel, .{ + var fut_mod_prefill = try async.async(zml.compileModel, .{ allocator, llama.LlamaLM.forward, llama_tensors, .{ prefill_tokens_shape, @@ -287,7 +287,7 @@ pub fn asyncMain() !void { platform, }); - var fut_mod = try asynk.asyncc(zml.compileModel, .{ + var fut_mod = try async.async(zml.compileModel, .{ allocator, llama.LlamaLM.forward, llama_tensors, .{ gen_tokens_shape, @@ -304,9 +304,9 @@ pub fn asyncMain() !void { defer zml.aio.unloadBuffers(&llama_buffers); log.info("✅\tLoaded weights in {D}", .{start.read()}); - var llama_module_prefill = (try fut_mod_prefill.awaitt()).prepare(llama_buffers); + var llama_module_prefill = (try fut_mod_prefill.await()).prepare(llama_buffers); defer llama_module_prefill.deinit(); - var llama_module = (try fut_mod.awaitt()).prepare(llama_buffers); + var llama_module = (try fut_mod.await()).prepare(llama_buffers); defer llama_module.deinit(); log.info("✅\tCompiled model in {D}", .{start.read()}); log.info("Creating KvCache", .{}); diff --git a/examples/llama/test.zig b/examples/llama/test.zig index 42c0b49..0e1edba 100644 --- a/examples/llama/test.zig +++ b/examples/llama/test.zig @@ -1,4 +1,4 @@ -const asynk = @import("async"); +const async = @import("async"); const std = @import("std"); const stdx = @import("stdx"); const zml = @import("zml"); @@ -10,7 +10,7 @@ const LlamaLM = llama_mod.LlamaLM; const Tensor = zml.Tensor; pub fn main() !void { - try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); + try async.AsyncThread.main(std.heap.c_allocator, asyncMain); } pub fn asyncMain() !void { @@ -58,7 +58,7 @@ pub fn asyncMain() !void { const num_kv_heads: i64 = cli_args.num_kv_heads orelse buffer_store.metadata("num_kv_heads", .int) orelse num_heads; const config = blk: { - var config_json_file = try asynk.File.open(cli_args.config, .{ .mode = .read_only }); + var config_json_file = try async.File.open(cli_args.config, .{ .mode = .read_only }); defer config_json_file.close() catch unreachable; var reader = std.json.reader(allocator, config_json_file.reader()); defer reader.deinit(); diff --git a/examples/mnist/mnist.zig b/examples/mnist/mnist.zig index 5c85f5b..632cafd 100644 --- a/examples/mnist/mnist.zig +++ b/examples/mnist/mnist.zig @@ -1,13 +1,13 @@ const std = @import("std"); -const asynk = @import("async"); +const async = @import("async"); const zml = @import("zml"); const log = std.log.scoped(.mnist); pub const std_options: std.Options = .{ .log_level = .info, - .logFn = asynk.logFn(std.log.defaultLog), + .logFn = async.logFn(std.log.defaultLog), }; /// Model definition @@ -37,7 +37,7 @@ const Mnist = struct { }; pub fn main() !void { - try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); + try async.AsyncThread.main(std.heap.c_allocator, asyncMain); } pub fn asyncMain() !void { @@ -75,14 +75,14 @@ pub fn asyncMain() !void { // Start compiling log.info("Compiling model to MLIR....", .{}); var start_time = try std.time.Timer.start(); - var compilation = try asynk.asyncc(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform }); + var compilation = try async.async(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform }); // While compiling, start loading weights on the platform var model_weights = try zml.aio.loadModelBuffers(Mnist, mnist_model, buffer_store, arena, platform); defer zml.aio.unloadBuffers(&model_weights); // Wait for end of compilation and end of weights loading. - const compiled_mnist = try compilation.awaitt(); + const compiled_mnist = try compilation.await(); log.info("✅ Compiled model in {d}ms", .{start_time.read() / std.time.ns_per_ms}); const mnist = compiled_mnist.prepare(model_weights); @@ -92,7 +92,7 @@ pub fn asyncMain() !void { log.info("Starting inference...", .{}); // Load a random digit image from the dataset. - const dataset = try asynk.File.open(t10kfilename, .{ .mode = .read_only }); + const dataset = try async.File.open(t10kfilename, .{ .mode = .read_only }); defer dataset.close() catch unreachable; var rng = std.Random.Xoshiro256.init(@intCast(std.time.timestamp())); diff --git a/examples/modernbert/main.zig b/examples/modernbert/main.zig index 95d5aff..1206eb9 100644 --- a/examples/modernbert/main.zig +++ b/examples/modernbert/main.zig @@ -1,6 +1,6 @@ const std = @import("std"); -const asynk = @import("async"); +const async = @import("async"); const clap = @import("clap"); const stdx = @import("stdx"); const zml = @import("zml"); @@ -15,7 +15,7 @@ pub const std_options: std.Options = .{ .log_scope_levels = &[_]std.log.ScopeLevel{ .{ .scope = .modernbert, .level = .info }, }, - .logFn = asynk.logFn(std.log.defaultLog), + .logFn = async.logFn(std.log.defaultLog), }; const params = clap.parseParamsComptime( @@ -38,7 +38,7 @@ const clap_parsers = .{ }; pub fn main() !void { - try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); + try async.AsyncThread.main(std.heap.c_allocator, asyncMain); } pub fn asyncMain() !void { @@ -136,7 +136,7 @@ pub fn asyncMain() !void { // Compile the model log.info("\tCompiling ModernBERT model...", .{}); - var fut_mod = try asynk.asyncc(zml.compile, .{ + var fut_mod = try async.async(zml.compile, .{ allocator, modernbert.ModernBertForMaskedLM.forward, .{modernbert_options}, @@ -144,7 +144,7 @@ pub fn asyncMain() !void { tensor_store, platform, }); - var bert_module = (try fut_mod.awaitt()).prepare(bert_weights); + var bert_module = (try fut_mod.await()).prepare(bert_weights); defer bert_module.deinit(); log.info("✅\tLoaded weights and compiled model in {D}", .{start.read()}); diff --git a/examples/modernbert/modernbert.zig b/examples/modernbert/modernbert.zig index 79585f1..2e6eb4a 100644 --- a/examples/modernbert/modernbert.zig +++ b/examples/modernbert/modernbert.zig @@ -1,6 +1,6 @@ const std = @import("std"); -const asynk = @import("async"); +const async = @import("async"); const stdx = @import("stdx"); const zml = @import("zml"); const Tensor = zml.Tensor; diff --git a/examples/modernbert/test.zig b/examples/modernbert/test.zig index 8dd8bbb..5a6d449 100644 --- a/examples/modernbert/test.zig +++ b/examples/modernbert/test.zig @@ -1,7 +1,7 @@ const clap = @import("clap"); const std = @import("std"); const zml = @import("zml"); -const asynk = @import("async"); +const async = @import("async"); const log = std.log; const Tensor = zml.Tensor; const modernbert_module = @import("modernbert.zig"); @@ -20,7 +20,7 @@ fn printUsageAndExit(stderr: anytype) noreturn { std.process.exit(0); } pub fn main() !void { - try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); + try async.AsyncThread.main(std.heap.c_allocator, asyncMain); } pub fn asyncMain() !void { diff --git a/examples/simple_layer/main.zig b/examples/simple_layer/main.zig index e0b89a9..57d5d16 100644 --- a/examples/simple_layer/main.zig +++ b/examples/simple_layer/main.zig @@ -1,6 +1,6 @@ const std = @import("std"); -const asynk = @import("async"); +const async = @import("async"); const zml = @import("zml"); /// Model definition @@ -18,7 +18,7 @@ const Layer = struct { }; pub fn main() !void { - try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); + try async.AsyncThread.main(std.heap.c_allocator, asyncMain); } pub fn asyncMain() !void { @@ -58,7 +58,7 @@ pub fn asyncMain() !void { // Start compiling. This uses the inferred shapes from the BufferStore. // The shape of the input tensor, we have to pass in manually. - var compilation = try asynk.asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform }); + var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform }); // Produce a bufferized weights struct from the fake BufferStore. // This is like the inferred shapes, but with actual values. @@ -67,7 +67,7 @@ pub fn asyncMain() !void { defer zml.aio.unloadBuffers(&model_weights); // for good practice // Wait for compilation to finish - const compiled = try compilation.awaitt(); + const compiled = try compilation.await(); // pass the model weights to the compiled module to create an executable module var executable = compiled.prepare(model_weights);