Update docs and example scripts (including test files) to remove async helpers asynk, asyncc, awaitt, and await_

This commit is contained in:
Foke Singh 2025-09-02 10:27:40 +00:00
parent 6e15123fb3
commit 090d7748d5
11 changed files with 51 additions and 56 deletions

View File

@ -118,13 +118,13 @@ model. Put the following in `my_project/torch2zml.zig`.
const std = @import("std"); const std = @import("std");
const log = std.log; const log = std.log;
const asynk = @import("async"); const async = @import("async");
const zml = @import("zml"); const zml = @import("zml");
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit(); defer _ = gpa.deinit();
try asynk.AsyncThread.main(gpa.allocator(), asyncMain, .{}); try async.AsyncThread.main(gpa.allocator(), asyncMain, .{});
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {

View File

@ -99,10 +99,7 @@ Let's start by writing some Zig code, importing ZML and often-used modules:
```zig ```zig
const std = @import("std"); const std = @import("std");
const zml = @import("zml"); const zml = @import("zml");
const asynk = @import("async"); const async = @import("async");
// shortcut to the asyncc function in the asynk module
const asyncc = asynk.asyncc;
``` ```
You will use above lines probably in all ZML projects. Also, note that **ZML is You will use above lines probably in all ZML projects. Also, note that **ZML is
@ -154,7 +151,7 @@ like this:
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit(); defer _ = gpa.deinit();
try asynk.AsyncThread.main(gpa.allocator(), asyncMain); try async.AsyncThread.main(gpa.allocator(), asyncMain);
} }
@ -251,7 +248,7 @@ const model_shapes = try zml.aio.populateModel(Layer, allocator, bs);
// Start compiling. This uses the inferred shapes from the BufferStore. // Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually. // The shape of the input tensor, we have to pass in manually.
var compilation = try asyncc( var compilation = try async.async(
zml.compileModel, zml.compileModel,
.{ allocator, Layer.forward, model_shapes, .{input_shape}, platform }, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform },
); );
@ -263,11 +260,11 @@ var model_weights = try zml.aio.loadBuffers(Layer, .{}, bs, arena, platform);
defer zml.aio.unloadBuffers(&model_weights); // for good practice defer zml.aio.unloadBuffers(&model_weights); // for good practice
// Wait for compilation to finish // Wait for compilation to finish
const compiled = try compilation.awaitt(); const compiled = try compilation.await();
``` ```
Compiling is happening in the background via the `asyncc` function. We call Compiling is happening in the background via the `async` function. We call
`asyncc` with the `zml.compileModel` function and its arguments `async` with the `zml.compileModel` function and its arguments
separately. The arguments themselves are basically the shapes of the weights in separately. The arguments themselves are basically the shapes of the weights in
the BufferStore, the `.forward` function name in order to compile the BufferStore, the `.forward` function name in order to compile
`Layer.forward`, the shape of the input tensor(s), and the platform for which to `Layer.forward`, the shape of the input tensor(s), and the platform for which to
@ -371,7 +368,7 @@ top of the Zig file:
```zig ```zig
const zml = @import("zml"); const zml = @import("zml");
const asynk = @import("async"); const async = @import("async");
``` ```
@ -418,9 +415,7 @@ You can access the complete source code of this walkthrough here:
```zig ```zig
const std = @import("std"); const std = @import("std");
const zml = @import("zml"); const zml = @import("zml");
const asynk = @import("async"); const async = @import("async");
const asyncc = asynk.asyncc;
/// Model definition /// Model definition
const Layer = struct { const Layer = struct {
@ -439,7 +434,7 @@ const Layer = struct {
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit(); defer _ = gpa.deinit();
try asynk.AsyncThread.main(gpa.allocator(), asyncMain); try async.AsyncThread.main(gpa.allocator(), asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {
@ -482,7 +477,7 @@ pub fn asyncMain() !void {
// Start compiling. This uses the inferred shapes from the BufferStore. // Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually. // The shape of the input tensor, we have to pass in manually.
var compilation = try asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform }); var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
// Produce a bufferized weights struct from the fake BufferStore. // Produce a bufferized weights struct from the fake BufferStore.
// This is like the inferred shapes, but with actual values. // This is like the inferred shapes, but with actual values.
@ -491,7 +486,7 @@ pub fn asyncMain() !void {
defer zml.aio.unloadBuffers(&model_weights); // for good practice defer zml.aio.unloadBuffers(&model_weights); // for good practice
// Wait for compilation to finish // Wait for compilation to finish
const compiled = try compilation.awaitt(); const compiled = try compilation.await();
// pass the model weights to the compiled module to create an executable // pass the model weights to the compiled module to create an executable
// module // module

View File

@ -1,13 +1,13 @@
const std = @import("std"); const std = @import("std");
const zml = @import("zml"); const zml = @import("zml");
const stdx = @import("stdx"); const stdx = @import("stdx");
const asynk = @import("async"); const async = @import("async");
const flags = stdx.flags; const flags = stdx.flags;
// set log level to debug to print the generated IR // set log level to debug to print the generated IR
pub const std_options: std.Options = .{ pub const std_options: std.Options = .{
.log_level = .warn, .log_level = .warn,
.logFn = asynk.logFn(std.log.defaultLog), .logFn = async.logFn(std.log.defaultLog),
}; };
pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor { pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
@ -15,7 +15,7 @@ pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
} }
pub fn main() !void { pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {
@ -53,10 +53,10 @@ pub fn asyncMain() !void {
// Start compiling. // Start compiling.
// The shape of the input tensor, we have to pass in manually. // The shape of the input tensor, we have to pass in manually.
timer.reset(); timer.reset();
var compilation = try asynk.asyncc(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform }); var compilation = try async.async(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
// Wait for compilation to finish // Wait for compilation to finish
const executable = try compilation.awaitt(); const executable = try compilation.await();
defer executable.deinit(); defer executable.deinit();
const compilation_elapsed = timer.lap() / std.time.ns_per_ms; const compilation_elapsed = timer.lap() / std.time.ns_per_ms;
std.debug.print("-" ** 160 ++ "\n\n", .{}); std.debug.print("-" ** 160 ++ "\n\n", .{});

View File

@ -1,13 +1,13 @@
const std = @import("std"); const std = @import("std");
const asynk = @import("async"); const async = @import("async");
const runtimes = @import("runtimes"); const runtimes = @import("runtimes");
const zml = @import("zml"); const zml = @import("zml");
const cu = zml.platform_specific; const cu = zml.platform_specific;
pub const std_options: std.Options = .{ pub const std_options: std.Options = .{
.log_level = .info, .log_level = .info,
.logFn = asynk.logFn(std.log.defaultLog), .logFn = async.logFn(std.log.defaultLog),
}; };
const log = std.log.scoped(.@"examples/custom_call"); const log = std.log.scoped(.@"examples/custom_call");
@ -126,7 +126,7 @@ pub fn grayscale(rgb: zml.Tensor) zml.Tensor {
} }
pub fn main() !void { pub fn main() !void {
try asynk.AsyncThread.main(std.heap.smp_allocator, asyncMain); try async.AsyncThread.main(std.heap.smp_allocator, asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {

View File

@ -1,6 +1,6 @@
const std = @import("std"); const std = @import("std");
const asynk = @import("async"); const async = @import("async");
const clap = @import("clap"); const clap = @import("clap");
const stdx = @import("stdx"); const stdx = @import("stdx");
const zml = @import("zml"); const zml = @import("zml");
@ -19,7 +19,7 @@ const log = std.log.scoped(.llama);
pub const std_options: std.Options = .{ pub const std_options: std.Options = .{
.log_level = .info, .log_level = .info,
.logFn = asynk.logFn(std.log.defaultLog), .logFn = async.logFn(std.log.defaultLog),
}; };
const params = clap.parseParamsComptime( const params = clap.parseParamsComptime(
@ -152,7 +152,7 @@ pub fn generateText(
} }
pub fn main() !void { pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {
@ -198,7 +198,7 @@ pub fn asyncMain() !void {
const model_weights_path = b: { const model_weights_path = b: {
const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" }); const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" });
if (asynk.File.access(simple_path, .{})) { if (async.File.access(simple_path, .{})) {
break :b simple_path; break :b simple_path;
} else |_| { } else |_| {
allocator.free(simple_path); allocator.free(simple_path);
@ -213,7 +213,7 @@ pub fn asyncMain() !void {
defer allocator.free(model_tokenizer_path); defer allocator.free(model_tokenizer_path);
const config = blk: { const config = blk: {
var config_json_file = try asynk.File.open(model_config_path, .{ .mode = .read_only }); var config_json_file = try async.File.open(model_config_path, .{ .mode = .read_only });
defer config_json_file.close() catch unreachable; defer config_json_file.close() catch unreachable;
var config_json_buffer: [256]u8 = undefined; var config_json_buffer: [256]u8 = undefined;
var config_reader = config_json_file.reader(&config_json_buffer); var config_reader = config_json_file.reader(&config_json_buffer);
@ -276,7 +276,7 @@ pub fn asyncMain() !void {
// Compile the model twice, one for prefill, one for generation. // Compile the model twice, one for prefill, one for generation.
var start = try std.time.Timer.start(); var start = try std.time.Timer.start();
var fut_mod_prefill = try asynk.asyncc(zml.compileModel, .{ var fut_mod_prefill = try async.async(zml.compileModel, .{
allocator, llama.LlamaLM.forward, llama_tensors, allocator, llama.LlamaLM.forward, llama_tensors,
.{ .{
prefill_tokens_shape, prefill_tokens_shape,
@ -287,7 +287,7 @@ pub fn asyncMain() !void {
platform, platform,
}); });
var fut_mod = try asynk.asyncc(zml.compileModel, .{ var fut_mod = try async.async(zml.compileModel, .{
allocator, llama.LlamaLM.forward, llama_tensors, allocator, llama.LlamaLM.forward, llama_tensors,
.{ .{
gen_tokens_shape, gen_tokens_shape,
@ -304,9 +304,9 @@ pub fn asyncMain() !void {
defer zml.aio.unloadBuffers(&llama_buffers); defer zml.aio.unloadBuffers(&llama_buffers);
log.info("\tLoaded weights in {D}", .{start.read()}); log.info("\tLoaded weights in {D}", .{start.read()});
var llama_module_prefill = (try fut_mod_prefill.awaitt()).prepare(llama_buffers); var llama_module_prefill = (try fut_mod_prefill.await()).prepare(llama_buffers);
defer llama_module_prefill.deinit(); defer llama_module_prefill.deinit();
var llama_module = (try fut_mod.awaitt()).prepare(llama_buffers); var llama_module = (try fut_mod.await()).prepare(llama_buffers);
defer llama_module.deinit(); defer llama_module.deinit();
log.info("\tCompiled model in {D}", .{start.read()}); log.info("\tCompiled model in {D}", .{start.read()});
log.info("Creating KvCache", .{}); log.info("Creating KvCache", .{});

View File

@ -1,4 +1,4 @@
const asynk = @import("async"); const async = @import("async");
const std = @import("std"); const std = @import("std");
const stdx = @import("stdx"); const stdx = @import("stdx");
const zml = @import("zml"); const zml = @import("zml");
@ -10,7 +10,7 @@ const LlamaLM = llama_mod.LlamaLM;
const Tensor = zml.Tensor; const Tensor = zml.Tensor;
pub fn main() !void { pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {
@ -58,7 +58,7 @@ pub fn asyncMain() !void {
const num_kv_heads: i64 = cli_args.num_kv_heads orelse buffer_store.metadata("num_kv_heads", .int) orelse num_heads; const num_kv_heads: i64 = cli_args.num_kv_heads orelse buffer_store.metadata("num_kv_heads", .int) orelse num_heads;
const config = blk: { const config = blk: {
var config_json_file = try asynk.File.open(cli_args.config, .{ .mode = .read_only }); var config_json_file = try async.File.open(cli_args.config, .{ .mode = .read_only });
defer config_json_file.close() catch unreachable; defer config_json_file.close() catch unreachable;
var reader = std.json.reader(allocator, config_json_file.reader()); var reader = std.json.reader(allocator, config_json_file.reader());
defer reader.deinit(); defer reader.deinit();

View File

@ -1,13 +1,13 @@
const std = @import("std"); const std = @import("std");
const asynk = @import("async"); const async = @import("async");
const zml = @import("zml"); const zml = @import("zml");
const log = std.log.scoped(.mnist); const log = std.log.scoped(.mnist);
pub const std_options: std.Options = .{ pub const std_options: std.Options = .{
.log_level = .info, .log_level = .info,
.logFn = asynk.logFn(std.log.defaultLog), .logFn = async.logFn(std.log.defaultLog),
}; };
/// Model definition /// Model definition
@ -37,7 +37,7 @@ const Mnist = struct {
}; };
pub fn main() !void { pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {
@ -75,14 +75,14 @@ pub fn asyncMain() !void {
// Start compiling // Start compiling
log.info("Compiling model to MLIR....", .{}); log.info("Compiling model to MLIR....", .{});
var start_time = try std.time.Timer.start(); var start_time = try std.time.Timer.start();
var compilation = try asynk.asyncc(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform }); var compilation = try async.async(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
// While compiling, start loading weights on the platform // While compiling, start loading weights on the platform
var model_weights = try zml.aio.loadModelBuffers(Mnist, mnist_model, buffer_store, arena, platform); var model_weights = try zml.aio.loadModelBuffers(Mnist, mnist_model, buffer_store, arena, platform);
defer zml.aio.unloadBuffers(&model_weights); defer zml.aio.unloadBuffers(&model_weights);
// Wait for end of compilation and end of weights loading. // Wait for end of compilation and end of weights loading.
const compiled_mnist = try compilation.awaitt(); const compiled_mnist = try compilation.await();
log.info("✅ Compiled model in {d}ms", .{start_time.read() / std.time.ns_per_ms}); log.info("✅ Compiled model in {d}ms", .{start_time.read() / std.time.ns_per_ms});
const mnist = compiled_mnist.prepare(model_weights); const mnist = compiled_mnist.prepare(model_weights);
@ -92,7 +92,7 @@ pub fn asyncMain() !void {
log.info("Starting inference...", .{}); log.info("Starting inference...", .{});
// Load a random digit image from the dataset. // Load a random digit image from the dataset.
const dataset = try asynk.File.open(t10kfilename, .{ .mode = .read_only }); const dataset = try async.File.open(t10kfilename, .{ .mode = .read_only });
defer dataset.close() catch unreachable; defer dataset.close() catch unreachable;
var rng = std.Random.Xoshiro256.init(@intCast(std.time.timestamp())); var rng = std.Random.Xoshiro256.init(@intCast(std.time.timestamp()));

View File

@ -1,6 +1,6 @@
const std = @import("std"); const std = @import("std");
const asynk = @import("async"); const async = @import("async");
const clap = @import("clap"); const clap = @import("clap");
const stdx = @import("stdx"); const stdx = @import("stdx");
const zml = @import("zml"); const zml = @import("zml");
@ -15,7 +15,7 @@ pub const std_options: std.Options = .{
.log_scope_levels = &[_]std.log.ScopeLevel{ .log_scope_levels = &[_]std.log.ScopeLevel{
.{ .scope = .modernbert, .level = .info }, .{ .scope = .modernbert, .level = .info },
}, },
.logFn = asynk.logFn(std.log.defaultLog), .logFn = async.logFn(std.log.defaultLog),
}; };
const params = clap.parseParamsComptime( const params = clap.parseParamsComptime(
@ -38,7 +38,7 @@ const clap_parsers = .{
}; };
pub fn main() !void { pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {
@ -136,7 +136,7 @@ pub fn asyncMain() !void {
// Compile the model // Compile the model
log.info("\tCompiling ModernBERT model...", .{}); log.info("\tCompiling ModernBERT model...", .{});
var fut_mod = try asynk.asyncc(zml.compile, .{ var fut_mod = try async.async(zml.compile, .{
allocator, allocator,
modernbert.ModernBertForMaskedLM.forward, modernbert.ModernBertForMaskedLM.forward,
.{modernbert_options}, .{modernbert_options},
@ -144,7 +144,7 @@ pub fn asyncMain() !void {
tensor_store, tensor_store,
platform, platform,
}); });
var bert_module = (try fut_mod.awaitt()).prepare(bert_weights); var bert_module = (try fut_mod.await()).prepare(bert_weights);
defer bert_module.deinit(); defer bert_module.deinit();
log.info("\tLoaded weights and compiled model in {D}", .{start.read()}); log.info("\tLoaded weights and compiled model in {D}", .{start.read()});

View File

@ -1,6 +1,6 @@
const std = @import("std"); const std = @import("std");
const asynk = @import("async"); const async = @import("async");
const stdx = @import("stdx"); const stdx = @import("stdx");
const zml = @import("zml"); const zml = @import("zml");
const Tensor = zml.Tensor; const Tensor = zml.Tensor;

View File

@ -1,7 +1,7 @@
const clap = @import("clap"); const clap = @import("clap");
const std = @import("std"); const std = @import("std");
const zml = @import("zml"); const zml = @import("zml");
const asynk = @import("async"); const async = @import("async");
const log = std.log; const log = std.log;
const Tensor = zml.Tensor; const Tensor = zml.Tensor;
const modernbert_module = @import("modernbert.zig"); const modernbert_module = @import("modernbert.zig");
@ -20,7 +20,7 @@ fn printUsageAndExit(stderr: anytype) noreturn {
std.process.exit(0); std.process.exit(0);
} }
pub fn main() !void { pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {

View File

@ -1,6 +1,6 @@
const std = @import("std"); const std = @import("std");
const asynk = @import("async"); const async = @import("async");
const zml = @import("zml"); const zml = @import("zml");
/// Model definition /// Model definition
@ -18,7 +18,7 @@ const Layer = struct {
}; };
pub fn main() !void { pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain); try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
} }
pub fn asyncMain() !void { pub fn asyncMain() !void {
@ -58,7 +58,7 @@ pub fn asyncMain() !void {
// Start compiling. This uses the inferred shapes from the BufferStore. // Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually. // The shape of the input tensor, we have to pass in manually.
var compilation = try asynk.asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform }); var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
// Produce a bufferized weights struct from the fake BufferStore. // Produce a bufferized weights struct from the fake BufferStore.
// This is like the inferred shapes, but with actual values. // This is like the inferred shapes, but with actual values.
@ -67,7 +67,7 @@ pub fn asyncMain() !void {
defer zml.aio.unloadBuffers(&model_weights); // for good practice defer zml.aio.unloadBuffers(&model_weights); // for good practice
// Wait for compilation to finish // Wait for compilation to finish
const compiled = try compilation.awaitt(); const compiled = try compilation.await();
// pass the model weights to the compiled module to create an executable module // pass the model weights to the compiled module to create an executable module
var executable = compiled.prepare(model_weights); var executable = compiled.prepare(model_weights);