Update docs and example scripts (including test files) to remove async helpers asynk, asyncc, awaitt, and await_

This commit is contained in:
Foke Singh 2025-09-02 10:27:40 +00:00
parent 6e15123fb3
commit 090d7748d5
11 changed files with 51 additions and 56 deletions

View File

@ -118,13 +118,13 @@ model. Put the following in `my_project/torch2zml.zig`.
const std = @import("std");
const log = std.log;
const asynk = @import("async");
const async = @import("async");
const zml = @import("zml");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
try asynk.AsyncThread.main(gpa.allocator(), asyncMain, .{});
try async.AsyncThread.main(gpa.allocator(), asyncMain, .{});
}
pub fn asyncMain() !void {

View File

@ -99,10 +99,7 @@ Let's start by writing some Zig code, importing ZML and often-used modules:
```zig
const std = @import("std");
const zml = @import("zml");
const asynk = @import("async");
// shortcut to the asyncc function in the asynk module
const asyncc = asynk.asyncc;
const async = @import("async");
```
You will use above lines probably in all ZML projects. Also, note that **ZML is
@ -154,7 +151,7 @@ like this:
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
try asynk.AsyncThread.main(gpa.allocator(), asyncMain);
try async.AsyncThread.main(gpa.allocator(), asyncMain);
}
@ -251,7 +248,7 @@ const model_shapes = try zml.aio.populateModel(Layer, allocator, bs);
// Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually.
var compilation = try asyncc(
var compilation = try async.async(
zml.compileModel,
.{ allocator, Layer.forward, model_shapes, .{input_shape}, platform },
);
@ -263,11 +260,11 @@ var model_weights = try zml.aio.loadBuffers(Layer, .{}, bs, arena, platform);
defer zml.aio.unloadBuffers(&model_weights); // for good practice
// Wait for compilation to finish
const compiled = try compilation.awaitt();
const compiled = try compilation.await();
```
Compiling is happening in the background via the `asyncc` function. We call
`asyncc` with the `zml.compileModel` function and its arguments
Compiling is happening in the background via the `async` function. We call
`async` with the `zml.compileModel` function and its arguments
separately. The arguments themselves are basically the shapes of the weights in
the BufferStore, the `.forward` function name in order to compile
`Layer.forward`, the shape of the input tensor(s), and the platform for which to
@ -371,7 +368,7 @@ top of the Zig file:
```zig
const zml = @import("zml");
const asynk = @import("async");
const async = @import("async");
```
@ -418,9 +415,7 @@ You can access the complete source code of this walkthrough here:
```zig
const std = @import("std");
const zml = @import("zml");
const asynk = @import("async");
const asyncc = asynk.asyncc;
const async = @import("async");
/// Model definition
const Layer = struct {
@ -439,7 +434,7 @@ const Layer = struct {
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
try asynk.AsyncThread.main(gpa.allocator(), asyncMain);
try async.AsyncThread.main(gpa.allocator(), asyncMain);
}
pub fn asyncMain() !void {
@ -482,7 +477,7 @@ pub fn asyncMain() !void {
// Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually.
var compilation = try asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
// Produce a bufferized weights struct from the fake BufferStore.
// This is like the inferred shapes, but with actual values.
@ -491,7 +486,7 @@ pub fn asyncMain() !void {
defer zml.aio.unloadBuffers(&model_weights); // for good practice
// Wait for compilation to finish
const compiled = try compilation.awaitt();
const compiled = try compilation.await();
// pass the model weights to the compiled module to create an executable
// module

View File

@ -1,13 +1,13 @@
const std = @import("std");
const zml = @import("zml");
const stdx = @import("stdx");
const asynk = @import("async");
const async = @import("async");
const flags = stdx.flags;
// set log level to debug to print the generated IR
pub const std_options: std.Options = .{
.log_level = .warn,
.logFn = asynk.logFn(std.log.defaultLog),
.logFn = async.logFn(std.log.defaultLog),
};
pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
@ -15,7 +15,7 @@ pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
}
pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
}
pub fn asyncMain() !void {
@ -53,10 +53,10 @@ pub fn asyncMain() !void {
// Start compiling.
// The shape of the input tensor, we have to pass in manually.
timer.reset();
var compilation = try asynk.asyncc(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
var compilation = try async.async(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
// Wait for compilation to finish
const executable = try compilation.awaitt();
const executable = try compilation.await();
defer executable.deinit();
const compilation_elapsed = timer.lap() / std.time.ns_per_ms;
std.debug.print("-" ** 160 ++ "\n\n", .{});

View File

@ -1,13 +1,13 @@
const std = @import("std");
const asynk = @import("async");
const async = @import("async");
const runtimes = @import("runtimes");
const zml = @import("zml");
const cu = zml.platform_specific;
pub const std_options: std.Options = .{
.log_level = .info,
.logFn = asynk.logFn(std.log.defaultLog),
.logFn = async.logFn(std.log.defaultLog),
};
const log = std.log.scoped(.@"examples/custom_call");
@ -126,7 +126,7 @@ pub fn grayscale(rgb: zml.Tensor) zml.Tensor {
}
pub fn main() !void {
try asynk.AsyncThread.main(std.heap.smp_allocator, asyncMain);
try async.AsyncThread.main(std.heap.smp_allocator, asyncMain);
}
pub fn asyncMain() !void {

View File

@ -1,6 +1,6 @@
const std = @import("std");
const asynk = @import("async");
const async = @import("async");
const clap = @import("clap");
const stdx = @import("stdx");
const zml = @import("zml");
@ -19,7 +19,7 @@ const log = std.log.scoped(.llama);
pub const std_options: std.Options = .{
.log_level = .info,
.logFn = asynk.logFn(std.log.defaultLog),
.logFn = async.logFn(std.log.defaultLog),
};
const params = clap.parseParamsComptime(
@ -152,7 +152,7 @@ pub fn generateText(
}
pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
}
pub fn asyncMain() !void {
@ -198,7 +198,7 @@ pub fn asyncMain() !void {
const model_weights_path = b: {
const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" });
if (asynk.File.access(simple_path, .{})) {
if (async.File.access(simple_path, .{})) {
break :b simple_path;
} else |_| {
allocator.free(simple_path);
@ -213,7 +213,7 @@ pub fn asyncMain() !void {
defer allocator.free(model_tokenizer_path);
const config = blk: {
var config_json_file = try asynk.File.open(model_config_path, .{ .mode = .read_only });
var config_json_file = try async.File.open(model_config_path, .{ .mode = .read_only });
defer config_json_file.close() catch unreachable;
var config_json_buffer: [256]u8 = undefined;
var config_reader = config_json_file.reader(&config_json_buffer);
@ -276,7 +276,7 @@ pub fn asyncMain() !void {
// Compile the model twice, one for prefill, one for generation.
var start = try std.time.Timer.start();
var fut_mod_prefill = try asynk.asyncc(zml.compileModel, .{
var fut_mod_prefill = try async.async(zml.compileModel, .{
allocator, llama.LlamaLM.forward, llama_tensors,
.{
prefill_tokens_shape,
@ -287,7 +287,7 @@ pub fn asyncMain() !void {
platform,
});
var fut_mod = try asynk.asyncc(zml.compileModel, .{
var fut_mod = try async.async(zml.compileModel, .{
allocator, llama.LlamaLM.forward, llama_tensors,
.{
gen_tokens_shape,
@ -304,9 +304,9 @@ pub fn asyncMain() !void {
defer zml.aio.unloadBuffers(&llama_buffers);
log.info("\tLoaded weights in {D}", .{start.read()});
var llama_module_prefill = (try fut_mod_prefill.awaitt()).prepare(llama_buffers);
var llama_module_prefill = (try fut_mod_prefill.await()).prepare(llama_buffers);
defer llama_module_prefill.deinit();
var llama_module = (try fut_mod.awaitt()).prepare(llama_buffers);
var llama_module = (try fut_mod.await()).prepare(llama_buffers);
defer llama_module.deinit();
log.info("\tCompiled model in {D}", .{start.read()});
log.info("Creating KvCache", .{});

View File

@ -1,4 +1,4 @@
const asynk = @import("async");
const async = @import("async");
const std = @import("std");
const stdx = @import("stdx");
const zml = @import("zml");
@ -10,7 +10,7 @@ const LlamaLM = llama_mod.LlamaLM;
const Tensor = zml.Tensor;
pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
}
pub fn asyncMain() !void {
@ -58,7 +58,7 @@ pub fn asyncMain() !void {
const num_kv_heads: i64 = cli_args.num_kv_heads orelse buffer_store.metadata("num_kv_heads", .int) orelse num_heads;
const config = blk: {
var config_json_file = try asynk.File.open(cli_args.config, .{ .mode = .read_only });
var config_json_file = try async.File.open(cli_args.config, .{ .mode = .read_only });
defer config_json_file.close() catch unreachable;
var reader = std.json.reader(allocator, config_json_file.reader());
defer reader.deinit();

View File

@ -1,13 +1,13 @@
const std = @import("std");
const asynk = @import("async");
const async = @import("async");
const zml = @import("zml");
const log = std.log.scoped(.mnist);
pub const std_options: std.Options = .{
.log_level = .info,
.logFn = asynk.logFn(std.log.defaultLog),
.logFn = async.logFn(std.log.defaultLog),
};
/// Model definition
@ -37,7 +37,7 @@ const Mnist = struct {
};
pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
}
pub fn asyncMain() !void {
@ -75,14 +75,14 @@ pub fn asyncMain() !void {
// Start compiling
log.info("Compiling model to MLIR....", .{});
var start_time = try std.time.Timer.start();
var compilation = try asynk.asyncc(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
var compilation = try async.async(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
// While compiling, start loading weights on the platform
var model_weights = try zml.aio.loadModelBuffers(Mnist, mnist_model, buffer_store, arena, platform);
defer zml.aio.unloadBuffers(&model_weights);
// Wait for end of compilation and end of weights loading.
const compiled_mnist = try compilation.awaitt();
const compiled_mnist = try compilation.await();
log.info("✅ Compiled model in {d}ms", .{start_time.read() / std.time.ns_per_ms});
const mnist = compiled_mnist.prepare(model_weights);
@ -92,7 +92,7 @@ pub fn asyncMain() !void {
log.info("Starting inference...", .{});
// Load a random digit image from the dataset.
const dataset = try asynk.File.open(t10kfilename, .{ .mode = .read_only });
const dataset = try async.File.open(t10kfilename, .{ .mode = .read_only });
defer dataset.close() catch unreachable;
var rng = std.Random.Xoshiro256.init(@intCast(std.time.timestamp()));

View File

@ -1,6 +1,6 @@
const std = @import("std");
const asynk = @import("async");
const async = @import("async");
const clap = @import("clap");
const stdx = @import("stdx");
const zml = @import("zml");
@ -15,7 +15,7 @@ pub const std_options: std.Options = .{
.log_scope_levels = &[_]std.log.ScopeLevel{
.{ .scope = .modernbert, .level = .info },
},
.logFn = asynk.logFn(std.log.defaultLog),
.logFn = async.logFn(std.log.defaultLog),
};
const params = clap.parseParamsComptime(
@ -38,7 +38,7 @@ const clap_parsers = .{
};
pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
}
pub fn asyncMain() !void {
@ -136,7 +136,7 @@ pub fn asyncMain() !void {
// Compile the model
log.info("\tCompiling ModernBERT model...", .{});
var fut_mod = try asynk.asyncc(zml.compile, .{
var fut_mod = try async.async(zml.compile, .{
allocator,
modernbert.ModernBertForMaskedLM.forward,
.{modernbert_options},
@ -144,7 +144,7 @@ pub fn asyncMain() !void {
tensor_store,
platform,
});
var bert_module = (try fut_mod.awaitt()).prepare(bert_weights);
var bert_module = (try fut_mod.await()).prepare(bert_weights);
defer bert_module.deinit();
log.info("\tLoaded weights and compiled model in {D}", .{start.read()});

View File

@ -1,6 +1,6 @@
const std = @import("std");
const asynk = @import("async");
const async = @import("async");
const stdx = @import("stdx");
const zml = @import("zml");
const Tensor = zml.Tensor;

View File

@ -1,7 +1,7 @@
const clap = @import("clap");
const std = @import("std");
const zml = @import("zml");
const asynk = @import("async");
const async = @import("async");
const log = std.log;
const Tensor = zml.Tensor;
const modernbert_module = @import("modernbert.zig");
@ -20,7 +20,7 @@ fn printUsageAndExit(stderr: anytype) noreturn {
std.process.exit(0);
}
pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
}
pub fn asyncMain() !void {

View File

@ -1,6 +1,6 @@
const std = @import("std");
const asynk = @import("async");
const async = @import("async");
const zml = @import("zml");
/// Model definition
@ -18,7 +18,7 @@ const Layer = struct {
};
pub fn main() !void {
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
}
pub fn asyncMain() !void {
@ -58,7 +58,7 @@ pub fn asyncMain() !void {
// Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually.
var compilation = try asynk.asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
// Produce a bufferized weights struct from the fake BufferStore.
// This is like the inferred shapes, but with actual values.
@ -67,7 +67,7 @@ pub fn asyncMain() !void {
defer zml.aio.unloadBuffers(&model_weights); // for good practice
// Wait for compilation to finish
const compiled = try compilation.awaitt();
const compiled = try compilation.await();
// pass the model weights to the compiled module to create an executable module
var executable = compiled.prepare(model_weights);