Update docs and example scripts (including test files) to remove async helpers asynk, asyncc, awaitt, and await_
This commit is contained in:
parent
6e15123fb3
commit
090d7748d5
@ -118,13 +118,13 @@ model. Put the following in `my_project/torch2zml.zig`.
|
||||
const std = @import("std");
|
||||
const log = std.log;
|
||||
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const zml = @import("zml");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
try asynk.AsyncThread.main(gpa.allocator(), asyncMain, .{});
|
||||
try async.AsyncThread.main(gpa.allocator(), asyncMain, .{});
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
|
||||
@ -99,10 +99,7 @@ Let's start by writing some Zig code, importing ZML and often-used modules:
|
||||
```zig
|
||||
const std = @import("std");
|
||||
const zml = @import("zml");
|
||||
const asynk = @import("async");
|
||||
|
||||
// shortcut to the asyncc function in the asynk module
|
||||
const asyncc = asynk.asyncc;
|
||||
const async = @import("async");
|
||||
```
|
||||
|
||||
You will use above lines probably in all ZML projects. Also, note that **ZML is
|
||||
@ -154,7 +151,7 @@ like this:
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
try asynk.AsyncThread.main(gpa.allocator(), asyncMain);
|
||||
try async.AsyncThread.main(gpa.allocator(), asyncMain);
|
||||
}
|
||||
|
||||
|
||||
@ -251,7 +248,7 @@ const model_shapes = try zml.aio.populateModel(Layer, allocator, bs);
|
||||
|
||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||
// The shape of the input tensor, we have to pass in manually.
|
||||
var compilation = try asyncc(
|
||||
var compilation = try async.async(
|
||||
zml.compileModel,
|
||||
.{ allocator, Layer.forward, model_shapes, .{input_shape}, platform },
|
||||
);
|
||||
@ -263,11 +260,11 @@ var model_weights = try zml.aio.loadBuffers(Layer, .{}, bs, arena, platform);
|
||||
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
||||
|
||||
// Wait for compilation to finish
|
||||
const compiled = try compilation.awaitt();
|
||||
const compiled = try compilation.await();
|
||||
```
|
||||
|
||||
Compiling is happening in the background via the `asyncc` function. We call
|
||||
`asyncc` with the `zml.compileModel` function and its arguments
|
||||
Compiling is happening in the background via the `async` function. We call
|
||||
`async` with the `zml.compileModel` function and its arguments
|
||||
separately. The arguments themselves are basically the shapes of the weights in
|
||||
the BufferStore, the `.forward` function name in order to compile
|
||||
`Layer.forward`, the shape of the input tensor(s), and the platform for which to
|
||||
@ -371,7 +368,7 @@ top of the Zig file:
|
||||
|
||||
```zig
|
||||
const zml = @import("zml");
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
```
|
||||
|
||||
|
||||
@ -418,9 +415,7 @@ You can access the complete source code of this walkthrough here:
|
||||
```zig
|
||||
const std = @import("std");
|
||||
const zml = @import("zml");
|
||||
const asynk = @import("async");
|
||||
|
||||
const asyncc = asynk.asyncc;
|
||||
const async = @import("async");
|
||||
|
||||
/// Model definition
|
||||
const Layer = struct {
|
||||
@ -439,7 +434,7 @@ const Layer = struct {
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
defer _ = gpa.deinit();
|
||||
try asynk.AsyncThread.main(gpa.allocator(), asyncMain);
|
||||
try async.AsyncThread.main(gpa.allocator(), asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
@ -482,7 +477,7 @@ pub fn asyncMain() !void {
|
||||
|
||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||
// The shape of the input tensor, we have to pass in manually.
|
||||
var compilation = try asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
||||
var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
||||
|
||||
// Produce a bufferized weights struct from the fake BufferStore.
|
||||
// This is like the inferred shapes, but with actual values.
|
||||
@ -491,7 +486,7 @@ pub fn asyncMain() !void {
|
||||
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
||||
|
||||
// Wait for compilation to finish
|
||||
const compiled = try compilation.awaitt();
|
||||
const compiled = try compilation.await();
|
||||
|
||||
// pass the model weights to the compiled module to create an executable
|
||||
// module
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
const std = @import("std");
|
||||
const zml = @import("zml");
|
||||
const stdx = @import("stdx");
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const flags = stdx.flags;
|
||||
|
||||
// set log level to debug to print the generated IR
|
||||
pub const std_options: std.Options = .{
|
||||
.log_level = .warn,
|
||||
.logFn = asynk.logFn(std.log.defaultLog),
|
||||
.logFn = async.logFn(std.log.defaultLog),
|
||||
};
|
||||
|
||||
pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
|
||||
@ -15,7 +15,7 @@ pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
|
||||
}
|
||||
|
||||
pub fn main() !void {
|
||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
@ -53,10 +53,10 @@ pub fn asyncMain() !void {
|
||||
// Start compiling.
|
||||
// The shape of the input tensor, we have to pass in manually.
|
||||
timer.reset();
|
||||
var compilation = try asynk.asyncc(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
|
||||
var compilation = try async.async(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
|
||||
|
||||
// Wait for compilation to finish
|
||||
const executable = try compilation.awaitt();
|
||||
const executable = try compilation.await();
|
||||
defer executable.deinit();
|
||||
const compilation_elapsed = timer.lap() / std.time.ns_per_ms;
|
||||
std.debug.print("-" ** 160 ++ "\n\n", .{});
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
const std = @import("std");
|
||||
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const runtimes = @import("runtimes");
|
||||
const zml = @import("zml");
|
||||
const cu = zml.platform_specific;
|
||||
|
||||
pub const std_options: std.Options = .{
|
||||
.log_level = .info,
|
||||
.logFn = asynk.logFn(std.log.defaultLog),
|
||||
.logFn = async.logFn(std.log.defaultLog),
|
||||
};
|
||||
|
||||
const log = std.log.scoped(.@"examples/custom_call");
|
||||
@ -126,7 +126,7 @@ pub fn grayscale(rgb: zml.Tensor) zml.Tensor {
|
||||
}
|
||||
|
||||
pub fn main() !void {
|
||||
try asynk.AsyncThread.main(std.heap.smp_allocator, asyncMain);
|
||||
try async.AsyncThread.main(std.heap.smp_allocator, asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
const std = @import("std");
|
||||
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const clap = @import("clap");
|
||||
const stdx = @import("stdx");
|
||||
const zml = @import("zml");
|
||||
@ -19,7 +19,7 @@ const log = std.log.scoped(.llama);
|
||||
|
||||
pub const std_options: std.Options = .{
|
||||
.log_level = .info,
|
||||
.logFn = asynk.logFn(std.log.defaultLog),
|
||||
.logFn = async.logFn(std.log.defaultLog),
|
||||
};
|
||||
|
||||
const params = clap.parseParamsComptime(
|
||||
@ -152,7 +152,7 @@ pub fn generateText(
|
||||
}
|
||||
|
||||
pub fn main() !void {
|
||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
@ -198,7 +198,7 @@ pub fn asyncMain() !void {
|
||||
|
||||
const model_weights_path = b: {
|
||||
const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" });
|
||||
if (asynk.File.access(simple_path, .{})) {
|
||||
if (async.File.access(simple_path, .{})) {
|
||||
break :b simple_path;
|
||||
} else |_| {
|
||||
allocator.free(simple_path);
|
||||
@ -213,7 +213,7 @@ pub fn asyncMain() !void {
|
||||
defer allocator.free(model_tokenizer_path);
|
||||
|
||||
const config = blk: {
|
||||
var config_json_file = try asynk.File.open(model_config_path, .{ .mode = .read_only });
|
||||
var config_json_file = try async.File.open(model_config_path, .{ .mode = .read_only });
|
||||
defer config_json_file.close() catch unreachable;
|
||||
var config_json_buffer: [256]u8 = undefined;
|
||||
var config_reader = config_json_file.reader(&config_json_buffer);
|
||||
@ -276,7 +276,7 @@ pub fn asyncMain() !void {
|
||||
|
||||
// Compile the model twice, one for prefill, one for generation.
|
||||
var start = try std.time.Timer.start();
|
||||
var fut_mod_prefill = try asynk.asyncc(zml.compileModel, .{
|
||||
var fut_mod_prefill = try async.async(zml.compileModel, .{
|
||||
allocator, llama.LlamaLM.forward, llama_tensors,
|
||||
.{
|
||||
prefill_tokens_shape,
|
||||
@ -287,7 +287,7 @@ pub fn asyncMain() !void {
|
||||
platform,
|
||||
});
|
||||
|
||||
var fut_mod = try asynk.asyncc(zml.compileModel, .{
|
||||
var fut_mod = try async.async(zml.compileModel, .{
|
||||
allocator, llama.LlamaLM.forward, llama_tensors,
|
||||
.{
|
||||
gen_tokens_shape,
|
||||
@ -304,9 +304,9 @@ pub fn asyncMain() !void {
|
||||
defer zml.aio.unloadBuffers(&llama_buffers);
|
||||
log.info("✅\tLoaded weights in {D}", .{start.read()});
|
||||
|
||||
var llama_module_prefill = (try fut_mod_prefill.awaitt()).prepare(llama_buffers);
|
||||
var llama_module_prefill = (try fut_mod_prefill.await()).prepare(llama_buffers);
|
||||
defer llama_module_prefill.deinit();
|
||||
var llama_module = (try fut_mod.awaitt()).prepare(llama_buffers);
|
||||
var llama_module = (try fut_mod.await()).prepare(llama_buffers);
|
||||
defer llama_module.deinit();
|
||||
log.info("✅\tCompiled model in {D}", .{start.read()});
|
||||
log.info("Creating KvCache", .{});
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const std = @import("std");
|
||||
const stdx = @import("stdx");
|
||||
const zml = @import("zml");
|
||||
@ -10,7 +10,7 @@ const LlamaLM = llama_mod.LlamaLM;
|
||||
const Tensor = zml.Tensor;
|
||||
|
||||
pub fn main() !void {
|
||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
@ -58,7 +58,7 @@ pub fn asyncMain() !void {
|
||||
const num_kv_heads: i64 = cli_args.num_kv_heads orelse buffer_store.metadata("num_kv_heads", .int) orelse num_heads;
|
||||
|
||||
const config = blk: {
|
||||
var config_json_file = try asynk.File.open(cli_args.config, .{ .mode = .read_only });
|
||||
var config_json_file = try async.File.open(cli_args.config, .{ .mode = .read_only });
|
||||
defer config_json_file.close() catch unreachable;
|
||||
var reader = std.json.reader(allocator, config_json_file.reader());
|
||||
defer reader.deinit();
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
const std = @import("std");
|
||||
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const zml = @import("zml");
|
||||
|
||||
const log = std.log.scoped(.mnist);
|
||||
|
||||
pub const std_options: std.Options = .{
|
||||
.log_level = .info,
|
||||
.logFn = asynk.logFn(std.log.defaultLog),
|
||||
.logFn = async.logFn(std.log.defaultLog),
|
||||
};
|
||||
|
||||
/// Model definition
|
||||
@ -37,7 +37,7 @@ const Mnist = struct {
|
||||
};
|
||||
|
||||
pub fn main() !void {
|
||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
@ -75,14 +75,14 @@ pub fn asyncMain() !void {
|
||||
// Start compiling
|
||||
log.info("Compiling model to MLIR....", .{});
|
||||
var start_time = try std.time.Timer.start();
|
||||
var compilation = try asynk.asyncc(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
|
||||
var compilation = try async.async(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
|
||||
|
||||
// While compiling, start loading weights on the platform
|
||||
var model_weights = try zml.aio.loadModelBuffers(Mnist, mnist_model, buffer_store, arena, platform);
|
||||
defer zml.aio.unloadBuffers(&model_weights);
|
||||
|
||||
// Wait for end of compilation and end of weights loading.
|
||||
const compiled_mnist = try compilation.awaitt();
|
||||
const compiled_mnist = try compilation.await();
|
||||
log.info("✅ Compiled model in {d}ms", .{start_time.read() / std.time.ns_per_ms});
|
||||
|
||||
const mnist = compiled_mnist.prepare(model_weights);
|
||||
@ -92,7 +92,7 @@ pub fn asyncMain() !void {
|
||||
log.info("Starting inference...", .{});
|
||||
|
||||
// Load a random digit image from the dataset.
|
||||
const dataset = try asynk.File.open(t10kfilename, .{ .mode = .read_only });
|
||||
const dataset = try async.File.open(t10kfilename, .{ .mode = .read_only });
|
||||
defer dataset.close() catch unreachable;
|
||||
var rng = std.Random.Xoshiro256.init(@intCast(std.time.timestamp()));
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
const std = @import("std");
|
||||
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const clap = @import("clap");
|
||||
const stdx = @import("stdx");
|
||||
const zml = @import("zml");
|
||||
@ -15,7 +15,7 @@ pub const std_options: std.Options = .{
|
||||
.log_scope_levels = &[_]std.log.ScopeLevel{
|
||||
.{ .scope = .modernbert, .level = .info },
|
||||
},
|
||||
.logFn = asynk.logFn(std.log.defaultLog),
|
||||
.logFn = async.logFn(std.log.defaultLog),
|
||||
};
|
||||
|
||||
const params = clap.parseParamsComptime(
|
||||
@ -38,7 +38,7 @@ const clap_parsers = .{
|
||||
};
|
||||
|
||||
pub fn main() !void {
|
||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
@ -136,7 +136,7 @@ pub fn asyncMain() !void {
|
||||
|
||||
// Compile the model
|
||||
log.info("\tCompiling ModernBERT model...", .{});
|
||||
var fut_mod = try asynk.asyncc(zml.compile, .{
|
||||
var fut_mod = try async.async(zml.compile, .{
|
||||
allocator,
|
||||
modernbert.ModernBertForMaskedLM.forward,
|
||||
.{modernbert_options},
|
||||
@ -144,7 +144,7 @@ pub fn asyncMain() !void {
|
||||
tensor_store,
|
||||
platform,
|
||||
});
|
||||
var bert_module = (try fut_mod.awaitt()).prepare(bert_weights);
|
||||
var bert_module = (try fut_mod.await()).prepare(bert_weights);
|
||||
defer bert_module.deinit();
|
||||
log.info("✅\tLoaded weights and compiled model in {D}", .{start.read()});
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
const std = @import("std");
|
||||
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const stdx = @import("stdx");
|
||||
const zml = @import("zml");
|
||||
const Tensor = zml.Tensor;
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
const clap = @import("clap");
|
||||
const std = @import("std");
|
||||
const zml = @import("zml");
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const log = std.log;
|
||||
const Tensor = zml.Tensor;
|
||||
const modernbert_module = @import("modernbert.zig");
|
||||
@ -20,7 +20,7 @@ fn printUsageAndExit(stderr: anytype) noreturn {
|
||||
std.process.exit(0);
|
||||
}
|
||||
pub fn main() !void {
|
||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
const std = @import("std");
|
||||
|
||||
const asynk = @import("async");
|
||||
const async = @import("async");
|
||||
const zml = @import("zml");
|
||||
|
||||
/// Model definition
|
||||
@ -18,7 +18,7 @@ const Layer = struct {
|
||||
};
|
||||
|
||||
pub fn main() !void {
|
||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||
}
|
||||
|
||||
pub fn asyncMain() !void {
|
||||
@ -58,7 +58,7 @@ pub fn asyncMain() !void {
|
||||
|
||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||
// The shape of the input tensor, we have to pass in manually.
|
||||
var compilation = try asynk.asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
||||
var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
||||
|
||||
// Produce a bufferized weights struct from the fake BufferStore.
|
||||
// This is like the inferred shapes, but with actual values.
|
||||
@ -67,7 +67,7 @@ pub fn asyncMain() !void {
|
||||
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
||||
|
||||
// Wait for compilation to finish
|
||||
const compiled = try compilation.awaitt();
|
||||
const compiled = try compilation.await();
|
||||
|
||||
// pass the model weights to the compiled module to create an executable module
|
||||
var executable = compiled.prepare(model_weights);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user