Update docs and example scripts (including test files) to remove async helpers asynk, asyncc, awaitt, and await_
This commit is contained in:
parent
6e15123fb3
commit
090d7748d5
@ -118,13 +118,13 @@ model. Put the following in `my_project/torch2zml.zig`.
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const log = std.log;
|
const log = std.log;
|
||||||
|
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
defer _ = gpa.deinit();
|
defer _ = gpa.deinit();
|
||||||
try asynk.AsyncThread.main(gpa.allocator(), asyncMain, .{});
|
try async.AsyncThread.main(gpa.allocator(), asyncMain, .{});
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
|
|||||||
@ -99,10 +99,7 @@ Let's start by writing some Zig code, importing ZML and often-used modules:
|
|||||||
```zig
|
```zig
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
|
|
||||||
// shortcut to the asyncc function in the asynk module
|
|
||||||
const asyncc = asynk.asyncc;
|
|
||||||
```
|
```
|
||||||
|
|
||||||
You will use above lines probably in all ZML projects. Also, note that **ZML is
|
You will use above lines probably in all ZML projects. Also, note that **ZML is
|
||||||
@ -154,7 +151,7 @@ like this:
|
|||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
defer _ = gpa.deinit();
|
defer _ = gpa.deinit();
|
||||||
try asynk.AsyncThread.main(gpa.allocator(), asyncMain);
|
try async.AsyncThread.main(gpa.allocator(), asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -251,7 +248,7 @@ const model_shapes = try zml.aio.populateModel(Layer, allocator, bs);
|
|||||||
|
|
||||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||||
// The shape of the input tensor, we have to pass in manually.
|
// The shape of the input tensor, we have to pass in manually.
|
||||||
var compilation = try asyncc(
|
var compilation = try async.async(
|
||||||
zml.compileModel,
|
zml.compileModel,
|
||||||
.{ allocator, Layer.forward, model_shapes, .{input_shape}, platform },
|
.{ allocator, Layer.forward, model_shapes, .{input_shape}, platform },
|
||||||
);
|
);
|
||||||
@ -263,11 +260,11 @@ var model_weights = try zml.aio.loadBuffers(Layer, .{}, bs, arena, platform);
|
|||||||
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
||||||
|
|
||||||
// Wait for compilation to finish
|
// Wait for compilation to finish
|
||||||
const compiled = try compilation.awaitt();
|
const compiled = try compilation.await();
|
||||||
```
|
```
|
||||||
|
|
||||||
Compiling is happening in the background via the `asyncc` function. We call
|
Compiling is happening in the background via the `async` function. We call
|
||||||
`asyncc` with the `zml.compileModel` function and its arguments
|
`async` with the `zml.compileModel` function and its arguments
|
||||||
separately. The arguments themselves are basically the shapes of the weights in
|
separately. The arguments themselves are basically the shapes of the weights in
|
||||||
the BufferStore, the `.forward` function name in order to compile
|
the BufferStore, the `.forward` function name in order to compile
|
||||||
`Layer.forward`, the shape of the input tensor(s), and the platform for which to
|
`Layer.forward`, the shape of the input tensor(s), and the platform for which to
|
||||||
@ -371,7 +368,7 @@ top of the Zig file:
|
|||||||
|
|
||||||
```zig
|
```zig
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
@ -418,9 +415,7 @@ You can access the complete source code of this walkthrough here:
|
|||||||
```zig
|
```zig
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
|
|
||||||
const asyncc = asynk.asyncc;
|
|
||||||
|
|
||||||
/// Model definition
|
/// Model definition
|
||||||
const Layer = struct {
|
const Layer = struct {
|
||||||
@ -439,7 +434,7 @@ const Layer = struct {
|
|||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
defer _ = gpa.deinit();
|
defer _ = gpa.deinit();
|
||||||
try asynk.AsyncThread.main(gpa.allocator(), asyncMain);
|
try async.AsyncThread.main(gpa.allocator(), asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
@ -482,7 +477,7 @@ pub fn asyncMain() !void {
|
|||||||
|
|
||||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||||
// The shape of the input tensor, we have to pass in manually.
|
// The shape of the input tensor, we have to pass in manually.
|
||||||
var compilation = try asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
||||||
|
|
||||||
// Produce a bufferized weights struct from the fake BufferStore.
|
// Produce a bufferized weights struct from the fake BufferStore.
|
||||||
// This is like the inferred shapes, but with actual values.
|
// This is like the inferred shapes, but with actual values.
|
||||||
@ -491,7 +486,7 @@ pub fn asyncMain() !void {
|
|||||||
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
||||||
|
|
||||||
// Wait for compilation to finish
|
// Wait for compilation to finish
|
||||||
const compiled = try compilation.awaitt();
|
const compiled = try compilation.await();
|
||||||
|
|
||||||
// pass the model weights to the compiled module to create an executable
|
// pass the model weights to the compiled module to create an executable
|
||||||
// module
|
// module
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const stdx = @import("stdx");
|
const stdx = @import("stdx");
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const flags = stdx.flags;
|
const flags = stdx.flags;
|
||||||
|
|
||||||
// set log level to debug to print the generated IR
|
// set log level to debug to print the generated IR
|
||||||
pub const std_options: std.Options = .{
|
pub const std_options: std.Options = .{
|
||||||
.log_level = .warn,
|
.log_level = .warn,
|
||||||
.logFn = asynk.logFn(std.log.defaultLog),
|
.logFn = async.logFn(std.log.defaultLog),
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
|
pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
|
||||||
@ -15,7 +15,7 @@ pub fn benchmark(a: zml.Tensor, b: zml.Tensor) zml.Tensor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
@ -53,10 +53,10 @@ pub fn asyncMain() !void {
|
|||||||
// Start compiling.
|
// Start compiling.
|
||||||
// The shape of the input tensor, we have to pass in manually.
|
// The shape of the input tensor, we have to pass in manually.
|
||||||
timer.reset();
|
timer.reset();
|
||||||
var compilation = try asynk.asyncc(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
|
var compilation = try async.async(zml.compileFn, .{ allocator, benchmark, .{ a_shape, b_shape }, platform });
|
||||||
|
|
||||||
// Wait for compilation to finish
|
// Wait for compilation to finish
|
||||||
const executable = try compilation.awaitt();
|
const executable = try compilation.await();
|
||||||
defer executable.deinit();
|
defer executable.deinit();
|
||||||
const compilation_elapsed = timer.lap() / std.time.ns_per_ms;
|
const compilation_elapsed = timer.lap() / std.time.ns_per_ms;
|
||||||
std.debug.print("-" ** 160 ++ "\n\n", .{});
|
std.debug.print("-" ** 160 ++ "\n\n", .{});
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const runtimes = @import("runtimes");
|
const runtimes = @import("runtimes");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const cu = zml.platform_specific;
|
const cu = zml.platform_specific;
|
||||||
|
|
||||||
pub const std_options: std.Options = .{
|
pub const std_options: std.Options = .{
|
||||||
.log_level = .info,
|
.log_level = .info,
|
||||||
.logFn = asynk.logFn(std.log.defaultLog),
|
.logFn = async.logFn(std.log.defaultLog),
|
||||||
};
|
};
|
||||||
|
|
||||||
const log = std.log.scoped(.@"examples/custom_call");
|
const log = std.log.scoped(.@"examples/custom_call");
|
||||||
@ -126,7 +126,7 @@ pub fn grayscale(rgb: zml.Tensor) zml.Tensor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
try asynk.AsyncThread.main(std.heap.smp_allocator, asyncMain);
|
try async.AsyncThread.main(std.heap.smp_allocator, asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const clap = @import("clap");
|
const clap = @import("clap");
|
||||||
const stdx = @import("stdx");
|
const stdx = @import("stdx");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
@ -19,7 +19,7 @@ const log = std.log.scoped(.llama);
|
|||||||
|
|
||||||
pub const std_options: std.Options = .{
|
pub const std_options: std.Options = .{
|
||||||
.log_level = .info,
|
.log_level = .info,
|
||||||
.logFn = asynk.logFn(std.log.defaultLog),
|
.logFn = async.logFn(std.log.defaultLog),
|
||||||
};
|
};
|
||||||
|
|
||||||
const params = clap.parseParamsComptime(
|
const params = clap.parseParamsComptime(
|
||||||
@ -152,7 +152,7 @@ pub fn generateText(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
@ -198,7 +198,7 @@ pub fn asyncMain() !void {
|
|||||||
|
|
||||||
const model_weights_path = b: {
|
const model_weights_path = b: {
|
||||||
const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" });
|
const simple_path = try std.fs.path.join(allocator, &.{ hf_model_path, "model.safetensors" });
|
||||||
if (asynk.File.access(simple_path, .{})) {
|
if (async.File.access(simple_path, .{})) {
|
||||||
break :b simple_path;
|
break :b simple_path;
|
||||||
} else |_| {
|
} else |_| {
|
||||||
allocator.free(simple_path);
|
allocator.free(simple_path);
|
||||||
@ -213,7 +213,7 @@ pub fn asyncMain() !void {
|
|||||||
defer allocator.free(model_tokenizer_path);
|
defer allocator.free(model_tokenizer_path);
|
||||||
|
|
||||||
const config = blk: {
|
const config = blk: {
|
||||||
var config_json_file = try asynk.File.open(model_config_path, .{ .mode = .read_only });
|
var config_json_file = try async.File.open(model_config_path, .{ .mode = .read_only });
|
||||||
defer config_json_file.close() catch unreachable;
|
defer config_json_file.close() catch unreachable;
|
||||||
var config_json_buffer: [256]u8 = undefined;
|
var config_json_buffer: [256]u8 = undefined;
|
||||||
var config_reader = config_json_file.reader(&config_json_buffer);
|
var config_reader = config_json_file.reader(&config_json_buffer);
|
||||||
@ -276,7 +276,7 @@ pub fn asyncMain() !void {
|
|||||||
|
|
||||||
// Compile the model twice, one for prefill, one for generation.
|
// Compile the model twice, one for prefill, one for generation.
|
||||||
var start = try std.time.Timer.start();
|
var start = try std.time.Timer.start();
|
||||||
var fut_mod_prefill = try asynk.asyncc(zml.compileModel, .{
|
var fut_mod_prefill = try async.async(zml.compileModel, .{
|
||||||
allocator, llama.LlamaLM.forward, llama_tensors,
|
allocator, llama.LlamaLM.forward, llama_tensors,
|
||||||
.{
|
.{
|
||||||
prefill_tokens_shape,
|
prefill_tokens_shape,
|
||||||
@ -287,7 +287,7 @@ pub fn asyncMain() !void {
|
|||||||
platform,
|
platform,
|
||||||
});
|
});
|
||||||
|
|
||||||
var fut_mod = try asynk.asyncc(zml.compileModel, .{
|
var fut_mod = try async.async(zml.compileModel, .{
|
||||||
allocator, llama.LlamaLM.forward, llama_tensors,
|
allocator, llama.LlamaLM.forward, llama_tensors,
|
||||||
.{
|
.{
|
||||||
gen_tokens_shape,
|
gen_tokens_shape,
|
||||||
@ -304,9 +304,9 @@ pub fn asyncMain() !void {
|
|||||||
defer zml.aio.unloadBuffers(&llama_buffers);
|
defer zml.aio.unloadBuffers(&llama_buffers);
|
||||||
log.info("✅\tLoaded weights in {D}", .{start.read()});
|
log.info("✅\tLoaded weights in {D}", .{start.read()});
|
||||||
|
|
||||||
var llama_module_prefill = (try fut_mod_prefill.awaitt()).prepare(llama_buffers);
|
var llama_module_prefill = (try fut_mod_prefill.await()).prepare(llama_buffers);
|
||||||
defer llama_module_prefill.deinit();
|
defer llama_module_prefill.deinit();
|
||||||
var llama_module = (try fut_mod.awaitt()).prepare(llama_buffers);
|
var llama_module = (try fut_mod.await()).prepare(llama_buffers);
|
||||||
defer llama_module.deinit();
|
defer llama_module.deinit();
|
||||||
log.info("✅\tCompiled model in {D}", .{start.read()});
|
log.info("✅\tCompiled model in {D}", .{start.read()});
|
||||||
log.info("Creating KvCache", .{});
|
log.info("Creating KvCache", .{});
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const stdx = @import("stdx");
|
const stdx = @import("stdx");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
@ -10,7 +10,7 @@ const LlamaLM = llama_mod.LlamaLM;
|
|||||||
const Tensor = zml.Tensor;
|
const Tensor = zml.Tensor;
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
@ -58,7 +58,7 @@ pub fn asyncMain() !void {
|
|||||||
const num_kv_heads: i64 = cli_args.num_kv_heads orelse buffer_store.metadata("num_kv_heads", .int) orelse num_heads;
|
const num_kv_heads: i64 = cli_args.num_kv_heads orelse buffer_store.metadata("num_kv_heads", .int) orelse num_heads;
|
||||||
|
|
||||||
const config = blk: {
|
const config = blk: {
|
||||||
var config_json_file = try asynk.File.open(cli_args.config, .{ .mode = .read_only });
|
var config_json_file = try async.File.open(cli_args.config, .{ .mode = .read_only });
|
||||||
defer config_json_file.close() catch unreachable;
|
defer config_json_file.close() catch unreachable;
|
||||||
var reader = std.json.reader(allocator, config_json_file.reader());
|
var reader = std.json.reader(allocator, config_json_file.reader());
|
||||||
defer reader.deinit();
|
defer reader.deinit();
|
||||||
|
|||||||
@ -1,13 +1,13 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
|
|
||||||
const log = std.log.scoped(.mnist);
|
const log = std.log.scoped(.mnist);
|
||||||
|
|
||||||
pub const std_options: std.Options = .{
|
pub const std_options: std.Options = .{
|
||||||
.log_level = .info,
|
.log_level = .info,
|
||||||
.logFn = asynk.logFn(std.log.defaultLog),
|
.logFn = async.logFn(std.log.defaultLog),
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Model definition
|
/// Model definition
|
||||||
@ -37,7 +37,7 @@ const Mnist = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
@ -75,14 +75,14 @@ pub fn asyncMain() !void {
|
|||||||
// Start compiling
|
// Start compiling
|
||||||
log.info("Compiling model to MLIR....", .{});
|
log.info("Compiling model to MLIR....", .{});
|
||||||
var start_time = try std.time.Timer.start();
|
var start_time = try std.time.Timer.start();
|
||||||
var compilation = try asynk.asyncc(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
|
var compilation = try async.async(zml.compile, .{ allocator, Mnist.forward, .{}, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
|
||||||
|
|
||||||
// While compiling, start loading weights on the platform
|
// While compiling, start loading weights on the platform
|
||||||
var model_weights = try zml.aio.loadModelBuffers(Mnist, mnist_model, buffer_store, arena, platform);
|
var model_weights = try zml.aio.loadModelBuffers(Mnist, mnist_model, buffer_store, arena, platform);
|
||||||
defer zml.aio.unloadBuffers(&model_weights);
|
defer zml.aio.unloadBuffers(&model_weights);
|
||||||
|
|
||||||
// Wait for end of compilation and end of weights loading.
|
// Wait for end of compilation and end of weights loading.
|
||||||
const compiled_mnist = try compilation.awaitt();
|
const compiled_mnist = try compilation.await();
|
||||||
log.info("✅ Compiled model in {d}ms", .{start_time.read() / std.time.ns_per_ms});
|
log.info("✅ Compiled model in {d}ms", .{start_time.read() / std.time.ns_per_ms});
|
||||||
|
|
||||||
const mnist = compiled_mnist.prepare(model_weights);
|
const mnist = compiled_mnist.prepare(model_weights);
|
||||||
@ -92,7 +92,7 @@ pub fn asyncMain() !void {
|
|||||||
log.info("Starting inference...", .{});
|
log.info("Starting inference...", .{});
|
||||||
|
|
||||||
// Load a random digit image from the dataset.
|
// Load a random digit image from the dataset.
|
||||||
const dataset = try asynk.File.open(t10kfilename, .{ .mode = .read_only });
|
const dataset = try async.File.open(t10kfilename, .{ .mode = .read_only });
|
||||||
defer dataset.close() catch unreachable;
|
defer dataset.close() catch unreachable;
|
||||||
var rng = std.Random.Xoshiro256.init(@intCast(std.time.timestamp()));
|
var rng = std.Random.Xoshiro256.init(@intCast(std.time.timestamp()));
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const clap = @import("clap");
|
const clap = @import("clap");
|
||||||
const stdx = @import("stdx");
|
const stdx = @import("stdx");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
@ -15,7 +15,7 @@ pub const std_options: std.Options = .{
|
|||||||
.log_scope_levels = &[_]std.log.ScopeLevel{
|
.log_scope_levels = &[_]std.log.ScopeLevel{
|
||||||
.{ .scope = .modernbert, .level = .info },
|
.{ .scope = .modernbert, .level = .info },
|
||||||
},
|
},
|
||||||
.logFn = asynk.logFn(std.log.defaultLog),
|
.logFn = async.logFn(std.log.defaultLog),
|
||||||
};
|
};
|
||||||
|
|
||||||
const params = clap.parseParamsComptime(
|
const params = clap.parseParamsComptime(
|
||||||
@ -38,7 +38,7 @@ const clap_parsers = .{
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
@ -136,7 +136,7 @@ pub fn asyncMain() !void {
|
|||||||
|
|
||||||
// Compile the model
|
// Compile the model
|
||||||
log.info("\tCompiling ModernBERT model...", .{});
|
log.info("\tCompiling ModernBERT model...", .{});
|
||||||
var fut_mod = try asynk.asyncc(zml.compile, .{
|
var fut_mod = try async.async(zml.compile, .{
|
||||||
allocator,
|
allocator,
|
||||||
modernbert.ModernBertForMaskedLM.forward,
|
modernbert.ModernBertForMaskedLM.forward,
|
||||||
.{modernbert_options},
|
.{modernbert_options},
|
||||||
@ -144,7 +144,7 @@ pub fn asyncMain() !void {
|
|||||||
tensor_store,
|
tensor_store,
|
||||||
platform,
|
platform,
|
||||||
});
|
});
|
||||||
var bert_module = (try fut_mod.awaitt()).prepare(bert_weights);
|
var bert_module = (try fut_mod.await()).prepare(bert_weights);
|
||||||
defer bert_module.deinit();
|
defer bert_module.deinit();
|
||||||
log.info("✅\tLoaded weights and compiled model in {D}", .{start.read()});
|
log.info("✅\tLoaded weights and compiled model in {D}", .{start.read()});
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const stdx = @import("stdx");
|
const stdx = @import("stdx");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const Tensor = zml.Tensor;
|
const Tensor = zml.Tensor;
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
const clap = @import("clap");
|
const clap = @import("clap");
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const log = std.log;
|
const log = std.log;
|
||||||
const Tensor = zml.Tensor;
|
const Tensor = zml.Tensor;
|
||||||
const modernbert_module = @import("modernbert.zig");
|
const modernbert_module = @import("modernbert.zig");
|
||||||
@ -20,7 +20,7 @@ fn printUsageAndExit(stderr: anytype) noreturn {
|
|||||||
std.process.exit(0);
|
std.process.exit(0);
|
||||||
}
|
}
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
const asynk = @import("async");
|
const async = @import("async");
|
||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
|
|
||||||
/// Model definition
|
/// Model definition
|
||||||
@ -18,7 +18,7 @@ const Layer = struct {
|
|||||||
};
|
};
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn asyncMain() !void {
|
pub fn asyncMain() !void {
|
||||||
@ -58,7 +58,7 @@ pub fn asyncMain() !void {
|
|||||||
|
|
||||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||||
// The shape of the input tensor, we have to pass in manually.
|
// The shape of the input tensor, we have to pass in manually.
|
||||||
var compilation = try asynk.asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
||||||
|
|
||||||
// Produce a bufferized weights struct from the fake BufferStore.
|
// Produce a bufferized weights struct from the fake BufferStore.
|
||||||
// This is like the inferred shapes, but with actual values.
|
// This is like the inferred shapes, but with actual values.
|
||||||
@ -67,7 +67,7 @@ pub fn asyncMain() !void {
|
|||||||
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
||||||
|
|
||||||
// Wait for compilation to finish
|
// Wait for compilation to finish
|
||||||
const compiled = try compilation.awaitt();
|
const compiled = try compilation.await();
|
||||||
|
|
||||||
// pass the model weights to the compiled module to create an executable module
|
// pass the model weights to the compiled module to create an executable module
|
||||||
var executable = compiled.prepare(model_weights);
|
var executable = compiled.prepare(model_weights);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user