Update tutorial and example code to use the new asyncc name and Generic slugs.

This commit is contained in:
Foke Singh 2023-05-08 16:58:45 +00:00
parent 5543c8192f
commit 672df8fa2f
6 changed files with 21 additions and 25 deletions

View File

@ -53,12 +53,12 @@ By the way, you can access the complete source code of this walkthrough here:
Before firing up our editor, let's quickly talk about a few basic ZML Before firing up our editor, let's quickly talk about a few basic ZML
fundamentals. fundamentals.
In ZML, we describe a _Module_, which represents our AI model, as a Zig In ZML, we describe a _Module_, which represents our AI model, as a Zig
`struct`. That struct can contain Tensor fields that are used for computation, `struct`. That struct can contain Tensor fields that are used for computation,
e.g. weights and biases. In the _forward_ function of a Module, we describe the e.g. weights and biases. In the _forward_ function of a Module, we describe the
computation by calling tensor operations like _mul_, _add_, _dotGeneral_, computation by calling tensor operations like _mul_, _add_, _dotGeneral_,
_conv2D_, etc., or even nested Modules. _conv2D_, etc., or even nested Modules.
ZML creates an MLIR representation of the computation when we compile the ZML creates an MLIR representation of the computation when we compile the
Module. For compilation, only the _Shapes_ of all tensors must be known. No Module. For compilation, only the _Shapes_ of all tensors must be known. No
@ -102,7 +102,7 @@ const zml = @import("zml");
const asynk = @import("async"); const asynk = @import("async");
// shortcut to the async_ function in the asynk module // shortcut to the async_ function in the asynk module
const async_ = asynk.async_; const asyncc = asynk.asyncc;
``` ```
You will use above lines probably in all ZML projects. Also, note that **ZML is You will use above lines probably in all ZML projects. Also, note that **ZML is
@ -198,7 +198,7 @@ We also initialize the ZML context `context` and get our CPU `platform`
automatically. automatically.
### The BufferStore ### The BufferStore
Next, we need to set up the concrete weight and bias tensors for our model. Next, we need to set up the concrete weight and bias tensors for our model.
Typically, we would load them from disk. But since our example works without Typically, we would load them from disk. But since our example works without
@ -251,7 +251,7 @@ const model_shapes = try zml.aio.populateModel(Layer, allocator, bs);
// Start compiling. This uses the inferred shapes from the BufferStore. // Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually. // The shape of the input tensor, we have to pass in manually.
var compilation = try async_( var compilation = try asyncc(
zml.compileModel, zml.compileModel,
.{ allocator, model_shapes, .forward, .{input_shape}, platform }, .{ allocator, model_shapes, .forward, .{input_shape}, platform },
); );
@ -278,7 +278,7 @@ compile (we used auto platform).
### Creating the Executable Model ### Creating the Executable Model
Now that we have compiled the module utilizing the shapes, we turn it into an Now that we have compiled the module utilizing the shapes, we turn it into an
executable. executable.
```zig ```zig
// pass the model weights to the compiled module to create an executable module // pass the model weights to the compiled module to create an executable module
@ -308,9 +308,9 @@ in device memory.
```zig ```zig
// prepare an input buffer // prepare an input buffer
// Here, we use zml.HostBuffer.fromSlice to show how you would create a // Here, we use zml.HostBuffer.fromSlice to show how you would create a
// HostBuffer with a specific shape from an array. // HostBuffer with a specific shape from an array.
// For situations where e.g. you have an [4]f16 array but need a .{2, 2} input // For situations where e.g. you have an [4]f16 array but need a .{2, 2} input
// shape. // shape.
var input = [3]f16{ 5.0, 5.0, 5.0 }; var input = [3]f16{ 5.0, 5.0, 5.0 };
var input_buffer = try zml.Buffer.from( var input_buffer = try zml.Buffer.from(
@ -420,7 +420,7 @@ const std = @import("std");
const zml = @import("zml"); const zml = @import("zml");
const asynk = @import("async"); const asynk = @import("async");
const async_ = asynk.async_; const asyncc = asynk.asyncc;
/// Model definition /// Model definition
const Layer = struct { const Layer = struct {
@ -482,7 +482,7 @@ pub fn asyncMain() !void {
// Start compiling. This uses the inferred shapes from the BufferStore. // Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually. // The shape of the input tensor, we have to pass in manually.
var compilation = try async_( var compilation = try asyncc(
zml.compileModel, zml.compileModel,
.{ allocator, model_shapes, .forward, .{input_shape}, platform }, .{ allocator, model_shapes, .forward, .{input_shape}, platform },
); );
@ -496,15 +496,15 @@ pub fn asyncMain() !void {
// Wait for compilation to finish // Wait for compilation to finish
const compiled = try compilation.await_(); const compiled = try compilation.await_();
// pass the model weights to the compiled module to create an executable // pass the model weights to the compiled module to create an executable
// module // module
var executable = try compiled.prepare(arena, model_weights); var executable = try compiled.prepare(arena, model_weights);
defer executable.deinit(); defer executable.deinit();
// prepare an input buffer // prepare an input buffer
// Here, we use zml.HostBuffer.fromSlice to show how you would create a // Here, we use zml.HostBuffer.fromSlice to show how you would create a
// HostBuffer with a specific shape from an array. // HostBuffer with a specific shape from an array.
// For situations where e.g. you have an [4]f16 array but need a .{2, 2} // For situations where e.g. you have an [4]f16 array but need a .{2, 2}
// input shape. // input shape.
var input = [3]f16{ 5.0, 5.0, 5.0 }; var input = [3]f16{ 5.0, 5.0, 5.0 };
var input_buffer = try zml.Buffer.from( var input_buffer = try zml.Buffer.from(

View File

@ -3,7 +3,7 @@ const zml = @import("zml");
const asynk = @import("async"); const asynk = @import("async");
const flags = @import("tigerbeetle/flags"); const flags = @import("tigerbeetle/flags");
const async_ = asynk.async_; const asyncc = asynk.asyncc;
// set log level to debug to print the generated IR // set log level to debug to print the generated IR
pub const std_options = .{ pub const std_options = .{
@ -92,7 +92,7 @@ pub fn asyncMain() !void {
// Start compiling. // Start compiling.
// The shape of the input tensor, we have to pass in manually. // The shape of the input tensor, we have to pass in manually.
timer.reset(); timer.reset();
var compilation = try async_(zml.module.compileModel, .{ allocator, Benchmark{}, .forward, .{ a_shape, b_shape }, platform }); var compilation = try asyncc(zml.module.compileModel, .{ allocator, Benchmark{}, .forward, .{ a_shape, b_shape }, platform });
// Wait for compilation to finish // Wait for compilation to finish
const compiled = try compilation.await_(); const compiled = try compilation.await_();

View File

@ -6,8 +6,6 @@ const asynk = @import("async");
const flags = @import("tigerbeetle/flags"); const flags = @import("tigerbeetle/flags");
const llama_mod = @import("llama.zig"); const llama_mod = @import("llama.zig");
const async_ = asynk.async_;
const LlamaLM = llama_mod.LlamaLM; const LlamaLM = llama_mod.LlamaLM;
const Llama = llama_mod.Llama; const Llama = llama_mod.Llama;
const KvCache = llama_mod.KvCache; const KvCache = llama_mod.KvCache;
@ -241,8 +239,8 @@ pub fn asyncMain() !void {
const rng_shape = Tensor.Rng.shape(); const rng_shape = Tensor.Rng.shape();
const compile_start = std.time.milliTimestamp(); const compile_start = std.time.milliTimestamp();
var fut_mod_prefill = try async_(zml.compile, .{ allocator, LlamaLM, .{llama_options}, .forward, .{ tokens_shape, token_idx_shape, null, rng_shape }, ts, platform }); var fut_mod_prefill = try asynk.asyncGeneric(zml.compile, .{ allocator, LlamaLM, .{llama_options}, .forward, .{ tokens_shape, token_idx_shape, null, rng_shape }, ts, platform });
var fut_mod = try async_(zml.compile, .{ allocator, LlamaLM, .{llama_options}, .forward, .{ tokens_shape, token_idx_shape, kv_cache_shape, rng_shape }, ts, platform }); var fut_mod = try asynk.asyncGeneric(zml.compile, .{ allocator, LlamaLM, .{llama_options}, .forward, .{ tokens_shape, token_idx_shape, kv_cache_shape, rng_shape }, ts, platform });
log.info("Starting loading weights", .{}); log.info("Starting loading weights", .{});
var llama_weights = try zml.aio.loadBuffers(LlamaLM, .{llama_options}, ts, model_arena, platform); var llama_weights = try zml.aio.loadBuffers(LlamaLM, .{llama_options}, ts, model_arena, platform);

View File

@ -2,7 +2,7 @@ const std = @import("std");
const zml = @import("zml"); const zml = @import("zml");
const asynk = @import("async"); const asynk = @import("async");
const async_ = asynk.async_; const asyncc = asynk.asyncc;
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var gpa = std.heap.GeneralPurposeAllocator(.{}){};

View File

@ -2,8 +2,6 @@ const std = @import("std");
const zml = @import("zml"); const zml = @import("zml");
const asynk = @import("async"); const asynk = @import("async");
const async_ = asynk.async_;
const show_mlir = true; const show_mlir = true;
/// Model definition /// Model definition
@ -115,7 +113,7 @@ pub fn asyncMain() !void {
} else { } else {
std.debug.print("Compiling model to MLIR....\r", .{}); std.debug.print("Compiling model to MLIR....\r", .{});
} }
var compilation = try async_(zml.compile, .{ allocator, Mnist, .{}, .forward, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform }); var compilation = try asynk.asyncGeneric(zml.compile, .{ allocator, Mnist, .{}, .forward, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
// Wait for end of compilation and end of weights loading. // Wait for end of compilation and end of weights loading.
const compiled_mnist = try compilation.await_(); const compiled_mnist = try compilation.await_();

View File

@ -2,7 +2,7 @@ const std = @import("std");
const zml = @import("zml"); const zml = @import("zml");
const asynk = @import("async"); const asynk = @import("async");
const async_ = asynk.async_; const asyncc = asynk.asyncc;
/// Model definition /// Model definition
const Layer = struct { const Layer = struct {
@ -65,7 +65,7 @@ pub fn asyncMain() !void {
// Start compiling. This uses the inferred shapes from the BufferStore. // Start compiling. This uses the inferred shapes from the BufferStore.
// The shape of the input tensor, we have to pass in manually. // The shape of the input tensor, we have to pass in manually.
var compilation = try async_(zml.compileModel, .{ allocator, model_shapes, .forward, .{input_shape}, platform }); var compilation = try asyncc(zml.compileModel, .{ allocator, model_shapes, .forward, .{input_shape}, platform });
// Produce a bufferized weights struct from the fake BufferStore. // Produce a bufferized weights struct from the fake BufferStore.
// This is like the inferred shapes, but with actual values. // This is like the inferred shapes, but with actual values.