Update tutorial and example code to use the new asyncc name and Generic slugs.
This commit is contained in:
parent
5543c8192f
commit
672df8fa2f
@ -53,12 +53,12 @@ By the way, you can access the complete source code of this walkthrough here:
|
|||||||
|
|
||||||
Before firing up our editor, let's quickly talk about a few basic ZML
|
Before firing up our editor, let's quickly talk about a few basic ZML
|
||||||
fundamentals.
|
fundamentals.
|
||||||
|
|
||||||
In ZML, we describe a _Module_, which represents our AI model, as a Zig
|
In ZML, we describe a _Module_, which represents our AI model, as a Zig
|
||||||
`struct`. That struct can contain Tensor fields that are used for computation,
|
`struct`. That struct can contain Tensor fields that are used for computation,
|
||||||
e.g. weights and biases. In the _forward_ function of a Module, we describe the
|
e.g. weights and biases. In the _forward_ function of a Module, we describe the
|
||||||
computation by calling tensor operations like _mul_, _add_, _dotGeneral_,
|
computation by calling tensor operations like _mul_, _add_, _dotGeneral_,
|
||||||
_conv2D_, etc., or even nested Modules.
|
_conv2D_, etc., or even nested Modules.
|
||||||
|
|
||||||
ZML creates an MLIR representation of the computation when we compile the
|
ZML creates an MLIR representation of the computation when we compile the
|
||||||
Module. For compilation, only the _Shapes_ of all tensors must be known. No
|
Module. For compilation, only the _Shapes_ of all tensors must be known. No
|
||||||
@ -102,7 +102,7 @@ const zml = @import("zml");
|
|||||||
const asynk = @import("async");
|
const asynk = @import("async");
|
||||||
|
|
||||||
// shortcut to the async_ function in the asynk module
|
// shortcut to the async_ function in the asynk module
|
||||||
const async_ = asynk.async_;
|
const asyncc = asynk.asyncc;
|
||||||
```
|
```
|
||||||
|
|
||||||
You will use above lines probably in all ZML projects. Also, note that **ZML is
|
You will use above lines probably in all ZML projects. Also, note that **ZML is
|
||||||
@ -198,7 +198,7 @@ We also initialize the ZML context `context` and get our CPU `platform`
|
|||||||
automatically.
|
automatically.
|
||||||
|
|
||||||
|
|
||||||
### The BufferStore
|
### The BufferStore
|
||||||
|
|
||||||
Next, we need to set up the concrete weight and bias tensors for our model.
|
Next, we need to set up the concrete weight and bias tensors for our model.
|
||||||
Typically, we would load them from disk. But since our example works without
|
Typically, we would load them from disk. But since our example works without
|
||||||
@ -251,7 +251,7 @@ const model_shapes = try zml.aio.populateModel(Layer, allocator, bs);
|
|||||||
|
|
||||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||||
// The shape of the input tensor, we have to pass in manually.
|
// The shape of the input tensor, we have to pass in manually.
|
||||||
var compilation = try async_(
|
var compilation = try asyncc(
|
||||||
zml.compileModel,
|
zml.compileModel,
|
||||||
.{ allocator, model_shapes, .forward, .{input_shape}, platform },
|
.{ allocator, model_shapes, .forward, .{input_shape}, platform },
|
||||||
);
|
);
|
||||||
@ -278,7 +278,7 @@ compile (we used auto platform).
|
|||||||
### Creating the Executable Model
|
### Creating the Executable Model
|
||||||
|
|
||||||
Now that we have compiled the module utilizing the shapes, we turn it into an
|
Now that we have compiled the module utilizing the shapes, we turn it into an
|
||||||
executable.
|
executable.
|
||||||
|
|
||||||
```zig
|
```zig
|
||||||
// pass the model weights to the compiled module to create an executable module
|
// pass the model weights to the compiled module to create an executable module
|
||||||
@ -308,9 +308,9 @@ in device memory.
|
|||||||
|
|
||||||
```zig
|
```zig
|
||||||
// prepare an input buffer
|
// prepare an input buffer
|
||||||
// Here, we use zml.HostBuffer.fromSlice to show how you would create a
|
// Here, we use zml.HostBuffer.fromSlice to show how you would create a
|
||||||
// HostBuffer with a specific shape from an array.
|
// HostBuffer with a specific shape from an array.
|
||||||
// For situations where e.g. you have an [4]f16 array but need a .{2, 2} input
|
// For situations where e.g. you have an [4]f16 array but need a .{2, 2} input
|
||||||
// shape.
|
// shape.
|
||||||
var input = [3]f16{ 5.0, 5.0, 5.0 };
|
var input = [3]f16{ 5.0, 5.0, 5.0 };
|
||||||
var input_buffer = try zml.Buffer.from(
|
var input_buffer = try zml.Buffer.from(
|
||||||
@ -420,7 +420,7 @@ const std = @import("std");
|
|||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const asynk = @import("async");
|
const asynk = @import("async");
|
||||||
|
|
||||||
const async_ = asynk.async_;
|
const asyncc = asynk.asyncc;
|
||||||
|
|
||||||
/// Model definition
|
/// Model definition
|
||||||
const Layer = struct {
|
const Layer = struct {
|
||||||
@ -482,7 +482,7 @@ pub fn asyncMain() !void {
|
|||||||
|
|
||||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||||
// The shape of the input tensor, we have to pass in manually.
|
// The shape of the input tensor, we have to pass in manually.
|
||||||
var compilation = try async_(
|
var compilation = try asyncc(
|
||||||
zml.compileModel,
|
zml.compileModel,
|
||||||
.{ allocator, model_shapes, .forward, .{input_shape}, platform },
|
.{ allocator, model_shapes, .forward, .{input_shape}, platform },
|
||||||
);
|
);
|
||||||
@ -496,15 +496,15 @@ pub fn asyncMain() !void {
|
|||||||
// Wait for compilation to finish
|
// Wait for compilation to finish
|
||||||
const compiled = try compilation.await_();
|
const compiled = try compilation.await_();
|
||||||
|
|
||||||
// pass the model weights to the compiled module to create an executable
|
// pass the model weights to the compiled module to create an executable
|
||||||
// module
|
// module
|
||||||
var executable = try compiled.prepare(arena, model_weights);
|
var executable = try compiled.prepare(arena, model_weights);
|
||||||
defer executable.deinit();
|
defer executable.deinit();
|
||||||
|
|
||||||
// prepare an input buffer
|
// prepare an input buffer
|
||||||
// Here, we use zml.HostBuffer.fromSlice to show how you would create a
|
// Here, we use zml.HostBuffer.fromSlice to show how you would create a
|
||||||
// HostBuffer with a specific shape from an array.
|
// HostBuffer with a specific shape from an array.
|
||||||
// For situations where e.g. you have an [4]f16 array but need a .{2, 2}
|
// For situations where e.g. you have an [4]f16 array but need a .{2, 2}
|
||||||
// input shape.
|
// input shape.
|
||||||
var input = [3]f16{ 5.0, 5.0, 5.0 };
|
var input = [3]f16{ 5.0, 5.0, 5.0 };
|
||||||
var input_buffer = try zml.Buffer.from(
|
var input_buffer = try zml.Buffer.from(
|
||||||
|
|||||||
@ -3,7 +3,7 @@ const zml = @import("zml");
|
|||||||
const asynk = @import("async");
|
const asynk = @import("async");
|
||||||
const flags = @import("tigerbeetle/flags");
|
const flags = @import("tigerbeetle/flags");
|
||||||
|
|
||||||
const async_ = asynk.async_;
|
const asyncc = asynk.asyncc;
|
||||||
|
|
||||||
// set log level to debug to print the generated IR
|
// set log level to debug to print the generated IR
|
||||||
pub const std_options = .{
|
pub const std_options = .{
|
||||||
@ -92,7 +92,7 @@ pub fn asyncMain() !void {
|
|||||||
// Start compiling.
|
// Start compiling.
|
||||||
// The shape of the input tensor, we have to pass in manually.
|
// The shape of the input tensor, we have to pass in manually.
|
||||||
timer.reset();
|
timer.reset();
|
||||||
var compilation = try async_(zml.module.compileModel, .{ allocator, Benchmark{}, .forward, .{ a_shape, b_shape }, platform });
|
var compilation = try asyncc(zml.module.compileModel, .{ allocator, Benchmark{}, .forward, .{ a_shape, b_shape }, platform });
|
||||||
|
|
||||||
// Wait for compilation to finish
|
// Wait for compilation to finish
|
||||||
const compiled = try compilation.await_();
|
const compiled = try compilation.await_();
|
||||||
|
|||||||
@ -6,8 +6,6 @@ const asynk = @import("async");
|
|||||||
const flags = @import("tigerbeetle/flags");
|
const flags = @import("tigerbeetle/flags");
|
||||||
const llama_mod = @import("llama.zig");
|
const llama_mod = @import("llama.zig");
|
||||||
|
|
||||||
const async_ = asynk.async_;
|
|
||||||
|
|
||||||
const LlamaLM = llama_mod.LlamaLM;
|
const LlamaLM = llama_mod.LlamaLM;
|
||||||
const Llama = llama_mod.Llama;
|
const Llama = llama_mod.Llama;
|
||||||
const KvCache = llama_mod.KvCache;
|
const KvCache = llama_mod.KvCache;
|
||||||
@ -241,8 +239,8 @@ pub fn asyncMain() !void {
|
|||||||
const rng_shape = Tensor.Rng.shape();
|
const rng_shape = Tensor.Rng.shape();
|
||||||
|
|
||||||
const compile_start = std.time.milliTimestamp();
|
const compile_start = std.time.milliTimestamp();
|
||||||
var fut_mod_prefill = try async_(zml.compile, .{ allocator, LlamaLM, .{llama_options}, .forward, .{ tokens_shape, token_idx_shape, null, rng_shape }, ts, platform });
|
var fut_mod_prefill = try asynk.asyncGeneric(zml.compile, .{ allocator, LlamaLM, .{llama_options}, .forward, .{ tokens_shape, token_idx_shape, null, rng_shape }, ts, platform });
|
||||||
var fut_mod = try async_(zml.compile, .{ allocator, LlamaLM, .{llama_options}, .forward, .{ tokens_shape, token_idx_shape, kv_cache_shape, rng_shape }, ts, platform });
|
var fut_mod = try asynk.asyncGeneric(zml.compile, .{ allocator, LlamaLM, .{llama_options}, .forward, .{ tokens_shape, token_idx_shape, kv_cache_shape, rng_shape }, ts, platform });
|
||||||
|
|
||||||
log.info("Starting loading weights", .{});
|
log.info("Starting loading weights", .{});
|
||||||
var llama_weights = try zml.aio.loadBuffers(LlamaLM, .{llama_options}, ts, model_arena, platform);
|
var llama_weights = try zml.aio.loadBuffers(LlamaLM, .{llama_options}, ts, model_arena, platform);
|
||||||
|
|||||||
@ -2,7 +2,7 @@ const std = @import("std");
|
|||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const asynk = @import("async");
|
const asynk = @import("async");
|
||||||
|
|
||||||
const async_ = asynk.async_;
|
const asyncc = asynk.asyncc;
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||||
|
|||||||
@ -2,8 +2,6 @@ const std = @import("std");
|
|||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const asynk = @import("async");
|
const asynk = @import("async");
|
||||||
|
|
||||||
const async_ = asynk.async_;
|
|
||||||
|
|
||||||
const show_mlir = true;
|
const show_mlir = true;
|
||||||
|
|
||||||
/// Model definition
|
/// Model definition
|
||||||
@ -115,7 +113,7 @@ pub fn asyncMain() !void {
|
|||||||
} else {
|
} else {
|
||||||
std.debug.print("Compiling model to MLIR....\r", .{});
|
std.debug.print("Compiling model to MLIR....\r", .{});
|
||||||
}
|
}
|
||||||
var compilation = try async_(zml.compile, .{ allocator, Mnist, .{}, .forward, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
|
var compilation = try asynk.asyncGeneric(zml.compile, .{ allocator, Mnist, .{}, .forward, .{zml.Shape.init(.{ 28, 28 }, .u8)}, buffer_store, platform });
|
||||||
|
|
||||||
// Wait for end of compilation and end of weights loading.
|
// Wait for end of compilation and end of weights loading.
|
||||||
const compiled_mnist = try compilation.await_();
|
const compiled_mnist = try compilation.await_();
|
||||||
|
|||||||
@ -2,7 +2,7 @@ const std = @import("std");
|
|||||||
const zml = @import("zml");
|
const zml = @import("zml");
|
||||||
const asynk = @import("async");
|
const asynk = @import("async");
|
||||||
|
|
||||||
const async_ = asynk.async_;
|
const asyncc = asynk.asyncc;
|
||||||
|
|
||||||
/// Model definition
|
/// Model definition
|
||||||
const Layer = struct {
|
const Layer = struct {
|
||||||
@ -65,7 +65,7 @@ pub fn asyncMain() !void {
|
|||||||
|
|
||||||
// Start compiling. This uses the inferred shapes from the BufferStore.
|
// Start compiling. This uses the inferred shapes from the BufferStore.
|
||||||
// The shape of the input tensor, we have to pass in manually.
|
// The shape of the input tensor, we have to pass in manually.
|
||||||
var compilation = try async_(zml.compileModel, .{ allocator, model_shapes, .forward, .{input_shape}, platform });
|
var compilation = try asyncc(zml.compileModel, .{ allocator, model_shapes, .forward, .{input_shape}, platform });
|
||||||
|
|
||||||
// Produce a bufferized weights struct from the fake BufferStore.
|
// Produce a bufferized weights struct from the fake BufferStore.
|
||||||
// This is like the inferred shapes, but with actual values.
|
// This is like the inferred shapes, but with actual values.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user