2023-01-03 10:21:07 +00:00
|
|
|
const std = @import("std");
|
2025-07-29 16:07:11 +00:00
|
|
|
|
2025-09-02 10:27:40 +00:00
|
|
|
const async = @import("async");
|
2025-07-29 16:07:11 +00:00
|
|
|
const zml = @import("zml");
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
/// Model definition
|
|
|
|
|
const Layer = struct {
|
|
|
|
|
bias: ?zml.Tensor = null,
|
|
|
|
|
weight: zml.Tensor,
|
|
|
|
|
|
|
|
|
|
pub fn forward(self: Layer, x: zml.Tensor) zml.Tensor {
|
|
|
|
|
var y = self.weight.mul(x);
|
|
|
|
|
if (self.bias) |bias| {
|
|
|
|
|
y = y.add(bias);
|
|
|
|
|
}
|
|
|
|
|
return y;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
pub fn main() !void {
|
2025-09-02 10:27:40 +00:00
|
|
|
try async.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
2023-01-03 10:21:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn asyncMain() !void {
|
|
|
|
|
// Short lived allocations
|
|
|
|
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
|
|
|
|
defer _ = gpa.deinit();
|
|
|
|
|
const allocator = gpa.allocator();
|
|
|
|
|
|
|
|
|
|
// Arena allocator for BufferStore etc.
|
|
|
|
|
var arena_state = std.heap.ArenaAllocator.init(allocator);
|
|
|
|
|
defer arena_state.deinit();
|
|
|
|
|
const arena = arena_state.allocator();
|
|
|
|
|
|
|
|
|
|
var context = try zml.Context.init();
|
|
|
|
|
defer context.deinit();
|
|
|
|
|
|
2023-11-09 12:31:11 +00:00
|
|
|
const platform = context.autoPlatform(.{});
|
2023-06-27 14:23:22 +00:00
|
|
|
context.printAvailablePlatforms(platform);
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// Our weights and bias to use
|
2025-07-29 16:07:11 +00:00
|
|
|
var weights = [4]f32{ 2.0, 2.0, 2.0, 2.0 };
|
|
|
|
|
var bias = [4]f32{ 1.0, 2.0, 3.0, 4.0 };
|
|
|
|
|
const input_shape = zml.Shape.init(.{4}, .f32);
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// We manually produce a BufferStore. You would not normally do that.
|
|
|
|
|
// A BufferStore is usually created by loading model data from a file.
|
2025-08-22 17:55:03 +00:00
|
|
|
var store: zml.aio.BufferStore = .init(allocator);
|
|
|
|
|
defer store.deinit();
|
2025-12-31 12:46:11 +00:00
|
|
|
try store.buffers.put(store.arena.allocator(), "weight", zml.HostBuffer.fromArrayPtr(&weights));
|
|
|
|
|
try store.buffers.put(store.arena.allocator(), "bias", zml.HostBuffer.fromArrayPtr(&bias));
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// A clone of our model, consisting of shapes. We only need shapes for compiling.
|
|
|
|
|
// We use the BufferStore to infer the shapes.
|
2025-08-22 17:55:03 +00:00
|
|
|
var model_shapes = try zml.aio.populateModel(Layer, allocator, store);
|
2023-02-23 11:18:27 +00:00
|
|
|
model_shapes.weight = model_shapes.weight.withSharding(.{-1});
|
|
|
|
|
model_shapes.bias = model_shapes.bias.?.withSharding(.{-1});
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// Start compiling. This uses the inferred shapes from the BufferStore.
|
|
|
|
|
// The shape of the input tensor, we have to pass in manually.
|
2025-09-02 10:27:40 +00:00
|
|
|
var compilation = try async.async(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// Produce a bufferized weights struct from the fake BufferStore.
|
|
|
|
|
// This is like the inferred shapes, but with actual values.
|
|
|
|
|
// We will need to send those to the computation device later.
|
2025-08-22 17:55:03 +00:00
|
|
|
var model_weights = try zml.aio.loadModelBuffers(Layer, model_shapes, store, arena, platform);
|
2023-01-03 10:21:07 +00:00
|
|
|
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
|
|
|
|
|
|
|
|
|
// Wait for compilation to finish
|
2025-09-02 10:27:40 +00:00
|
|
|
const compiled = try compilation.await();
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// pass the model weights to the compiled module to create an executable module
|
2023-10-10 11:12:34 +00:00
|
|
|
var executable = compiled.prepare(model_weights);
|
2023-01-03 10:21:07 +00:00
|
|
|
defer executable.deinit();
|
|
|
|
|
|
|
|
|
|
// prepare an input buffer
|
|
|
|
|
// Here, we use zml.HostBuffer.fromSlice to show how you would create a HostBuffer
|
|
|
|
|
// with a specific shape from an array.
|
2025-07-29 16:07:11 +00:00
|
|
|
// For situations where e.g. you have an [4]f32 array but need a .{2, 2} input shape.
|
|
|
|
|
var input = [4]f32{ 5.0, 5.0, 5.0, 5.0 };
|
2024-12-20 09:30:35 +00:00
|
|
|
var input_buffer = try zml.Buffer.from(platform, zml.HostBuffer.fromSlice(input_shape, &input), .{});
|
2023-01-03 10:21:07 +00:00
|
|
|
defer input_buffer.deinit();
|
|
|
|
|
|
|
|
|
|
// call our executable module
|
|
|
|
|
var result: zml.Buffer = executable.call(.{input_buffer});
|
|
|
|
|
defer result.deinit();
|
|
|
|
|
|
|
|
|
|
// fetch the result to CPU memory
|
|
|
|
|
const cpu_result = try result.toHostAlloc(arena);
|
|
|
|
|
std.debug.print(
|
2025-07-29 16:07:11 +00:00
|
|
|
"\nThe result of {any} * {any} + {any} = {any}\n",
|
|
|
|
|
.{ &weights, &input, &bias, cpu_result.items(f32) },
|
2023-01-03 10:21:07 +00:00
|
|
|
);
|
|
|
|
|
}
|