2023-01-03 10:21:07 +00:00
|
|
|
const std = @import("std");
|
|
|
|
|
const zml = @import("zml");
|
|
|
|
|
const asynk = @import("async");
|
|
|
|
|
|
|
|
|
|
/// Model definition
|
|
|
|
|
const Layer = struct {
|
|
|
|
|
bias: ?zml.Tensor = null,
|
|
|
|
|
weight: zml.Tensor,
|
|
|
|
|
|
|
|
|
|
pub fn forward(self: Layer, x: zml.Tensor) zml.Tensor {
|
|
|
|
|
var y = self.weight.mul(x);
|
|
|
|
|
if (self.bias) |bias| {
|
|
|
|
|
y = y.add(bias);
|
|
|
|
|
}
|
|
|
|
|
return y;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
pub fn main() !void {
|
2023-06-27 14:23:22 +00:00
|
|
|
try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
|
2023-01-03 10:21:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn asyncMain() !void {
|
|
|
|
|
// Short lived allocations
|
|
|
|
|
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
|
|
|
|
defer _ = gpa.deinit();
|
|
|
|
|
const allocator = gpa.allocator();
|
|
|
|
|
|
|
|
|
|
// Arena allocator for BufferStore etc.
|
|
|
|
|
var arena_state = std.heap.ArenaAllocator.init(allocator);
|
|
|
|
|
defer arena_state.deinit();
|
|
|
|
|
const arena = arena_state.allocator();
|
|
|
|
|
|
|
|
|
|
var context = try zml.Context.init();
|
|
|
|
|
defer context.deinit();
|
|
|
|
|
|
|
|
|
|
const platform = context.autoPlatform();
|
2023-06-27 14:23:22 +00:00
|
|
|
context.printAvailablePlatforms(platform);
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// Our weights and bias to use
|
2023-02-23 11:18:27 +00:00
|
|
|
var weights = [4]f16{ 2.0, 2.0, 2.0, 2.0 };
|
|
|
|
|
var bias = [4]f16{ 1.0, 2.0, 3.0, 4.0 };
|
|
|
|
|
const input_shape = zml.Shape.init(.{4}, .f16);
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// We manually produce a BufferStore. You would not normally do that.
|
|
|
|
|
// A BufferStore is usually created by loading model data from a file.
|
|
|
|
|
var buffers: zml.aio.BufferStore.Buffers = .{};
|
|
|
|
|
try buffers.put(arena, "weight", zml.HostBuffer.fromArray(&weights));
|
|
|
|
|
try buffers.put(arena, "bias", zml.HostBuffer.fromArray(&bias));
|
|
|
|
|
|
|
|
|
|
// the actual BufferStore
|
|
|
|
|
const buffer_store: zml.aio.BufferStore = .{
|
|
|
|
|
.arena = arena_state,
|
|
|
|
|
.buffers = buffers,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// A clone of our model, consisting of shapes. We only need shapes for compiling.
|
|
|
|
|
// We use the BufferStore to infer the shapes.
|
2023-02-23 11:18:27 +00:00
|
|
|
var model_shapes = try zml.aio.populateModel(Layer, allocator, buffer_store);
|
|
|
|
|
model_shapes.weight = model_shapes.weight.withSharding(.{-1});
|
|
|
|
|
model_shapes.bias = model_shapes.bias.?.withSharding(.{-1});
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// Start compiling. This uses the inferred shapes from the BufferStore.
|
|
|
|
|
// The shape of the input tensor, we have to pass in manually.
|
2023-07-24 09:34:35 +00:00
|
|
|
var compilation = try asynk.asyncc(zml.compileModel, .{ allocator, Layer.forward, model_shapes, .{input_shape}, platform });
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// Produce a bufferized weights struct from the fake BufferStore.
|
|
|
|
|
// This is like the inferred shapes, but with actual values.
|
|
|
|
|
// We will need to send those to the computation device later.
|
2023-02-23 11:18:27 +00:00
|
|
|
var model_weights = try zml.aio.loadModelBuffers(Layer, model_shapes, buffer_store, arena, platform);
|
2023-01-03 10:21:07 +00:00
|
|
|
defer zml.aio.unloadBuffers(&model_weights); // for good practice
|
|
|
|
|
|
|
|
|
|
// Wait for compilation to finish
|
2023-08-03 11:35:24 +00:00
|
|
|
const compiled = try compilation.awaitt();
|
2023-01-03 10:21:07 +00:00
|
|
|
|
|
|
|
|
// pass the model weights to the compiled module to create an executable module
|
2023-10-10 11:12:34 +00:00
|
|
|
var executable = compiled.prepare(model_weights);
|
2023-01-03 10:21:07 +00:00
|
|
|
defer executable.deinit();
|
|
|
|
|
|
|
|
|
|
// prepare an input buffer
|
|
|
|
|
// Here, we use zml.HostBuffer.fromSlice to show how you would create a HostBuffer
|
|
|
|
|
// with a specific shape from an array.
|
|
|
|
|
// For situations where e.g. you have an [4]f16 array but need a .{2, 2} input shape.
|
2023-02-23 11:18:27 +00:00
|
|
|
var input = [4]f16{ 5.0, 5.0, 5.0, 5.0 };
|
2023-01-03 10:21:07 +00:00
|
|
|
var input_buffer = try zml.Buffer.from(platform, zml.HostBuffer.fromSlice(input_shape, &input));
|
|
|
|
|
defer input_buffer.deinit();
|
|
|
|
|
|
|
|
|
|
// call our executable module
|
|
|
|
|
var result: zml.Buffer = executable.call(.{input_buffer});
|
|
|
|
|
defer result.deinit();
|
|
|
|
|
|
|
|
|
|
// fetch the result to CPU memory
|
|
|
|
|
const cpu_result = try result.toHostAlloc(arena);
|
|
|
|
|
std.debug.print(
|
|
|
|
|
"\nThe result of {d} * {d} + {d} = {d}\n",
|
|
|
|
|
.{ &weights, &input, &bias, cpu_result.items(f16) },
|
|
|
|
|
);
|
|
|
|
|
}
|