From cad1a688da3d0bb5114af3dd489295708ff4a9bf Mon Sep 17 00:00:00 2001 From: Foke Singh Date: Thu, 23 Feb 2023 11:18:27 +0000 Subject: [PATCH] Add sharding usage to the benchmark and simple_layer example programs. --- examples/benchmark/main.zig | 14 ++++++++------ examples/simple_layer/main.zig | 14 ++++++++------ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/examples/benchmark/main.zig b/examples/benchmark/main.zig index b26d646..8d7604e 100644 --- a/examples/benchmark/main.zig +++ b/examples/benchmark/main.zig @@ -42,7 +42,9 @@ pub fn asyncMain() !void { defer context.deinit(); // Auto-select platform - const platform = context.autoPlatform(); + const platform = context.autoPlatform().withCompilationOptions(.{ + .sharding_enabled = true, + }); { // List available targets std.debug.print("Available Platforms:\n", .{}); @@ -76,8 +78,8 @@ pub fn asyncMain() !void { var args = std.process.args(); const cli_args = flags.parse(&args, CliArgs); - const input_shape = zml.Shape.init(.{ cli_args.size, cli_args.size }, cli_args.dtype); - + const a_shape = zml.Shape.init(.{ cli_args.size, cli_args.size }, cli_args.dtype).withTags(.{ .m, .k }).withSharding(.{.k}); + const b_shape = a_shape.withTags(.{ .k, .n }).withSharding(.{.k}); var timer = try std.time.Timer.start(); std.debug.print("\nCompiling model to MLIR....\n", .{}); @@ -85,7 +87,7 @@ pub fn asyncMain() !void { // Start compiling. // The shape of the input tensor, we have to pass in manually. timer.reset(); - var compilation = try async_(zml.module.compileModel, .{ allocator, Benchmark{}, .forward, .{ input_shape.withTags(.{ .m, .k }), input_shape.withTags(.{ .k, .n }) }, platform }); + var compilation = try async_(zml.module.compileModel, .{ allocator, Benchmark{}, .forward, .{ a_shape, b_shape }, platform }); // Wait for compilation to finish const compiled = try compilation.await_(); @@ -100,9 +102,9 @@ pub fn asyncMain() !void { var rng = std.Random.DefaultPrng.init(0); const random = rng.random(); - var a_buffer = try createRandomBuffer(allocator, platform, input_shape, random); + var a_buffer = try createRandomBuffer(allocator, platform, a_shape, random); defer a_buffer.deinit(); - var b_buffer = try createRandomBuffer(allocator, platform, input_shape, random); + var b_buffer = try createRandomBuffer(allocator, platform, b_shape, random); defer b_buffer.deinit(); std.debug.print("\nRunning benchmark....\n", .{}); diff --git a/examples/simple_layer/main.zig b/examples/simple_layer/main.zig index d756aa6..dbe3242 100644 --- a/examples/simple_layer/main.zig +++ b/examples/simple_layer/main.zig @@ -41,9 +41,9 @@ pub fn asyncMain() !void { const platform = context.autoPlatform(); // Our weights and bias to use - var weights = [3]f16{ 2.0, 2.0, 2.0 }; - var bias = [3]f16{ 1.0, 2.0, 3.0 }; - const input_shape = zml.Shape.init(.{3}, .f16); + var weights = [4]f16{ 2.0, 2.0, 2.0, 2.0 }; + var bias = [4]f16{ 1.0, 2.0, 3.0, 4.0 }; + const input_shape = zml.Shape.init(.{4}, .f16); // We manually produce a BufferStore. You would not normally do that. // A BufferStore is usually created by loading model data from a file. @@ -59,7 +59,9 @@ pub fn asyncMain() !void { // A clone of our model, consisting of shapes. We only need shapes for compiling. // We use the BufferStore to infer the shapes. - const model_shapes = try zml.aio.populateModel(Layer, allocator, buffer_store); + var model_shapes = try zml.aio.populateModel(Layer, allocator, buffer_store); + model_shapes.weight = model_shapes.weight.withSharding(.{-1}); + model_shapes.bias = model_shapes.bias.?.withSharding(.{-1}); // Start compiling. This uses the inferred shapes from the BufferStore. // The shape of the input tensor, we have to pass in manually. @@ -68,7 +70,7 @@ pub fn asyncMain() !void { // Produce a bufferized weights struct from the fake BufferStore. // This is like the inferred shapes, but with actual values. // We will need to send those to the computation device later. - var model_weights = try zml.aio.loadBuffers(Layer, .{}, buffer_store, arena, platform); + var model_weights = try zml.aio.loadModelBuffers(Layer, model_shapes, buffer_store, arena, platform); defer zml.aio.unloadBuffers(&model_weights); // for good practice // Wait for compilation to finish @@ -82,7 +84,7 @@ pub fn asyncMain() !void { // Here, we use zml.HostBuffer.fromSlice to show how you would create a HostBuffer // with a specific shape from an array. // For situations where e.g. you have an [4]f16 array but need a .{2, 2} input shape. - var input = [3]f16{ 5.0, 5.0, 5.0 }; + var input = [4]f16{ 5.0, 5.0, 5.0, 5.0 }; var input_buffer = try zml.Buffer.from(platform, zml.HostBuffer.fromSlice(input_shape, &input)); defer input_buffer.deinit();