Radix/examples/modernbert/test.zig

const clap = @import("clap");
const std = @import("std");
const zml = @import("zml");
const asynk = @import("async");
const log = std.log;
const Tensor = zml.Tensor;
const modernbert_module = @import("modernbert.zig");
const ModernBertOptions = modernbert_module.ModernBertOptions;

const params = clap.parseParamsComptime(
    \\--help                    print this help
    \\--model           <PATH>  model weights path
    \\--activations     <PATH>  model activations path
);

fn printUsageAndExit(stderr: anytype) noreturn {
    stderr.print("usage: ", .{}) catch {};
    clap.usage(stderr, clap.Help, &params) catch {};
    stderr.print("\n", .{}) catch {};
    std.process.exit(0);
}
pub fn main() !void {
    try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);
}

pub fn asyncMain() !void {
    // Short lived allocations
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();

    const allocator = gpa.allocator();
    const stderr = std.io.getStdErr().writer();

    // Read CLI arguments
    const parsers = comptime .{
        .PATH = clap.parsers.string,
    };
    var diag: clap.Diagnostic = .{};
    var res = clap.parse(clap.Help, &params, parsers, .{
        .diagnostic = &diag,
        .allocator = allocator,
    }) catch |err| {
        try diag.report(stderr, err);
        try printUsageAndExit(stderr);
    };
    defer res.deinit();

    if (res.args.help != 0) {
        try clap.help(stderr, clap.Help, &params, .{});
        return;
    }

    const model_file = res.args.model orelse {
        stderr.print("Error: missing --model=...\n\n", .{}) catch {};
        printUsageAndExit(stderr);
        unreachable;
    };
    const activations_file = res.args.activations orelse {
        stderr.print("Error: missing --activations=...\n\n", .{}) catch {};
        printUsageAndExit(stderr);
        unreachable;
    };

    // Initialize the ZML context
    var context = try zml.Context.init();
    defer context.deinit();

    // Auto-select platform
    const compute_platform = context.autoPlatform(.{});
    log.info("Selected platform: {s}", .{@tagName(compute_platform.target)});

    // Create a dedicated memory arena for model-related allocations (dedicated to model shapes and weights)
    var arena_state = std.heap.ArenaAllocator.init(allocator);
    defer arena_state.deinit();
    const model_arena = arena_state.allocator();

    // Load the model weights file and parse its structure (shape)
    var weights_file = try zml.aio.detectFormatAndOpen(allocator, model_file);
    defer weights_file.deinit();
    log.info("Model contains {d} layers. Loaded from: {s}", .{ weights_file.buffers.count(), model_file });

    // Load the activation data file
    const activations = try zml.aio.torch.open(model_arena, activations_file);
    defer activations.deinit();
    log.info("Found {} activations in {s}", .{ activations.buffers.count(), activations_file });

    // Initialize model
    var model = try zml.aio.populateModel(
        modernbert_module.ModernBertForMaskedLM,
        model_arena,
        weights_file,
    );

    const modernbert_base_options: modernbert_module.ModernBertOptions = .{
        .num_attention_heads = 12,
        .tie_word_embeddings = true,
        .pad_token = 50283,
        .local_attention = 128,
    };
    model.init(modernbert_base_options);

    // Load model weights
    const model_weights = try zml.aio.loadModelBuffers(
        modernbert_module.ModernBertForMaskedLM,
        model,
        weights_file,
        model_arena,
        compute_platform,
    );

    // Test implementation
    try testImplementation(compute_platform, model, model_weights, activations);
}

fn testImplementation(
    compute_platform: zml.Platform,
    model: modernbert_module.ModernBertForMaskedLM,
    model_weights: zml.Bufferized(modernbert_module.ModernBertForMaskedLM),
    activations: zml.aio.BufferStore,
) !void {
    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.embeddings.tok_embeddings",
        model.model.embeddings.tok_embeddings,
        model_weights.model.embeddings.tok_embeddings,
        1e-6,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.embeddings.norm",
        model.model.embeddings.norm,
        model_weights.model.embeddings.norm,
        1e-3,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.embeddings",
        model.model.embeddings,
        model_weights.model.embeddings,
        1e-3,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.final_norm",
        model.model.final_norm,
        model_weights.model.final_norm,
        1e-5,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.layers.2.mlp",
        model.model.layers[2].mlp,
        model_weights.model.layers[2].mlp,
        2e-3,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.layers.2.mlp_norm",
        model.model.layers[2].mlp_norm,
        model_weights.model.layers[2].mlp_norm,
        1e-4,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.layers.2.attn",
        model.model.layers[2].attn,
        model_weights.model.layers[2].attn,
        1e-6,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.layers.2",
        model.model.layers[2],
        model_weights.model.layers[2],
        2e-3,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model.layers.3.attn",
        model.model.layers[3].attn,
        model_weights.model.layers[3].attn,
        1e-5,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.model",
        model.model,
        model_weights.model,
        1e-2,
    );

    const TiedDecoder = struct {
        weight: Tensor,
        bias: Tensor,

        pub fn forward(self: @This(), head_outputs: Tensor) Tensor {
            const results = head_outputs.withTags(.{ .b, .s, .d }).dot(self.weight.withTags(.{ .voc, .d }), .{.d});
            return results.add(self.bias.withTags(.{.voc}).broad(results.shape()));
        }
    };

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.decoder",
        TiedDecoder{ .weight = model.decoder.weight orelse model.model.embeddings.tok_embeddings.weight, .bias = model.decoder.bias },
        .{ .weight = model_weights.model.embeddings.tok_embeddings.weight, .bias = model_weights.decoder.bias },
        1e-3,
    );

    try zml.testing.testLayer(
        compute_platform,
        activations,
        "model.head",
        model.head,
        model_weights.head,
        0.1, // TODO: too high tolerance
    );
}
Add tests for the ModernBERT example, covering activation utilities, build setup, and example Zig code. 2024-06-11 17:33:22 +00:00			`const clap = @import("clap");`
			`const std = @import("std");`
			`const zml = @import("zml");`
			`const asynk = @import("async");`
			`const log = std.log;`
			`const Tensor = zml.Tensor;`
			`const modernbert_module = @import("modernbert.zig");`
			`const ModernBertOptions = modernbert_module.ModernBertOptions;`

			`const params = clap.parseParamsComptime(`
			`\\--help print this help`
			`\\--model <PATH> model weights path`
			`\\--activations <PATH> model activations path`
			`);`

			`fn printUsageAndExit(stderr: anytype) noreturn {`
			`stderr.print("usage: ", .{}) catch {};`
			`clap.usage(stderr, clap.Help, &params) catch {};`
			`stderr.print("\n", .{}) catch {};`
			`std.process.exit(0);`
			`}`
			`pub fn main() !void {`
			`try asynk.AsyncThread.main(std.heap.c_allocator, asyncMain);`
			`}`

			`pub fn asyncMain() !void {`
			`// Short lived allocations`
			`var gpa = std.heap.GeneralPurposeAllocator(.{}){};`
			`defer _ = gpa.deinit();`

			`const allocator = gpa.allocator();`
			`const stderr = std.io.getStdErr().writer();`

			`// Read CLI arguments`
			`const parsers = comptime .{`
			`.PATH = clap.parsers.string,`
			`};`
			`var diag: clap.Diagnostic = .{};`
			`var res = clap.parse(clap.Help, &params, parsers, .{`
			`.diagnostic = &diag,`
			`.allocator = allocator,`
			`}) catch \|err\| {`
			`try diag.report(stderr, err);`
			`try printUsageAndExit(stderr);`
			`};`
			`defer res.deinit();`

			`if (res.args.help != 0) {`
			`try clap.help(stderr, clap.Help, &params, .{});`
			`return;`
			`}`

			`const model_file = res.args.model orelse {`
			`stderr.print("Error: missing --model=...\n\n", .{}) catch {};`
			`printUsageAndExit(stderr);`
			`unreachable;`
			`};`
			`const activations_file = res.args.activations orelse {`
			`stderr.print("Error: missing --activations=...\n\n", .{}) catch {};`
			`printUsageAndExit(stderr);`
			`unreachable;`
			`};`

			`// Initialize the ZML context`
			`var context = try zml.Context.init();`
			`defer context.deinit();`

			`// Auto-select platform`
			`const compute_platform = context.autoPlatform(.{});`
			`log.info("Selected platform: {s}", .{@tagName(compute_platform.target)});`

			`// Create a dedicated memory arena for model-related allocations (dedicated to model shapes and weights)`
			`var arena_state = std.heap.ArenaAllocator.init(allocator);`
			`defer arena_state.deinit();`
			`const model_arena = arena_state.allocator();`

			`// Load the model weights file and parse its structure (shape)`
			`var weights_file = try zml.aio.detectFormatAndOpen(allocator, model_file);`
			`defer weights_file.deinit();`
			`log.info("Model contains {d} layers. Loaded from: {s}", .{ weights_file.buffers.count(), model_file });`

			`// Load the activation data file`
			`const activations = try zml.aio.torch.open(model_arena, activations_file);`
			`defer activations.deinit();`
			`log.info("Found {} activations in {s}", .{ activations.buffers.count(), activations_file });`

			`// Initialize model`
			`var model = try zml.aio.populateModel(`
			`modernbert_module.ModernBertForMaskedLM,`
			`model_arena,`
			`weights_file,`
			`);`

			`const modernbert_base_options: modernbert_module.ModernBertOptions = .{`
			`.num_attention_heads = 12,`
			`.tie_word_embeddings = true,`
			`.pad_token = 50283,`
			`.local_attention = 128,`
			`};`
			`model.init(modernbert_base_options);`

			`// Load model weights`
			`const model_weights = try zml.aio.loadModelBuffers(`
			`modernbert_module.ModernBertForMaskedLM,`
			`model,`
			`weights_file,`
			`model_arena,`
			`compute_platform,`
			`);`

			`// Test implementation`
			`try testImplementation(compute_platform, model, model_weights, activations);`
			`}`

			`fn testImplementation(`
			`compute_platform: zml.Platform,`
			`model: modernbert_module.ModernBertForMaskedLM,`
			`model_weights: zml.Bufferized(modernbert_module.ModernBertForMaskedLM),`
			`activations: zml.aio.BufferStore,`
			`) !void {`
			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.embeddings.tok_embeddings",`
			`model.model.embeddings.tok_embeddings,`
			`model_weights.model.embeddings.tok_embeddings,`
			`1e-6,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.embeddings.norm",`
			`model.model.embeddings.norm,`
			`model_weights.model.embeddings.norm,`
			`1e-3,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.embeddings",`
			`model.model.embeddings,`
			`model_weights.model.embeddings,`
			`1e-3,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.final_norm",`
			`model.model.final_norm,`
			`model_weights.model.final_norm,`
			`1e-5,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.layers.2.mlp",`
			`model.model.layers[2].mlp,`
			`model_weights.model.layers[2].mlp,`
			`2e-3,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.layers.2.mlp_norm",`
			`model.model.layers[2].mlp_norm,`
			`model_weights.model.layers[2].mlp_norm,`
			`1e-4,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.layers.2.attn",`
			`model.model.layers[2].attn,`
			`model_weights.model.layers[2].attn,`
			`1e-6,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.layers.2",`
			`model.model.layers[2],`
			`model_weights.model.layers[2],`
			`2e-3,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model.layers.3.attn",`
			`model.model.layers[3].attn,`
			`model_weights.model.layers[3].attn,`
			`1e-5,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.model",`
			`model.model,`
			`model_weights.model,`
			`1e-2,`
			`);`

			`const TiedDecoder = struct {`
			`weight: Tensor,`
			`bias: Tensor,`

			`pub fn forward(self: @This(), head_outputs: Tensor) Tensor {`
			`const results = head_outputs.withTags(.{ .b, .s, .d }).dot(self.weight.withTags(.{ .voc, .d }), .{.d});`
			`return results.add(self.bias.withTags(.{.voc}).broad(results.shape()));`
			`}`
			`};`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.decoder",`
			`TiedDecoder{ .weight = model.decoder.weight orelse model.model.embeddings.tok_embeddings.weight, .bias = model.decoder.bias },`
			`.{ .weight = model_weights.model.embeddings.tok_embeddings.weight, .bias = model_weights.decoder.bias },`
			`1e-3,`
			`);`

			`try zml.testing.testLayer(`
			`compute_platform,`
			`activations,`
			`"model.head",`
			`model.head,`
			`model_weights.head,`
			`0.1, // TODO: too high tolerance`
			`);`
			`}`