Radix/zml/aio/gguf/core.zig

const asynk = @import("async");
const std = @import("std");
const utils = @import("../utils.zig");
const zml = @import("../../zml.zig");

const assert = std.debug.assert;
const log = std.log.scoped(.zml_io);

pub const GgufErrors = error{
    ValueTypeMismatch,
    InvalidGguf,
    UnsupportedGgufType,
    EndOfMetadata,
    OutOfMemory,
};

// Enums and structures
pub const TensorType = enum(u32) {
    f32 = 0,
    f16 = 1,
    q4_0 = 2,
    q4_1 = 3,
    deprecated_q4_2 = 4,
    deprecated_q4_3 = 5,
    q5_0 = 6,
    q5_1 = 7,
    q8_0 = 8,
    q8_1 = 9,
    // k-quantizations
    q2_k = 10,
    q3_k = 11,
    q4_k = 12,
    q5_k = 13,
    q6_k = 14,
    q8_k = 15,
    i8 = 16,
    i16 = 17,
    i32 = 18,

    const MAX_KNOWN_ENUM = 18;

    pub fn canConvertQuant(self: TensorType) bool {
        return switch (self) {
            .q8_0, .q4_k, .q6_k, .q2_k, .q4_0, .q4_1 => true,
            else => false,
        };
    }

    pub fn toDtype(self: TensorType) ?zml.DataType {
        return switch (self) {
            .f32 => .f32,
            .f16 => .f16,
            .i8 => .i8,
            .i16 => .i16,
            .i32 => .i32,
            else => null,
        };
    }

    pub fn sizeOf(self: TensorType) usize {
        return self.toDtype().?.sizeOf();
    }

    /// Return the tensor type features
    pub fn getFeatures(t: TensorType) TensorTypeFeatures {
        return switch (t) {
            inline else => |val| @field(TENSOR_TYPE_FEATURES, @tagName(val)),
        };
    }
};

/// GGUF tensor type to features lookup table.
pub const TensorTypeFeatures = struct {
    items_per_block: u29,
    bytes_per_block: u29,

    pub fn alignment(features: TensorTypeFeatures) u8 {
        return std.math.log2_int(u29, features.bytes_per_block);
    }
};

pub const TENSOR_TYPE_FEATURES: std.enums.EnumFieldStruct(TensorType, TensorTypeFeatures, null) = .{
    .f32 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(f32) },
    .f16 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(f16) },
    .q4_0 = .{ .items_per_block = 32, .bytes_per_block = 18 },
    .q4_1 = .{ .items_per_block = 32, .bytes_per_block = 20 },
    .deprecated_q4_2 = .{ .items_per_block = 0, .bytes_per_block = 0 },
    .deprecated_q4_3 = .{ .items_per_block = 0, .bytes_per_block = 0 },
    .q5_0 = .{ .items_per_block = 32, .bytes_per_block = 22 },
    .q5_1 = .{ .items_per_block = 32, .bytes_per_block = 24 },
    .q8_0 = .{ .items_per_block = 32, .bytes_per_block = 34 },
    .q8_1 = .{ .items_per_block = 32, .bytes_per_block = 40 },
    .q2_k = .{ .items_per_block = 256, .bytes_per_block = 82 },
    .q3_k = .{ .items_per_block = 256, .bytes_per_block = 110 },
    .q4_k = .{ .items_per_block = 256, .bytes_per_block = 144 },
    .q5_k = .{ .items_per_block = 256, .bytes_per_block = 176 },
    .q6_k = .{ .items_per_block = 256, .bytes_per_block = 210 },
    .q8_k = .{ .items_per_block = 256, .bytes_per_block = 292 },
    .i8 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i8) },
    .i16 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i16) },
    .i32 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i32) },
};

pub const GgufValueType = enum(u32) {
    // The value is a 8-bit unsigned integer.
    uint8 = 0,
    // The value is a 8-bit signed integer.
    int8 = 1,
    // The value is a 16-bit unsigned little-endian integer.
    uint16 = 2,
    // The value is a 16-bit signed little-endian integer.
    int16 = 3,
    // The value is a 32-bit unsigned little-endian integer.
    uint32 = 4,
    // The value is a 32-bit signed little-endian integer.
    int32 = 5,
    // The value is a 32-bit IEEE754 floating point number.
    float32 = 6,
    // The value is a boolean.
    // 1-byte value where 0 is false and 1 is true.
    // Anything else is invalid, and should be treated as either the model
    // being invalid or the reader being buggy.
    bool = 7,
    // The value is a UTF-8 non-null-terminated string, with length prepended.
    string = 8,
    // The value is an array of other values, with the length and type
    // prepended. Arrays can be nested, and the length of the array is the
    // number of elements in the array, not the number of bytes.
    array = 9,
    // The value is a 64-bit unsigned little-endian integer.
    uint64 = 10,
    // The value is a 64-bit signed little-endian integer.
    int64 = 11,
    // The value is a 64-bit IEEE754 floating point number.
    float64 = 12,
    // Special values used by the callbacks of gguf_do_with_value().
    array_start = 100,
    array_end = 101,

    // Allow other values in case GGUF add more types without us noticing
    _,

    pub fn sizeOf(self: GgufValueType) usize {
        return switch (self) {
            .uint8 => @sizeOf(u8),
            .int8 => @sizeOf(i8),
            .uint16 => @sizeOf(u16),
            .int16 => @sizeOf(i16),
            .uint32 => @sizeOf(u32),
            .int32 => @sizeOf(i32),
            .float32 => @sizeOf(f32),
            .bool => @sizeOf(bool),
            .uint64 => @sizeOf(u64),
            .int64 => @sizeOf(i64),
            .float64 => @sizeOf(f64),
            .string => @sizeOf([]u8),
            else => unreachable,
        };
    }

    pub fn arrayTypeCheck(self: GgufValueType, comptime T: type) !void {
        switch (self) {
            .string => if (T != []u8 and T != []const u8) return error.ValueTypeMismatch,
            .uint8 => if (T != u8) return error.ValueTypeMismatch,
            .int8 => if (T != i8) return error.ValueTypeMismatch,
            .uint16 => if (T != u16) return error.ValueTypeMismatch,
            .int16 => if (T != i16) return error.ValueTypeMismatch,
            .uint32 => if (T != u32) return error.ValueTypeMismatch,
            .int32 => if (T != i32) return error.ValueTypeMismatch,
            .float32 => if (T != f32) return error.ValueTypeMismatch,
            .bool => if (T != bool) return error.ValueTypeMismatch,
            .uint64 => if (T != u64) return error.ValueTypeMismatch,
            .int64 => if (T != i64) return error.ValueTypeMismatch,
            .float64 => if (T != f64) return error.ValueTypeMismatch,
            else => {},
        }
    }
};

pub const ValueType = enum {
    uint8,
    int8,
    uint16,
    int16,
    uint32,
    int32,
    float32,
    uint64,
    int64,
    float64,
    boolval,
    string,
    array,
};

// Union of possible values.
pub const GgufValue = union(ValueType) {
    uint8: u8,
    int8: i8,
    uint16: u16,
    int16: i16,
    uint32: u32,
    int32: i32,
    float32: f32,
    uint64: u64,
    int64: i64,
    float64: f64,
    boolval: bool,
    string: []const u8,
    array: Array,

    pub const Array = struct {
        // Any value type is valid, including arrays.
        child: GgufValueType,
        // Number of elements, not bytes
        len: usize,
        data: []u8,
    };

    pub fn asLoaderValue(self: GgufValue) zml.aio.Value {
        return switch (self) {
            .array => |v| .{
                .array = .{
                    .item_type = switch (v.child) {
                        .bool => .boolval,
                        .uint8 => .uint8,
                        .int8 => .int8,
                        .uint16 => .uint16,
                        .int16 => .int16,
                        .uint32 => .uint32,
                        .int32 => .int32,
                        .float32 => .float32,
                        .uint64 => .uint64,
                        .int64 => .int64,
                        .float64 => .float64,
                        .string => .string,
                        // TODO: .array => .array,
                        else => unreachable,
                    },
                    .data = v.data,
                },
            },
            inline else => |v, tag| @unionInit(zml.aio.Value, @tagName(tag), v),
        };
    }
};

// Header
const GgufHeader = extern struct {
    // Magic number to announce that this is a GGUF file. Must be `GUFF`.
    magic: [4]u8,
    // The version of the format implemented.
    // Must be `3` for version described in this spec.
    version: u32,
    // The number of tensors in the file.
    // This is explicit, instead of being included in the metadata, to ensure
    // it is always present for loading the tensors.
    tensor_count: usize,
    // The number of metadata key-value pairs.
    metadata_kv_count: usize,

    pub fn validate(self: GgufHeader) !void {
        if (!std.mem.eql(u8, &self.magic, "GGUF")) {
            log.err("Invalid GGUF file: wrong header {s}", .{self.magic});
            return error.InvalidGguf;
        }
    }
};

// Key representation in this library API.
pub const GgufMetadataKv = struct {
    name: []const u8,
    type_: GgufValueType,
    val: GgufValue,
};

// Tensor representation in this library API.
const GGUF_TENSOR_MAX_DIM: usize = 8; // Future-proof: actual limit is 4.
pub const GgufTensorInfo = struct {
    name: []const u8,
    t: TensorType, // Tensor type (enum TensorType).
    rank: usize, // Number of dimensions of the tensor.
    dims: [GGUF_TENSOR_MAX_DIM]i64, // Dimensions (Eg. [512, 1024, 1, 1]).
    start: usize, // Offset from start of data section.
    byte_len: usize, // Total size in bytes.
    num_weights: usize, // Total number of parameters.

    pub inline fn shape(info: GgufTensorInfo) []const i64 {
        return info.dims[0..info.rank];
    }
};

// Return the value type name given the type ID.
fn getValueTypeName(t: u32) []const u8 {
    if (@as(usize, @intCast(t)) >= GGUF_VALUE_NAME.len) return "unknown";
    return GGUF_VALUE_NAME[@intCast(t)];
}

const GGUF_VALUE_NAME = [_][]const u8{
    "uint8",   "int8", "uint16", "int16", "uint32", "int32",
    "float32", "bool", "string", "array", "uint64", "int64",
    "float64",
};

/// GGUF file API
/// A memory-mapped view of a .gguf file.
/// Format used by GGML models: https://github.com/ggerganov/ggml/
pub const GgufFile = struct {
    header: GgufHeader, // GUFF file header info.
    size: usize, // Total file size.
    file: zml.aio.MemoryMappedFile,
    left_kv: usize, // Number of key-value pairs yet to read.
    left_tensors: usize, // Number of tensors yet to read.
    off: usize, // Offset of the next item to parse.
    alignment: usize = 32, // File data alignment. Default: 32 bytes.

    /// Open and memmap the given file.
    pub fn open(path: []const u8) !GgufFile {
        const file = try asynk.File.open(path, .{});
        const header = try file.reader().readStruct(GgufHeader);
        try header.validate();
        return .{
            .header = header,
            .size = (try file.stat()).size,
            .file = try zml.aio.MemoryMappedFile.init(file),
            .off = @sizeOf(GgufHeader),
            .left_kv = header.metadata_kv_count,
            .left_tensors = header.tensor_count,
        };
    }

    ///  Unmap the file memory and close the file handle.
    pub fn close(self: *GgufFile) void {
        self.file.deinit();
    }

    /// Set the context to read the first key-value entry in the GGUF
    /// file and then all the rest. Is used when creating a new context
    /// and also when you want to restart scanning the key-value
    /// items in the file.
    fn rewind(ctx: *GgufFile) void {
        ctx.off = @sizeOf(GgufHeader);
        ctx.left_kv = ctx.header.metadata_kv_count;
        ctx.left_tensors = ctx.header.tensor_count;
    }

    pub fn seek(self: *GgufFile, pos: usize) void {
        assert(pos < self.size);
        self.off = pos;
    }

    fn readInt(self: *GgufFile, comptime T: type) !T {
        if (self.off + @sizeOf(T) >= self.size) return error.InvalidGguf;
        const res = self.file.file.reader().readInt(T, .little);
        self.off += @sizeOf(T);
        return res;
    }

    fn readTensorType(self: *GgufFile) !TensorType {
        const raw = try self.readInt(u32);
        if (raw > TensorType.MAX_KNOWN_ENUM) {
            log.err("Unsupported GGUF tensor type: {d}", .{raw});
            return error.UnsupportedGgufType;
        }
        return @enumFromInt(raw);
    }

    fn readValueType(self: *GgufFile) !GgufValueType {
        const raw = try self.readInt(u32);
        const t: GgufValueType = @enumFromInt(raw);
        switch (t) {
            .uint8, .int8, .uint16, .int16, .uint32, .int32, .float32, .bool, .string, .array, .uint64, .int64, .float64, .array_start, .array_end => {},
            else => {
                log.err("Unsupported GGUF value type: {s}", .{@tagName(t)});
                return error.UnsupportedGgufType;
            },
        }
        return t;
    }

    pub fn readAlloc(self: *GgufFile, allocator: std.mem.Allocator, len: usize) ![]u8 {
        const data = try allocator.alloc(u8, len);
        const read = try self.file.file.reader().readAll(data);
        if (read != data.len) return error.InvalidGguf;
        self.off += len;
        return data;
    }

    pub fn skipBytes(self: *GgufFile, len: usize) !void {
        try self.file.file.seekBy(@intCast(len));
        self.off += len;
    }

    /// Read the len then the actual bytes.
    pub fn readString(self: *GgufFile, allocator: std.mem.Allocator) ![]u8 {
        const len: usize = try self.readInt(u64);
        return self.readAlloc(allocator, len);
    }

    pub fn skipString(self: *GgufFile) !void {
        const len: usize = try self.readInt(u64);
        return self.skipBytes(len);
    }

    fn readArrayHeader(self: *GgufFile, allocator: std.mem.Allocator) !GgufValue.Array {
        const child = try self.readValueType();
        const len: usize = try self.readInt(u64);
        const data = switch (child) {
            // Since strings have variable lenghts, we need to read them one by one
            .string => str: {
                var data = try allocator.alloc([]u8, len);
                for (0..len) |i| data[i] = try self.readString(allocator);
                break :str std.mem.sliceAsBytes(data);
            },
            else => try self.readAlloc(allocator, len * child.sizeOf()),
        };
        return .{
            .child = child,
            .len = len,
            .data = data,
        };
    }

    fn readTypedValue(self: *GgufFile, allocator: std.mem.Allocator, t: GgufValueType) !GgufValue {
        return switch (t) {
            .uint8 => .{ .uint8 = try self.readInt(u8) },
            .int8 => .{ .int8 = try self.readInt(i8) },
            .uint16 => .{ .uint16 = try self.readInt(u16) },
            .int16 => .{ .int16 = try self.readInt(i16) },
            .uint32 => .{ .uint32 = try self.readInt(u32) },
            .int32 => .{ .int32 = try self.readInt(i32) },
            .float32 => .{ .float32 = @bitCast(try self.readInt(u32)) },
            .bool => .{ .boolval = try self.readInt(u8) != 0 },
            .string => .{ .string = try self.readString(allocator) },
            .array => .{ .array = try self.readArrayHeader(allocator) },
            .uint64 => .{ .uint64 = try self.readInt(u64) },
            .int64 => .{ .int64 = try self.readInt(i64) },
            .float64 => .{ .float64 = @bitCast(try self.readInt(u64)) },
            else => error.UnsupportedGgufType,
        };
    }

    /// Parses the next metadata entry.
    /// Returns error.EndOfMetadata if there are no longer metadata to process in this GGUF file.
    pub fn readMetadata(self: *GgufFile, allocator: std.mem.Allocator) !GgufMetadataKv {
        if (self.left_kv == 0) return error.EndOfMetadata;
        self.left_kv -= 1;
        const name = try self.readString(allocator);
        const type_ = try self.readValueType();
        const val: GgufValue = try self.readTypedValue(allocator, type_);
        return .{ .name = name, .type_ = type_, .val = val };
    }

    // Set the data section offset. This function must be called exactly when
    // all the key-values are consumed, in the context of the first call of
    // ctx.getTensor(): this way we will be able to return tensor offsets
    // as absolute positions and pointers to the mmapped file.
    fn setDataOffset(self: *GgufFile) !void {
        const base_off = self.off;

        assert(self.left_kv == 0 and self.left_tensors == self.header.tensor_count);

        for (0..self.left_tensors) |_| try self.skipTensor();
        const padding: usize = getAlignmentPadding(self.alignment, self.off);
        self.file.data_offset = self.off + padding;

        try self.file.file.seekTo(base_off);
        self.off = base_off;
    }

    pub fn skipTensor(self: *GgufFile) !void {
        try self.skipString(); // Skip name
        const num_dim: u32 = try self.readInt(u32);
        // dimensions, type, and offset.
        try self.skipBytes(8 * num_dim + 4 + 8);
    }

    /// Parses the next tensor entry.
    /// Returns error.EndOfMetadata if there are no longer tensor metadata to process in this GGUF file.
    pub fn readTensorInfo(self: *GgufFile, allocator: std.mem.Allocator) !GgufTensorInfo {
        if (self.left_tensors == 0 or self.left_kv != 0) {
            return error.EndOfMetadata;
        }

        // We want to return tensor data with offsets relative to the start
        // of the file, so that the user of the API is able to access tensors
        // as it iterates over them. To do so, we need to perform a full
        // scan if this is the first tensor info we are reading.
        // TODO: explicitly set the data offset in
        if (self.file.data_offset == 0) try self.setDataOffset();
        self.left_tensors -= 1;
        const name = try self.readString(allocator);
        const num_dim = try self.readInt(u32);
        assert(@as(usize, @intCast(num_dim)) <= GGUF_TENSOR_MAX_DIM);
        // Read the dimentions; unused dimensions are left `undefined`.
        // Note: we reverse the order of the dimensions to match zml convention.
        var dims: [GGUF_TENSOR_MAX_DIM]i64 = undefined;
        var num_weights: usize = 1;
        for (0..num_dim) |j| {
            const d = try self.readInt(u64);
            dims[num_dim - 1 - j] = @intCast(d);
            num_weights *= d;
        }
        const t: TensorType = try self.readTensorType();
        const start = try self.readInt(u64);
        // To accurately calculate the bytes used by this tensor on the GGUF
        // file, we need to take into account that quantization methods store
        // tensors as block of N weights. So first of all we need to understand
        // the number of padding weights (since the last block may have just
        // fewer weights stored inside, but still requires to be stored to its full
        // length). Then we can do the math to see how many blocks we need, and
        // multiply by the block size to obtain the final total size.
        const tf = t.getFeatures();
        const byte_len: usize = (std.math.divCeil(usize, num_weights, tf.items_per_block) catch unreachable) * tf.bytes_per_block;
        return .{
            .name = name,
            .t = t,
            .rank = num_dim,
            .dims = dims,
            .start = start,
            .byte_len = byte_len,
            .num_weights = num_weights,
        };
    }
};

/// Given an offset or a length, returns the padding needed to align it to alignment.
fn getAlignmentPadding(alignment: usize, offset: usize) usize {
    return @rem((alignment - @rem(offset, alignment)), alignment);
}
Add initial Bazel build configuration, async runtime implementation, and core MLIR dialect definitions for ZML. 2023-01-02 14:28:25 +00:00			`const asynk = @import("async");`
			`const std = @import("std");`
			`const utils = @import("../utils.zig");`
			`const zml = @import("../../zml.zig");`

			`const assert = std.debug.assert;`
			`const log = std.log.scoped(.zml_io);`

			`pub const GgufErrors = error{`
			`ValueTypeMismatch,`
			`InvalidGguf,`
			`UnsupportedGgufType,`
			`EndOfMetadata,`
			`OutOfMemory,`
			`};`

			`// Enums and structures`
			`pub const TensorType = enum(u32) {`
			`f32 = 0,`
			`f16 = 1,`
			`q4_0 = 2,`
			`q4_1 = 3,`
			`deprecated_q4_2 = 4,`
			`deprecated_q4_3 = 5,`
			`q5_0 = 6,`
			`q5_1 = 7,`
			`q8_0 = 8,`
			`q8_1 = 9,`
			`// k-quantizations`
			`q2_k = 10,`
			`q3_k = 11,`
			`q4_k = 12,`
			`q5_k = 13,`
			`q6_k = 14,`
			`q8_k = 15,`
			`i8 = 16,`
			`i16 = 17,`
			`i32 = 18,`

			`const MAX_KNOWN_ENUM = 18;`

			`pub fn canConvertQuant(self: TensorType) bool {`
			`return switch (self) {`
			`.q8_0, .q4_k, .q6_k, .q2_k, .q4_0, .q4_1 => true,`
			`else => false,`
			`};`
			`}`

			`pub fn toDtype(self: TensorType) ?zml.DataType {`
			`return switch (self) {`
			`.f32 => .f32,`
			`.f16 => .f16,`
			`.i8 => .i8,`
			`.i16 => .i16,`
			`.i32 => .i32,`
			`else => null,`
			`};`
			`}`

			`pub fn sizeOf(self: TensorType) usize {`
			`return self.toDtype().?.sizeOf();`
			`}`

			`/// Return the tensor type features`
			`pub fn getFeatures(t: TensorType) TensorTypeFeatures {`
			`return switch (t) {`
			`inline else => \|val\| @field(TENSOR_TYPE_FEATURES, @tagName(val)),`
			`};`
			`}`
			`};`

			`/// GGUF tensor type to features lookup table.`
			`pub const TensorTypeFeatures = struct {`
			`items_per_block: u29,`
			`bytes_per_block: u29,`

			`pub fn alignment(features: TensorTypeFeatures) u8 {`
			`return std.math.log2_int(u29, features.bytes_per_block);`
			`}`
			`};`

			`pub const TENSOR_TYPE_FEATURES: std.enums.EnumFieldStruct(TensorType, TensorTypeFeatures, null) = .{`
			`.f32 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(f32) },`
			`.f16 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(f16) },`
			`.q4_0 = .{ .items_per_block = 32, .bytes_per_block = 18 },`
			`.q4_1 = .{ .items_per_block = 32, .bytes_per_block = 20 },`
			`.deprecated_q4_2 = .{ .items_per_block = 0, .bytes_per_block = 0 },`
			`.deprecated_q4_3 = .{ .items_per_block = 0, .bytes_per_block = 0 },`
			`.q5_0 = .{ .items_per_block = 32, .bytes_per_block = 22 },`
			`.q5_1 = .{ .items_per_block = 32, .bytes_per_block = 24 },`
			`.q8_0 = .{ .items_per_block = 32, .bytes_per_block = 34 },`
			`.q8_1 = .{ .items_per_block = 32, .bytes_per_block = 40 },`
			`.q2_k = .{ .items_per_block = 256, .bytes_per_block = 82 },`
			`.q3_k = .{ .items_per_block = 256, .bytes_per_block = 110 },`
			`.q4_k = .{ .items_per_block = 256, .bytes_per_block = 144 },`
			`.q5_k = .{ .items_per_block = 256, .bytes_per_block = 176 },`
			`.q6_k = .{ .items_per_block = 256, .bytes_per_block = 210 },`
			`.q8_k = .{ .items_per_block = 256, .bytes_per_block = 292 },`
			`.i8 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i8) },`
			`.i16 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i16) },`
			`.i32 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i32) },`
			`};`

			`pub const GgufValueType = enum(u32) {`
			`// The value is a 8-bit unsigned integer.`
			`uint8 = 0,`
			`// The value is a 8-bit signed integer.`
			`int8 = 1,`
			`// The value is a 16-bit unsigned little-endian integer.`
			`uint16 = 2,`
			`// The value is a 16-bit signed little-endian integer.`
			`int16 = 3,`
			`// The value is a 32-bit unsigned little-endian integer.`
			`uint32 = 4,`
			`// The value is a 32-bit signed little-endian integer.`
			`int32 = 5,`
			`// The value is a 32-bit IEEE754 floating point number.`
			`float32 = 6,`
			`// The value is a boolean.`
			`// 1-byte value where 0 is false and 1 is true.`
			`// Anything else is invalid, and should be treated as either the model`
			`// being invalid or the reader being buggy.`
			`bool = 7,`
			`// The value is a UTF-8 non-null-terminated string, with length prepended.`
			`string = 8,`
			`// The value is an array of other values, with the length and type`
			`// prepended. Arrays can be nested, and the length of the array is the`
			`// number of elements in the array, not the number of bytes.`
			`array = 9,`
			`// The value is a 64-bit unsigned little-endian integer.`
			`uint64 = 10,`
			`// The value is a 64-bit signed little-endian integer.`
			`int64 = 11,`
			`// The value is a 64-bit IEEE754 floating point number.`
			`float64 = 12,`
			`// Special values used by the callbacks of gguf_do_with_value().`
			`array_start = 100,`
			`array_end = 101,`

			`// Allow other values in case GGUF add more types without us noticing`
			`_,`

			`pub fn sizeOf(self: GgufValueType) usize {`
			`return switch (self) {`
			`.uint8 => @sizeOf(u8),`
			`.int8 => @sizeOf(i8),`
			`.uint16 => @sizeOf(u16),`
			`.int16 => @sizeOf(i16),`
			`.uint32 => @sizeOf(u32),`
			`.int32 => @sizeOf(i32),`
			`.float32 => @sizeOf(f32),`
			`.bool => @sizeOf(bool),`
			`.uint64 => @sizeOf(u64),`
			`.int64 => @sizeOf(i64),`
			`.float64 => @sizeOf(f64),`
			`.string => @sizeOf([]u8),`
			`else => unreachable,`
			`};`
			`}`

			`pub fn arrayTypeCheck(self: GgufValueType, comptime T: type) !void {`
			`switch (self) {`
			`.string => if (T != []u8 and T != []const u8) return error.ValueTypeMismatch,`
			`.uint8 => if (T != u8) return error.ValueTypeMismatch,`
			`.int8 => if (T != i8) return error.ValueTypeMismatch,`
			`.uint16 => if (T != u16) return error.ValueTypeMismatch,`
			`.int16 => if (T != i16) return error.ValueTypeMismatch,`
			`.uint32 => if (T != u32) return error.ValueTypeMismatch,`
			`.int32 => if (T != i32) return error.ValueTypeMismatch,`
			`.float32 => if (T != f32) return error.ValueTypeMismatch,`
			`.bool => if (T != bool) return error.ValueTypeMismatch,`
			`.uint64 => if (T != u64) return error.ValueTypeMismatch,`
			`.int64 => if (T != i64) return error.ValueTypeMismatch,`
			`.float64 => if (T != f64) return error.ValueTypeMismatch,`
			`else => {},`
			`}`
			`}`
			`};`

			`pub const ValueType = enum {`
			`uint8,`
			`int8,`
			`uint16,`
			`int16,`
			`uint32,`
			`int32,`
			`float32,`
			`uint64,`
			`int64,`
			`float64,`
			`boolval,`
			`string,`
			`array,`
			`};`

			`// Union of possible values.`
			`pub const GgufValue = union(ValueType) {`
			`uint8: u8,`
			`int8: i8,`
			`uint16: u16,`
			`int16: i16,`
			`uint32: u32,`
			`int32: i32,`
			`float32: f32,`
			`uint64: u64,`
			`int64: i64,`
			`float64: f64,`
			`boolval: bool,`
			`string: []const u8,`
			`array: Array,`

			`pub const Array = struct {`
			`// Any value type is valid, including arrays.`
			`child: GgufValueType,`
			`// Number of elements, not bytes`
			`len: usize,`
			`data: []u8,`
			`};`

			`pub fn asLoaderValue(self: GgufValue) zml.aio.Value {`
			`return switch (self) {`
			`.array => \|v\| .{`
			`.array = .{`
			`.item_type = switch (v.child) {`
			`.bool => .boolval,`
			`.uint8 => .uint8,`
			`.int8 => .int8,`
			`.uint16 => .uint16,`
			`.int16 => .int16,`
			`.uint32 => .uint32,`
			`.int32 => .int32,`
			`.float32 => .float32,`
			`.uint64 => .uint64,`
			`.int64 => .int64,`
			`.float64 => .float64,`
			`.string => .string,`
			`// TODO: .array => .array,`
			`else => unreachable,`
			`},`
			`.data = v.data,`
			`},`
			`},`
			`inline else => \|v, tag\| @unionInit(zml.aio.Value, @tagName(tag), v),`
			`};`
			`}`
			`};`

			`// Header`
			`const GgufHeader = extern struct {`
			// Magic number to announce that this is a GGUF file. Must be `GUFF`.
			`magic: [4]u8,`
			`// The version of the format implemented.`
			// Must be `3` for version described in this spec.
			`version: u32,`
			`// The number of tensors in the file.`
			`// This is explicit, instead of being included in the metadata, to ensure`
			`// it is always present for loading the tensors.`
			`tensor_count: usize,`
			`// The number of metadata key-value pairs.`
			`metadata_kv_count: usize,`

			`pub fn validate(self: GgufHeader) !void {`
			`if (!std.mem.eql(u8, &self.magic, "GGUF")) {`
			`log.err("Invalid GGUF file: wrong header {s}", .{self.magic});`
			`return error.InvalidGguf;`
			`}`
			`}`
			`};`

			`// Key representation in this library API.`
			`pub const GgufMetadataKv = struct {`
			`name: []const u8,`
			`type_: GgufValueType,`
			`val: GgufValue,`
			`};`

			`// Tensor representation in this library API.`
			`const GGUF_TENSOR_MAX_DIM: usize = 8; // Future-proof: actual limit is 4.`
			`pub const GgufTensorInfo = struct {`
			`name: []const u8,`
			`t: TensorType, // Tensor type (enum TensorType).`
			`rank: usize, // Number of dimensions of the tensor.`
			`dims: [GGUF_TENSOR_MAX_DIM]i64, // Dimensions (Eg. [512, 1024, 1, 1]).`
			`start: usize, // Offset from start of data section.`
			`byte_len: usize, // Total size in bytes.`
			`num_weights: usize, // Total number of parameters.`

			`pub inline fn shape(info: GgufTensorInfo) []const i64 {`
			`return info.dims[0..info.rank];`
			`}`
			`};`

			`// Return the value type name given the type ID.`
			`fn getValueTypeName(t: u32) []const u8 {`
			`if (@as(usize, @intCast(t)) >= GGUF_VALUE_NAME.len) return "unknown";`
			`return GGUF_VALUE_NAME[@intCast(t)];`
			`}`

			`const GGUF_VALUE_NAME = [_][]const u8{`
			`"uint8", "int8", "uint16", "int16", "uint32", "int32",`
			`"float32", "bool", "string", "array", "uint64", "int64",`
			`"float64",`
			`};`

			`/// GGUF file API`
			`/// A memory-mapped view of a .gguf file.`
			`/// Format used by GGML models: https://github.com/ggerganov/ggml/`
			`pub const GgufFile = struct {`
			`header: GgufHeader, // GUFF file header info.`
			`size: usize, // Total file size.`
			`file: zml.aio.MemoryMappedFile,`
			`left_kv: usize, // Number of key-value pairs yet to read.`
			`left_tensors: usize, // Number of tensors yet to read.`
			`off: usize, // Offset of the next item to parse.`
			`alignment: usize = 32, // File data alignment. Default: 32 bytes.`

			`/// Open and memmap the given file.`
			`pub fn open(path: []const u8) !GgufFile {`
			`const file = try asynk.File.open(path, .{});`
			`const header = try file.reader().readStruct(GgufHeader);`
			`try header.validate();`
			`return .{`
			`.header = header,`
			`.size = (try file.stat()).size,`
			`.file = try zml.aio.MemoryMappedFile.init(file),`
			`.off = @sizeOf(GgufHeader),`
			`.left_kv = header.metadata_kv_count,`
			`.left_tensors = header.tensor_count,`
			`};`
			`}`

			`/// Unmap the file memory and close the file handle.`
			`pub fn close(self: *GgufFile) void {`
			`self.file.deinit();`
			`}`

			`/// Set the context to read the first key-value entry in the GGUF`
			`/// file and then all the rest. Is used when creating a new context`
			`/// and also when you want to restart scanning the key-value`
			`/// items in the file.`
			`fn rewind(ctx: *GgufFile) void {`
			`ctx.off = @sizeOf(GgufHeader);`
			`ctx.left_kv = ctx.header.metadata_kv_count;`
			`ctx.left_tensors = ctx.header.tensor_count;`
			`}`

			`pub fn seek(self: *GgufFile, pos: usize) void {`
			`assert(pos < self.size);`
			`self.off = pos;`
			`}`

			`fn readInt(self: *GgufFile, comptime T: type) !T {`
			`if (self.off + @sizeOf(T) >= self.size) return error.InvalidGguf;`
			`const res = self.file.file.reader().readInt(T, .little);`
			`self.off += @sizeOf(T);`
			`return res;`
			`}`

			`fn readTensorType(self: *GgufFile) !TensorType {`
			`const raw = try self.readInt(u32);`
			`if (raw > TensorType.MAX_KNOWN_ENUM) {`
			`log.err("Unsupported GGUF tensor type: {d}", .{raw});`
			`return error.UnsupportedGgufType;`
			`}`
			`return @enumFromInt(raw);`
			`}`

			`fn readValueType(self: *GgufFile) !GgufValueType {`
			`const raw = try self.readInt(u32);`
			`const t: GgufValueType = @enumFromInt(raw);`
			`switch (t) {`
			`.uint8, .int8, .uint16, .int16, .uint32, .int32, .float32, .bool, .string, .array, .uint64, .int64, .float64, .array_start, .array_end => {},`
			`else => {`
			`log.err("Unsupported GGUF value type: {s}", .{@tagName(t)});`
			`return error.UnsupportedGgufType;`
			`},`
			`}`
			`return t;`
			`}`

			`pub fn readAlloc(self: *GgufFile, allocator: std.mem.Allocator, len: usize) ![]u8 {`
			`const data = try allocator.alloc(u8, len);`
			`const read = try self.file.file.reader().readAll(data);`
			`if (read != data.len) return error.InvalidGguf;`
			`self.off += len;`
			`return data;`
			`}`

			`pub fn skipBytes(self: *GgufFile, len: usize) !void {`
			`try self.file.file.seekBy(@intCast(len));`
			`self.off += len;`
			`}`

			`/// Read the len then the actual bytes.`
			`pub fn readString(self: *GgufFile, allocator: std.mem.Allocator) ![]u8 {`
			`const len: usize = try self.readInt(u64);`
			`return self.readAlloc(allocator, len);`
			`}`

			`pub fn skipString(self: *GgufFile) !void {`
			`const len: usize = try self.readInt(u64);`
			`return self.skipBytes(len);`
			`}`

			`fn readArrayHeader(self: *GgufFile, allocator: std.mem.Allocator) !GgufValue.Array {`
			`const child = try self.readValueType();`
			`const len: usize = try self.readInt(u64);`
			`const data = switch (child) {`
			`// Since strings have variable lenghts, we need to read them one by one`
			`.string => str: {`
			`var data = try allocator.alloc([]u8, len);`
			`for (0..len) \|i\| data[i] = try self.readString(allocator);`
			`break :str std.mem.sliceAsBytes(data);`
			`},`
			`else => try self.readAlloc(allocator, len * child.sizeOf()),`
			`};`
			`return .{`
			`.child = child,`
			`.len = len,`
			`.data = data,`
			`};`
			`}`

			`fn readTypedValue(self: *GgufFile, allocator: std.mem.Allocator, t: GgufValueType) !GgufValue {`
			`return switch (t) {`
			`.uint8 => .{ .uint8 = try self.readInt(u8) },`
			`.int8 => .{ .int8 = try self.readInt(i8) },`
			`.uint16 => .{ .uint16 = try self.readInt(u16) },`
			`.int16 => .{ .int16 = try self.readInt(i16) },`
			`.uint32 => .{ .uint32 = try self.readInt(u32) },`
			`.int32 => .{ .int32 = try self.readInt(i32) },`
			`.float32 => .{ .float32 = @bitCast(try self.readInt(u32)) },`
			`.bool => .{ .boolval = try self.readInt(u8) != 0 },`
			`.string => .{ .string = try self.readString(allocator) },`
			`.array => .{ .array = try self.readArrayHeader(allocator) },`
			`.uint64 => .{ .uint64 = try self.readInt(u64) },`
			`.int64 => .{ .int64 = try self.readInt(i64) },`
			`.float64 => .{ .float64 = @bitCast(try self.readInt(u64)) },`
			`else => error.UnsupportedGgufType,`
			`};`
			`}`

			`/// Parses the next metadata entry.`
			`/// Returns error.EndOfMetadata if there are no longer metadata to process in this GGUF file.`
			`pub fn readMetadata(self: *GgufFile, allocator: std.mem.Allocator) !GgufMetadataKv {`
			`if (self.left_kv == 0) return error.EndOfMetadata;`
			`self.left_kv -= 1;`
			`const name = try self.readString(allocator);`
			`const type_ = try self.readValueType();`
			`const val: GgufValue = try self.readTypedValue(allocator, type_);`
			`return .{ .name = name, .type_ = type_, .val = val };`
			`}`

			`// Set the data section offset. This function must be called exactly when`
			`// all the key-values are consumed, in the context of the first call of`
			`// ctx.getTensor(): this way we will be able to return tensor offsets`
			`// as absolute positions and pointers to the mmapped file.`
			`fn setDataOffset(self: *GgufFile) !void {`
			`const base_off = self.off;`

			`assert(self.left_kv == 0 and self.left_tensors == self.header.tensor_count);`

			`for (0..self.left_tensors) \|_\| try self.skipTensor();`
			`const padding: usize = getAlignmentPadding(self.alignment, self.off);`
			`self.file.data_offset = self.off + padding;`

			`try self.file.file.seekTo(base_off);`
			`self.off = base_off;`
			`}`

			`pub fn skipTensor(self: *GgufFile) !void {`
			`try self.skipString(); // Skip name`
			`const num_dim: u32 = try self.readInt(u32);`
			`// dimensions, type, and offset.`
			`try self.skipBytes(8 * num_dim + 4 + 8);`
			`}`

			`/// Parses the next tensor entry.`
			`/// Returns error.EndOfMetadata if there are no longer tensor metadata to process in this GGUF file.`
			`pub fn readTensorInfo(self: *GgufFile, allocator: std.mem.Allocator) !GgufTensorInfo {`
			`if (self.left_tensors == 0 or self.left_kv != 0) {`
			`return error.EndOfMetadata;`
			`}`

			`// We want to return tensor data with offsets relative to the start`
			`// of the file, so that the user of the API is able to access tensors`
			`// as it iterates over them. To do so, we need to perform a full`
			`// scan if this is the first tensor info we are reading.`
			`// TODO: explicitly set the data offset in`
			`if (self.file.data_offset == 0) try self.setDataOffset();`
			`self.left_tensors -= 1;`
			`const name = try self.readString(allocator);`
			`const num_dim = try self.readInt(u32);`
			`assert(@as(usize, @intCast(num_dim)) <= GGUF_TENSOR_MAX_DIM);`
			// Read the dimentions; unused dimensions are left `undefined`.
			`// Note: we reverse the order of the dimensions to match zml convention.`
			`var dims: [GGUF_TENSOR_MAX_DIM]i64 = undefined;`
			`var num_weights: usize = 1;`
			`for (0..num_dim) \|j\| {`
			`const d = try self.readInt(u64);`
			`dims[num_dim - 1 - j] = @intCast(d);`
			`num_weights *= d;`
			`}`
			`const t: TensorType = try self.readTensorType();`
			`const start = try self.readInt(u64);`
			`// To accurately calculate the bytes used by this tensor on the GGUF`
			`// file, we need to take into account that quantization methods store`
			`// tensors as block of N weights. So first of all we need to understand`
			`// the number of padding weights (since the last block may have just`
			`// fewer weights stored inside, but still requires to be stored to its full`
			`// length). Then we can do the math to see how many blocks we need, and`
			`// multiply by the block size to obtain the final total size.`
			`const tf = t.getFeatures();`
			`const byte_len: usize = (std.math.divCeil(usize, num_weights, tf.items_per_block) catch unreachable) * tf.bytes_per_block;`
			`return .{`
			`.name = name,`
			`.t = t,`
			`.rank = num_dim,`
			`.dims = dims,`
			`.start = start,`
			`.byte_len = byte_len,`
			`.num_weights = num_weights,`
			`};`
			`}`
			`};`

			`/// Given an offset or a length, returns the padding needed to align it to alignment.`
			`fn getAlignmentPadding(alignment: usize, offset: usize) usize {`
			`return @rem((alignment - @rem(offset, alignment)), alignment);`
			`}`