531 lines
19 KiB
Zig
531 lines
19 KiB
Zig
|
|
const asynk = @import("async");
|
||
|
|
const std = @import("std");
|
||
|
|
const utils = @import("../utils.zig");
|
||
|
|
const zml = @import("../../zml.zig");
|
||
|
|
|
||
|
|
const assert = std.debug.assert;
|
||
|
|
const log = std.log.scoped(.zml_io);
|
||
|
|
|
||
|
|
pub const GgufErrors = error{
|
||
|
|
ValueTypeMismatch,
|
||
|
|
InvalidGguf,
|
||
|
|
UnsupportedGgufType,
|
||
|
|
EndOfMetadata,
|
||
|
|
OutOfMemory,
|
||
|
|
};
|
||
|
|
|
||
|
|
// Enums and structures
|
||
|
|
pub const TensorType = enum(u32) {
|
||
|
|
f32 = 0,
|
||
|
|
f16 = 1,
|
||
|
|
q4_0 = 2,
|
||
|
|
q4_1 = 3,
|
||
|
|
deprecated_q4_2 = 4,
|
||
|
|
deprecated_q4_3 = 5,
|
||
|
|
q5_0 = 6,
|
||
|
|
q5_1 = 7,
|
||
|
|
q8_0 = 8,
|
||
|
|
q8_1 = 9,
|
||
|
|
// k-quantizations
|
||
|
|
q2_k = 10,
|
||
|
|
q3_k = 11,
|
||
|
|
q4_k = 12,
|
||
|
|
q5_k = 13,
|
||
|
|
q6_k = 14,
|
||
|
|
q8_k = 15,
|
||
|
|
i8 = 16,
|
||
|
|
i16 = 17,
|
||
|
|
i32 = 18,
|
||
|
|
|
||
|
|
const MAX_KNOWN_ENUM = 18;
|
||
|
|
|
||
|
|
pub fn canConvertQuant(self: TensorType) bool {
|
||
|
|
return switch (self) {
|
||
|
|
.q8_0, .q4_k, .q6_k, .q2_k, .q4_0, .q4_1 => true,
|
||
|
|
else => false,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn toDtype(self: TensorType) ?zml.DataType {
|
||
|
|
return switch (self) {
|
||
|
|
.f32 => .f32,
|
||
|
|
.f16 => .f16,
|
||
|
|
.i8 => .i8,
|
||
|
|
.i16 => .i16,
|
||
|
|
.i32 => .i32,
|
||
|
|
else => null,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn sizeOf(self: TensorType) usize {
|
||
|
|
return self.toDtype().?.sizeOf();
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Return the tensor type features
|
||
|
|
pub fn getFeatures(t: TensorType) TensorTypeFeatures {
|
||
|
|
return switch (t) {
|
||
|
|
inline else => |val| @field(TENSOR_TYPE_FEATURES, @tagName(val)),
|
||
|
|
};
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
/// GGUF tensor type to features lookup table.
|
||
|
|
pub const TensorTypeFeatures = struct {
|
||
|
|
items_per_block: u29,
|
||
|
|
bytes_per_block: u29,
|
||
|
|
|
||
|
|
pub fn alignment(features: TensorTypeFeatures) u8 {
|
||
|
|
return std.math.log2_int(u29, features.bytes_per_block);
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
pub const TENSOR_TYPE_FEATURES: std.enums.EnumFieldStruct(TensorType, TensorTypeFeatures, null) = .{
|
||
|
|
.f32 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(f32) },
|
||
|
|
.f16 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(f16) },
|
||
|
|
.q4_0 = .{ .items_per_block = 32, .bytes_per_block = 18 },
|
||
|
|
.q4_1 = .{ .items_per_block = 32, .bytes_per_block = 20 },
|
||
|
|
.deprecated_q4_2 = .{ .items_per_block = 0, .bytes_per_block = 0 },
|
||
|
|
.deprecated_q4_3 = .{ .items_per_block = 0, .bytes_per_block = 0 },
|
||
|
|
.q5_0 = .{ .items_per_block = 32, .bytes_per_block = 22 },
|
||
|
|
.q5_1 = .{ .items_per_block = 32, .bytes_per_block = 24 },
|
||
|
|
.q8_0 = .{ .items_per_block = 32, .bytes_per_block = 34 },
|
||
|
|
.q8_1 = .{ .items_per_block = 32, .bytes_per_block = 40 },
|
||
|
|
.q2_k = .{ .items_per_block = 256, .bytes_per_block = 82 },
|
||
|
|
.q3_k = .{ .items_per_block = 256, .bytes_per_block = 110 },
|
||
|
|
.q4_k = .{ .items_per_block = 256, .bytes_per_block = 144 },
|
||
|
|
.q5_k = .{ .items_per_block = 256, .bytes_per_block = 176 },
|
||
|
|
.q6_k = .{ .items_per_block = 256, .bytes_per_block = 210 },
|
||
|
|
.q8_k = .{ .items_per_block = 256, .bytes_per_block = 292 },
|
||
|
|
.i8 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i8) },
|
||
|
|
.i16 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i16) },
|
||
|
|
.i32 = .{ .items_per_block = 1, .bytes_per_block = @sizeOf(i32) },
|
||
|
|
};
|
||
|
|
|
||
|
|
pub const GgufValueType = enum(u32) {
|
||
|
|
// The value is a 8-bit unsigned integer.
|
||
|
|
uint8 = 0,
|
||
|
|
// The value is a 8-bit signed integer.
|
||
|
|
int8 = 1,
|
||
|
|
// The value is a 16-bit unsigned little-endian integer.
|
||
|
|
uint16 = 2,
|
||
|
|
// The value is a 16-bit signed little-endian integer.
|
||
|
|
int16 = 3,
|
||
|
|
// The value is a 32-bit unsigned little-endian integer.
|
||
|
|
uint32 = 4,
|
||
|
|
// The value is a 32-bit signed little-endian integer.
|
||
|
|
int32 = 5,
|
||
|
|
// The value is a 32-bit IEEE754 floating point number.
|
||
|
|
float32 = 6,
|
||
|
|
// The value is a boolean.
|
||
|
|
// 1-byte value where 0 is false and 1 is true.
|
||
|
|
// Anything else is invalid, and should be treated as either the model
|
||
|
|
// being invalid or the reader being buggy.
|
||
|
|
bool = 7,
|
||
|
|
// The value is a UTF-8 non-null-terminated string, with length prepended.
|
||
|
|
string = 8,
|
||
|
|
// The value is an array of other values, with the length and type
|
||
|
|
// prepended. Arrays can be nested, and the length of the array is the
|
||
|
|
// number of elements in the array, not the number of bytes.
|
||
|
|
array = 9,
|
||
|
|
// The value is a 64-bit unsigned little-endian integer.
|
||
|
|
uint64 = 10,
|
||
|
|
// The value is a 64-bit signed little-endian integer.
|
||
|
|
int64 = 11,
|
||
|
|
// The value is a 64-bit IEEE754 floating point number.
|
||
|
|
float64 = 12,
|
||
|
|
// Special values used by the callbacks of gguf_do_with_value().
|
||
|
|
array_start = 100,
|
||
|
|
array_end = 101,
|
||
|
|
|
||
|
|
// Allow other values in case GGUF add more types without us noticing
|
||
|
|
_,
|
||
|
|
|
||
|
|
pub fn sizeOf(self: GgufValueType) usize {
|
||
|
|
return switch (self) {
|
||
|
|
.uint8 => @sizeOf(u8),
|
||
|
|
.int8 => @sizeOf(i8),
|
||
|
|
.uint16 => @sizeOf(u16),
|
||
|
|
.int16 => @sizeOf(i16),
|
||
|
|
.uint32 => @sizeOf(u32),
|
||
|
|
.int32 => @sizeOf(i32),
|
||
|
|
.float32 => @sizeOf(f32),
|
||
|
|
.bool => @sizeOf(bool),
|
||
|
|
.uint64 => @sizeOf(u64),
|
||
|
|
.int64 => @sizeOf(i64),
|
||
|
|
.float64 => @sizeOf(f64),
|
||
|
|
.string => @sizeOf([]u8),
|
||
|
|
else => unreachable,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn arrayTypeCheck(self: GgufValueType, comptime T: type) !void {
|
||
|
|
switch (self) {
|
||
|
|
.string => if (T != []u8 and T != []const u8) return error.ValueTypeMismatch,
|
||
|
|
.uint8 => if (T != u8) return error.ValueTypeMismatch,
|
||
|
|
.int8 => if (T != i8) return error.ValueTypeMismatch,
|
||
|
|
.uint16 => if (T != u16) return error.ValueTypeMismatch,
|
||
|
|
.int16 => if (T != i16) return error.ValueTypeMismatch,
|
||
|
|
.uint32 => if (T != u32) return error.ValueTypeMismatch,
|
||
|
|
.int32 => if (T != i32) return error.ValueTypeMismatch,
|
||
|
|
.float32 => if (T != f32) return error.ValueTypeMismatch,
|
||
|
|
.bool => if (T != bool) return error.ValueTypeMismatch,
|
||
|
|
.uint64 => if (T != u64) return error.ValueTypeMismatch,
|
||
|
|
.int64 => if (T != i64) return error.ValueTypeMismatch,
|
||
|
|
.float64 => if (T != f64) return error.ValueTypeMismatch,
|
||
|
|
else => {},
|
||
|
|
}
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
pub const ValueType = enum {
|
||
|
|
uint8,
|
||
|
|
int8,
|
||
|
|
uint16,
|
||
|
|
int16,
|
||
|
|
uint32,
|
||
|
|
int32,
|
||
|
|
float32,
|
||
|
|
uint64,
|
||
|
|
int64,
|
||
|
|
float64,
|
||
|
|
boolval,
|
||
|
|
string,
|
||
|
|
array,
|
||
|
|
};
|
||
|
|
|
||
|
|
// Union of possible values.
|
||
|
|
pub const GgufValue = union(ValueType) {
|
||
|
|
uint8: u8,
|
||
|
|
int8: i8,
|
||
|
|
uint16: u16,
|
||
|
|
int16: i16,
|
||
|
|
uint32: u32,
|
||
|
|
int32: i32,
|
||
|
|
float32: f32,
|
||
|
|
uint64: u64,
|
||
|
|
int64: i64,
|
||
|
|
float64: f64,
|
||
|
|
boolval: bool,
|
||
|
|
string: []const u8,
|
||
|
|
array: Array,
|
||
|
|
|
||
|
|
pub const Array = struct {
|
||
|
|
// Any value type is valid, including arrays.
|
||
|
|
child: GgufValueType,
|
||
|
|
// Number of elements, not bytes
|
||
|
|
len: usize,
|
||
|
|
data: []u8,
|
||
|
|
};
|
||
|
|
|
||
|
|
pub fn asLoaderValue(self: GgufValue) zml.aio.Value {
|
||
|
|
return switch (self) {
|
||
|
|
.array => |v| .{
|
||
|
|
.array = .{
|
||
|
|
.item_type = switch (v.child) {
|
||
|
|
.bool => .boolval,
|
||
|
|
.uint8 => .uint8,
|
||
|
|
.int8 => .int8,
|
||
|
|
.uint16 => .uint16,
|
||
|
|
.int16 => .int16,
|
||
|
|
.uint32 => .uint32,
|
||
|
|
.int32 => .int32,
|
||
|
|
.float32 => .float32,
|
||
|
|
.uint64 => .uint64,
|
||
|
|
.int64 => .int64,
|
||
|
|
.float64 => .float64,
|
||
|
|
.string => .string,
|
||
|
|
// TODO: .array => .array,
|
||
|
|
else => unreachable,
|
||
|
|
},
|
||
|
|
.data = v.data,
|
||
|
|
},
|
||
|
|
},
|
||
|
|
inline else => |v, tag| @unionInit(zml.aio.Value, @tagName(tag), v),
|
||
|
|
};
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
// Header
|
||
|
|
const GgufHeader = extern struct {
|
||
|
|
// Magic number to announce that this is a GGUF file. Must be `GUFF`.
|
||
|
|
magic: [4]u8,
|
||
|
|
// The version of the format implemented.
|
||
|
|
// Must be `3` for version described in this spec.
|
||
|
|
version: u32,
|
||
|
|
// The number of tensors in the file.
|
||
|
|
// This is explicit, instead of being included in the metadata, to ensure
|
||
|
|
// it is always present for loading the tensors.
|
||
|
|
tensor_count: usize,
|
||
|
|
// The number of metadata key-value pairs.
|
||
|
|
metadata_kv_count: usize,
|
||
|
|
|
||
|
|
pub fn validate(self: GgufHeader) !void {
|
||
|
|
if (!std.mem.eql(u8, &self.magic, "GGUF")) {
|
||
|
|
log.err("Invalid GGUF file: wrong header {s}", .{self.magic});
|
||
|
|
return error.InvalidGguf;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
// Key representation in this library API.
|
||
|
|
pub const GgufMetadataKv = struct {
|
||
|
|
name: []const u8,
|
||
|
|
type_: GgufValueType,
|
||
|
|
val: GgufValue,
|
||
|
|
};
|
||
|
|
|
||
|
|
// Tensor representation in this library API.
|
||
|
|
const GGUF_TENSOR_MAX_DIM: usize = 8; // Future-proof: actual limit is 4.
|
||
|
|
pub const GgufTensorInfo = struct {
|
||
|
|
name: []const u8,
|
||
|
|
t: TensorType, // Tensor type (enum TensorType).
|
||
|
|
rank: usize, // Number of dimensions of the tensor.
|
||
|
|
dims: [GGUF_TENSOR_MAX_DIM]i64, // Dimensions (Eg. [512, 1024, 1, 1]).
|
||
|
|
start: usize, // Offset from start of data section.
|
||
|
|
byte_len: usize, // Total size in bytes.
|
||
|
|
num_weights: usize, // Total number of parameters.
|
||
|
|
|
||
|
|
pub inline fn shape(info: GgufTensorInfo) []const i64 {
|
||
|
|
return info.dims[0..info.rank];
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
// Return the value type name given the type ID.
|
||
|
|
fn getValueTypeName(t: u32) []const u8 {
|
||
|
|
if (@as(usize, @intCast(t)) >= GGUF_VALUE_NAME.len) return "unknown";
|
||
|
|
return GGUF_VALUE_NAME[@intCast(t)];
|
||
|
|
}
|
||
|
|
|
||
|
|
const GGUF_VALUE_NAME = [_][]const u8{
|
||
|
|
"uint8", "int8", "uint16", "int16", "uint32", "int32",
|
||
|
|
"float32", "bool", "string", "array", "uint64", "int64",
|
||
|
|
"float64",
|
||
|
|
};
|
||
|
|
|
||
|
|
/// GGUF file API
|
||
|
|
/// A memory-mapped view of a .gguf file.
|
||
|
|
/// Format used by GGML models: https://github.com/ggerganov/ggml/
|
||
|
|
pub const GgufFile = struct {
|
||
|
|
header: GgufHeader, // GUFF file header info.
|
||
|
|
size: usize, // Total file size.
|
||
|
|
file: zml.aio.MemoryMappedFile,
|
||
|
|
left_kv: usize, // Number of key-value pairs yet to read.
|
||
|
|
left_tensors: usize, // Number of tensors yet to read.
|
||
|
|
off: usize, // Offset of the next item to parse.
|
||
|
|
alignment: usize = 32, // File data alignment. Default: 32 bytes.
|
||
|
|
|
||
|
|
/// Open and memmap the given file.
|
||
|
|
pub fn open(path: []const u8) !GgufFile {
|
||
|
|
const file = try asynk.File.open(path, .{});
|
||
|
|
const header = try file.reader().readStruct(GgufHeader);
|
||
|
|
try header.validate();
|
||
|
|
return .{
|
||
|
|
.header = header,
|
||
|
|
.size = (try file.stat()).size,
|
||
|
|
.file = try zml.aio.MemoryMappedFile.init(file),
|
||
|
|
.off = @sizeOf(GgufHeader),
|
||
|
|
.left_kv = header.metadata_kv_count,
|
||
|
|
.left_tensors = header.tensor_count,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Unmap the file memory and close the file handle.
|
||
|
|
pub fn close(self: *GgufFile) void {
|
||
|
|
self.file.deinit();
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Set the context to read the first key-value entry in the GGUF
|
||
|
|
/// file and then all the rest. Is used when creating a new context
|
||
|
|
/// and also when you want to restart scanning the key-value
|
||
|
|
/// items in the file.
|
||
|
|
fn rewind(ctx: *GgufFile) void {
|
||
|
|
ctx.off = @sizeOf(GgufHeader);
|
||
|
|
ctx.left_kv = ctx.header.metadata_kv_count;
|
||
|
|
ctx.left_tensors = ctx.header.tensor_count;
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn seek(self: *GgufFile, pos: usize) void {
|
||
|
|
assert(pos < self.size);
|
||
|
|
self.off = pos;
|
||
|
|
}
|
||
|
|
|
||
|
|
fn readInt(self: *GgufFile, comptime T: type) !T {
|
||
|
|
if (self.off + @sizeOf(T) >= self.size) return error.InvalidGguf;
|
||
|
|
const res = self.file.file.reader().readInt(T, .little);
|
||
|
|
self.off += @sizeOf(T);
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
fn readTensorType(self: *GgufFile) !TensorType {
|
||
|
|
const raw = try self.readInt(u32);
|
||
|
|
if (raw > TensorType.MAX_KNOWN_ENUM) {
|
||
|
|
log.err("Unsupported GGUF tensor type: {d}", .{raw});
|
||
|
|
return error.UnsupportedGgufType;
|
||
|
|
}
|
||
|
|
return @enumFromInt(raw);
|
||
|
|
}
|
||
|
|
|
||
|
|
fn readValueType(self: *GgufFile) !GgufValueType {
|
||
|
|
const raw = try self.readInt(u32);
|
||
|
|
const t: GgufValueType = @enumFromInt(raw);
|
||
|
|
switch (t) {
|
||
|
|
.uint8, .int8, .uint16, .int16, .uint32, .int32, .float32, .bool, .string, .array, .uint64, .int64, .float64, .array_start, .array_end => {},
|
||
|
|
else => {
|
||
|
|
log.err("Unsupported GGUF value type: {s}", .{@tagName(t)});
|
||
|
|
return error.UnsupportedGgufType;
|
||
|
|
},
|
||
|
|
}
|
||
|
|
return t;
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn readAlloc(self: *GgufFile, allocator: std.mem.Allocator, len: usize) ![]u8 {
|
||
|
|
const data = try allocator.alloc(u8, len);
|
||
|
|
const read = try self.file.file.reader().readAll(data);
|
||
|
|
if (read != data.len) return error.InvalidGguf;
|
||
|
|
self.off += len;
|
||
|
|
return data;
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn skipBytes(self: *GgufFile, len: usize) !void {
|
||
|
|
try self.file.file.seekBy(@intCast(len));
|
||
|
|
self.off += len;
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Read the len then the actual bytes.
|
||
|
|
pub fn readString(self: *GgufFile, allocator: std.mem.Allocator) ![]u8 {
|
||
|
|
const len: usize = try self.readInt(u64);
|
||
|
|
return self.readAlloc(allocator, len);
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn skipString(self: *GgufFile) !void {
|
||
|
|
const len: usize = try self.readInt(u64);
|
||
|
|
return self.skipBytes(len);
|
||
|
|
}
|
||
|
|
|
||
|
|
fn readArrayHeader(self: *GgufFile, allocator: std.mem.Allocator) !GgufValue.Array {
|
||
|
|
const child = try self.readValueType();
|
||
|
|
const len: usize = try self.readInt(u64);
|
||
|
|
const data = switch (child) {
|
||
|
|
// Since strings have variable lenghts, we need to read them one by one
|
||
|
|
.string => str: {
|
||
|
|
var data = try allocator.alloc([]u8, len);
|
||
|
|
for (0..len) |i| data[i] = try self.readString(allocator);
|
||
|
|
break :str std.mem.sliceAsBytes(data);
|
||
|
|
},
|
||
|
|
else => try self.readAlloc(allocator, len * child.sizeOf()),
|
||
|
|
};
|
||
|
|
return .{
|
||
|
|
.child = child,
|
||
|
|
.len = len,
|
||
|
|
.data = data,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
fn readTypedValue(self: *GgufFile, allocator: std.mem.Allocator, t: GgufValueType) !GgufValue {
|
||
|
|
return switch (t) {
|
||
|
|
.uint8 => .{ .uint8 = try self.readInt(u8) },
|
||
|
|
.int8 => .{ .int8 = try self.readInt(i8) },
|
||
|
|
.uint16 => .{ .uint16 = try self.readInt(u16) },
|
||
|
|
.int16 => .{ .int16 = try self.readInt(i16) },
|
||
|
|
.uint32 => .{ .uint32 = try self.readInt(u32) },
|
||
|
|
.int32 => .{ .int32 = try self.readInt(i32) },
|
||
|
|
.float32 => .{ .float32 = @bitCast(try self.readInt(u32)) },
|
||
|
|
.bool => .{ .boolval = try self.readInt(u8) != 0 },
|
||
|
|
.string => .{ .string = try self.readString(allocator) },
|
||
|
|
.array => .{ .array = try self.readArrayHeader(allocator) },
|
||
|
|
.uint64 => .{ .uint64 = try self.readInt(u64) },
|
||
|
|
.int64 => .{ .int64 = try self.readInt(i64) },
|
||
|
|
.float64 => .{ .float64 = @bitCast(try self.readInt(u64)) },
|
||
|
|
else => error.UnsupportedGgufType,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Parses the next metadata entry.
|
||
|
|
/// Returns error.EndOfMetadata if there are no longer metadata to process in this GGUF file.
|
||
|
|
pub fn readMetadata(self: *GgufFile, allocator: std.mem.Allocator) !GgufMetadataKv {
|
||
|
|
if (self.left_kv == 0) return error.EndOfMetadata;
|
||
|
|
self.left_kv -= 1;
|
||
|
|
const name = try self.readString(allocator);
|
||
|
|
const type_ = try self.readValueType();
|
||
|
|
const val: GgufValue = try self.readTypedValue(allocator, type_);
|
||
|
|
return .{ .name = name, .type_ = type_, .val = val };
|
||
|
|
}
|
||
|
|
|
||
|
|
// Set the data section offset. This function must be called exactly when
|
||
|
|
// all the key-values are consumed, in the context of the first call of
|
||
|
|
// ctx.getTensor(): this way we will be able to return tensor offsets
|
||
|
|
// as absolute positions and pointers to the mmapped file.
|
||
|
|
fn setDataOffset(self: *GgufFile) !void {
|
||
|
|
const base_off = self.off;
|
||
|
|
|
||
|
|
assert(self.left_kv == 0 and self.left_tensors == self.header.tensor_count);
|
||
|
|
|
||
|
|
for (0..self.left_tensors) |_| try self.skipTensor();
|
||
|
|
const padding: usize = getAlignmentPadding(self.alignment, self.off);
|
||
|
|
self.file.data_offset = self.off + padding;
|
||
|
|
|
||
|
|
try self.file.file.seekTo(base_off);
|
||
|
|
self.off = base_off;
|
||
|
|
}
|
||
|
|
|
||
|
|
pub fn skipTensor(self: *GgufFile) !void {
|
||
|
|
try self.skipString(); // Skip name
|
||
|
|
const num_dim: u32 = try self.readInt(u32);
|
||
|
|
// dimensions, type, and offset.
|
||
|
|
try self.skipBytes(8 * num_dim + 4 + 8);
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Parses the next tensor entry.
|
||
|
|
/// Returns error.EndOfMetadata if there are no longer tensor metadata to process in this GGUF file.
|
||
|
|
pub fn readTensorInfo(self: *GgufFile, allocator: std.mem.Allocator) !GgufTensorInfo {
|
||
|
|
if (self.left_tensors == 0 or self.left_kv != 0) {
|
||
|
|
return error.EndOfMetadata;
|
||
|
|
}
|
||
|
|
|
||
|
|
// We want to return tensor data with offsets relative to the start
|
||
|
|
// of the file, so that the user of the API is able to access tensors
|
||
|
|
// as it iterates over them. To do so, we need to perform a full
|
||
|
|
// scan if this is the first tensor info we are reading.
|
||
|
|
// TODO: explicitly set the data offset in
|
||
|
|
if (self.file.data_offset == 0) try self.setDataOffset();
|
||
|
|
self.left_tensors -= 1;
|
||
|
|
const name = try self.readString(allocator);
|
||
|
|
const num_dim = try self.readInt(u32);
|
||
|
|
assert(@as(usize, @intCast(num_dim)) <= GGUF_TENSOR_MAX_DIM);
|
||
|
|
// Read the dimentions; unused dimensions are left `undefined`.
|
||
|
|
// Note: we reverse the order of the dimensions to match zml convention.
|
||
|
|
var dims: [GGUF_TENSOR_MAX_DIM]i64 = undefined;
|
||
|
|
var num_weights: usize = 1;
|
||
|
|
for (0..num_dim) |j| {
|
||
|
|
const d = try self.readInt(u64);
|
||
|
|
dims[num_dim - 1 - j] = @intCast(d);
|
||
|
|
num_weights *= d;
|
||
|
|
}
|
||
|
|
const t: TensorType = try self.readTensorType();
|
||
|
|
const start = try self.readInt(u64);
|
||
|
|
// To accurately calculate the bytes used by this tensor on the GGUF
|
||
|
|
// file, we need to take into account that quantization methods store
|
||
|
|
// tensors as block of N weights. So first of all we need to understand
|
||
|
|
// the number of padding weights (since the last block may have just
|
||
|
|
// fewer weights stored inside, but still requires to be stored to its full
|
||
|
|
// length). Then we can do the math to see how many blocks we need, and
|
||
|
|
// multiply by the block size to obtain the final total size.
|
||
|
|
const tf = t.getFeatures();
|
||
|
|
const byte_len: usize = (std.math.divCeil(usize, num_weights, tf.items_per_block) catch unreachable) * tf.bytes_per_block;
|
||
|
|
return .{
|
||
|
|
.name = name,
|
||
|
|
.t = t,
|
||
|
|
.rank = num_dim,
|
||
|
|
.dims = dims,
|
||
|
|
.start = start,
|
||
|
|
.byte_len = byte_len,
|
||
|
|
.num_weights = num_weights,
|
||
|
|
};
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
/// Given an offset or a length, returns the padding needed to align it to alignment.
|
||
|
|
fn getAlignmentPadding(alignment: usize, offset: usize) usize {
|
||
|
|
return @rem((alignment - @rem(offset, alignment)), alignment);
|
||
|
|
}
|