Radix/zml/tokenizer/hftokenizers/main.zig

28 lines
1.1 KiB
Zig
Raw Normal View History

const std = @import("std");
const c = @import("c");
const HFTokenizers = @import("hftokenizers").HFTokenizers;
pub fn main() !void {
const tokenizer = HFTokenizers.init("/private/var/tmp/_bazel_steeve/a67b810d44f2a673ebbd5bab86ccd5cc/external/zml~~huggingface~Meta-Llama-3.1-8B-Instruct/tokenizer.json");
defer HFTokenizers.deinit(tokenizer);
const input = "Hello, world! plane pouet plane";
var encoded = HFTokenizers.encode(tokenizer, input);
defer encoded.deinit();
var pouet = std.ArrayList(u32).init(std.heap.c_allocator);
defer pouet.deinit();
// try pouet.appendSlice(encoded.ids);
var t = try std.time.Timer.start();
for (0..100) |_| {
try pouet.appendSlice(encoded.ids);
t.reset();
var decoded = HFTokenizers.decode(tokenizer, pouet.items);
defer decoded.deinit();
const elapsed = t.lap();
// std.debug.print("{any} {any} {d}us\n", .{tokenizer, encoded, elapsed / std.time.ns_per_us});
std.debug.print("{any} {any} {s} {d}ns {d}us\n", .{ tokenizer, encoded, decoded.str, elapsed, elapsed / std.time.ns_per_us });
}
}