Update modernbert example to reflect removal of old writer interface APIs.
This commit is contained in:
parent
3ed9bca5ad
commit
12efc763d5
@ -216,27 +216,26 @@ pub fn unmask(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn tokenize(allocator: std.mem.Allocator, tokenizer: zml.tokenizer.Tokenizer, prompt: []const u8) ![]const u32 {
|
pub fn tokenize(allocator: std.mem.Allocator, tokenizer: zml.tokenizer.Tokenizer, prompt: []const u8) ![]const u32 {
|
||||||
var tokens = std.array_list.Managed(u32).init(allocator);
|
|
||||||
var encoder = try tokenizer.encoder();
|
var encoder = try tokenizer.encoder();
|
||||||
defer encoder.deinit();
|
defer encoder.deinit();
|
||||||
|
|
||||||
const bos = tokenizer.tokenToId("[CLS]") orelse return error.NoSuchToken;
|
const bos = tokenizer.tokenToId("[CLS]") orelse return error.NoSuchToken;
|
||||||
const eos = tokenizer.tokenToId("[SEP]") orelse return error.NoSuchToken;
|
const eos = tokenizer.tokenToId("[SEP]") orelse return error.NoSuchToken;
|
||||||
|
|
||||||
try tokens.append(bos);
|
var tokens: std.ArrayList(u32) = try .initCapacity(allocator, prompt.len);
|
||||||
try tokens.appendSlice(try encoder.encode(prompt));
|
try tokens.append(allocator, bos);
|
||||||
try tokens.append(eos);
|
try tokens.appendSlice(allocator, try encoder.encode(prompt));
|
||||||
|
try tokens.append(allocator, eos);
|
||||||
|
|
||||||
return tokens.toOwnedSlice();
|
return tokens.toOwnedSlice(allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn findMaskPositions(allocator: std.mem.Allocator, tokens: []const u32, mask_token: u32) ![]usize {
|
fn findMaskPositions(allocator: std.mem.Allocator, tokens: []const u32, mask_token: u32) ![]usize {
|
||||||
var mask_positions = std.array_list.Managed(usize).init(allocator);
|
var mask_positions: std.ArrayList(usize) = .empty;
|
||||||
defer mask_positions.deinit();
|
|
||||||
|
|
||||||
for (tokens, 0..) |token, i| {
|
for (tokens, 0..) |token, i| {
|
||||||
if (token == mask_token) {
|
if (token == mask_token) {
|
||||||
try mask_positions.append(i);
|
try mask_positions.append(allocator, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,7 +246,7 @@ fn findMaskPositions(allocator: std.mem.Allocator, tokens: []const u32, mask_tok
|
|||||||
|
|
||||||
if (mask_positions.items.len > 1) log.warn("Currently only supporting one [MASK] per input", .{});
|
if (mask_positions.items.len > 1) log.warn("Currently only supporting one [MASK] per input", .{});
|
||||||
|
|
||||||
return mask_positions.toOwnedSlice();
|
return mask_positions.toOwnedSlice(allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prepareTensorInputs(
|
fn prepareTensorInputs(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user