modernbert: set default epsilon value for embeddings layernorm.
This commit is contained in:
parent
a63d0a4aa3
commit
1d5b79111a
@ -56,6 +56,7 @@ pub const ModernBertModel = struct {
|
|||||||
pub fn init(self: *ModernBertModel, options: ModernBertOptions) void {
|
pub fn init(self: *ModernBertModel, options: ModernBertOptions) void {
|
||||||
self.options = options;
|
self.options = options;
|
||||||
self.final_norm.eps = 1e-5;
|
self.final_norm.eps = 1e-5;
|
||||||
|
self.embeddings.norm.eps = 1e-5;
|
||||||
for (self.layers, 0..) |*encoder_layer, layer_idx| {
|
for (self.layers, 0..) |*encoder_layer, layer_idx| {
|
||||||
encoder_layer.attn.Wqkv.weight = encoder_layer.attn.Wqkv.weight.withSharding(.{0});
|
encoder_layer.attn.Wqkv.weight = encoder_layer.attn.Wqkv.weight.withSharding(.{0});
|
||||||
encoder_layer.attn.Wo.weight = encoder_layer.attn.Wo.weight.withSharding(.{1});
|
encoder_layer.attn.Wo.weight = encoder_layer.attn.Wo.weight.withSharding(.{1});
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user