Some fixes for Qwen, #1134

EricLBuehler · Feb 13, 2025 · 87a7c23 · 87a7c23
1 parent c9ac321
commit 87a7c23
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 5 deletions.
diff --git a/mistralrs-core/src/models/quantized_qwen2.rs b/mistralrs-core/src/models/quantized_qwen2.rs
@@ -254,7 +254,7 @@ impl ModelConfig::FromGGUF for ModelWeights {
                     head_dim,
                     max_seq_len,
                     device,
-                    false,
+                    true,
                     dtype,
                 )?),
             );

diff --git a/mistralrs-core/src/utils/gguf_metadata.rs b/mistralrs-core/src/utils/gguf_metadata.rs
@@ -500,10 +500,7 @@ impl DeviceMappedModelLoader for GgufDeviceMapLoaderInner<'_, '_> {
                     + tensor_info_size_in_bytes!(self.model.tensor_info("blk.0.attn_v.bias")?);
                 let attn_output = tensor_info_size_in_bytes!(self
                     .model
-                    .tensor_info("blk.0.attn_output.weight")?)
-                    + tensor_info_size_in_bytes!(self
-                        .model
-                        .tensor_info("blk.0.attn_output.bias")?);
+                    .tensor_info("blk.0.attn_output.weight")?);
 
                 let ffn_gate =
                     tensor_info_size_in_bytes!(self.model.tensor_info("blk.0.ffn_gate.weight")?);
-Original file line number
+Diff line change
@@ Expand Up / @@ -254,7 +254,7 @@ impl ModelConfig::FromGGUF for ModelWeights { @@
                         head_dim,
                         max_seq_len,
                         device,
-                        false,
+                        true,
                         dtype,
                     )?),
                 );
@@ Expand Down @@