Small cleanups to the llama multi-process example. (#2098)

2025-06-19 11:56:45 +00:00 · 2024-04-20 22:19:46 +02:00
parent dd78422701
commit 587ee3bb6f
4 changed files with 54 additions and 70 deletions
--- a/candle-transformers/src/models/llama.rs
+++ b/candle-transformers/src/models/llama.rs
@ -20,6 +20,12 @@ pub struct LlamaConfig {
    pub eos_token_id: Option<u32>,
 }

+impl LlamaConfig {
+    pub fn num_key_value_heads(&self) -> usize {
+        self.num_key_value_heads.unwrap_or(self.num_attention_heads)
+    }
+}
+
 fn default_rope() -> f32 {
    10_000.0
 }
@ -32,7 +38,7 @@ impl LlamaConfig {
            vocab_size: self.vocab_size,
            num_hidden_layers: self.num_hidden_layers,
            num_attention_heads: self.num_attention_heads,
-            num_key_value_heads: self.num_key_value_heads.unwrap_or(self.num_attention_heads),
+            num_key_value_heads: self.num_key_value_heads(),
            rms_norm_eps: self.rms_norm_eps,
            rope_theta: self.rope_theta,
            use_flash_attn,