From 32f567bac491aa0f52dfbe1001ea4d6187bb4301 Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Fri, 22 Mar 2024 10:28:38 +0100 Subject: [PATCH] Fix loading the gguf files. (#1913) --- candle-transformers/src/models/quantized_llama.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/candle-transformers/src/models/quantized_llama.rs b/candle-transformers/src/models/quantized_llama.rs index ee50c092..717e6771 100644 --- a/candle-transformers/src/models/quantized_llama.rs +++ b/candle-transformers/src/models/quantized_llama.rs @@ -362,7 +362,7 @@ impl ModelWeights { let embedding_length = md_get("llama.embedding_length")?.to_u32()? as usize; let rope_dim = md_get("llama.rope.dimension_count")?.to_u32()? as usize; // Strangely this value is generally 1e-6 in GGUF file but used to be 1e-5 by default. - let rms_norm_eps = md_get("llama.attention.layer_norm_rms_epsilon")?.to_f64()?; + let rms_norm_eps = md_get("llama.attention.layer_norm_rms_epsilon")?.to_f32()? as f64; let rope_freq_base = md_get("llama.rope.freq_base") .and_then(|m| m.to_f32())