Quantized moondream implementation and BOS token (#1980)

* moondream implementation * add moondream example * change config default activation * Add assets and integrate phi mixformer with example * Make use of kv cache and fix seq_len bug; Clean up example code * Add README link to example * Remove pos_embed scaling; Remove assets; Add to README; Expand VisionConfig * Delete image * Use apply instead of forward * Pass bos token at the beginning of tensor. * Quantize moondream. * Forward with image bos token. * Clippy. * Use q4_0 quantization. * Add pointers for sequence and tokens; Remove seq_len conditional
2025-06-19 03:54:56 +00:00 · 2024-04-01 10:37:54 -07:00
parent 308ea070ed
commit ea0d8d3753
6 changed files with 393 additions and 32 deletions
--- a/candle-transformers/src/models/moondream.rs
+++ b/candle-transformers/src/models/moondream.rs
@ -25,15 +25,15 @@ fn scaled_dot_product_attention(q: &Tensor, k: &Tensor, v: &Tensor) -> Result<Te

 #[derive(Debug, Clone, PartialEq, serde::Deserialize)]
 pub struct VisionConfig {
-    image_embedding_dim: usize,
-    model_dim: usize,
-    hidden_dim: usize,
-    hidden_features: usize,
-    embed_len: usize,
-    embed_dim: usize,
-    num_blocks: usize,
-    num_heads: usize,
-    act: candle_nn::Activation,
+    pub(crate) image_embedding_dim: usize,
+    pub(crate) model_dim: usize,
+    pub(crate) hidden_dim: usize,
+    pub(crate) hidden_features: usize,
+    pub(crate) embed_len: usize,
+    pub(crate) embed_dim: usize,
+    pub(crate) num_blocks: usize,
+    pub(crate) num_heads: usize,
+    pub(crate) act: candle_nn::Activation,
 }

 impl VisionConfig {