mirror of
https://github.com/huggingface/candle.git
synced 2025-06-19 03:54:56 +00:00
Quantized moondream implementation and BOS token (#1980)
* moondream implementation * add moondream example * change config default activation * Add assets and integrate phi mixformer with example * Make use of kv cache and fix seq_len bug; Clean up example code * Add README link to example * Remove pos_embed scaling; Remove assets; Add to README; Expand VisionConfig * Delete image * Use apply instead of forward * Pass bos token at the beginning of tensor. * Quantize moondream. * Forward with image bos token. * Clippy. * Use q4_0 quantization. * Add pointers for sequence and tokens; Remove seq_len conditional
This commit is contained in:
@ -25,15 +25,15 @@ fn scaled_dot_product_attention(q: &Tensor, k: &Tensor, v: &Tensor) -> Result<Te
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, serde::Deserialize)]
|
||||
pub struct VisionConfig {
|
||||
image_embedding_dim: usize,
|
||||
model_dim: usize,
|
||||
hidden_dim: usize,
|
||||
hidden_features: usize,
|
||||
embed_len: usize,
|
||||
embed_dim: usize,
|
||||
num_blocks: usize,
|
||||
num_heads: usize,
|
||||
act: candle_nn::Activation,
|
||||
pub(crate) image_embedding_dim: usize,
|
||||
pub(crate) model_dim: usize,
|
||||
pub(crate) hidden_dim: usize,
|
||||
pub(crate) hidden_features: usize,
|
||||
pub(crate) embed_len: usize,
|
||||
pub(crate) embed_dim: usize,
|
||||
pub(crate) num_blocks: usize,
|
||||
pub(crate) num_heads: usize,
|
||||
pub(crate) act: candle_nn::Activation,
|
||||
}
|
||||
|
||||
impl VisionConfig {
|
||||
|
Reference in New Issue
Block a user