Add the SigLIP model. (#2515)

* Add the SigLIP model.

* Add more to the forward pass of the vision model.

* Complete the forward pass.

* Add the siglip example.

* Fix.

* Another fix.

* Get everything in place.

* Add a readme.
This commit is contained in:
Laurent Mazare
2024-09-28 23:48:00 +02:00
committed by GitHub
parent 62525e8352
commit 261ed65f36
8 changed files with 797 additions and 54 deletions

View File

@ -77,7 +77,7 @@ impl ClipTextEmbeddings {
)?;
let position_ids =
Tensor::arange(0u32, c.max_position_embeddings as u32, vs.device())?.unsqueeze(0)?;
Ok(ClipTextEmbeddings {
Ok(Self {
token_embedding,
position_embedding,
position_ids,
@ -298,7 +298,7 @@ impl ClipTextTransformer {
})
}
// TODO: rewrrite to newer version
// TODO: rewrite to newer version
fn build_causal_attention_mask(
bsz: usize,
seq_len: usize,