diff --git a/candle-transformers/src/models/based.rs b/candle-transformers/src/models/based.rs index c54ff966..1dbd6dc2 100644 --- a/candle-transformers/src/models/based.rs +++ b/candle-transformers/src/models/based.rs @@ -1,9 +1,9 @@ //! Based from the Stanford Hazy Research group. //! //! See "Simple linear attention language models balance the recall-throughput tradeoff", Arora et al. 2024 -//! - [Arxiv](https://arxiv.org/abs/2402.18668) -//! - [Github](https://github.com/HazyResearch/based) -//! +//! - Simple linear attention language models balance the recall-throughput tradeoff. [Arxiv](https://arxiv.org/abs/2402.18668) +//! - [Github Rep](https://github.com/HazyResearch/based) +//! - [Blogpost](https://hazyresearch.stanford.edu/blog/2024-03-03-based) use candle::{DType, Device, IndexOp, Module, Result, Tensor, D}; use candle_nn::{ diff --git a/candle-transformers/src/models/bert.rs b/candle-transformers/src/models/bert.rs index a7db075c..808ca415 100644 --- a/candle-transformers/src/models/bert.rs +++ b/candle-transformers/src/models/bert.rs @@ -1,8 +1,61 @@ //! BERT (Bidirectional Encoder Representations from Transformers) //! -//! See "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding", Devlin et al. 2018 -//! - [Arxiv](https://arxiv.org/abs/1810.04805) -//! - [Github](https://github.com/google-research/bert) +//! Bert is a general large language model that can be used for various language tasks: +//! - Compute sentence embeddings for a prompt. +//! - Compute similarities between a set of sentences. +//! - [Arxiv](https://arxiv.org/abs/1810.04805) "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" +//! - Upstream [Github repo](https://github.com/google-research/bert). +//! - See bert in [candle-examples](https://github.com/huggingface/candle/tree/main/candle-examples/) for runnable code +//! +//! ```no_run +//! // for sentence embeddings +//! # use candle_core::Tensor; +//! # use candle_nn::{VarBuilder, Module}; +//! # fn main() -> candle_core::Result<()> { +//! # let model = todo!(); +//! # let prompt = "Here is a test sentence"; +//! let embeddings = model.forward(prompt)?; +//! // Returns tensor of shape [1, 7, 384] +//! println!("{embeddings}"); +//! # Ok(()) +//! # } +//! +//! // Different models can be loaded using the model ID +//! # use candle_core::Tensor; +//! # use candle_nn::{VarBuilder, Module}; +//! # fn main() -> candle_core::Result<()> { +//! # let vb = todo!(); +//! # let config = todo!(); +//! let model = BertModel::load(vb, &config )?; +//! # Ok(()) +//! # } +//! +//! // Gelu approximation +//! // You can get a speedup by configuring the model +//! // to use an approximation of the gelu activation: +//! # use candle_core::Tensor; +//! # use candle_nn::{VarBuilder, Module}; +//! # fn main() -> candle_core::Result<()> { +//! # let mut config = todo!(); +//! config.hidden_act = HiddenAct::GeluApproximate; +//! # Ok(()) +//! # } +//! +//! // Similarities +//! // Bert can compute sentence embeddings which can then be used to calculate +//! // semantic similarities between sentences through cosine similarity scoring. +//! // The sentence embeddings are computed using average pooling across all tokens. +//! # use candle_core::Tensor; +//! # use candle_nn::{VarBuilder, Module}; +//! # fn main() -> candle_core::Result<()> { +//! # let model = todo!(); +//! let sentence1 = "The new movie is awesome"; +//! let sentence2 = "The new movie is so great"; +//! let emb1 = model.forward(sentence1)?; +//! let emb2 = model.forward(sentence2)?; +//! # Ok(()) +//! # } +//! ``` //! use super::with_tracing::{layer_norm, linear, LayerNorm, Linear}; use candle::{DType, Device, Result, Tensor}; diff --git a/candle-transformers/src/models/bigcode.rs b/candle-transformers/src/models/bigcode.rs index 8ed1462b..c5dcb6bc 100644 --- a/candle-transformers/src/models/bigcode.rs +++ b/candle-transformers/src/models/bigcode.rs @@ -1,9 +1,25 @@ //! BigCode implementation in Rust based on the GPT-BigCode model. //! -//! See "StarCoder: A State-of-the-Art LLM for Code", Mukherjee et al. 2023 +//! [StarCoder/BigCode](https://huggingface.co/bigcode/starcoderbase-1b) is a LLM +//! model specialized to code generation. The initial model was trained on 80 +//! programming languages. See "StarCoder: A State-of-the-Art LLM for Code", Mukherjee et al. 2023 //! - [Arxiv](https://arxiv.org/abs/2305.06161) //! - [Github](https://github.com/bigcode-project/starcoder) //! +//! ## Running some example +//! +//! ```bash +//! cargo run --example bigcode --release -- --prompt "fn fact(n: u64) -> u64" +//! +//! > fn fact(n: u64) -> u64 { +//! > if n == 0 { +//! > 1 +//! > } else { +//! > n * fact(n - 1) +//! > } +//! > } +//! ``` +//! use candle::{DType, Device, IndexOp, Result, Tensor, D}; use candle_nn::{embedding, linear_b as linear, Embedding, LayerNorm, Linear, Module, VarBuilder}; diff --git a/candle-transformers/src/models/flux/mod.rs b/candle-transformers/src/models/flux/mod.rs index 8eb928f5..064c5130 100644 --- a/candle-transformers/src/models/flux/mod.rs +++ b/candle-transformers/src/models/flux/mod.rs @@ -1,10 +1,26 @@ //! Flux Model //! -//! Flux is a series of text-to-image generation models based on diffusion transformers. +//! Flux is a 12B rectified flow transformer capable of generating images from text descriptions. //! -//! - [GH Link](https://github.com/black-forest-labs/flux) -//! - Transformers Python [reference implementation](https://github.com/huggingface/transformers/blob/5af7d41e49bbfc8319f462eb45253dcb3863dfb7/src/transformers/models/chinese_clip/modeling_chinese_clip.py) +//! - [Hugging Face Model](https://huggingface.co/black-forest-labs/FLUX.1-schnell) +//! - [GitHub Repository](https://github.com/black-forest-labs/flux) +//! - [Blog Post](https://blackforestlabs.ai/announcing-black-forest-labs/) //! +//! # Usage +//! +//! ```bash +//! cargo run --features cuda \ +//! --example flux -r -- \ +//! --height 1024 --width 1024 \ +//! --prompt "a rusty robot walking on a beach holding a small torch, \ +//! the robot has the word \"rust\" written on it, high quality, 4k" +//! ``` +//! +//!