mirror of
https://github.com/huggingface/candle.git
synced 2025-06-15 10:26:33 +00:00
Get the ggml based llama to generate some text. (#464)
* Add more stats to the ggml example. * Build a quantized model from the file content. * Move the tensor retrieval in the main crate. * Start adding the forward pass. * Add more to the forward pass of the quantized llama. * Apply the attention layers. * Add the sampling loop. * Get the sampling loop to work. * Minor tweak. * Add a quantize/dequantize test. * Bugfix. * Add a comment + swap the order. * Bugfixes.
This commit is contained in:
@ -140,10 +140,6 @@ impl Cache {
|
||||
}
|
||||
}
|
||||
|
||||
fn silu(xs: &Tensor) -> Result<Tensor> {
|
||||
xs / (xs.neg()?.exp()? + 1.0)?
|
||||
}
|
||||
|
||||
fn linear(size1: usize, size2: usize, vb: VarBuilder) -> Result<Linear> {
|
||||
let span = tracing::span!(tracing::Level::TRACE, "linear");
|
||||
let inner = candle_nn::linear_no_bias(size1, size2, vb)?;
|
||||
@ -358,7 +354,7 @@ struct Mlp {
|
||||
impl Mlp {
|
||||
fn forward(&self, x: &Tensor) -> Result<Tensor> {
|
||||
let _enter = self.span.enter();
|
||||
let x = (silu(&self.c_fc1.forward(x)?)? * self.c_fc2.forward(x)?)?;
|
||||
let x = (candle_nn::ops::silu(&self.c_fc1.forward(x)?)? * self.c_fc2.forward(x)?)?;
|
||||
self.c_proj.forward(&x)
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user