add quantized rwkv v5 model (#1743)

* and quantized rwkv v5 model

* Integrate the quantized rwkv model in the initial example.

---------

Co-authored-by: laurent <laurent.mazare@gmail.com>
This commit is contained in:
Jack Shih
2024-02-26 04:43:40 +08:00
committed by GitHub
parent 1a6043af51
commit 918136ba46
4 changed files with 326 additions and 6 deletions

View File

@ -124,7 +124,7 @@ impl SelfAttention {
let (b, t, s) = xs.dims3()?;
let s = s / h;
let (receptance, key, value, gate) = {
// exctract key-value
// extract key-value
let shifted = state.per_layer[self.layer_id].extract_key_value.clone();
let shifted = if shifted.rank() == 2 {
shifted.unsqueeze(1)?
@ -164,7 +164,6 @@ impl SelfAttention {
let mut out: Vec<Tensor> = Vec::with_capacity(t);
for t_ in 0..t {
//
let rt = receptance.i((.., .., t_..t_ + 1))?.contiguous()?;
let kt = key.i((.., .., .., t_..t_ + 1))?.contiguous()?;
let vt = value.i((.., .., t_..t_ + 1))?.contiguous()?;