diff --git a/candle-wasm-examples/llama2-c/src/lib.rs b/candle-wasm-examples/llama2-c/src/lib.rs index 61154d04..b6b4004f 100644 --- a/candle-wasm-examples/llama2-c/src/lib.rs +++ b/candle-wasm-examples/llama2-c/src/lib.rs @@ -1,28 +1,3 @@ -#![allow(dead_code)] - -pub const WITH_TIMER: bool = true; - -struct Timer { - label: &'static str, -} - -impl Timer { - fn new(label: &'static str) -> Self { - if WITH_TIMER { - web_sys::console::time_with_label(label); - } - Self { label } - } -} - -impl Drop for Timer { - fn drop(&mut self) { - if WITH_TIMER { - web_sys::console::time_end_with_label(self.label) - } - } -} - mod app; mod model; mod worker; diff --git a/candle-wasm-examples/llama2-c/src/model.rs b/candle-wasm-examples/llama2-c/src/model.rs index 8cf53c2a..3231cabf 100644 --- a/candle-wasm-examples/llama2-c/src/model.rs +++ b/candle-wasm-examples/llama2-c/src/model.rs @@ -106,14 +106,15 @@ struct CausalSelfAttention { n_key_value_head: usize, head_dim: usize, cache: Cache, - max_seq_len: usize, } impl CausalSelfAttention { fn apply_rotary_emb(&self, x: &Tensor, index_pos: usize) -> Result { let (b_sz, seq_len, h, n_embd) = x.dims4()?; - let cos = self.cache.cos.narrow(0, index_pos, seq_len)?; - let sin = self.cache.sin.narrow(0, index_pos, seq_len)?; + let cos = self.cache.cos.i(index_pos..index_pos + seq_len)?; + let sin = self.cache.sin.i(index_pos..index_pos + seq_len)?; + let cos = cos.unsqueeze(1)?; + let sin = sin.unsqueeze(1)?; let cos = cos.broadcast_as((b_sz, seq_len, 1, n_embd / 2, 1))?; let sin = sin.broadcast_as((b_sz, seq_len, 1, n_embd / 2, 1))?; let x = x.reshape((b_sz, seq_len, h, n_embd / 2, 2))?; @@ -196,7 +197,6 @@ impl CausalSelfAttention { n_key_value_head: cfg.n_kv_heads, head_dim: cfg.dim / cfg.n_heads, cache: cache.clone(), - max_seq_len: cfg.seq_len, }) } }