mirror of
https://github.com/huggingface/candle.git
synced 2025-06-18 11:37:11 +00:00
Tracing for the phi model (#936)
* Add some tracing bits to mixformers. * Add the missing file. * Add the conv2d layer to with-tracing. * Improve the tracing usage.
This commit is contained in:
@ -1,57 +1,12 @@
|
||||
// T5 Text Model
|
||||
// https://github.com/huggingface/transformers/blob/main/src/transformers/models/t5/modeling_t5.py
|
||||
|
||||
use crate::models::with_tracing::{linear_no_bias, Embedding, Linear};
|
||||
use candle::{DType, Device, Module, Result, Tensor, D};
|
||||
use candle_nn::{Activation, VarBuilder};
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Embedding {
|
||||
inner: candle_nn::Embedding,
|
||||
span: tracing::Span,
|
||||
}
|
||||
|
||||
impl Embedding {
|
||||
fn new(d1: usize, d2: usize, vb: VarBuilder) -> Result<Self> {
|
||||
let inner = candle_nn::embedding(d1, d2, vb)?;
|
||||
let span = tracing::span!(tracing::Level::TRACE, "embedding");
|
||||
Ok(Self { inner, span })
|
||||
}
|
||||
|
||||
fn embeddings(&self) -> &Tensor {
|
||||
self.inner.embeddings()
|
||||
}
|
||||
}
|
||||
|
||||
impl Module for Embedding {
|
||||
fn forward(&self, xs: &Tensor) -> Result<Tensor> {
|
||||
let _enter = self.span.enter();
|
||||
self.inner.forward(xs)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Linear {
|
||||
inner: candle_nn::Linear,
|
||||
span: tracing::Span,
|
||||
}
|
||||
|
||||
impl Linear {
|
||||
fn new(d1: usize, d2: usize, vb: VarBuilder) -> Result<Self> {
|
||||
let inner = candle_nn::linear_no_bias(d1, d2, vb)?;
|
||||
let span = tracing::span!(tracing::Level::TRACE, "linear");
|
||||
Ok(Self { inner, span })
|
||||
}
|
||||
}
|
||||
|
||||
impl Module for Linear {
|
||||
fn forward(&self, xs: &Tensor) -> Result<Tensor> {
|
||||
let _enter = self.span.enter();
|
||||
self.inner.forward(xs)
|
||||
}
|
||||
}
|
||||
|
||||
fn default_relative_attention_max_distance() -> usize {
|
||||
128
|
||||
}
|
||||
@ -205,8 +160,8 @@ struct T5DenseActDense {
|
||||
|
||||
impl T5DenseActDense {
|
||||
fn load(vb: VarBuilder, cfg: &Config) -> Result<Self> {
|
||||
let wi = Linear::new(cfg.d_model, cfg.d_ff, vb.pp("wi"))?;
|
||||
let wo = Linear::new(cfg.d_ff, cfg.d_model, vb.pp("wo"))?;
|
||||
let wi = linear_no_bias(cfg.d_model, cfg.d_ff, vb.pp("wi"))?;
|
||||
let wo = linear_no_bias(cfg.d_ff, cfg.d_model, vb.pp("wo"))?;
|
||||
Ok(Self {
|
||||
wi,
|
||||
wo,
|
||||
@ -237,9 +192,9 @@ struct T5DenseGatedActDense {
|
||||
|
||||
impl T5DenseGatedActDense {
|
||||
fn load(vb: VarBuilder, cfg: &Config) -> Result<Self> {
|
||||
let wi_0 = Linear::new(cfg.d_model, cfg.d_ff, vb.pp("wi_0"))?;
|
||||
let wi_1 = Linear::new(cfg.d_model, cfg.d_ff, vb.pp("wi_1"))?;
|
||||
let wo = Linear::new(cfg.d_ff, cfg.d_model, vb.pp("wo"))?;
|
||||
let wi_0 = linear_no_bias(cfg.d_model, cfg.d_ff, vb.pp("wi_0"))?;
|
||||
let wi_1 = linear_no_bias(cfg.d_model, cfg.d_ff, vb.pp("wi_1"))?;
|
||||
let wo = linear_no_bias(cfg.d_ff, cfg.d_model, vb.pp("wo"))?;
|
||||
Ok(Self {
|
||||
wi_0,
|
||||
wi_1,
|
||||
@ -334,10 +289,10 @@ impl T5Attention {
|
||||
cfg: &Config,
|
||||
) -> Result<Self> {
|
||||
let inner_dim = cfg.num_heads * cfg.d_kv;
|
||||
let q = Linear::new(cfg.d_model, inner_dim, vb.pp("q"))?;
|
||||
let k = Linear::new(cfg.d_model, inner_dim, vb.pp("k"))?;
|
||||
let v = Linear::new(cfg.d_model, inner_dim, vb.pp("v"))?;
|
||||
let o = Linear::new(inner_dim, cfg.d_model, vb.pp("o"))?;
|
||||
let q = linear_no_bias(cfg.d_model, inner_dim, vb.pp("q"))?;
|
||||
let k = linear_no_bias(cfg.d_model, inner_dim, vb.pp("k"))?;
|
||||
let v = linear_no_bias(cfg.d_model, inner_dim, vb.pp("v"))?;
|
||||
let o = linear_no_bias(inner_dim, cfg.d_model, vb.pp("o"))?;
|
||||
let relative_attention_bias = if has_relative_attention_bias {
|
||||
let emb = Embedding::new(
|
||||
cfg.relative_attention_num_buckets,
|
||||
@ -772,7 +727,11 @@ impl T5ForConditionalGeneration {
|
||||
let lm_head = if tie_word_embeddings {
|
||||
None
|
||||
} else {
|
||||
Some(Linear::new(cfg.d_model, cfg.vocab_size, vb.pp("lm_head"))?)
|
||||
Some(linear_no_bias(
|
||||
cfg.d_model,
|
||||
cfg.vocab_size,
|
||||
vb.pp("lm_head"),
|
||||
)?)
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
|
Reference in New Issue
Block a user