From b73c35cc577953716cc7e619d2f67af31e87751b Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Sun, 15 Oct 2023 10:43:10 +0100 Subject: [PATCH] Improve the reshape error messages. (#1096) * Improve the reshape error messages. * Add the verbose-prompt flag to the phi example. --- candle-core/src/shape.rs | 101 +++++++++------------------ candle-examples/examples/phi/main.rs | 23 ++++-- 2 files changed, 49 insertions(+), 75 deletions(-) diff --git a/candle-core/src/shape.rs b/candle-core/src/shape.rs index 4d500e7f..ac00a979 100644 --- a/candle-core/src/shape.rs +++ b/candle-core/src/shape.rs @@ -511,154 +511,119 @@ impl ShapeWithOneHole for ((),) { } } +fn hole_size(el_count: usize, prod_d: usize, s: &dyn std::fmt::Debug) -> Result { + if prod_d == 0 { + crate::bail!("cannot reshape tensor of {el_count} elements to {s:?}") + } + if el_count % prod_d != 0 { + crate::bail!("cannot reshape tensor with {el_count} elements to {s:?}") + } + Ok(el_count / prod_d) +} + impl ShapeWithOneHole for ((), usize) { fn into_shape(self, el_count: usize) -> Result { let ((), d1) = self; - if el_count % d1 != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d1}") - } - Ok((el_count / d1, d1).into()) + Ok((hole_size(el_count, d1, &self)?, d1).into()) } } impl ShapeWithOneHole for (usize, ()) { fn into_shape(self, el_count: usize) -> Result { let (d1, ()) = self; - if el_count % d1 != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d1}") - } - Ok((d1, el_count / d1).into()) + Ok((d1, hole_size(el_count, d1, &self)?).into()) } } impl ShapeWithOneHole for ((), usize, usize) { fn into_shape(self, el_count: usize) -> Result { let ((), d1, d2) = self; - let d = d1 * d2; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((el_count / d, d1, d2).into()) + Ok((hole_size(el_count, d1 * d2, &self)?, d1, d2).into()) } } impl ShapeWithOneHole for (usize, (), usize) { fn into_shape(self, el_count: usize) -> Result { let (d1, (), d2) = self; - let d = d1 * d2; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, el_count / d, d2).into()) + Ok((d1, hole_size(el_count, d1 * d2, &self)?, d2).into()) } } impl ShapeWithOneHole for (usize, usize, ()) { fn into_shape(self, el_count: usize) -> Result { let (d1, d2, ()) = self; - let d = d1 * d2; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, d2, el_count / d).into()) + Ok((d1, d2, hole_size(el_count, d1 * d2, &self)?).into()) } } impl ShapeWithOneHole for ((), usize, usize, usize) { fn into_shape(self, el_count: usize) -> Result { let ((), d1, d2, d3) = self; - let d = d1 * d2 * d3; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((el_count / d, d1, d2, d3).into()) + let d = hole_size(el_count, d1 * d2 * d3, &self)?; + Ok((d, d1, d2, d3).into()) } } impl ShapeWithOneHole for (usize, (), usize, usize) { fn into_shape(self, el_count: usize) -> Result { let (d1, (), d2, d3) = self; - let d = d1 * d2 * d3; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, el_count / d, d2, d3).into()) + let d = hole_size(el_count, d1 * d2 * d3, &self)?; + Ok((d1, d, d2, d3).into()) } } impl ShapeWithOneHole for (usize, usize, (), usize) { fn into_shape(self, el_count: usize) -> Result { let (d1, d2, (), d3) = self; - let d = d1 * d2 * d3; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, d2, el_count / d, d3).into()) + let d = hole_size(el_count, d1 * d2 * d3, &self)?; + Ok((d1, d2, d, d3).into()) } } impl ShapeWithOneHole for (usize, usize, usize, ()) { fn into_shape(self, el_count: usize) -> Result { let (d1, d2, d3, ()) = self; - let d = d1 * d2 * d3; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, d2, d3, el_count / d).into()) + let d = hole_size(el_count, d1 * d2 * d3, &self)?; + Ok((d1, d2, d3, d).into()) } } impl ShapeWithOneHole for ((), usize, usize, usize, usize) { fn into_shape(self, el_count: usize) -> Result { let ((), d1, d2, d3, d4) = self; - let d = d1 * d2 * d3 * d4; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((el_count / d, d1, d2, d3, d4).into()) + let d = hole_size(el_count, d1 * d2 * d3 * d4, &self)?; + Ok((d, d1, d2, d3, d4).into()) } } impl ShapeWithOneHole for (usize, (), usize, usize, usize) { fn into_shape(self, el_count: usize) -> Result { let (d1, (), d2, d3, d4) = self; - let d = d1 * d2 * d3 * d4; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, el_count / d, d2, d3, d4).into()) + let d = hole_size(el_count, d1 * d2 * d3 * d4, &self)?; + Ok((d1, d, d2, d3, d4).into()) } } impl ShapeWithOneHole for (usize, usize, (), usize, usize) { fn into_shape(self, el_count: usize) -> Result { let (d1, d2, (), d3, d4) = self; - let d = d1 * d2 * d3 * d4; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, d2, el_count / d, d3, d4).into()) + let d = hole_size(el_count, d1 * d2 * d3 * d4, &self)?; + Ok((d1, d2, d, d3, d4).into()) } } impl ShapeWithOneHole for (usize, usize, usize, (), usize) { fn into_shape(self, el_count: usize) -> Result { let (d1, d2, d3, (), d4) = self; - let d = d1 * d2 * d3 * d4; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, d2, d3, el_count / d, d4).into()) + let d = hole_size(el_count, d1 * d2 * d3 * d4, &self)?; + Ok((d1, d2, d3, d, d4).into()) } } impl ShapeWithOneHole for (usize, usize, usize, usize, ()) { fn into_shape(self, el_count: usize) -> Result { let (d1, d2, d3, d4, ()) = self; - let d = d1 * d2 * d3 * d4; - if el_count % d != 0 { - crate::bail!("tensor number of elements {el_count} is not divisible by {d}") - } - Ok((d1, d2, d3, d4, el_count / d).into()) + let d = hole_size(el_count, d1 * d2 * d3 * d4, &self)?; + Ok((d1, d2, d3, d4, d).into()) } } diff --git a/candle-examples/examples/phi/main.rs b/candle-examples/examples/phi/main.rs index 3922b3d5..605819ac 100644 --- a/candle-examples/examples/phi/main.rs +++ b/candle-examples/examples/phi/main.rs @@ -28,6 +28,7 @@ struct TextGeneration { logits_processor: LogitsProcessor, repeat_penalty: f32, repeat_last_n: usize, + verbose_prompt: bool, } impl TextGeneration { @@ -40,6 +41,7 @@ impl TextGeneration { top_p: Option, repeat_penalty: f32, repeat_last_n: usize, + verbose_prompt: bool, device: &Device, ) -> Self { let logits_processor = LogitsProcessor::new(seed, temp, top_p); @@ -49,6 +51,7 @@ impl TextGeneration { logits_processor, repeat_penalty, repeat_last_n, + verbose_prompt, device: device.clone(), } } @@ -58,13 +61,14 @@ impl TextGeneration { println!("starting the inference loop"); print!("{prompt}"); std::io::stdout().flush()?; - let mut tokens = self - .tokenizer - .encode(prompt, true) - .map_err(E::msg)? - .get_ids() - .to_vec(); - + let tokens = self.tokenizer.encode(prompt, true).map_err(E::msg)?; + if self.verbose_prompt { + for (token, id) in tokens.get_tokens().iter().zip(tokens.get_ids().iter()) { + let token = token.replace('▁', " ").replace("<0x0A>", "\n"); + println!("{id:7} -> '{token}'"); + } + } + let mut tokens = tokens.get_ids().to_vec(); let mut generated_tokens = 0usize; let eos_token = match self.tokenizer.get_vocab(true).get("<|endoftext|>") { Some(token) => *token, @@ -129,6 +133,10 @@ struct Args { #[arg(long)] tracing: bool, + /// Display the token for the specified prompt. + #[arg(long)] + verbose_prompt: bool, + #[arg(long)] prompt: String, @@ -266,6 +274,7 @@ fn main() -> Result<()> { args.top_p, args.repeat_penalty, args.repeat_last_n, + args.verbose_prompt, &device, ); pipeline.run(&args.prompt, args.sample_len)?;