mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
onnx: fix pad, unsqueeze (#2317)
* onnx: fix pad, unsqueeze both implementations have off-by-one errors: - Pad 'reflect' cycle for eg `dim==3` is `[0,1,2,1]` which has length of 4 (or `dim*2 - 2`) not 5 (current code `dim*2 - 1`) - Unsqueeze(-1) for tensor with `dim==3` should be 3 (ie `dim+index+1`) not 2 (ie currently `dim+index`) in addition, Pad is incorrectly calculating the starting padding. If we want to pad out 2 elements to the start, and we have this cycle of indices of length 6, then we should skip 4 elements, but currently we skip 2. A more visual representation of what's going on is below: ``` pad_start: 2 data: [a,b,c,d] indices: [0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 2, 1, 0, ..] // zigzag between 0..4 actual: skip [ c d| c b a b] expected: ~ skip ~ [ c b| a b c d] ``` The values between `[` and `|` are padding and the values between `|` and `]` in the example should match the original data being padded. * Fix clippy lints. --------- Co-authored-by: Laurent <laurent.mazare@gmail.com>
This commit is contained in:
@ -16,7 +16,7 @@ use candle_transformers::models::beit;
|
||||
/// Loads an image from disk using the image crate, this returns a tensor with shape
|
||||
/// (3, 384, 384). Beit special normalization is applied.
|
||||
pub fn load_image384_beit_norm<P: AsRef<std::path::Path>>(p: P) -> Result<Tensor> {
|
||||
let img = image::io::Reader::open(p)?
|
||||
let img = image::ImageReader::open(p)?
|
||||
.decode()
|
||||
.map_err(candle::Error::wrap)?
|
||||
.resize_to_fill(384, 384, image::imageops::FilterType::Triangle);
|
||||
|
@ -55,7 +55,7 @@ const SEP_TOKEN_ID: u32 = 102;
|
||||
/// Loads an image from disk using the image crate, this returns a tensor with shape
|
||||
/// (3, 384, 384). OpenAI normalization is applied.
|
||||
pub fn load_image<P: AsRef<std::path::Path>>(p: P) -> Result<Tensor> {
|
||||
let img = image::io::Reader::open(p)?
|
||||
let img = image::ImageReader::open(p)?
|
||||
.decode()
|
||||
.map_err(candle::Error::wrap)?
|
||||
.resize_to_fill(384, 384, image::imageops::FilterType::Triangle);
|
||||
|
@ -33,7 +33,7 @@ struct Args {
|
||||
}
|
||||
|
||||
fn load_image<T: AsRef<std::path::Path>>(path: T, image_size: usize) -> anyhow::Result<Tensor> {
|
||||
let img = image::io::Reader::open(path)?.decode()?;
|
||||
let img = image::ImageReader::open(path)?.decode()?;
|
||||
let (height, width) = (image_size, image_size);
|
||||
let img = img.resize_to_fill(
|
||||
width as u32,
|
||||
|
@ -16,7 +16,7 @@ use candle_transformers::models::eva2;
|
||||
/// Loads an image from disk using the image crate, this returns a tensor with shape
|
||||
/// (3, 448, 448). OpenAI normalization is applied.
|
||||
pub fn load_image448_openai_norm<P: AsRef<std::path::Path>>(p: P) -> Result<Tensor> {
|
||||
let img = image::io::Reader::open(p)?
|
||||
let img = image::ImageReader::open(p)?
|
||||
.decode()
|
||||
.map_err(candle::Error::wrap)?
|
||||
.resize_to_fill(448, 448, image::imageops::FilterType::Triangle);
|
||||
|
@ -57,7 +57,7 @@ fn load_image<T: AsRef<std::path::Path>>(
|
||||
llava_config: &LLaVAConfig,
|
||||
dtype: DType,
|
||||
) -> Result<((u32, u32), Tensor)> {
|
||||
let img = image::io::Reader::open(path)?.decode()?;
|
||||
let img = image::ImageReader::open(path)?.decode()?;
|
||||
let img_tensor = process_image(&img, processor, llava_config)?;
|
||||
Ok(((img.width(), img.height()), img_tensor.to_dtype(dtype)?))
|
||||
}
|
||||
|
@ -208,7 +208,7 @@ struct Args {
|
||||
/// Loads an image from disk using the image crate, this returns a tensor with shape
|
||||
/// (3, 378, 378).
|
||||
pub fn load_image<P: AsRef<std::path::Path>>(p: P) -> candle::Result<Tensor> {
|
||||
let img = image::io::Reader::open(p)?
|
||||
let img = image::ImageReader::open(p)?
|
||||
.decode()
|
||||
.map_err(candle::Error::wrap)?
|
||||
.resize_to_fill(378, 378, image::imageops::FilterType::Triangle); // Adjusted to 378x378
|
||||
|
@ -139,7 +139,7 @@ pub fn main() -> anyhow::Result<()> {
|
||||
let (_one, h, w) = mask.dims3()?;
|
||||
let mask = mask.expand((3, h, w))?;
|
||||
|
||||
let mut img = image::io::Reader::open(&args.image)?
|
||||
let mut img = image::ImageReader::open(&args.image)?
|
||||
.decode()
|
||||
.map_err(candle::Error::wrap)?;
|
||||
let mask_pixels = mask.permute((1, 2, 0))?.flatten_all()?.to_vec1::<u8>()?;
|
||||
|
@ -380,7 +380,7 @@ fn text_embeddings(
|
||||
}
|
||||
|
||||
fn image_preprocess<T: AsRef<std::path::Path>>(path: T) -> anyhow::Result<Tensor> {
|
||||
let img = image::io::Reader::open(path)?.decode()?;
|
||||
let img = image::ImageReader::open(path)?.decode()?;
|
||||
let (height, width) = (img.height() as usize, img.width() as usize);
|
||||
let height = height - height % 32;
|
||||
let width = width - width % 32;
|
||||
|
@ -145,7 +145,7 @@ impl ViTImageProcessor {
|
||||
pub fn load_images(&self, image_path: Vec<&str>) -> Result<Vec<image::DynamicImage>> {
|
||||
let mut images: Vec<image::DynamicImage> = Vec::new();
|
||||
for path in image_path {
|
||||
let img = image::io::Reader::open(path)?.decode().unwrap();
|
||||
let img = image::ImageReader::open(path)?.decode().unwrap();
|
||||
images.push(img);
|
||||
}
|
||||
|
||||
|
@ -159,7 +159,7 @@ pub fn main() -> Result<()> {
|
||||
let net_width = darknet.width()?;
|
||||
let net_height = darknet.height()?;
|
||||
|
||||
let original_image = image::io::Reader::open(&image_name)?
|
||||
let original_image = image::ImageReader::open(&image_name)?
|
||||
.decode()
|
||||
.map_err(candle::Error::wrap)?;
|
||||
let image = {
|
||||
|
@ -390,7 +390,7 @@ pub fn run<T: Task>(args: Args) -> anyhow::Result<()> {
|
||||
for image_name in args.images.iter() {
|
||||
println!("processing {image_name}");
|
||||
let mut image_name = std::path::PathBuf::from(image_name);
|
||||
let original_image = image::io::Reader::open(&image_name)?
|
||||
let original_image = image::ImageReader::open(&image_name)?
|
||||
.decode()
|
||||
.map_err(candle::Error::wrap)?;
|
||||
let (width, height) = {
|
||||
|
Reference in New Issue
Block a user