Add Resize to onnx ops (#2946)

* added resize to candle-onnx, not currently working * changed unreachable to bail, and bailed when both scales and sizes are set * cleanup and added other unused options for this op * cleanup * fixed image loading to make output work * cleanup and removed unused variables * removed path path creation code, and changed unwrap to ?
2025-06-16 02:38:10 +00:00 · 2025-05-09 22:05:03 -07:00
parent 3d05f5cf3d
commit 36508a2c93
2 changed files with 118 additions and 13 deletions
--- a/candle-examples/examples/onnx/main.rs
+++ b/candle-examples/examples/onnx/main.rs
@ -5,12 +5,14 @@ extern crate intel_mkl_src;
 extern crate accelerate_src;
 use candle::{IndexOp, D};
 use candle_examples::save_image;
 use clap::{Parser, ValueEnum};
 #[derive(Clone, Copy, Debug, ValueEnum)]
 enum Which {
    SqueezeNet,
    EfficientNet,
    EsrGan,
 }
 #[derive(Parser)]
@ -28,10 +30,21 @@ struct Args {
 pub fn main() -> anyhow::Result<()> {
    let args = Args::parse();
-    let image = candle_examples::imagenet::load_image224(args.image)?;
+    let image = match args.which {
        Which::SqueezeNet | Which::EfficientNet => {
            candle_examples::imagenet::load_image224(&args.image)?
        }
        Which::EsrGan => candle_examples::imagenet::load_image_with_std_mean(
            &args.image,
            128,
            &[0.0f32, 0.0, 0.0],
            &[1.0f32, 1.0, 1.0],
        )?,
    };
    let image = match args.which {
        Which::SqueezeNet => image,
        Which::EfficientNet => image.permute((1, 2, 0))?,
        Which::EsrGan => image,
    };
    println!("loaded image {image:?}");
@ -45,6 +58,9 @@ pub fn main() -> anyhow::Result<()> {
            Which::EfficientNet => hf_hub::api::sync::Api::new()?
                .model("onnx/EfficientNet-Lite4".into())
                .get("efficientnet-lite4-11.onnx")?,
            Which::EsrGan => hf_hub::api::sync::Api::new()?
                .model("qualcomm/Real-ESRGAN-x4plus".into())
                .get("Real-ESRGAN-x4plus.onnx")?,
        },
    };
@ -57,7 +73,11 @@ pub fn main() -> anyhow::Result<()> {
    let prs = match args.which {
        Which::SqueezeNet => candle_nn::ops::softmax(&output, D::Minus1)?,
        Which::EfficientNet => output,
        Which::EsrGan => output,
    };
    match args.which {
        Which::EfficientNet | Which::SqueezeNet => {
            let prs = prs.i(0)?.to_vec1::<f32>()?;
            // Sort the predictions and take the top 5
@ -73,6 +93,21 @@ pub fn main() -> anyhow::Result<()> {
                    p * 100.0
                );
            }
        }
        Which::EsrGan => {
            let max_pixel_val = candle::Tensor::try_from(255.0f32)?
                .to_device(prs.device())?
                .broadcast_as(prs.shape())?;
            let out = (prs * max_pixel_val)?.i(0)?.to_dtype(candle::DType::U8)?;
            let pb = std::path::PathBuf::from(args.image);
            let input_file_name = pb.file_name().unwrap();
            let mut output_file_name = std::ffi::OsString::from("super_");
            output_file_name.push(input_file_name);
            save_image(&out, output_file_name)?;
        }
    }
    Ok(())
 }
--- a/candle-onnx/src/eval.rs
+++ b/candle-onnx/src/eval.rs
@ -1960,6 +1960,76 @@ fn simple_eval_(
                let output = input.sign()?;
                values.insert(node.output[0].clone(), output);
            }
            "Resize" => {
                let input = get(&node.input[0])?;
                if input.rank() != 4 {
                    bail!("Unsupported rank for nearest resize: {}", input.rank());
                }
                let scales = if node.input.len() > 2 && !node.input[2].is_empty() {
                    Some(get(&node.input[2])?)
                } else {
                    None
                };
                let sizes = if node.input.len() > 3 && !node.input[3].is_empty() {
                    Some(get(&node.input[3])?)
                } else {
                    None
                };
                let output_dims = match (scales, sizes) {
                    (Some(_), Some(_)) => {
                        bail!("Scales and sizes cannot both be set for Resize operation")
                    }
                    (Some(scales_tensor), None) => {
                        let scale_values = scales_tensor.to_vec1::<f32>()?;
                        input
                            .dims()
                            .iter()
                            .enumerate()
                            .map(|(i, &d)| (d as f32 * scale_values[i]) as usize)
                            .collect::<Vec<_>>()
                    }
                    (None, Some(sizes_tensor)) => sizes_tensor
                        .to_vec1::<i64>()?
                        .iter()
                        .map(|&d| d as usize)
                        .collect::<Vec<_>>(),
                    (None, None) => bail!("Either scales or sizes should be present"),
                };
                let coordinate_transformation_mode =
                    get_attr_opt::<str>(node, "coordinate_transformation_mode")?
                        .unwrap_or("half_pixel");
                // Interpolation mode: nearest, linear, or cubic.
                let mode = get_attr_opt::<str>(node, "mode")?.unwrap_or("nearest");
                // How to determine the "nearest" pixel in nearest interpolation mode.
                let nearest_mode =
                    get_attr_opt::<str>(node, "nearest_mode")?.unwrap_or("round_prefer_floor");
                if mode != "nearest" {
                    bail!("Unsupported resize mode: {}", mode);
                }
                if nearest_mode != "floor" {
                    bail!("Unsupported nearest_mode for resize: {}", nearest_mode);
                }
                if coordinate_transformation_mode != "asymmetric" {
                    bail!(
                        "Unsupported coordinate_transformation_mode for resize: {}",
                        coordinate_transformation_mode
                    );
                }
                let h = output_dims[2];
                let w = output_dims[3];
                let output = input.upsample_nearest2d(h, w)?;
                values.insert(node.output[0].clone(), output);
            }
            op_type => bail!("unsupported op_type {op_type} for op {node:?}"),
        }
    }