Improve the aspect ratio handling on yolo-v8. (#549)

* Fix the aspect ratio handling in yolo-v8.

* Typo.
This commit is contained in:
Laurent Mazare
2023-08-22 14:55:33 +01:00
committed by GitHub
parent bb69d89e28
commit 9bc811a247

View File

@ -424,6 +424,7 @@ impl YoloV8Neck {
} }
fn forward(&self, p3: &Tensor, p4: &Tensor, p5: &Tensor) -> Result<(Tensor, Tensor, Tensor)> { fn forward(&self, p3: &Tensor, p4: &Tensor, p5: &Tensor) -> Result<(Tensor, Tensor, Tensor)> {
println!("{p3:?} {p4:?} {p5:?}");
let x = self let x = self
.n1 .n1
.forward(&Tensor::cat(&[&self.up.forward(p5)?, p4], 1)?)?; .forward(&Tensor::cat(&[&self.up.forward(p5)?, p4], 1)?)?;
@ -707,11 +708,11 @@ struct Args {
images: Vec<String>, images: Vec<String>,
/// Threshold for the model confidence level. /// Threshold for the model confidence level.
#[arg(long, default_value_t = 0.5)] #[arg(long, default_value_t = 0.25)]
confidence_threshold: f32, confidence_threshold: f32,
/// Threshold for non-maximum suppression. /// Threshold for non-maximum suppression.
#[arg(long, default_value_t = 0.4)] #[arg(long, default_value_t = 0.45)]
nms_threshold: f32, nms_threshold: f32,
} }
@ -759,27 +760,47 @@ pub fn main() -> anyhow::Result<()> {
let original_image = image::io::Reader::open(&image_name)? let original_image = image::io::Reader::open(&image_name)?
.decode() .decode()
.map_err(candle::Error::wrap)?; .map_err(candle::Error::wrap)?;
let image = { let (width, height) = {
let data = original_image let w = original_image.width() as usize;
.resize_exact(640, 640, image::imageops::FilterType::Triangle) let h = original_image.height() as usize;
.to_rgb8() if w < h {
.into_raw(); let w = w * 640 / h;
Tensor::from_vec(data, (640, 640, 3), &Device::Cpu)?.permute((2, 0, 1))? // Sizes have to be divisible by 32.
(w / 32 * 32, 640)
} else {
let h = h * 640 / w;
(640, h / 32 * 32)
}
}; };
let image = (image.unsqueeze(0)?.to_dtype(DType::F32)? * (1. / 255.))?; let image_t = {
let predictions = model.forward(&image)?.squeeze(0)?; let img = original_image.resize_exact(
width as u32,
height as u32,
image::imageops::FilterType::CatmullRom,
);
let data = img.to_rgb8().into_raw();
Tensor::from_vec(
data,
(img.height() as usize, img.width() as usize, 3),
&Device::Cpu,
)?
.permute((2, 0, 1))?
};
println!("{image_t:?}");
let image_t = (image_t.unsqueeze(0)?.to_dtype(DType::F32)? * (1. / 255.))?;
let predictions = model.forward(&image_t)?.squeeze(0)?;
println!("generated predictions {predictions:?}"); println!("generated predictions {predictions:?}");
let image = report( let image_t = report(
&predictions, &predictions,
original_image, original_image,
640, width,
640, height,
args.confidence_threshold, args.confidence_threshold,
args.nms_threshold, args.nms_threshold,
)?; )?;
image_name.set_extension("pp.jpg"); image_name.set_extension("pp.jpg");
println!("writing {image_name:?}"); println!("writing {image_name:?}");
image.save(image_name)? image_t.save(image_name)?
} }
Ok(()) Ok(())