mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Improve the aspect ratio handling on yolo-v8. (#549)
* Fix the aspect ratio handling in yolo-v8. * Typo.
This commit is contained in:
@ -424,6 +424,7 @@ impl YoloV8Neck {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn forward(&self, p3: &Tensor, p4: &Tensor, p5: &Tensor) -> Result<(Tensor, Tensor, Tensor)> {
|
fn forward(&self, p3: &Tensor, p4: &Tensor, p5: &Tensor) -> Result<(Tensor, Tensor, Tensor)> {
|
||||||
|
println!("{p3:?} {p4:?} {p5:?}");
|
||||||
let x = self
|
let x = self
|
||||||
.n1
|
.n1
|
||||||
.forward(&Tensor::cat(&[&self.up.forward(p5)?, p4], 1)?)?;
|
.forward(&Tensor::cat(&[&self.up.forward(p5)?, p4], 1)?)?;
|
||||||
@ -707,11 +708,11 @@ struct Args {
|
|||||||
images: Vec<String>,
|
images: Vec<String>,
|
||||||
|
|
||||||
/// Threshold for the model confidence level.
|
/// Threshold for the model confidence level.
|
||||||
#[arg(long, default_value_t = 0.5)]
|
#[arg(long, default_value_t = 0.25)]
|
||||||
confidence_threshold: f32,
|
confidence_threshold: f32,
|
||||||
|
|
||||||
/// Threshold for non-maximum suppression.
|
/// Threshold for non-maximum suppression.
|
||||||
#[arg(long, default_value_t = 0.4)]
|
#[arg(long, default_value_t = 0.45)]
|
||||||
nms_threshold: f32,
|
nms_threshold: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -759,27 +760,47 @@ pub fn main() -> anyhow::Result<()> {
|
|||||||
let original_image = image::io::Reader::open(&image_name)?
|
let original_image = image::io::Reader::open(&image_name)?
|
||||||
.decode()
|
.decode()
|
||||||
.map_err(candle::Error::wrap)?;
|
.map_err(candle::Error::wrap)?;
|
||||||
let image = {
|
let (width, height) = {
|
||||||
let data = original_image
|
let w = original_image.width() as usize;
|
||||||
.resize_exact(640, 640, image::imageops::FilterType::Triangle)
|
let h = original_image.height() as usize;
|
||||||
.to_rgb8()
|
if w < h {
|
||||||
.into_raw();
|
let w = w * 640 / h;
|
||||||
Tensor::from_vec(data, (640, 640, 3), &Device::Cpu)?.permute((2, 0, 1))?
|
// Sizes have to be divisible by 32.
|
||||||
|
(w / 32 * 32, 640)
|
||||||
|
} else {
|
||||||
|
let h = h * 640 / w;
|
||||||
|
(640, h / 32 * 32)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let image = (image.unsqueeze(0)?.to_dtype(DType::F32)? * (1. / 255.))?;
|
let image_t = {
|
||||||
let predictions = model.forward(&image)?.squeeze(0)?;
|
let img = original_image.resize_exact(
|
||||||
|
width as u32,
|
||||||
|
height as u32,
|
||||||
|
image::imageops::FilterType::CatmullRom,
|
||||||
|
);
|
||||||
|
let data = img.to_rgb8().into_raw();
|
||||||
|
Tensor::from_vec(
|
||||||
|
data,
|
||||||
|
(img.height() as usize, img.width() as usize, 3),
|
||||||
|
&Device::Cpu,
|
||||||
|
)?
|
||||||
|
.permute((2, 0, 1))?
|
||||||
|
};
|
||||||
|
println!("{image_t:?}");
|
||||||
|
let image_t = (image_t.unsqueeze(0)?.to_dtype(DType::F32)? * (1. / 255.))?;
|
||||||
|
let predictions = model.forward(&image_t)?.squeeze(0)?;
|
||||||
println!("generated predictions {predictions:?}");
|
println!("generated predictions {predictions:?}");
|
||||||
let image = report(
|
let image_t = report(
|
||||||
&predictions,
|
&predictions,
|
||||||
original_image,
|
original_image,
|
||||||
640,
|
width,
|
||||||
640,
|
height,
|
||||||
args.confidence_threshold,
|
args.confidence_threshold,
|
||||||
args.nms_threshold,
|
args.nms_threshold,
|
||||||
)?;
|
)?;
|
||||||
image_name.set_extension("pp.jpg");
|
image_name.set_extension("pp.jpg");
|
||||||
println!("writing {image_name:?}");
|
println!("writing {image_name:?}");
|
||||||
image.save(image_name)?
|
image_t.save(image_name)?
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
Reference in New Issue
Block a user