Add a basic metal example with capture (#2324)

* Add some tracing.

* Get the trace to work.
This commit is contained in:
Laurent Mazare
2024-07-09 12:38:11 +02:00
committed by GitHub
parent 9cd54aa5d4
commit 25960676ca
3 changed files with 39 additions and 1 deletions

View File

@ -48,3 +48,7 @@ metal = ["dep:metal", "dep:candle-metal-kernels"]
[[bench]]
name = "bench_main"
harness = false
[[example]]
name = "metal_basics"
required-features = ["metal"]

View File

@ -0,0 +1,28 @@
#[cfg(feature = "accelerate")]
extern crate accelerate_src;
#[cfg(feature = "mkl")]
extern crate intel_mkl_src;
use anyhow::Result;
use candle_core::{Device, Tensor};
fn main() -> Result<()> {
// This requires the code to be run with MTL_CAPTURE_ENABLED=1
let device = Device::new_metal(0)?;
let metal_device = match &device {
Device::Metal(m) => m,
_ => anyhow::bail!("unexpected device"),
};
metal_device.capture("/tmp/candle.gputrace")?;
// This first synchronize ensures that a new command buffer gets created after setting up the
// capture scope.
device.synchronize()?;
let x = Tensor::randn(0f32, 1.0, (128, 128), &device)?;
let x1 = x.add(&x)?;
println!("{x1:?}");
// This second synchronize ensures that the command buffer gets commited before the end of the
// capture scope.
device.synchronize()?;
Ok(())
}

View File

@ -273,7 +273,13 @@ impl MetalDevice {
let descriptor = metal::CaptureDescriptor::new();
descriptor.set_destination(metal::MTLCaptureDestination::GpuTraceDocument);
descriptor.set_capture_device(self);
descriptor.set_output_url(path);
// The [set_output_url] call requires an absolute path so we convert it if needed.
if path.as_ref().is_absolute() {
descriptor.set_output_url(path);
} else {
let path = std::env::current_dir()?.join(path);
descriptor.set_output_url(path);
}
capture
.start_capture(&descriptor)