mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 02:58:50 +00:00
Add a basic metal example with capture (#2324)
* Add some tracing. * Get the trace to work.
This commit is contained in:
@ -48,3 +48,7 @@ metal = ["dep:metal", "dep:candle-metal-kernels"]
|
|||||||
[[bench]]
|
[[bench]]
|
||||||
name = "bench_main"
|
name = "bench_main"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "metal_basics"
|
||||||
|
required-features = ["metal"]
|
||||||
|
28
candle-core/examples/metal_basics.rs
Normal file
28
candle-core/examples/metal_basics.rs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#[cfg(feature = "accelerate")]
|
||||||
|
extern crate accelerate_src;
|
||||||
|
|
||||||
|
#[cfg(feature = "mkl")]
|
||||||
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use candle_core::{Device, Tensor};
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
// This requires the code to be run with MTL_CAPTURE_ENABLED=1
|
||||||
|
let device = Device::new_metal(0)?;
|
||||||
|
let metal_device = match &device {
|
||||||
|
Device::Metal(m) => m,
|
||||||
|
_ => anyhow::bail!("unexpected device"),
|
||||||
|
};
|
||||||
|
metal_device.capture("/tmp/candle.gputrace")?;
|
||||||
|
// This first synchronize ensures that a new command buffer gets created after setting up the
|
||||||
|
// capture scope.
|
||||||
|
device.synchronize()?;
|
||||||
|
let x = Tensor::randn(0f32, 1.0, (128, 128), &device)?;
|
||||||
|
let x1 = x.add(&x)?;
|
||||||
|
println!("{x1:?}");
|
||||||
|
// This second synchronize ensures that the command buffer gets commited before the end of the
|
||||||
|
// capture scope.
|
||||||
|
device.synchronize()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -273,7 +273,13 @@ impl MetalDevice {
|
|||||||
let descriptor = metal::CaptureDescriptor::new();
|
let descriptor = metal::CaptureDescriptor::new();
|
||||||
descriptor.set_destination(metal::MTLCaptureDestination::GpuTraceDocument);
|
descriptor.set_destination(metal::MTLCaptureDestination::GpuTraceDocument);
|
||||||
descriptor.set_capture_device(self);
|
descriptor.set_capture_device(self);
|
||||||
|
// The [set_output_url] call requires an absolute path so we convert it if needed.
|
||||||
|
if path.as_ref().is_absolute() {
|
||||||
descriptor.set_output_url(path);
|
descriptor.set_output_url(path);
|
||||||
|
} else {
|
||||||
|
let path = std::env::current_dir()?.join(path);
|
||||||
|
descriptor.set_output_url(path);
|
||||||
|
}
|
||||||
|
|
||||||
capture
|
capture
|
||||||
.start_capture(&descriptor)
|
.start_capture(&descriptor)
|
||||||
|
Reference in New Issue
Block a user