mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 18:48:51 +00:00
Sketch a fast cuda kernel for reduce-sum. (#109)
* Sketch a fast cuda kernel for reduce-sum. * Sketch the rust support code for the fast sum kernel. * More work on the fast kernel. * Add some testing ground. * A couple fixes for the fast sum kernel.
This commit is contained in:
15
candle-core/examples/cuda_basics.rs
Normal file
15
candle-core/examples/cuda_basics.rs
Normal file
@ -0,0 +1,15 @@
|
||||
#[cfg(feature = "mkl")]
|
||||
extern crate intel_mkl_src;
|
||||
|
||||
use anyhow::Result;
|
||||
use candle::{Device, Tensor};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let device = Device::new_cuda(0)?;
|
||||
let t = Tensor::new(&[[1f32, 2., 3., 4.2]], &device)?;
|
||||
let sum = t.sum(&[0])?;
|
||||
println!("{sum}");
|
||||
let sum = t.sum(&[1])?;
|
||||
println!("{sum}");
|
||||
Ok(())
|
||||
}
|
Reference in New Issue
Block a user