mirror of
https://github.com/huggingface/candle.git
synced 2025-06-18 19:47:12 +00:00
Support for UG kernels. (#2579)
* Support for UG kernels. * Add a dedicated test.
This commit is contained in:
@ -51,6 +51,27 @@ impl CudaDevice {
|
||||
self.device.clone()
|
||||
}
|
||||
|
||||
pub fn compile(
|
||||
&self,
|
||||
func_name: &'static str,
|
||||
kernel: ug::lang::ssa::Kernel,
|
||||
) -> Result<CudaFunction> {
|
||||
let mut buf = vec![];
|
||||
ug_cuda::code_gen::gen(&mut buf, func_name, &kernel)?;
|
||||
let cuda_code = String::from_utf8(buf)?;
|
||||
let opts = cudarc::nvrtc::CompileOptions {
|
||||
use_fast_math: Some(true),
|
||||
..Default::default()
|
||||
};
|
||||
let ptx = cudarc::nvrtc::safe::compile_ptx_with_opts(cuda_code, opts).w()?;
|
||||
self.device.load_ptx(ptx, "ug", &[func_name]).w()?;
|
||||
let func = match self.device.get_func("ug", func_name) {
|
||||
Some(func) => func,
|
||||
None => crate::bail!("unknown function ug::{func_name}"),
|
||||
};
|
||||
Ok(func)
|
||||
}
|
||||
|
||||
pub fn id(&self) -> DeviceId {
|
||||
self.id
|
||||
}
|
||||
|
Reference in New Issue
Block a user