mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 18:48:51 +00:00
feat: parse Cuda compute cap from env (#1066)
* feat: add support for multiple compute caps * Revert to one compute cap * fmt * fix
This commit is contained in:
@ -12,6 +12,9 @@ compute_cap
|
|||||||
8.9
|
8.9
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can also compile the Cuda kernels for a specific compute cap using the
|
||||||
|
`CUDA_COMPUTE_CAP=<compute cap>` environment variable.
|
||||||
|
|
||||||
If any of the above commands errors out, please make sure to update your Cuda version.
|
If any of the above commands errors out, please make sure to update your Cuda version.
|
||||||
|
|
||||||
2. Create a new app and add [`candle-core`](https://github.com/huggingface/candle/tree/main/candle-core) with Cuda support.
|
2. Create a new app and add [`candle-core`](https://github.com/huggingface/candle/tree/main/candle-core) with Cuda support.
|
||||||
|
@ -84,12 +84,19 @@ fn main() -> Result<()> {
|
|||||||
(kernel_dir.join(f), obj_file)
|
(kernel_dir.join(f), obj_file)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
let out_modified: Result<_, _> = out_file.metadata().and_then(|m| m.modified());
|
||||||
let should_compile = if out_file.exists() {
|
let should_compile = if out_file.exists() {
|
||||||
cu_files.iter().any(|(cu_file, _)| {
|
kernel_dir
|
||||||
let out_modified = out_file.metadata().unwrap().modified().unwrap();
|
.read_dir()
|
||||||
let in_modified = cu_file.metadata().unwrap().modified().unwrap();
|
.expect("kernels folder should exist")
|
||||||
in_modified.duration_since(out_modified).is_ok()
|
.any(|entry| {
|
||||||
})
|
if let (Ok(entry), Ok(out_modified)) = (entry, &out_modified) {
|
||||||
|
let in_modified = entry.metadata().unwrap().modified().unwrap();
|
||||||
|
in_modified.duration_since(*out_modified).is_ok()
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
true
|
true
|
||||||
};
|
};
|
||||||
@ -100,12 +107,19 @@ fn main() -> Result<()> {
|
|||||||
let mut command = std::process::Command::new("nvcc");
|
let mut command = std::process::Command::new("nvcc");
|
||||||
command
|
command
|
||||||
.arg("-std=c++17")
|
.arg("-std=c++17")
|
||||||
|
.arg("-O3")
|
||||||
|
.arg("-U__CUDA_NO_HALF_OPERATORS__")
|
||||||
|
.arg("-U__CUDA_NO_HALF_CONVERSIONS__")
|
||||||
|
.arg("-U__CUDA_NO_HALF2_OPERATORS__")
|
||||||
|
.arg("-U__CUDA_NO_BFLOAT16_CONVERSIONS__")
|
||||||
.arg(format!("--gpu-architecture=sm_{compute_cap}"))
|
.arg(format!("--gpu-architecture=sm_{compute_cap}"))
|
||||||
.arg("-c")
|
.arg("-c")
|
||||||
.args(["-o", obj_file.to_str().unwrap()])
|
.args(["-o", obj_file.to_str().unwrap()])
|
||||||
.args(["--default-stream", "per-thread"])
|
.args(["--default-stream", "per-thread"])
|
||||||
.arg("-Icutlass/include")
|
.arg("-Icutlass/include")
|
||||||
.arg("--expt-relaxed-constexpr")
|
.arg("--expt-relaxed-constexpr")
|
||||||
|
.arg("--expt-extended-lambda")
|
||||||
|
.arg("--use_fast_math")
|
||||||
.arg("--verbose");
|
.arg("--verbose");
|
||||||
if let Ok(ccbin_path) = &ccbin_env {
|
if let Ok(ccbin_path) = &ccbin_env {
|
||||||
command
|
command
|
||||||
@ -203,13 +217,21 @@ fn set_cuda_include_dir() -> Result<()> {
|
|||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
fn compute_cap() -> Result<usize> {
|
fn compute_cap() -> Result<usize> {
|
||||||
// Grab compute code from nvidia-smi
|
println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");
|
||||||
let mut compute_cap = {
|
|
||||||
|
// Try to parse compute caps from env
|
||||||
|
let mut compute_cap = if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
|
||||||
|
println!("cargo:rustc-env=CUDA_COMPUTE_CAP={compute_cap_str}");
|
||||||
|
compute_cap_str
|
||||||
|
.parse::<usize>()
|
||||||
|
.context("Could not parse compute cap")?
|
||||||
|
} else {
|
||||||
|
// Use nvidia-smi to get the current compute cap
|
||||||
let out = std::process::Command::new("nvidia-smi")
|
let out = std::process::Command::new("nvidia-smi")
|
||||||
.arg("--query-gpu=compute_cap")
|
.arg("--query-gpu=compute_cap")
|
||||||
.arg("--format=csv")
|
.arg("--format=csv")
|
||||||
.output()
|
.output()
|
||||||
.context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?;
|
.context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?;
|
||||||
let out = std::str::from_utf8(&out.stdout).context("stdout is not a utf8 string")?;
|
let out = std::str::from_utf8(&out.stdout).context("stdout is not a utf8 string")?;
|
||||||
let mut lines = out.lines();
|
let mut lines = out.lines();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
@ -220,16 +242,19 @@ fn compute_cap() -> Result<usize> {
|
|||||||
.next()
|
.next()
|
||||||
.context("missing line in stdout")?
|
.context("missing line in stdout")?
|
||||||
.replace('.', "");
|
.replace('.', "");
|
||||||
cap.parse::<usize>()
|
let cap = cap
|
||||||
.with_context(|| format!("cannot parse as int {cap}"))?
|
.parse::<usize>()
|
||||||
|
.with_context(|| format!("cannot parse as int {cap}"))?;
|
||||||
|
println!("cargo:rustc-env=CUDA_COMPUTE_CAP={cap}");
|
||||||
|
cap
|
||||||
};
|
};
|
||||||
|
|
||||||
// Grab available GPU codes from nvcc and select the highest one
|
// Grab available GPU codes from nvcc and select the highest one
|
||||||
let max_nvcc_code = {
|
let (supported_nvcc_codes, max_nvcc_code) = {
|
||||||
let out = std::process::Command::new("nvcc")
|
let out = std::process::Command::new("nvcc")
|
||||||
.arg("--list-gpu-code")
|
.arg("--list-gpu-code")
|
||||||
.output()
|
.output()
|
||||||
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
|
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
|
||||||
let out = std::str::from_utf8(&out.stdout).unwrap();
|
let out = std::str::from_utf8(&out.stdout).unwrap();
|
||||||
|
|
||||||
let out = out.lines().collect::<Vec<&str>>();
|
let out = out.lines().collect::<Vec<&str>>();
|
||||||
@ -243,30 +268,21 @@ fn compute_cap() -> Result<usize> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
codes.sort();
|
codes.sort();
|
||||||
if !codes.contains(&compute_cap) {
|
let max_nvcc_code = *codes.last().context("no gpu codes parsed from nvcc")?;
|
||||||
anyhow::bail!(
|
(codes, max_nvcc_code)
|
||||||
"nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {codes:?}."
|
|
||||||
);
|
|
||||||
}
|
|
||||||
*codes.last().unwrap()
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// If nvidia-smi compute_cap is higher than the highest gpu code from nvcc,
|
// Check that nvcc supports the asked compute caps
|
||||||
// then choose the highest gpu code in nvcc
|
if !supported_nvcc_codes.contains(&compute_cap) {
|
||||||
if compute_cap > max_nvcc_code {
|
anyhow::bail!(
|
||||||
println!(
|
"nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {supported_nvcc_codes:?}."
|
||||||
"cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}."
|
);
|
||||||
|
}
|
||||||
|
if compute_cap > max_nvcc_code {
|
||||||
|
anyhow::bail!(
|
||||||
|
"CUDA compute cap {compute_cap} is higher than the highest gpu code from nvcc {max_nvcc_code}"
|
||||||
);
|
);
|
||||||
compute_cap = max_nvcc_code;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");
|
|
||||||
if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
|
|
||||||
compute_cap = compute_cap_str
|
|
||||||
.parse::<usize>()
|
|
||||||
.with_context(|| format!("cannot parse as usize '{compute_cap_str}'"))?;
|
|
||||||
println!("cargo:warning=Using gpu arch {compute_cap} from $CUDA_COMPUTE_CAP");
|
|
||||||
}
|
|
||||||
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}");
|
|
||||||
Ok(compute_cap)
|
Ok(compute_cap)
|
||||||
}
|
}
|
||||||
|
@ -12,5 +12,6 @@ license = "MIT OR Apache-2.0"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
|
anyhow = { version = "1", features = ["backtrace"] }
|
||||||
glob = "0.3.1"
|
glob = "0.3.1"
|
||||||
rayon = "1.7.0"
|
rayon = "1.7.0"
|
@ -1,4 +1,5 @@
|
|||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
println!("cargo:rerun-if-changed=build.rs");
|
println!("cargo:rerun-if-changed=build.rs");
|
||||||
|
|
||||||
@ -23,6 +24,8 @@ fn main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mod cuda {
|
mod cuda {
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
|
||||||
pub fn set_include_dir() {
|
pub fn set_include_dir() {
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
// NOTE: copied from cudarc build.rs.
|
// NOTE: copied from cudarc build.rs.
|
||||||
@ -100,107 +103,52 @@ mod cuda {
|
|||||||
include_directories.sort();
|
include_directories.sort();
|
||||||
include_directories.dedup();
|
include_directories.dedup();
|
||||||
|
|
||||||
|
let compute_cap = compute_cap().expect("Could not get Cuda compute cap");
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
let include_options: Vec<String> = include_directories
|
let include_options: Vec<String> = include_directories
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|s| "-I".to_string() + &s.into_os_string().into_string().unwrap())
|
.map(|s| "-I".to_string() + &s.into_os_string().into_string().unwrap())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
// let start = std::time::Instant::now();
|
|
||||||
|
|
||||||
// Grab compute code from nvidia-smi
|
|
||||||
let mut compute_cap = {
|
|
||||||
let out = std::process::Command::new("nvidia-smi")
|
|
||||||
.arg("--query-gpu=compute_cap")
|
|
||||||
.arg("--format=csv")
|
|
||||||
.output()
|
|
||||||
.expect("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.");
|
|
||||||
let out = std::str::from_utf8(&out.stdout).unwrap();
|
|
||||||
let mut lines = out.lines();
|
|
||||||
assert_eq!(lines.next().unwrap(), "compute_cap");
|
|
||||||
let cap = lines.next().unwrap().replace('.', "");
|
|
||||||
cap.parse::<usize>().unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Grab available GPU codes from nvcc and select the highest one
|
|
||||||
let max_nvcc_code = {
|
|
||||||
let out = std::process::Command::new("nvcc")
|
|
||||||
.arg("--list-gpu-code")
|
|
||||||
.output()
|
|
||||||
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
|
|
||||||
let out = std::str::from_utf8(&out.stdout).unwrap();
|
|
||||||
|
|
||||||
let out = out.lines().collect::<Vec<&str>>();
|
|
||||||
let mut codes = Vec::with_capacity(out.len());
|
|
||||||
for code in out {
|
|
||||||
let code = code.split('_').collect::<Vec<&str>>();
|
|
||||||
if !code.is_empty() && code.contains(&"sm") {
|
|
||||||
if let Ok(num) = code[1].parse::<usize>() {
|
|
||||||
codes.push(num);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
codes.sort();
|
|
||||||
if !codes.contains(&compute_cap) {
|
|
||||||
panic!("nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {codes:?}.");
|
|
||||||
}
|
|
||||||
*codes.last().unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
// If nvidia-smi compute_cap is higher than the highest gpu code from nvcc,
|
|
||||||
// then choose the highest gpu code in nvcc
|
|
||||||
if compute_cap > max_nvcc_code {
|
|
||||||
println!(
|
|
||||||
"cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}."
|
|
||||||
);
|
|
||||||
compute_cap = max_nvcc_code;
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");
|
|
||||||
if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
|
|
||||||
compute_cap = compute_cap_str.parse::<usize>().unwrap();
|
|
||||||
println!("cargo:warning=Using gpu arch {compute_cap} from $CUDA_COMPUTE_CAP");
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}");
|
|
||||||
|
|
||||||
let ccbin_env = std::env::var("CANDLE_NVCC_CCBIN");
|
let ccbin_env = std::env::var("CANDLE_NVCC_CCBIN");
|
||||||
println!("cargo:rerun-if-env-changed=CANDLE_NVCC_CCBIN");
|
println!("cargo:rerun-if-env-changed=CANDLE_NVCC_CCBIN");
|
||||||
let children = kernel_paths
|
let children = kernel_paths
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.flat_map(|p| {
|
.flat_map(|p| {
|
||||||
let mut output = p.clone();
|
let mut output = p.clone();
|
||||||
output.set_extension("ptx");
|
output.set_extension("ptx");
|
||||||
let output_filename = std::path::Path::new(&out_dir).to_path_buf().join("out").with_file_name(output.file_name().unwrap());
|
let output_filename = std::path::Path::new(&out_dir).to_path_buf().join("out").with_file_name(output.file_name().unwrap());
|
||||||
|
|
||||||
let ignore = if output_filename.exists() {
|
let ignore = if output_filename.exists() {
|
||||||
let out_modified = output_filename.metadata().unwrap().modified().unwrap();
|
let out_modified = output_filename.metadata().unwrap().modified().unwrap();
|
||||||
let in_modified = p.metadata().unwrap().modified().unwrap();
|
let in_modified = p.metadata().unwrap().modified().unwrap();
|
||||||
out_modified.duration_since(in_modified).is_ok()
|
out_modified.duration_since(in_modified).is_ok()
|
||||||
}else{
|
} else {
|
||||||
false
|
false
|
||||||
};
|
};
|
||||||
if ignore{
|
if ignore {
|
||||||
None
|
None
|
||||||
}else{
|
} else {
|
||||||
let mut command = std::process::Command::new("nvcc");
|
let mut command = std::process::Command::new("nvcc");
|
||||||
command.arg(format!("--gpu-architecture=sm_{compute_cap}"))
|
command.arg(format!("--gpu-architecture=sm_{compute_cap}"))
|
||||||
.arg("--ptx")
|
.arg("--ptx")
|
||||||
.args(["--default-stream", "per-thread"])
|
.args(["--default-stream", "per-thread"])
|
||||||
.args(["--output-directory", &out_dir])
|
.args(["--output-directory", &out_dir])
|
||||||
// Flash attention only
|
// Flash attention only
|
||||||
// .arg("--expt-relaxed-constexpr")
|
// .arg("--expt-relaxed-constexpr")
|
||||||
.args(&include_options);
|
.args(&include_options);
|
||||||
if let Ok(ccbin_path) = &ccbin_env {
|
if let Ok(ccbin_path) = &ccbin_env {
|
||||||
command
|
command
|
||||||
.arg("-allow-unsupported-compiler")
|
.arg("-allow-unsupported-compiler")
|
||||||
.args(["-ccbin", ccbin_path]);
|
.args(["-ccbin", ccbin_path]);
|
||||||
}
|
}
|
||||||
command.arg(p);
|
command.arg(p);
|
||||||
Some((p, command.spawn()
|
Some((p, command.spawn()
|
||||||
.expect("nvcc failed to start. Ensure that you have CUDA installed and that `nvcc` is in your PATH.").wait_with_output()))
|
.expect("nvcc failed to start. Ensure that you have CUDA installed and that `nvcc` is in your PATH.").wait_with_output()))
|
||||||
}})
|
}
|
||||||
.collect::<Vec<_>>();
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let ptx_paths: Vec<PathBuf> = glob::glob(&format!("{out_dir}/**/*.ptx"))
|
let ptx_paths: Vec<PathBuf> = glob::glob(&format!("{out_dir}/**/*.ptx"))
|
||||||
.unwrap()
|
.unwrap()
|
||||||
@ -220,4 +168,76 @@ mod cuda {
|
|||||||
}
|
}
|
||||||
(write, kernel_paths)
|
(write, kernel_paths)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
fn compute_cap() -> Result<usize> {
|
||||||
|
println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");
|
||||||
|
|
||||||
|
// Try to parse compute caps from env
|
||||||
|
let mut compute_cap = if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
|
||||||
|
println!("cargo:rustc-env=CUDA_COMPUTE_CAP={compute_cap_str}");
|
||||||
|
compute_cap_str
|
||||||
|
.parse::<usize>()
|
||||||
|
.context("Could not parse code")?
|
||||||
|
} else {
|
||||||
|
// Use nvidia-smi to get the current compute cap
|
||||||
|
let out = std::process::Command::new("nvidia-smi")
|
||||||
|
.arg("--query-gpu=compute_cap")
|
||||||
|
.arg("--format=csv")
|
||||||
|
.output()
|
||||||
|
.context("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.")?;
|
||||||
|
let out = std::str::from_utf8(&out.stdout).context("stdout is not a utf8 string")?;
|
||||||
|
let mut lines = out.lines();
|
||||||
|
assert_eq!(
|
||||||
|
lines.next().context("missing line in stdout")?,
|
||||||
|
"compute_cap"
|
||||||
|
);
|
||||||
|
let cap = lines
|
||||||
|
.next()
|
||||||
|
.context("missing line in stdout")?
|
||||||
|
.replace('.', "");
|
||||||
|
let cap = cap
|
||||||
|
.parse::<usize>()
|
||||||
|
.with_context(|| format!("cannot parse as int {cap}"))?;
|
||||||
|
println!("cargo:rustc-env=CUDA_COMPUTE_CAP={cap}");
|
||||||
|
cap
|
||||||
|
};
|
||||||
|
|
||||||
|
// Grab available GPU codes from nvcc and select the highest one
|
||||||
|
let (supported_nvcc_codes, max_nvcc_code) = {
|
||||||
|
let out = std::process::Command::new("nvcc")
|
||||||
|
.arg("--list-gpu-code")
|
||||||
|
.output()
|
||||||
|
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
|
||||||
|
let out = std::str::from_utf8(&out.stdout).unwrap();
|
||||||
|
|
||||||
|
let out = out.lines().collect::<Vec<&str>>();
|
||||||
|
let mut codes = Vec::with_capacity(out.len());
|
||||||
|
for code in out {
|
||||||
|
let code = code.split('_').collect::<Vec<&str>>();
|
||||||
|
if !code.is_empty() && code.contains(&"sm") {
|
||||||
|
if let Ok(num) = code[1].parse::<usize>() {
|
||||||
|
codes.push(num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
codes.sort();
|
||||||
|
let max_nvcc_code = *codes.last().context("no gpu codes parsed from nvcc")?;
|
||||||
|
(codes, max_nvcc_code)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check that nvcc supports the asked compute caps
|
||||||
|
if !supported_nvcc_codes.contains(&compute_cap) {
|
||||||
|
anyhow::bail!(
|
||||||
|
"nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {supported_nvcc_codes:?}."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if compute_cap > max_nvcc_code {
|
||||||
|
anyhow::bail!(
|
||||||
|
"CUDA compute cap {compute_cap} is higher than the highest gpu code from nvcc {max_nvcc_code}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(compute_cap)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user