mirror of
https://github.com/huggingface/candle.git
synced 2025-06-18 11:37:11 +00:00
Fixing the cached build.
- rerun-if-change:src/ encapsulates any src modification (including file additions). - Now not rewriting `src/lib.rs` everytime (it triggers new builds.) - Also using modified timestamp to trigger kernel recompilation (should prevent skipping modified source files). - Will also rewrite when a kernel is removed.
This commit is contained in:
@ -3,9 +3,8 @@ fn main() {
|
|||||||
println!("cargo:rerun-if-changed=build.rs");
|
println!("cargo:rerun-if-changed=build.rs");
|
||||||
|
|
||||||
cuda::set_include_dir();
|
cuda::set_include_dir();
|
||||||
let kernel_paths = cuda::build_ptx();
|
let (write, kernel_paths) = cuda::build_ptx();
|
||||||
// println!("cargo:warning=kernels {kernel_paths:?}");
|
if write {
|
||||||
|
|
||||||
let mut file = std::fs::File::create("src/lib.rs").unwrap();
|
let mut file = std::fs::File::create("src/lib.rs").unwrap();
|
||||||
for kernel_path in kernel_paths {
|
for kernel_path in kernel_paths {
|
||||||
let name = kernel_path.file_stem().unwrap().to_str().unwrap();
|
let name = kernel_path.file_stem().unwrap().to_str().unwrap();
|
||||||
@ -21,6 +20,7 @@ fn main() {
|
|||||||
file.write_all(&[b'\n']).unwrap();
|
file.write_all(&[b'\n']).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
mod cuda {
|
mod cuda {
|
||||||
pub fn set_include_dir() {
|
pub fn set_include_dir() {
|
||||||
@ -70,7 +70,7 @@ mod cuda {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_ptx() -> Vec<std::path::PathBuf> {
|
pub fn build_ptx() -> (bool, Vec<std::path::PathBuf>) {
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
let out_dir = std::env::var("OUT_DIR").unwrap();
|
let out_dir = std::env::var("OUT_DIR").unwrap();
|
||||||
@ -83,17 +83,13 @@ mod cuda {
|
|||||||
.map(|p| p.unwrap())
|
.map(|p| p.unwrap())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
for out_path in glob::glob(&format!("{out_dir}/**/*.ptx")).unwrap() {
|
|
||||||
std::fs::remove_file(out_path.unwrap()).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("cargo:rerun-if-changed=src/");
|
println!("cargo:rerun-if-changed=src/");
|
||||||
for path in &kernel_paths {
|
// for path in &kernel_paths {
|
||||||
println!("cargo:rerun-if-changed={}", path.display());
|
// println!("cargo:rerun-if-changed={}", path.display());
|
||||||
}
|
// }
|
||||||
|
|
||||||
for path in &mut include_directories {
|
for path in &mut include_directories {
|
||||||
println!("cargo:rerun-if-changed={}", path.display());
|
// println!("cargo:rerun-if-changed={}", path.display());
|
||||||
let destination =
|
let destination =
|
||||||
std::format!("{out_dir}/{}", path.file_name().unwrap().to_str().unwrap());
|
std::format!("{out_dir}/{}", path.file_name().unwrap().to_str().unwrap());
|
||||||
std::fs::copy(path.clone(), destination).unwrap();
|
std::fs::copy(path.clone(), destination).unwrap();
|
||||||
@ -110,21 +106,6 @@ mod cuda {
|
|||||||
.map(|s| "-I".to_string() + &s.into_os_string().into_string().unwrap())
|
.map(|s| "-I".to_string() + &s.into_os_string().into_string().unwrap())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
#[cfg(feature = "ci-check")]
|
|
||||||
{
|
|
||||||
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=ci");
|
|
||||||
|
|
||||||
for mut kernel_path in kernel_paths.into_iter() {
|
|
||||||
kernel_path.set_extension("ptx");
|
|
||||||
|
|
||||||
let mut ptx_path: PathBuf = out_dir.clone().into();
|
|
||||||
ptx_path.push(kernel_path.as_path().file_name().unwrap());
|
|
||||||
std::fs::File::create(ptx_path).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(not(feature = "ci-check"))]
|
|
||||||
{
|
|
||||||
// let start = std::time::Instant::now();
|
// let start = std::time::Instant::now();
|
||||||
|
|
||||||
// Grab compute code from nvidia-smi
|
// Grab compute code from nvidia-smi
|
||||||
@ -169,7 +150,9 @@ mod cuda {
|
|||||||
// If nvidia-smi compute_cap is higher than the highest gpu code from nvcc,
|
// If nvidia-smi compute_cap is higher than the highest gpu code from nvcc,
|
||||||
// then choose the highest gpu code in nvcc
|
// then choose the highest gpu code in nvcc
|
||||||
if compute_cap > max_nvcc_code {
|
if compute_cap > max_nvcc_code {
|
||||||
println!("cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}.");
|
println!(
|
||||||
|
"cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}."
|
||||||
|
);
|
||||||
compute_cap = max_nvcc_code;
|
compute_cap = max_nvcc_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,10 +164,6 @@ mod cuda {
|
|||||||
|
|
||||||
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}");
|
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}");
|
||||||
|
|
||||||
kernel_paths
|
|
||||||
.iter()
|
|
||||||
.for_each(|p| println!("cargo:rerun-if-changed={}", p.display()));
|
|
||||||
|
|
||||||
let children = kernel_paths
|
let children = kernel_paths
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.flat_map(|p| {
|
.flat_map(|p| {
|
||||||
@ -192,7 +171,14 @@ mod cuda {
|
|||||||
output.set_extension("ptx");
|
output.set_extension("ptx");
|
||||||
let output_filename = std::path::Path::new(&out_dir).to_path_buf().join("out").with_file_name(output.file_name().unwrap());
|
let output_filename = std::path::Path::new(&out_dir).to_path_buf().join("out").with_file_name(output.file_name().unwrap());
|
||||||
|
|
||||||
if output_filename.exists(){
|
let ignore = if output_filename.exists() {
|
||||||
|
let out_modified = output_filename.metadata().unwrap().modified().unwrap();
|
||||||
|
let in_modified = p.metadata().unwrap().modified().unwrap();
|
||||||
|
out_modified.duration_since(in_modified).is_ok()
|
||||||
|
}else{
|
||||||
|
false
|
||||||
|
};
|
||||||
|
if ignore{
|
||||||
None
|
None
|
||||||
}else{
|
}else{
|
||||||
let mut command = std::process::Command::new("nvcc");
|
let mut command = std::process::Command::new("nvcc");
|
||||||
@ -204,13 +190,18 @@ mod cuda {
|
|||||||
// .arg("--expt-relaxed-constexpr")
|
// .arg("--expt-relaxed-constexpr")
|
||||||
.args(&include_options)
|
.args(&include_options)
|
||||||
.arg(p);
|
.arg(p);
|
||||||
// println!(
|
|
||||||
// "cargo:warning={command:?}");
|
|
||||||
Some((p, command.spawn()
|
Some((p, command.spawn()
|
||||||
.expect("nvcc failed to start. Ensure that you have CUDA installed and that `nvcc` is in your PATH.").wait_with_output()))
|
.expect("nvcc failed to start. Ensure that you have CUDA installed and that `nvcc` is in your PATH.").wait_with_output()))
|
||||||
}})
|
}})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let ptx_paths: Vec<PathBuf> = glob::glob(&format!("{out_dir}/**/*.ptx"))
|
||||||
|
.unwrap()
|
||||||
|
.map(|p| p.unwrap())
|
||||||
|
.collect();
|
||||||
|
// We should rewrite `src/lib.rs` only if there are some newly compiled kernels, or removed
|
||||||
|
// some old ones
|
||||||
|
let write = !children.is_empty() || kernel_paths.len() < ptx_paths.len();
|
||||||
for (kernel_path, child) in children {
|
for (kernel_path, child) in children {
|
||||||
let output = child.expect("nvcc failed to run. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
|
let output = child.expect("nvcc failed to run. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
|
||||||
assert!(
|
assert!(
|
||||||
@ -220,13 +211,6 @@ mod cuda {
|
|||||||
String::from_utf8_lossy(&output.stderr)
|
String::from_utf8_lossy(&output.stderr)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
(write, kernel_paths)
|
||||||
// println!(
|
|
||||||
// "cargo:warning=Compiled {:?} cuda kernels in {:?}",
|
|
||||||
// n,
|
|
||||||
// start.elapsed()
|
|
||||||
// );
|
|
||||||
}
|
|
||||||
kernel_paths
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user