Fixing the cached build.

- rerun-if-change:src/ encapsulates any src modification (including file
  additions).
- Now not rewriting `src/lib.rs` everytime (it triggers new builds.)
- Also using modified timestamp to trigger kernel recompilation (should
  prevent skipping modified source files).
- Will also rewrite when a kernel is removed.
This commit is contained in:
Nicolas Patry
2023-07-05 18:00:58 +02:00
parent 4e80319147
commit fefdc0228a

View File

@ -3,22 +3,22 @@ fn main() {
println!("cargo:rerun-if-changed=build.rs"); println!("cargo:rerun-if-changed=build.rs");
cuda::set_include_dir(); cuda::set_include_dir();
let kernel_paths = cuda::build_ptx(); let (write, kernel_paths) = cuda::build_ptx();
// println!("cargo:warning=kernels {kernel_paths:?}"); if write {
let mut file = std::fs::File::create("src/lib.rs").unwrap();
let mut file = std::fs::File::create("src/lib.rs").unwrap(); for kernel_path in kernel_paths {
for kernel_path in kernel_paths { let name = kernel_path.file_stem().unwrap().to_str().unwrap();
let name = kernel_path.file_stem().unwrap().to_str().unwrap(); file.write_all(
file.write_all( format!(
format!( r#"pub const {}: &str = include_str!(concat!(env!("OUT_DIR"), "/{}.ptx"));"#,
r#"pub const {}: &str = include_str!(concat!(env!("OUT_DIR"), "/{}.ptx"));"#, name.to_uppercase().replace('.', "_"),
name.to_uppercase().replace('.', "_"), name
name )
.as_bytes(),
) )
.as_bytes(), .unwrap();
) file.write_all(&[b'\n']).unwrap();
.unwrap(); }
file.write_all(&[b'\n']).unwrap();
} }
} }
@ -70,7 +70,7 @@ mod cuda {
); );
} }
pub fn build_ptx() -> Vec<std::path::PathBuf> { pub fn build_ptx() -> (bool, Vec<std::path::PathBuf>) {
use rayon::prelude::*; use rayon::prelude::*;
use std::path::PathBuf; use std::path::PathBuf;
let out_dir = std::env::var("OUT_DIR").unwrap(); let out_dir = std::env::var("OUT_DIR").unwrap();
@ -83,17 +83,13 @@ mod cuda {
.map(|p| p.unwrap()) .map(|p| p.unwrap())
.collect(); .collect();
for out_path in glob::glob(&format!("{out_dir}/**/*.ptx")).unwrap() {
std::fs::remove_file(out_path.unwrap()).unwrap();
}
println!("cargo:rerun-if-changed=src/"); println!("cargo:rerun-if-changed=src/");
for path in &kernel_paths { // for path in &kernel_paths {
println!("cargo:rerun-if-changed={}", path.display()); // println!("cargo:rerun-if-changed={}", path.display());
} // }
for path in &mut include_directories { for path in &mut include_directories {
println!("cargo:rerun-if-changed={}", path.display()); // println!("cargo:rerun-if-changed={}", path.display());
let destination = let destination =
std::format!("{out_dir}/{}", path.file_name().unwrap().to_str().unwrap()); std::format!("{out_dir}/{}", path.file_name().unwrap().to_str().unwrap());
std::fs::copy(path.clone(), destination).unwrap(); std::fs::copy(path.clone(), destination).unwrap();
@ -110,123 +106,111 @@ mod cuda {
.map(|s| "-I".to_string() + &s.into_os_string().into_string().unwrap()) .map(|s| "-I".to_string() + &s.into_os_string().into_string().unwrap())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
#[cfg(feature = "ci-check")] // let start = std::time::Instant::now();
{
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=ci");
for mut kernel_path in kernel_paths.into_iter() { // Grab compute code from nvidia-smi
kernel_path.set_extension("ptx"); let mut compute_cap = {
let out = std::process::Command::new("nvidia-smi")
let mut ptx_path: PathBuf = out_dir.clone().into();
ptx_path.push(kernel_path.as_path().file_name().unwrap());
std::fs::File::create(ptx_path).unwrap();
}
}
#[cfg(not(feature = "ci-check"))]
{
// let start = std::time::Instant::now();
// Grab compute code from nvidia-smi
let mut compute_cap = {
let out = std::process::Command::new("nvidia-smi")
.arg("--query-gpu=compute_cap") .arg("--query-gpu=compute_cap")
.arg("--format=csv") .arg("--format=csv")
.output() .output()
.expect("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH."); .expect("`nvidia-smi` failed. Ensure that you have CUDA installed and that `nvidia-smi` is in your PATH.");
let out = std::str::from_utf8(&out.stdout).unwrap(); let out = std::str::from_utf8(&out.stdout).unwrap();
let mut lines = out.lines(); let mut lines = out.lines();
assert_eq!(lines.next().unwrap(), "compute_cap"); assert_eq!(lines.next().unwrap(), "compute_cap");
let cap = lines.next().unwrap().replace('.', ""); let cap = lines.next().unwrap().replace('.', "");
cap.parse::<usize>().unwrap() cap.parse::<usize>().unwrap()
}; };
// Grab available GPU codes from nvcc and select the highest one // Grab available GPU codes from nvcc and select the highest one
let max_nvcc_code = { let max_nvcc_code = {
let out = std::process::Command::new("nvcc") let out = std::process::Command::new("nvcc")
.arg("--list-gpu-code") .arg("--list-gpu-code")
.output() .output()
.expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH."); .expect("`nvcc` failed. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
let out = std::str::from_utf8(&out.stdout).unwrap(); let out = std::str::from_utf8(&out.stdout).unwrap();
let out = out.lines().collect::<Vec<&str>>(); let out = out.lines().collect::<Vec<&str>>();
let mut codes = Vec::with_capacity(out.len()); let mut codes = Vec::with_capacity(out.len());
for code in out { for code in out {
let code = code.split('_').collect::<Vec<&str>>(); let code = code.split('_').collect::<Vec<&str>>();
if !code.is_empty() && code.contains(&"sm") { if !code.is_empty() && code.contains(&"sm") {
if let Ok(num) = code[1].parse::<usize>() { if let Ok(num) = code[1].parse::<usize>() {
codes.push(num); codes.push(num);
}
} }
} }
codes.sort();
if !codes.contains(&compute_cap) {
panic!("nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {codes:?}.");
}
*codes.last().unwrap()
};
// If nvidia-smi compute_cap is higher than the highest gpu code from nvcc,
// then choose the highest gpu code in nvcc
if compute_cap > max_nvcc_code {
println!("cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}.");
compute_cap = max_nvcc_code;
} }
codes.sort();
println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP"); if !codes.contains(&compute_cap) {
if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") { panic!("nvcc cannot target gpu arch {compute_cap}. Available nvcc targets are {codes:?}.");
compute_cap = compute_cap_str.parse::<usize>().unwrap();
println!("cargo:warning=Using gpu arch {compute_cap} from $CUDA_COMPUTE_CAP");
} }
*codes.last().unwrap()
};
println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}"); // If nvidia-smi compute_cap is higher than the highest gpu code from nvcc,
// then choose the highest gpu code in nvcc
if compute_cap > max_nvcc_code {
println!(
"cargo:warning=Lowering gpu arch {compute_cap} to max nvcc target {max_nvcc_code}."
);
compute_cap = max_nvcc_code;
}
kernel_paths println!("cargo:rerun-if-env-changed=CUDA_COMPUTE_CAP");
.iter() if let Ok(compute_cap_str) = std::env::var("CUDA_COMPUTE_CAP") {
.for_each(|p| println!("cargo:rerun-if-changed={}", p.display())); compute_cap = compute_cap_str.parse::<usize>().unwrap();
println!("cargo:warning=Using gpu arch {compute_cap} from $CUDA_COMPUTE_CAP");
}
let children = kernel_paths println!("cargo:rustc-env=CUDA_COMPUTE_CAP=sm_{compute_cap}");
let children = kernel_paths
.par_iter() .par_iter()
.flat_map(|p| { .flat_map(|p| {
let mut output = p.clone(); let mut output = p.clone();
output.set_extension("ptx"); output.set_extension("ptx");
let output_filename = std::path::Path::new(&out_dir).to_path_buf().join("out").with_file_name(output.file_name().unwrap()); let output_filename = std::path::Path::new(&out_dir).to_path_buf().join("out").with_file_name(output.file_name().unwrap());
if output_filename.exists(){ let ignore = if output_filename.exists() {
let out_modified = output_filename.metadata().unwrap().modified().unwrap();
let in_modified = p.metadata().unwrap().modified().unwrap();
out_modified.duration_since(in_modified).is_ok()
}else{
false
};
if ignore{
None None
}else{ }else{
let mut command = std::process::Command::new("nvcc"); let mut command = std::process::Command::new("nvcc");
command.arg(format!("--gpu-architecture=sm_{compute_cap}")) command.arg(format!("--gpu-architecture=sm_{compute_cap}"))
.arg("--ptx") .arg("--ptx")
.args(["--default-stream", "per-thread"]) .args(["--default-stream", "per-thread"])
.args(["--output-directory", &out_dir]) .args(["--output-directory", &out_dir])
// Flash attention only // Flash attention only
// .arg("--expt-relaxed-constexpr") // .arg("--expt-relaxed-constexpr")
.args(&include_options) .args(&include_options)
.arg(p); .arg(p);
// println!(
// "cargo:warning={command:?}");
Some((p, command.spawn() Some((p, command.spawn()
.expect("nvcc failed to start. Ensure that you have CUDA installed and that `nvcc` is in your PATH.").wait_with_output())) .expect("nvcc failed to start. Ensure that you have CUDA installed and that `nvcc` is in your PATH.").wait_with_output()))
}}) }})
.collect::<Vec<_>>(); .collect::<Vec<_>>();
for (kernel_path, child) in children { let ptx_paths: Vec<PathBuf> = glob::glob(&format!("{out_dir}/**/*.ptx"))
let output = child.expect("nvcc failed to run. Ensure that you have CUDA installed and that `nvcc` is in your PATH."); .unwrap()
assert!( .map(|p| p.unwrap())
output.status.success(), .collect();
"nvcc error while compiling {kernel_path:?}:\n\n# stdout\n{:#}\n\n# stderr\n{:#}", // We should rewrite `src/lib.rs` only if there are some newly compiled kernels, or removed
String::from_utf8_lossy(&output.stdout), // some old ones
String::from_utf8_lossy(&output.stderr) let write = !children.is_empty() || kernel_paths.len() < ptx_paths.len();
); for (kernel_path, child) in children {
} let output = child.expect("nvcc failed to run. Ensure that you have CUDA installed and that `nvcc` is in your PATH.");
assert!(
// println!( output.status.success(),
// "cargo:warning=Compiled {:?} cuda kernels in {:?}", "nvcc error while compiling {kernel_path:?}:\n\n# stdout\n{:#}\n\n# stderr\n{:#}",
// n, String::from_utf8_lossy(&output.stdout),
// start.elapsed() String::from_utf8_lossy(&output.stderr)
// ); );
} }
kernel_paths (write, kernel_paths)
} }
} }