mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 04:00:28 +00:00
First pass (Quantized scaffolding work done + quantized example scaffolding).
This commit is contained in:
@ -232,6 +232,7 @@ fn main() -> anyhow::Result<()> {
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
let args = Args::parse();
|
||||
let device = candle_examples::device(false)?;
|
||||
let temperature = if args.temperature == 0. {
|
||||
None
|
||||
} else {
|
||||
@ -276,10 +277,10 @@ fn main() -> anyhow::Result<()> {
|
||||
&format_size(total_size_in_bytes),
|
||||
start.elapsed().as_secs_f32(),
|
||||
);
|
||||
ModelWeights::from_gguf(model, &mut file)?
|
||||
ModelWeights::from_gguf(model, &mut file, &device)?
|
||||
}
|
||||
Some("ggml" | "bin") | Some(_) | None => {
|
||||
let model = ggml_file::Content::read(&mut file)?;
|
||||
let model = ggml_file::Content::read(&mut file, &device)?;
|
||||
let mut total_size_in_bytes = 0;
|
||||
for (_, tensor) in model.tensors.iter() {
|
||||
let elem_count = tensor.shape().elem_count();
|
||||
|
@ -3,16 +3,27 @@ pub mod imagenet;
|
||||
pub mod token_output_stream;
|
||||
|
||||
use candle::{Device, Result, Tensor};
|
||||
use candle::utils::{cuda_is_available, metal_is_available};
|
||||
|
||||
pub fn device(cpu: bool) -> Result<Device> {
|
||||
if cpu {
|
||||
Ok(Device::Cpu)
|
||||
} else {
|
||||
let device = Device::cuda_if_available(0)?;
|
||||
if !device.is_cuda() {
|
||||
println!("Running on CPU, to run on GPU, build this example with `--features cuda`");
|
||||
if cuda_is_available(){
|
||||
Ok(Device::new_cuda(0)?)
|
||||
}else if metal_is_available(){
|
||||
Ok(Device::new_metal(0)?)
|
||||
}else{
|
||||
#[cfg(all(target_os="macos", target_arch="aarch64"))]
|
||||
{
|
||||
println!("Running on CPU, to run on GPU(metal), build this example with `--features metal`");
|
||||
}
|
||||
#[cfg(not(all(target_os="macos", target_arch="aarch64")))]
|
||||
{
|
||||
println!("Running on CPU, to run on GPU, build this example with `--features cuda`");
|
||||
}
|
||||
Ok(Device::Cpu)
|
||||
}
|
||||
Ok(device)
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user