Add quantized tensors. (#458)

* Add quantized tensors.

* Implement the debug trait for QTensor.

* Add the QMatMul custom op.
This commit is contained in:
Laurent Mazare
2023-08-15 22:45:53 +01:00
committed by GitHub
parent b8263aa15c
commit ca449f9ee1
3 changed files with 140 additions and 108 deletions

View File

@ -3,7 +3,6 @@ use clap::Parser;
use std::fs::File;
use candle::quantized::ggml_file::Content;
use candle::{DType, Device};
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
@ -18,7 +17,7 @@ fn main() -> Result<()> {
let mut file = File::open(args.model)?;
let start = std::time::Instant::now();
let model = Content::read(&mut file, DType::F16, &Device::Cpu)?;
let model = Content::read(&mut file)?;
println!(
"Loaded {:?} tensors in {:?}",