Q6K quantization (#495)

* Print the detected arch options.

* Add the q6k quantization.

* Add a currently broken test.

* Bugfix.

* Bugfix.

* Another bugfix.

* Another bugfix + get the test to work.
This commit is contained in:
Laurent Mazare
2023-08-17 22:22:57 +01:00
committed by GitHub
parent fc81af1712
commit 557b2c28dd
4 changed files with 257 additions and 2 deletions

View File

@ -348,6 +348,14 @@ fn main() -> anyhow::Result<()> {
None
};
println!(
"avx: {}, neon: {}, simd128: {}, f16c: {}",
candle::utils::with_avx(),
candle::utils::with_neon(),
candle::utils::with_simd128(),
candle::utils::with_f16c()
);
let mut file = std::fs::File::open(&args.model()?)?;
let start = std::time::Instant::now();
let model = Content::read(&mut file)?;