mirror of
https://github.com/huggingface/candle.git
synced 2025-06-15 18:28:24 +00:00
Add a toggle for F16/BF16 accumulation in gemm. (#2141)
* Add a toggle to control f16/bf16 gemm precision. * Use the faster variant in the quantized example. * Bugfix.
This commit is contained in:
@ -374,6 +374,9 @@ fn main() -> anyhow::Result<()> {
|
||||
#[cfg(feature = "cuda")]
|
||||
candle::quantized::cuda::set_force_dmmv(args.force_dmmv);
|
||||
|
||||
candle::cuda::set_gemm_reduced_precision_f16(true);
|
||||
candle::cuda::set_gemm_reduced_precision_bf16(true);
|
||||
|
||||
let _guard = if args.tracing {
|
||||
let (chrome_layer, guard) = ChromeLayerBuilder::new().build();
|
||||
tracing_subscriber::registry().with(chrome_layer).init();
|
||||
|
Reference in New Issue
Block a user