mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Switch the default to using the faster kernels. (#1978)
* Switch the default to using the faster kernels. * Add the force-dmmv flag.
This commit is contained in:
@ -196,6 +196,10 @@ struct Args {
|
||||
/// The context size to consider for the repeat penalty.
|
||||
#[arg(long, default_value_t = 64)]
|
||||
repeat_last_n: usize,
|
||||
|
||||
/// Use the slower dmmv cuda kernel.
|
||||
#[arg(long)]
|
||||
force_dmmv: bool,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
@ -203,6 +207,9 @@ fn main() -> Result<()> {
|
||||
use tracing_subscriber::prelude::*;
|
||||
|
||||
let args = Args::parse();
|
||||
#[cfg(feature = "cuda")]
|
||||
candle::quantized::cuda::set_force_dmmv(args.force_dmmv);
|
||||
|
||||
let _guard = if args.tracing {
|
||||
let (chrome_layer, guard) = ChromeLayerBuilder::new().build();
|
||||
tracing_subscriber::registry().with(chrome_layer).init();
|
||||
|
Reference in New Issue
Block a user