mirror of
https://github.com/huggingface/candle.git
synced 2025-06-19 11:56:45 +00:00
Tweak the kv-cache flag.
This commit is contained in:
@ -448,9 +448,9 @@ struct Args {
|
|||||||
#[arg(long, default_value_t = 100)]
|
#[arg(long, default_value_t = 100)]
|
||||||
sample_len: usize,
|
sample_len: usize,
|
||||||
|
|
||||||
/// Enable the key-value cache.
|
/// Disable the key-value cache.
|
||||||
#[arg(long, default_value_t = true)]
|
#[arg(long)]
|
||||||
use_kv_cache: bool,
|
no_kv_cache: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
@ -464,7 +464,7 @@ async fn main() -> Result<()> {
|
|||||||
Device::new_cuda(0)?
|
Device::new_cuda(0)?
|
||||||
};
|
};
|
||||||
let config = Config::config_7b();
|
let config = Config::config_7b();
|
||||||
let cache = Cache::new(args.use_kv_cache, &config, &device);
|
let cache = Cache::new(!args.no_kv_cache, &config, &device);
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
let (llama, tokenizer_filename) = match args.npy {
|
let (llama, tokenizer_filename) = match args.npy {
|
||||||
Some(npy) => {
|
Some(npy) => {
|
||||||
|
Reference in New Issue
Block a user