Tweak some quantized args (#692)

* Print the args + change the default temp/repeat penalty. * Minor formatting tweak.
2025-06-16 18:48:51 +00:00 · 2023-08-31 18:25:21 +02:00
parent 7509c98970
commit 7cef35c84d
1 changed files with 14 additions and 5 deletions
--- a/candle-examples/examples/quantized/main.rs
+++ b/candle-examples/examples/quantized/main.rs
@ -67,9 +67,9 @@ struct Args {
    #[arg(long)]
    tokenizer: Option<String>,

-    /// The temperature used to generate samples.
-    #[arg(long)]
-    temperature: Option<f64>,
+    /// The temperature used to generate samples, use 0 for greedy sampling.
+    #[arg(long, default_value_t = 0.8)]
+    temperature: f64,

    /// The seed to use when generating random samples.
    #[arg(long, default_value_t = 299792458)]
@ -84,7 +84,7 @@ struct Args {
    verbose_prompt: bool,

    /// Penalty to be applied for repeating tokens, 1. means no penalty.
-    #[arg(long, default_value_t = 1.0)]
+    #[arg(long, default_value_t = 1.1)]
    repeat_penalty: f32,

    /// The context size to consider for the repeat penalty.
@ -188,6 +188,11 @@ fn main() -> anyhow::Result<()> {
    use tracing_subscriber::prelude::*;

    let args = Args::parse();
+    let temperature = if args.temperature == 0. {
+        None
+    } else {
+        Some(args.temperature)
+    };
    let _guard = if args.tracing {
        let (chrome_layer, guard) = ChromeLayerBuilder::new().build();
        tracing_subscriber::registry().with(chrome_layer).init();
@ -203,6 +208,10 @@ fn main() -> anyhow::Result<()> {
        candle::utils::with_simd128(),
        candle::utils::with_f16c()
    );
+    println!(
+        "temp: {:.2} repeat-penalty: {:.2} repeat-last-n: {}",
+        args.temperature, args.repeat_penalty, args.repeat_last_n
+    );

    let model_path = args.model()?;
    let mut file = std::fs::File::open(&model_path)?;
@ -301,7 +310,7 @@ fn main() -> anyhow::Result<()> {
            prompt_tokens
        };
        let mut all_tokens = vec![];
-        let mut logits_processor = LogitsProcessor::new(args.seed, args.temperature);
+        let mut logits_processor = LogitsProcessor::new(args.seed, temperature);

        let start_prompt_processing = std::time::Instant::now();
        let mut next_token = {