Use the whisper-v3 tokenizer now that it has been added. (#1337)

* Use the whisper-v3 tokenizer now that it has been added.

* Use the appropriate nospeech token.
This commit is contained in:
Laurent Mazare
2023-11-16 22:10:31 +00:00
committed by GitHub
parent a1f41ab37b
commit 9ab3f9729f
3 changed files with 16 additions and 8 deletions

View File

@ -129,7 +129,13 @@ impl Decoder {
let transcribe_token = token_id(&tokenizer, m::TRANSCRIBE_TOKEN)?;
let translate_token = token_id(&tokenizer, m::TRANSLATE_TOKEN)?;
let eot_token = token_id(&tokenizer, m::EOT_TOKEN)?;
let no_speech_token = token_id(&tokenizer, m::NO_SPEECH_TOKEN)?;
let no_speech_token = m::NO_SPEECH_TOKENS
.iter()
.find_map(|token| token_id(&tokenizer, token).ok());
let no_speech_token = match no_speech_token {
None => anyhow::bail!("unable to find any non-speech token"),
Some(n) => n,
};
let seed = 299792458;
Ok(Self {
model,