Use the whisper-v3 tokenizer now that it has been added. (#1337)

* Use the whisper-v3 tokenizer now that it has been added.

* Use the appropriate nospeech token.
This commit is contained in:
Laurent Mazare
2023-11-16 22:10:31 +00:00
committed by GitHub
parent a1f41ab37b
commit 9ab3f9729f
3 changed files with 16 additions and 8 deletions

View File

@ -43,4 +43,4 @@ pub const TRANSCRIBE_TOKEN: &str = "<|transcribe|>";
pub const TRANSLATE_TOKEN: &str = "<|translate|>";
pub const NO_TIMESTAMPS_TOKEN: &str = "<|notimestamps|>";
pub const EOT_TOKEN: &str = "<|endoftext|>";
pub const NO_SPEECH_TOKEN: &str = "<|nocaptions|>";
pub const NO_SPEECH_TOKENS: [&str; 2] = ["<|nocaptions|>", "<|nospeech|>"];