mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
More multilingual support for whisper. (#419)
* More multilingual support for whisper. * Use the language token appropriately.
This commit is contained in:
@ -16,11 +16,21 @@ pub struct Config {
|
||||
// pub n_text_state: usize,
|
||||
pub decoder_attention_heads: usize, // n_text_head
|
||||
pub decoder_layers: usize, // n_text_layer
|
||||
pub suppress_tokens: Vec<u32>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
#[allow(dead_code)]
|
||||
pub fn tiny_en() -> Self {
|
||||
let suppress_tokens = vec![
|
||||
1, 2, 7, 8, 9, 10, 14, 25, 26, 27, 28, 29, 31, 58, 59, 60, 61, 62, 63, 90, 91, 92, 93,
|
||||
357, 366, 438, 532, 685, 705, 796, 930, 1058, 1220, 1267, 1279, 1303, 1343, 1377, 1391,
|
||||
1635, 1782, 1875, 2162, 2361, 2488, 3467, 4008, 4211, 4600, 4808, 5299, 5855, 6329,
|
||||
7203, 9609, 9959, 10563, 10786, 11420, 11709, 11907, 13163, 13697, 13700, 14808, 15306,
|
||||
16410, 16791, 17992, 19203, 19510, 20724, 22305, 22935, 27007, 30109, 30420, 33409,
|
||||
34949, 40283, 40493, 40549, 47282, 49146, 50257, 50357, 50358, 50359, 50360, 50361,
|
||||
50362,
|
||||
];
|
||||
Self {
|
||||
num_mel_bins: 80,
|
||||
vocab_size: 51864,
|
||||
@ -32,6 +42,7 @@ impl Config {
|
||||
// n_text_state: 384,
|
||||
decoder_attention_heads: 6,
|
||||
decoder_layers: 4,
|
||||
suppress_tokens,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user