mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 11:08:52 +00:00
Faster repeat penalty (#1940)
* Avoid the attention mask where possible. * Faster repeat penalty.
This commit is contained in:
@ -3,9 +3,13 @@ use candle::{Result, Tensor};
|
|||||||
pub fn apply_repeat_penalty(logits: &Tensor, penalty: f32, context: &[u32]) -> Result<Tensor> {
|
pub fn apply_repeat_penalty(logits: &Tensor, penalty: f32, context: &[u32]) -> Result<Tensor> {
|
||||||
let device = logits.device();
|
let device = logits.device();
|
||||||
let mut logits = logits.to_vec1::<f32>()?;
|
let mut logits = logits.to_vec1::<f32>()?;
|
||||||
let context: std::collections::HashSet<_> = context.iter().collect();
|
let mut already_seen = std::collections::HashSet::new();
|
||||||
for (token_id, logit) in logits.iter_mut().enumerate() {
|
for token_id in context {
|
||||||
if context.contains(&(token_id as u32)) {
|
if already_seen.contains(token_id) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
already_seen.insert(token_id);
|
||||||
|
if let Some(logit) = logits.get_mut(*token_id as usize) {
|
||||||
if *logit >= 0. {
|
if *logit >= 0. {
|
||||||
*logit /= penalty
|
*logit /= penalty
|
||||||
} else {
|
} else {
|
||||||
|
Reference in New Issue
Block a user