mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Remove the end of text tokens. (#289)
This commit is contained in:
@ -266,7 +266,8 @@ fn run_eval(tokenizer: Tokenizer, config_path: &std::path::PathBuf, args: Args)
|
||||
let file = std::io::BufReader::new(file);
|
||||
let mut tokens = vec![];
|
||||
for line in file.lines() {
|
||||
let line = tokenizer.encode(line?, false).map_err(E::msg)?;
|
||||
let line = line?.replace("<|endoftext|>", "");
|
||||
let line = tokenizer.encode(line, false).map_err(E::msg)?;
|
||||
tokens.push(line.get_ids().to_vec())
|
||||
}
|
||||
let tokens = tokens.concat();
|
||||
|
Reference in New Issue
Block a user