Streaming mode for reporting the generated tokens (#1007)

* Token streaming.

* Use the token output stream.

* Flush the output.

* Ensure that the last characters get reported.
This commit is contained in:
Laurent Mazare
2023-09-30 16:04:11 +02:00
committed by GitHub
parent 4021272875
commit 06207332bc
4 changed files with 96 additions and 11 deletions

View File

@ -25,6 +25,7 @@ rayon = { workspace = true }
safetensors = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokenizers = { workspace = true, features = ["onig"] }
[dev-dependencies]
anyhow = { workspace = true }
@ -35,7 +36,6 @@ imageproc = { workspace = true }
memmap2 = { workspace = true }
rand = { workspace = true }
rusttype = { workspace = true }
tokenizers = { workspace = true, features = ["onig"] }
tracing = { workspace = true }
tracing-chrome = { workspace = true }
tracing-subscriber = { workspace = true }