mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 02:38:10 +00:00
Add a flag to force running the quantized model on CPUs. (#1778)
* Add a flag to force running the quantized model on CPUs. * Add encodec to the readme.
This commit is contained in:
@ -83,6 +83,8 @@ We also provide a some command line based examples using state of the art models
|
|||||||
- [Replit-code-v1.5](./candle-examples/examples/replit-code/): a 3.3b LLM specialized for code completion.
|
- [Replit-code-v1.5](./candle-examples/examples/replit-code/): a 3.3b LLM specialized for code completion.
|
||||||
- [Yi-6B / Yi-34B](./candle-examples/examples/yi/): two bilingual
|
- [Yi-6B / Yi-34B](./candle-examples/examples/yi/): two bilingual
|
||||||
(English/Chinese) general LLMs with 6b and 34b parameters.
|
(English/Chinese) general LLMs with 6b and 34b parameters.
|
||||||
|
- [EnCodec](./candle-examples/examples/encodec/): high-quality audio compression
|
||||||
|
model using residual vector quantization.
|
||||||
- [Quantized LLaMA](./candle-examples/examples/quantized/): quantized version of
|
- [Quantized LLaMA](./candle-examples/examples/quantized/): quantized version of
|
||||||
the LLaMA model using the same quantization techniques as
|
the LLaMA model using the same quantization techniques as
|
||||||
[llama.cpp](https://github.com/ggerganov/llama.cpp).
|
[llama.cpp](https://github.com/ggerganov/llama.cpp).
|
||||||
@ -210,13 +212,15 @@ If you have an addition to this list, please submit a pull request.
|
|||||||
- Text to text.
|
- Text to text.
|
||||||
- T5 and its variants: FlanT5, UL2, MADLAD400 (translation), CoEdit (Grammar correction).
|
- T5 and its variants: FlanT5, UL2, MADLAD400 (translation), CoEdit (Grammar correction).
|
||||||
- Marian MT (Machine Translation).
|
- Marian MT (Machine Translation).
|
||||||
- Whisper (multi-lingual support).
|
|
||||||
- Text to image.
|
- Text to image.
|
||||||
- Stable Diffusion v1.5, v2.1, XL v1.0.
|
- Stable Diffusion v1.5, v2.1, XL v1.0.
|
||||||
- Wurstchen v2.
|
- Wurstchen v2.
|
||||||
- Image to text.
|
- Image to text.
|
||||||
- BLIP.
|
- BLIP.
|
||||||
- TrOCR.
|
- TrOCR.
|
||||||
|
- Audio.
|
||||||
|
- Whisper, multi-lingual text-to-speech.
|
||||||
|
- EnCodec, audio compression model.
|
||||||
- Computer Vision Models.
|
- Computer Vision Models.
|
||||||
- DINOv2, ConvMixer, EfficientNet, ResNet, ViT, VGG, RepVGG, ConvNeXT,
|
- DINOv2, ConvMixer, EfficientNet, ResNet, ViT, VGG, RepVGG, ConvNeXT,
|
||||||
ConvNeXTv2.
|
ConvNeXTv2.
|
||||||
|
@ -216,6 +216,10 @@ struct Args {
|
|||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
split_prompt: bool,
|
split_prompt: bool,
|
||||||
|
|
||||||
|
/// Run on CPU rather than GPU even if a GPU is available.
|
||||||
|
#[arg(long)]
|
||||||
|
cpu: bool,
|
||||||
|
|
||||||
/// Penalty to be applied for repeating tokens, 1. means no penalty.
|
/// Penalty to be applied for repeating tokens, 1. means no penalty.
|
||||||
#[arg(long, default_value_t = 1.1)]
|
#[arg(long, default_value_t = 1.1)]
|
||||||
repeat_penalty: f32,
|
repeat_penalty: f32,
|
||||||
@ -365,7 +369,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
let model_path = args.model()?;
|
let model_path = args.model()?;
|
||||||
let mut file = std::fs::File::open(&model_path)?;
|
let mut file = std::fs::File::open(&model_path)?;
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
let device = candle_examples::device(false)?;
|
let device = candle_examples::device(args.cpu)?;
|
||||||
|
|
||||||
let mut model = match model_path.extension().and_then(|v| v.to_str()) {
|
let mut model = match model_path.extension().and_then(|v| v.to_str()) {
|
||||||
Some("gguf") => {
|
Some("gguf") => {
|
||||||
|
Reference in New Issue
Block a user