mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 02:58:50 +00:00
Fixes for running Phi-4 quantized. (#2714)
This commit is contained in:
@ -28,6 +28,8 @@ enum Which {
|
|||||||
/// Alternative implementation of phi-3, based on llama.
|
/// Alternative implementation of phi-3, based on llama.
|
||||||
#[value(name = "phi-3b")]
|
#[value(name = "phi-3b")]
|
||||||
Phi3b,
|
Phi3b,
|
||||||
|
#[value(name = "phi-4")]
|
||||||
|
Phi4,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
@ -104,6 +106,7 @@ impl Args {
|
|||||||
let repo = match self.which {
|
let repo = match self.which {
|
||||||
Which::Phi2 => "microsoft/phi-2",
|
Which::Phi2 => "microsoft/phi-2",
|
||||||
Which::Phi3 | Which::Phi3b => "microsoft/Phi-3-mini-4k-instruct",
|
Which::Phi3 | Which::Phi3b => "microsoft/Phi-3-mini-4k-instruct",
|
||||||
|
Which::Phi4 => "microsoft/phi-4",
|
||||||
};
|
};
|
||||||
let api = api.model(repo.to_string());
|
let api = api.model(repo.to_string());
|
||||||
api.get("tokenizer.json")?
|
api.get("tokenizer.json")?
|
||||||
@ -128,6 +131,7 @@ impl Args {
|
|||||||
"Phi-3-mini-4k-instruct-q4.gguf",
|
"Phi-3-mini-4k-instruct-q4.gguf",
|
||||||
"5eef2ce24766d31909c0b269fe90c817a8f263fb",
|
"5eef2ce24766d31909c0b269fe90c817a8f263fb",
|
||||||
),
|
),
|
||||||
|
Which::Phi4 => ("microsoft/phi-4-gguf", "phi-4-q4.gguf", "main"),
|
||||||
};
|
};
|
||||||
let api = hf_hub::api::sync::Api::new()?;
|
let api = hf_hub::api::sync::Api::new()?;
|
||||||
api.repo(hf_hub::Repo::with_revision(
|
api.repo(hf_hub::Repo::with_revision(
|
||||||
@ -216,7 +220,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
);
|
);
|
||||||
match args.which {
|
match args.which {
|
||||||
Which::Phi2 => Model::Phi2(Phi2::from_gguf(model, &mut file, &device)?),
|
Which::Phi2 => Model::Phi2(Phi2::from_gguf(model, &mut file, &device)?),
|
||||||
Which::Phi3 => Model::Phi3(Phi3::from_gguf(
|
Which::Phi3 | Which::Phi4 => Model::Phi3(Phi3::from_gguf(
|
||||||
args.use_flash_attn,
|
args.use_flash_attn,
|
||||||
model,
|
model,
|
||||||
&mut file,
|
&mut file,
|
||||||
|
@ -127,7 +127,7 @@ impl LayerWeights {
|
|||||||
.reshape((b_sz, seq_len, self.n_head, self.head_dim))?
|
.reshape((b_sz, seq_len, self.n_head, self.head_dim))?
|
||||||
.transpose(1, 2)?;
|
.transpose(1, 2)?;
|
||||||
let k = k
|
let k = k
|
||||||
.reshape((b_sz, seq_len, self.n_head, self.head_dim))?
|
.reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?
|
||||||
.transpose(1, 2)?;
|
.transpose(1, 2)?;
|
||||||
let v = v
|
let v = v
|
||||||
.reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?
|
.reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?
|
||||||
|
Reference in New Issue
Block a user