Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
models : fix Phi-3 SWA parameters
ggml-ci
  • Loading branch information
ggerganov committed May 17, 2025
commit a4aafa5374975253e6d6e2a519251e91849224be
5 changes: 2 additions & 3 deletions src/llama-graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1316,9 +1316,8 @@ llm_graph_input_attn_kv_unified_iswa * llm_graph_context::build_attn_inp_kv_unif
inp->self_kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask, GGML_TYPE_F16) : inp->self_kq_mask;
}

{
GGML_ASSERT(hparams.n_swa_pattern > 1 && "Use llama_kv_cache_unified for non-SWA");
GGML_ASSERT(hparams.n_swa > 0 && "Use llama_kv_cache_unified for non-SWA");
if (hparams.n_swa_pattern > 1) {
GGML_ASSERT(hparams.n_swa > 0 && "Use llama_kv_cache_unified for non-SWA");

const auto n_kv = kv_self->get_kv_swa()->get_n();

Expand Down
6 changes: 3 additions & 3 deletions src/llama-graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,10 @@ class llm_graph_input_attn_kv_unified : public llm_graph_input_i {

void set_input(const llama_ubatch * ubatch) override;

ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; }
ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; }

ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch]
ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch]
ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch]
ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch]

const llama_hparams & hparams;
const llama_cparams & cparams;
Expand Down
22 changes: 18 additions & 4 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -856,20 +856,34 @@ void llama_model::load_hparams(llama_model_loader & ml) {
// for backward compatibility ; see: https://github.com/ggerganov/llama.cpp/pull/8931
if ((hparams.n_layer == 32 || hparams.n_layer == 40) && hparams.n_ctx_train == 4096) {
// default value for Phi-3-mini-4k-instruct and Phi-3-medium-4k-instruct
LLAMA_LOG_WARN("%s: assuming n_swa = 2047 for Phi-3-mini-4k-instruct and Phi-3-medium-4k-instruct\n", __func__);

hparams.n_swa = 2047;
} else if (hparams.n_layer == 32 && hparams.n_head_kv(0) == 32 && hparams.n_ctx_train == 131072) {
// default value for Phi-3-mini-128k-instruct
// note: this seems incorrect because the window is bigger than the train context?
hparams.n_swa = 262144;
LLAMA_LOG_WARN("%s: assuming n_swa = n_ctx_train for Phi-3-mini-128k-instruct\n", __func__);

hparams.n_swa = hparams.n_ctx_train;
hparams.n_swa_pattern = 1;
} else if (hparams.n_layer == 40 && hparams.n_ctx_train == 131072) {
// default value for Phi-3-medium-128k-instruct
// note: this seems incorrect because the window is equal to the train context?
hparams.n_swa = 131072;
LLAMA_LOG_WARN("%s: assuming n_swa = n_ctx_train for Phi-3-medium-128k-instruct\n", __func__);

hparams.n_swa = hparams.n_ctx_train;
hparams.n_swa_pattern = 1;
}

bool found_swa = ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false);
if (!found_swa && hparams.n_swa == 0) {
throw std::runtime_error("invalid value for sliding_window");
}

if (hparams.n_swa > hparams.n_ctx_train) {
LLAMA_LOG_WARN("%s: unexpected n_swa: %d >= %d, setting to 0\n", __func__, hparams.n_swa, hparams.n_ctx_train);

hparams.n_swa = hparams.n_ctx_train;
hparams.n_swa_pattern = 1;
}
} break;
case LLM_ARCH_PHIMOE:
{
Expand Down