-
Notifications
You must be signed in to change notification settings - Fork 13.9k
Modern Bert Support #15641
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Modern Bert Support #15641
Changes from 1 commit
6151592
6643c5a
ac67fc6
cc40378
41b6864
cc3d7ab
4ceb828
18c0c23
bffe3c9
8f32843
9805635
40249dd
853f344
2a1c750
c73eb68
ca353d3
6d86944
39c0291
e101005
044bc7d
e296a0b
2bacfb0
4e7c879
20d448a
db4f565
da0604a
43a2980
e368442
7036cc8
2522ce8
e043815
35667f2
3cdd650
86adde6
46f2182
33eed31
61a0b03
3bbf671
f362878
3976d77
ff9f8c2
97e1de4
4187cf5
e3ac2ae
72f1f51
952c302
2ea2862
da3a1c9
89431b6
43332bf
b442b43
94e7ece
30fe2a7
c386eb0
727008f
93c1744
7b956a3
9b0f38b
c9fa285
e1abf73
edbe4d2
1f54cf4
0082680
9715c2a
1d01245
a6306ce
3581b68
7c15ba5
070b30b
7a1f06a
04167d9
7e8e1a0
b66c2fd
a9441fb
9e078b8
e0ca150
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -855,7 +855,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { | |
| type = LLM_TYPE_149M; break; // modern-bert-base | ||
| case 28: | ||
| type = LLM_TYPE_395M; break; // modern-bert-large | ||
| default: type = LLM_TYPE_UNKNOWN; | ||
| default: type = LLM_TYPE_UNKNOWN; | ||
| } | ||
| } break; | ||
| case LLM_ARCH_JINA_BERT_V2: | ||
|
|
@@ -2993,11 +2993,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) { | |
| layer.layer_out_norm_b = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd}, 0); | ||
| } | ||
| } break; | ||
| case LLM_ARCH_MODERN_BERT: | ||
| case LLM_ARCH_MODERN_BERT: | ||
| { | ||
| tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); | ||
| tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0); | ||
|
|
||
| output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); | ||
|
|
||
| for(int i = 0; i < n_layer; ++i) { | ||
|
|
@@ -3006,15 +3006,15 @@ bool llama_model::load_tensors(llama_model_loader & ml) { | |
| if ( i != 0 ) { | ||
| layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); | ||
| } else{ | ||
| // layer 0 uses identity | ||
| // layer 0 uses identity | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this true only for some models?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe its the same case for the granite small embd model but this is defined in the transformers implementation of modern bert |
||
| layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, TENSOR_NOT_REQUIRED); | ||
| } | ||
|
|
||
|
|
||
| layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, 3 * n_embd }, 0); | ||
| layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); | ||
|
|
||
| layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, 2 * n_ff}, 0); | ||
| layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, 2 * n_ff}, 0); | ||
| layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0); | ||
| layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); | ||
| } | ||
|
|
@@ -8209,7 +8209,7 @@ struct llm_build_modern_bert : public llm_graph_context { | |
|
|
||
| ggml_tensor * cur = nullptr; | ||
| ggml_tensor * inpL = nullptr; | ||
| ggml_tensor * inp_pos = build_inp_pos(); | ||
| ggml_tensor * inp_pos = build_inp_pos(); | ||
|
|
||
| // construct input embeddings (token, type, position) | ||
| inpL = build_inp_embd(model.tok_embd); | ||
|
|
@@ -8221,7 +8221,7 @@ struct llm_build_modern_bert : public llm_graph_context { | |
|
|
||
| ggml_tensor * inp_out_ids = build_inp_out_ids(); | ||
|
|
||
| auto * inp_attn = build_attn_inp_no_cache(); | ||
| auto * inp_attn = build_attn_inp_no_cache(); | ||
|
|
||
| for (int il = 0; il < n_layer; ++il) { | ||
| ggml_tensor * cur = inpL; | ||
|
|
@@ -19831,7 +19831,7 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, | |
| case LLM_ARCH_NOMIC_BERT_MOE: | ||
| case LLM_ARCH_NEO_BERT: | ||
| case LLM_ARCH_WAVTOKENIZER_DEC: | ||
| case LLM_ARCH_MODERN_BERT: | ||
| case LLM_ARCH_MODERN_BERT: | ||
| case LLM_ARCH_GEMMA_EMBEDDING: | ||
| case LLM_ARCH_DREAM: | ||
| case LLM_ARCH_LLADA: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.