@@ -3472,6 +3472,7 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
3472
3472
"ROPE" ,
3473
3473
"ROPE_BACK" ,
3474
3474
"ALIBI" ,
3475
+ "CLAMP" ,
3475
3476
"CONV_1D_1S" ,
3476
3477
"CONV_1D_2S" ,
3477
3478
@@ -3482,7 +3483,8 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
3482
3483
"MAP_BINARY" ,
3483
3484
};
3484
3485
3485
- static_assert (GGML_OP_COUNT == 50 , "GGML_OP_COUNT != 50" );
3486
+ static_assert (GGML_OP_COUNT == 51 , "GGML_OP_COUNT != 51" );
3487
+
3486
3488
3487
3489
static const char * GGML_OP_SYMBOL [GGML_OP_COUNT ] = {
3488
3490
"none" ,
@@ -3532,6 +3534,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
3532
3534
"rope(x)" ,
3533
3535
"rope_back(x)" ,
3534
3536
"alibi(x)" ,
3537
+ "clamp(x)" ,
3535
3538
"conv_1d_1s(x)" ,
3536
3539
"conv_1d_2s(x)" ,
3537
3540
@@ -3542,7 +3545,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
3542
3545
"f(x,y)" ,
3543
3546
};
3544
3547
3545
- static_assert (GGML_OP_COUNT == 50 , "GGML_OP_COUNT != 50 " );
3548
+ static_assert (GGML_OP_COUNT == 51 , "GGML_OP_COUNT != 51 " );
3546
3549
3547
3550
static_assert (sizeof (struct ggml_object )%GGML_MEM_ALIGN == 0 , "ggml_object size must be a multiple of GGML_MEM_ALIGN" );
3548
3551
static_assert (sizeof (struct ggml_tensor )%GGML_MEM_ALIGN == 0 , "ggml_tensor size must be a multiple of GGML_MEM_ALIGN" );
@@ -6214,7 +6217,8 @@ struct ggml_tensor * ggml_alibi(
6214
6217
struct ggml_context * ctx ,
6215
6218
struct ggml_tensor * a ,
6216
6219
int n_past ,
6217
- int n_head ) {
6220
+ int n_head ,
6221
+ float bias_max ) {
6218
6222
GGML_ASSERT (n_past >= 0 );
6219
6223
bool is_node = false;
6220
6224
@@ -6233,6 +6237,8 @@ struct ggml_tensor * ggml_alibi(
6233
6237
6234
6238
((int32_t * ) b -> data )[0 ] = n_past ;
6235
6239
((int32_t * ) b -> data )[1 ] = n_head ;
6240
+ GGML_ASSERT (sizeof (float ) == sizeof (int32_t ));
6241
+ (((float * ) b -> data )[2 ]) = bias_max ;
6236
6242
6237
6243
ggml_scratch_load (ctx );
6238
6244
@@ -6244,6 +6250,40 @@ struct ggml_tensor * ggml_alibi(
6244
6250
return result ;
6245
6251
}
6246
6252
6253
+ // ggml_clamp
6254
+
6255
+ struct ggml_tensor * ggml_clamp (
6256
+ struct ggml_context * ctx ,
6257
+ struct ggml_tensor * a ,
6258
+ float min ,
6259
+ float max ) {
6260
+ bool is_node = false;
6261
+
6262
+ if (a -> grad ) {
6263
+ GGML_ASSERT (false); // TODO: implement backward
6264
+ is_node = true;
6265
+ }
6266
+
6267
+ // TODO: when implement backward, fix this:
6268
+ struct ggml_tensor * result = ggml_view_tensor (ctx , a );
6269
+
6270
+ ggml_scratch_save (ctx );
6271
+
6272
+ struct ggml_tensor * b = ggml_new_tensor_1d (ctx , GGML_TYPE_I32 , 3 );
6273
+
6274
+ ((float * ) b -> data )[0 ] = min ;
6275
+ ((float * ) b -> data )[1 ] = max ;
6276
+
6277
+ ggml_scratch_load (ctx );
6278
+
6279
+ result -> op = GGML_OP_CLAMP ;
6280
+ result -> grad = is_node ? ggml_dup_tensor (ctx , result ) : NULL ;
6281
+ result -> src0 = a ;
6282
+ result -> src1 = b ;
6283
+
6284
+ return result ;
6285
+ }
6286
+
6247
6287
// ggml_conv_1d_1s
6248
6288
6249
6289
struct ggml_tensor * ggml_conv_1d_1s (
@@ -10553,6 +10593,7 @@ static void ggml_compute_forward_diag_mask_f32(
10553
10593
10554
10594
const int n_past = ((int32_t * ) src1 -> data )[0 ];
10555
10595
const bool inplace = (bool )((int32_t * ) src1 -> data )[1 ];
10596
+
10556
10597
assert (n_past >= 0 );
10557
10598
10558
10599
if (!inplace && (params -> type == GGML_TASK_INIT )) {
@@ -10723,14 +10764,15 @@ static void ggml_compute_forward_alibi_f32(
10723
10764
struct ggml_tensor * dst ) {
10724
10765
assert (params -> ith == 0 );
10725
10766
assert (src1 -> type == GGML_TYPE_I32 );
10726
- assert (ggml_nelements (src1 ) == 2 );
10767
+ assert (ggml_nelements (src1 ) == 3 );
10727
10768
10728
10769
if (params -> type == GGML_TASK_INIT || params -> type == GGML_TASK_FINALIZE ) {
10729
10770
return ;
10730
10771
}
10731
10772
10732
- const int n_past = ((int32_t * ) src1 -> data )[0 ];
10733
- const int n_head = ((int32_t * ) src1 -> data )[1 ];
10773
+ const int n_past = ((int32_t * ) src1 -> data )[0 ];
10774
+ const int n_head = ((int32_t * ) src1 -> data )[1 ];
10775
+ const float max_bias = ((float * ) src1 -> data )[2 ];
10734
10776
10735
10777
assert (n_past >= 0 );
10736
10778
@@ -10753,8 +10795,8 @@ static void ggml_compute_forward_alibi_f32(
10753
10795
// add alibi to src0 (KQ_scaled)
10754
10796
const int n_heads_log2_floor = 1 << (int ) floor (log2 (n_head ));
10755
10797
10756
- const float m0 = powf (2.0f , -8.0f / n_heads_log2_floor );
10757
- const float m1 = powf (2.0f , -4 .0f / n_heads_log2_floor );
10798
+ const float m0 = powf (2.0f , - ( max_bias ) / n_heads_log2_floor );
10799
+ const float m1 = powf (2.0f , - ( max_bias / 2 .0f) / n_heads_log2_floor );
10758
10800
10759
10801
for (int i = 0 ; i < ne0 ; i ++ ) {
10760
10802
for (int j = 0 ; j < ne1 ; j ++ ) {
@@ -10772,28 +10814,29 @@ static void ggml_compute_forward_alibi_f32(
10772
10814
m_k = powf (m1 , 2 * (k - n_heads_log2_floor ) + 1 );
10773
10815
}
10774
10816
10775
- pdst [0 ] = i * m_k + src [0 ];
10817
+ pdst [0 ] = (i - ne0 + 1 ) * m_k + src [0 ];
10818
+
10776
10819
}
10777
10820
}
10778
10821
}
10779
10822
}
10780
10823
10781
-
10782
10824
static void ggml_compute_forward_alibi_f16 (
10783
10825
const struct ggml_compute_params * params ,
10784
10826
const struct ggml_tensor * src0 ,
10785
10827
const struct ggml_tensor * src1 ,
10786
10828
struct ggml_tensor * dst ) {
10787
10829
assert (params -> ith == 0 );
10788
10830
assert (src1 -> type == GGML_TYPE_I32 );
10789
- assert (ggml_nelements (src1 ) == 2 );
10831
+ assert (ggml_nelements (src1 ) == 3 );
10790
10832
10791
10833
if (params -> type == GGML_TASK_INIT || params -> type == GGML_TASK_FINALIZE ) {
10792
10834
return ;
10793
10835
}
10794
10836
10795
- const int n_past = ((int32_t * ) src1 -> data )[0 ];
10796
- const int n_head = ((int32_t * ) src1 -> data )[1 ];
10837
+ const int n_past = ((int32_t * ) src1 -> data )[0 ];
10838
+ const int n_head = ((int32_t * ) src1 -> data )[1 ];
10839
+ const float max_bias = ((float * ) src1 -> data )[2 ];
10797
10840
10798
10841
assert (n_past >= 0 );
10799
10842
@@ -10816,8 +10859,8 @@ static void ggml_compute_forward_alibi_f16(
10816
10859
// add alibi to src0 (KQ_scaled)
10817
10860
const int n_heads_log2_floor = 1 << (int ) floor (log2 (n_head ));
10818
10861
10819
- const float m0 = powf (2.0f , -8.0f / n_heads_log2_floor );
10820
- const float m1 = powf (2.0f , -4 .0f / n_heads_log2_floor );
10862
+ const float m0 = powf (2.0f , - ( max_bias ) / n_heads_log2_floor );
10863
+ const float m1 = powf (2.0f , - ( max_bias / 2 .0f) / n_heads_log2_floor );
10821
10864
10822
10865
for (int i = 0 ; i < ne0 ; i ++ ) {
10823
10866
for (int j = 0 ; j < ne1 ; j ++ ) {
@@ -10836,7 +10879,7 @@ static void ggml_compute_forward_alibi_f16(
10836
10879
}
10837
10880
10838
10881
// we return F32
10839
- pdst [0 ] = i * m_k + GGML_FP16_TO_FP32 (src [0 ]);
10882
+ pdst [0 ] = ( i - ne0 + 1 ) * m_k + GGML_FP16_TO_FP32 (src [0 ]);
10840
10883
}
10841
10884
}
10842
10885
}
@@ -10872,6 +10915,77 @@ static void ggml_compute_forward_alibi(
10872
10915
}
10873
10916
}
10874
10917
10918
+
10919
+ // ggml_compute_forward_clamp
10920
+
10921
+ static void ggml_compute_forward_clamp_f32 (
10922
+ const struct ggml_compute_params * params ,
10923
+ const struct ggml_tensor * src0 ,
10924
+ const struct ggml_tensor * src1 ,
10925
+ struct ggml_tensor * dst ) {
10926
+ assert (params -> ith == 0 );
10927
+ assert (src1 -> type == GGML_TYPE_I32 );
10928
+ assert (ggml_nelements (src1 ) == 2 );
10929
+
10930
+ if (params -> type == GGML_TASK_INIT || params -> type == GGML_TASK_FINALIZE ) {
10931
+ return ;
10932
+ }
10933
+
10934
+ const int min = ((float * ) src1 -> data )[0 ];
10935
+ const int max = ((float * ) src1 -> data )[1 ];
10936
+
10937
+ const int ith = params -> ith ;
10938
+ const int nth = params -> nth ;
10939
+
10940
+ const int n = ggml_nrows (src0 );
10941
+ const int nc = src0 -> ne [0 ];
10942
+
10943
+ const size_t nb00 = src0 -> nb [0 ];
10944
+ const size_t nb01 = src0 -> nb [1 ];
10945
+
10946
+ const size_t nb0 = dst -> nb [0 ];
10947
+ const size_t nb1 = dst -> nb [1 ];
10948
+
10949
+ GGML_ASSERT ( nb0 == sizeof (float ));
10950
+ GGML_ASSERT (nb00 == sizeof (float ));
10951
+
10952
+ for (int j = ith ; j < n ; j += nth ) {
10953
+ float * dst_ptr = (float * ) ((char * ) dst -> data + j * nb1 );
10954
+ float * src0_ptr = (float * ) ((char * ) src0 -> data + j * nb01 );
10955
+
10956
+ for (int i = 0 ; i < nc ; i ++ ) {
10957
+ dst_ptr [i ] = MAX (MIN (src0_ptr [i ], max ), min );
10958
+ }
10959
+ }
10960
+ }
10961
+
10962
+ static void ggml_compute_forward_clamp (
10963
+ const struct ggml_compute_params * params ,
10964
+ const struct ggml_tensor * src0 ,
10965
+ const struct ggml_tensor * src1 ,
10966
+ struct ggml_tensor * dst ) {
10967
+ switch (src0 -> type ) {
10968
+ case GGML_TYPE_F32 :
10969
+ {
10970
+ ggml_compute_forward_clamp_f32 (params , src0 , src1 , dst );
10971
+ } break ;
10972
+ case GGML_TYPE_F16 :
10973
+ case GGML_TYPE_Q4_0 :
10974
+ case GGML_TYPE_Q4_1 :
10975
+ case GGML_TYPE_Q5_0 :
10976
+ case GGML_TYPE_Q5_1 :
10977
+ case GGML_TYPE_Q8_0 :
10978
+ case GGML_TYPE_Q8_1 :
10979
+ case GGML_TYPE_I8 :
10980
+ case GGML_TYPE_I16 :
10981
+ case GGML_TYPE_I32 :
10982
+ case GGML_TYPE_COUNT :
10983
+ {
10984
+ GGML_ASSERT (false);
10985
+ } break ;
10986
+ }
10987
+ }
10988
+
10875
10989
// ggml_compute_forward_rope
10876
10990
10877
10991
static void ggml_compute_forward_rope_f32 (
@@ -12853,6 +12967,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
12853
12967
{
12854
12968
ggml_compute_forward_alibi (params , tensor -> src0 , tensor -> src1 , tensor );
12855
12969
} break ;
12970
+ case GGML_OP_CLAMP :
12971
+ {
12972
+ ggml_compute_forward_clamp (params , tensor -> src0 , tensor -> src1 , tensor );
12973
+ } break ;
12856
12974
case GGML_OP_CONV_1D_1S :
12857
12975
{
12858
12976
ggml_compute_forward_conv_1d_1s (params , tensor -> src0 , tensor -> src1 , tensor );
@@ -13160,6 +13278,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
13160
13278
{
13161
13279
GGML_ASSERT (false); // TODO: not implemented
13162
13280
} break ;
13281
+ case GGML_OP_CLAMP :
13282
+ {
13283
+ GGML_ASSERT (false); // TODO: not implemented
13284
+ } break ;
13163
13285
case GGML_OP_SILU :
13164
13286
{
13165
13287
// necessary for llama
@@ -14039,6 +14161,10 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
14039
14161
{
14040
14162
node -> n_tasks = 1 ; //TODO
14041
14163
} break ;
14164
+ case GGML_OP_CLAMP :
14165
+ {
14166
+ node -> n_tasks = 1 ; //TODO
14167
+ } break ;
14042
14168
case GGML_OP_CONV_1D_1S :
14043
14169
case GGML_OP_CONV_1D_2S :
14044
14170
{
0 commit comments