Skip to content

Commit 3de84b2

Browse files
authored
ggml : add ggml_clamp() (#1539)
* ggml : add ggml_clamp() * ggml : indentation
1 parent affc76e commit 3de84b2

File tree

2 files changed

+154
-18
lines changed

2 files changed

+154
-18
lines changed

ggml.c

+142-16
Original file line numberDiff line numberDiff line change
@@ -3472,6 +3472,7 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
34723472
"ROPE",
34733473
"ROPE_BACK",
34743474
"ALIBI",
3475+
"CLAMP",
34753476
"CONV_1D_1S",
34763477
"CONV_1D_2S",
34773478

@@ -3482,7 +3483,8 @@ static const char * GGML_OP_LABEL[GGML_OP_COUNT] = {
34823483
"MAP_BINARY",
34833484
};
34843485

3485-
static_assert(GGML_OP_COUNT == 50, "GGML_OP_COUNT != 50");
3486+
static_assert(GGML_OP_COUNT == 51, "GGML_OP_COUNT != 51");
3487+
34863488

34873489
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
34883490
"none",
@@ -3532,6 +3534,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
35323534
"rope(x)",
35333535
"rope_back(x)",
35343536
"alibi(x)",
3537+
"clamp(x)",
35353538
"conv_1d_1s(x)",
35363539
"conv_1d_2s(x)",
35373540

@@ -3542,7 +3545,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
35423545
"f(x,y)",
35433546
};
35443547

3545-
static_assert(GGML_OP_COUNT == 50, "GGML_OP_COUNT != 50");
3548+
static_assert(GGML_OP_COUNT == 51, "GGML_OP_COUNT != 51");
35463549

35473550
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
35483551
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
@@ -6214,7 +6217,8 @@ struct ggml_tensor * ggml_alibi(
62146217
struct ggml_context * ctx,
62156218
struct ggml_tensor * a,
62166219
int n_past,
6217-
int n_head) {
6220+
int n_head,
6221+
float bias_max) {
62186222
GGML_ASSERT(n_past >= 0);
62196223
bool is_node = false;
62206224

@@ -6233,6 +6237,8 @@ struct ggml_tensor * ggml_alibi(
62336237

62346238
((int32_t *) b->data)[0] = n_past;
62356239
((int32_t *) b->data)[1] = n_head;
6240+
GGML_ASSERT(sizeof(float) == sizeof(int32_t));
6241+
(((float *) b->data)[2]) = bias_max;
62366242

62376243
ggml_scratch_load(ctx);
62386244

@@ -6244,6 +6250,40 @@ struct ggml_tensor * ggml_alibi(
62446250
return result;
62456251
}
62466252

6253+
// ggml_clamp
6254+
6255+
struct ggml_tensor * ggml_clamp(
6256+
struct ggml_context * ctx,
6257+
struct ggml_tensor * a,
6258+
float min,
6259+
float max) {
6260+
bool is_node = false;
6261+
6262+
if (a->grad) {
6263+
GGML_ASSERT(false); // TODO: implement backward
6264+
is_node = true;
6265+
}
6266+
6267+
// TODO: when implement backward, fix this:
6268+
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6269+
6270+
ggml_scratch_save(ctx);
6271+
6272+
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
6273+
6274+
((float *) b->data)[0] = min;
6275+
((float *) b->data)[1] = max;
6276+
6277+
ggml_scratch_load(ctx);
6278+
6279+
result->op = GGML_OP_CLAMP;
6280+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
6281+
result->src0 = a;
6282+
result->src1 = b;
6283+
6284+
return result;
6285+
}
6286+
62476287
// ggml_conv_1d_1s
62486288

62496289
struct ggml_tensor * ggml_conv_1d_1s(
@@ -10553,6 +10593,7 @@ static void ggml_compute_forward_diag_mask_f32(
1055310593

1055410594
const int n_past = ((int32_t *) src1->data)[0];
1055510595
const bool inplace = (bool)((int32_t *) src1->data)[1];
10596+
1055610597
assert(n_past >= 0);
1055710598

1055810599
if (!inplace && (params->type == GGML_TASK_INIT)) {
@@ -10723,14 +10764,15 @@ static void ggml_compute_forward_alibi_f32(
1072310764
struct ggml_tensor * dst) {
1072410765
assert(params->ith == 0);
1072510766
assert(src1->type == GGML_TYPE_I32);
10726-
assert(ggml_nelements(src1) == 2);
10767+
assert(ggml_nelements(src1) == 3);
1072710768

1072810769
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
1072910770
return;
1073010771
}
1073110772

10732-
const int n_past = ((int32_t *) src1->data)[0];
10733-
const int n_head = ((int32_t *) src1->data)[1];
10773+
const int n_past = ((int32_t *) src1->data)[0];
10774+
const int n_head = ((int32_t *) src1->data)[1];
10775+
const float max_bias = ((float *) src1->data)[2];
1073410776

1073510777
assert(n_past >= 0);
1073610778

@@ -10753,8 +10795,8 @@ static void ggml_compute_forward_alibi_f32(
1075310795
// add alibi to src0 (KQ_scaled)
1075410796
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
1075510797

10756-
const float m0 = powf(2.0f, -8.0f / n_heads_log2_floor);
10757-
const float m1 = powf(2.0f, -4.0f / n_heads_log2_floor);
10798+
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
10799+
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
1075810800

1075910801
for (int i = 0; i < ne0; i++) {
1076010802
for (int j = 0; j < ne1; j++) {
@@ -10772,28 +10814,29 @@ static void ggml_compute_forward_alibi_f32(
1077210814
m_k = powf(m1, 2 * (k - n_heads_log2_floor) + 1);
1077310815
}
1077410816

10775-
pdst[0] = i * m_k + src[0];
10817+
pdst[0] = (i-ne0+1) * m_k + src[0];
10818+
1077610819
}
1077710820
}
1077810821
}
1077910822
}
1078010823

10781-
1078210824
static void ggml_compute_forward_alibi_f16(
1078310825
const struct ggml_compute_params * params,
1078410826
const struct ggml_tensor * src0,
1078510827
const struct ggml_tensor * src1,
1078610828
struct ggml_tensor * dst) {
1078710829
assert(params->ith == 0);
1078810830
assert(src1->type == GGML_TYPE_I32);
10789-
assert(ggml_nelements(src1) == 2);
10831+
assert(ggml_nelements(src1) == 3);
1079010832

1079110833
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
1079210834
return;
1079310835
}
1079410836

10795-
const int n_past = ((int32_t *) src1->data)[0];
10796-
const int n_head = ((int32_t *) src1->data)[1];
10837+
const int n_past = ((int32_t *) src1->data)[0];
10838+
const int n_head = ((int32_t *) src1->data)[1];
10839+
const float max_bias = ((float *) src1->data)[2];
1079710840

1079810841
assert(n_past >= 0);
1079910842

@@ -10816,8 +10859,8 @@ static void ggml_compute_forward_alibi_f16(
1081610859
// add alibi to src0 (KQ_scaled)
1081710860
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
1081810861

10819-
const float m0 = powf(2.0f, -8.0f / n_heads_log2_floor);
10820-
const float m1 = powf(2.0f, -4.0f / n_heads_log2_floor);
10862+
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
10863+
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
1082110864

1082210865
for (int i = 0; i < ne0; i++) {
1082310866
for (int j = 0; j < ne1; j++) {
@@ -10836,7 +10879,7 @@ static void ggml_compute_forward_alibi_f16(
1083610879
}
1083710880

1083810881
// we return F32
10839-
pdst[0] = i * m_k + GGML_FP16_TO_FP32(src[0]);
10882+
pdst[0] = (i-ne0+1) * m_k + GGML_FP16_TO_FP32(src[0]);
1084010883
}
1084110884
}
1084210885
}
@@ -10872,6 +10915,77 @@ static void ggml_compute_forward_alibi(
1087210915
}
1087310916
}
1087410917

10918+
10919+
// ggml_compute_forward_clamp
10920+
10921+
static void ggml_compute_forward_clamp_f32(
10922+
const struct ggml_compute_params * params,
10923+
const struct ggml_tensor * src0,
10924+
const struct ggml_tensor * src1,
10925+
struct ggml_tensor * dst) {
10926+
assert(params->ith == 0);
10927+
assert(src1->type == GGML_TYPE_I32);
10928+
assert(ggml_nelements(src1) == 2);
10929+
10930+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
10931+
return;
10932+
}
10933+
10934+
const int min = ((float *) src1->data)[0];
10935+
const int max = ((float *) src1->data)[1];
10936+
10937+
const int ith = params->ith;
10938+
const int nth = params->nth;
10939+
10940+
const int n = ggml_nrows(src0);
10941+
const int nc = src0->ne[0];
10942+
10943+
const size_t nb00 = src0->nb[0];
10944+
const size_t nb01 = src0->nb[1];
10945+
10946+
const size_t nb0 = dst->nb[0];
10947+
const size_t nb1 = dst->nb[1];
10948+
10949+
GGML_ASSERT( nb0 == sizeof(float));
10950+
GGML_ASSERT(nb00 == sizeof(float));
10951+
10952+
for (int j = ith; j < n; j += nth) {
10953+
float * dst_ptr = (float *) ((char *) dst->data + j*nb1);
10954+
float * src0_ptr = (float *) ((char *) src0->data + j*nb01);
10955+
10956+
for (int i = 0; i < nc; i++) {
10957+
dst_ptr[i] = MAX(MIN(src0_ptr[i], max), min);
10958+
}
10959+
}
10960+
}
10961+
10962+
static void ggml_compute_forward_clamp(
10963+
const struct ggml_compute_params * params,
10964+
const struct ggml_tensor * src0,
10965+
const struct ggml_tensor * src1,
10966+
struct ggml_tensor * dst) {
10967+
switch (src0->type) {
10968+
case GGML_TYPE_F32:
10969+
{
10970+
ggml_compute_forward_clamp_f32(params, src0, src1, dst);
10971+
} break;
10972+
case GGML_TYPE_F16:
10973+
case GGML_TYPE_Q4_0:
10974+
case GGML_TYPE_Q4_1:
10975+
case GGML_TYPE_Q5_0:
10976+
case GGML_TYPE_Q5_1:
10977+
case GGML_TYPE_Q8_0:
10978+
case GGML_TYPE_Q8_1:
10979+
case GGML_TYPE_I8:
10980+
case GGML_TYPE_I16:
10981+
case GGML_TYPE_I32:
10982+
case GGML_TYPE_COUNT:
10983+
{
10984+
GGML_ASSERT(false);
10985+
} break;
10986+
}
10987+
}
10988+
1087510989
// ggml_compute_forward_rope
1087610990

1087710991
static void ggml_compute_forward_rope_f32(
@@ -12853,6 +12967,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1285312967
{
1285412968
ggml_compute_forward_alibi(params, tensor->src0, tensor->src1, tensor);
1285512969
} break;
12970+
case GGML_OP_CLAMP:
12971+
{
12972+
ggml_compute_forward_clamp(params, tensor->src0, tensor->src1, tensor);
12973+
} break;
1285612974
case GGML_OP_CONV_1D_1S:
1285712975
{
1285812976
ggml_compute_forward_conv_1d_1s(params, tensor->src0, tensor->src1, tensor);
@@ -13160,6 +13278,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
1316013278
{
1316113279
GGML_ASSERT(false); // TODO: not implemented
1316213280
} break;
13281+
case GGML_OP_CLAMP:
13282+
{
13283+
GGML_ASSERT(false); // TODO: not implemented
13284+
} break;
1316313285
case GGML_OP_SILU:
1316413286
{
1316513287
// necessary for llama
@@ -14039,6 +14161,10 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
1403914161
{
1404014162
node->n_tasks = 1; //TODO
1404114163
} break;
14164+
case GGML_OP_CLAMP:
14165+
{
14166+
node->n_tasks = 1; //TODO
14167+
} break;
1404214168
case GGML_OP_CONV_1D_1S:
1404314169
case GGML_OP_CONV_1D_2S:
1404414170
{

ggml.h

+12-2
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ extern "C" {
313313
GGML_OP_ROPE,
314314
GGML_OP_ROPE_BACK,
315315
GGML_OP_ALIBI,
316+
GGML_OP_CLAMP,
316317
GGML_OP_CONV_1D_1S,
317318
GGML_OP_CONV_1D_2S,
318319

@@ -849,7 +850,7 @@ extern "C" {
849850
int n_past);
850851

851852
// in-place, returns view(a)
852-
GGML_API struct ggml_tensor * gml_diag_mask_zero_inplace(
853+
GGML_API struct ggml_tensor * ggml_diag_mask_zero_inplace(
853854
struct ggml_context * ctx,
854855
struct ggml_tensor * a,
855856
int n_past);
@@ -897,7 +898,16 @@ extern "C" {
897898
struct ggml_context * ctx,
898899
struct ggml_tensor * a,
899900
int n_past,
900-
int n_head);
901+
int n_head,
902+
float bias_max);
903+
904+
// clamp
905+
// in-place, returns view(a)
906+
struct ggml_tensor * ggml_clamp(
907+
struct ggml_context * ctx,
908+
struct ggml_tensor * a,
909+
float min,
910+
float max);
901911

902912
// padding = 1
903913
// TODO: we don't support extra parameters for now

0 commit comments

Comments
 (0)