@@ -1274,7 +1274,12 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
1274
1274
}
1275
1275
float sumlx = 0 ;
1276
1276
float suml2 = 0 ;
1277
+ #ifdef HAVE_BUGGY_APPLE_LINKER
1278
+ // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
1279
+ for (volatile int i = 0 ; i < n ; ++ i ) {
1280
+ #else
1277
1281
for (int i = 0 ; i < n ; ++ i ) {
1282
+ #endif
1278
1283
int l = nearest_int (iscale * x [i ]);
1279
1284
l = MAX (- nmax , MIN (nmax - 1 , l ));
1280
1285
L [i ] = l + nmax ;
@@ -1649,7 +1654,12 @@ static float make_qkx3_quants(int n, int nmax, const float * restrict x, const f
1649
1654
float max = x [0 ];
1650
1655
float sum_w = weights ? weights [0 ] : x [0 ]* x [0 ];
1651
1656
float sum_x = sum_w * x [0 ];
1657
+ #ifdef HAVE_BUGGY_APPLE_LINKER
1658
+ // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
1659
+ for (volatile int i = 1 ; i < n ; ++ i ) {
1660
+ #else
1652
1661
for (int i = 1 ; i < n ; ++ i ) {
1662
+ #endif
1653
1663
if (x [i ] < min ) min = x [i ];
1654
1664
if (x [i ] > max ) max = x [i ];
1655
1665
float w = weights ? weights [i ] : x [i ]* x [i ];
@@ -1660,7 +1670,7 @@ static float make_qkx3_quants(int n, int nmax, const float * restrict x, const f
1660
1670
min = 0 ;
1661
1671
}
1662
1672
if (max <= min ) {
1663
- for ( int i = 0 ; i < n ; ++ i ) L [ i ] = 0 ;
1673
+ memset ( L , 0 , n ) ;
1664
1674
* the_min = - min ;
1665
1675
return 0.f ;
1666
1676
}
@@ -1862,7 +1872,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
1862
1872
1863
1873
size_t quantize_q2_K (const float * src , void * dst , int nrow , int n_per_row , int64_t * hist , const float * quant_weights ) {
1864
1874
(void )hist ;
1865
- int row_size = ggml_row_size (GGML_TYPE_Q2_K , n_per_row );
1875
+ size_t row_size = ggml_row_size (GGML_TYPE_Q2_K , n_per_row );
1866
1876
if (!quant_weights ) {
1867
1877
quantize_row_q2_K_reference (src , dst , nrow * n_per_row );
1868
1878
}
@@ -2181,7 +2191,7 @@ static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restri
2181
2191
2182
2192
size_t quantize_q3_K (const float * src , void * dst , int nrow , int n_per_row , int64_t * hist , const float * quant_weights ) {
2183
2193
(void )hist ;
2184
- int row_size = ggml_row_size (GGML_TYPE_Q3_K , n_per_row );
2194
+ size_t row_size = ggml_row_size (GGML_TYPE_Q3_K , n_per_row );
2185
2195
if (!quant_weights ) {
2186
2196
quantize_row_q3_K_reference (src , dst , nrow * n_per_row );
2187
2197
}
@@ -2448,7 +2458,7 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri
2448
2458
2449
2459
size_t quantize_q4_K (const float * src , void * dst , int nrow , int n_per_row , int64_t * hist , const float * quant_weights ) {
2450
2460
(void )hist ;
2451
- int row_size = ggml_row_size (GGML_TYPE_Q4_K , n_per_row );
2461
+ size_t row_size = ggml_row_size (GGML_TYPE_Q4_K , n_per_row );
2452
2462
if (!quant_weights ) {
2453
2463
quantize_row_q4_K_reference (src , dst , nrow * n_per_row );
2454
2464
}
@@ -2771,7 +2781,7 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
2771
2781
2772
2782
size_t quantize_q5_K (const float * src , void * dst , int nrow , int n_per_row , int64_t * hist , const float * quant_weights ) {
2773
2783
(void )hist ;
2774
- int row_size = ggml_row_size (GGML_TYPE_Q5_K , n_per_row );
2784
+ size_t row_size = ggml_row_size (GGML_TYPE_Q5_K , n_per_row );
2775
2785
if (!quant_weights ) {
2776
2786
quantize_row_q5_K_reference (src , dst , nrow * n_per_row );
2777
2787
}
@@ -3025,7 +3035,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
3025
3035
3026
3036
size_t quantize_q6_K (const float * src , void * dst , int nrow , int n_per_row , int64_t * hist , const float * quant_weights ) {
3027
3037
(void )hist ;
3028
- int row_size = ggml_row_size (GGML_TYPE_Q6_K , n_per_row );
3038
+ size_t row_size = ggml_row_size (GGML_TYPE_Q6_K , n_per_row );
3029
3039
if (!quant_weights ) {
3030
3040
quantize_row_q6_K_reference (src , dst , nrow * n_per_row );
3031
3041
}
@@ -3072,7 +3082,7 @@ size_t quantize_q4_0(const float * src, void * dst, int nrow, int n_per_row, int
3072
3082
if (!quant_weights ) {
3073
3083
return ggml_quantize_q4_0 (src , dst , nrow * n_per_row , n_per_row , hist );
3074
3084
}
3075
- int row_size = ggml_row_size (GGML_TYPE_Q4_0 , n_per_row );
3085
+ size_t row_size = ggml_row_size (GGML_TYPE_Q4_0 , n_per_row );
3076
3086
char * qrow = (char * )dst ;
3077
3087
for (int row = 0 ; row < nrow ; ++ row ) {
3078
3088
quantize_row_q4_0_impl (src , (block_q4_0 * )qrow , n_per_row , quant_weights );
@@ -3116,7 +3126,7 @@ size_t quantize_q4_1(const float * src, void * dst, int nrow, int n_per_row, int
3116
3126
if (!quant_weights ) {
3117
3127
return ggml_quantize_q4_1 (src , dst , nrow * n_per_row , n_per_row , hist );
3118
3128
}
3119
- int row_size = ggml_row_size (GGML_TYPE_Q4_1 , n_per_row );
3129
+ size_t row_size = ggml_row_size (GGML_TYPE_Q4_1 , n_per_row );
3120
3130
char * qrow = (char * )dst ;
3121
3131
for (int row = 0 ; row < nrow ; ++ row ) {
3122
3132
quantize_row_q4_1_impl (src , (block_q4_1 * )qrow , n_per_row , quant_weights );
@@ -3169,7 +3179,7 @@ size_t quantize_q5_0(const float * src, void * dst, int nrow, int n_per_row, int
3169
3179
if (!quant_weights ) {
3170
3180
return ggml_quantize_q5_0 (src , dst , nrow * n_per_row , n_per_row , hist );
3171
3181
}
3172
- int row_size = ggml_row_size (GGML_TYPE_Q5_0 , n_per_row );
3182
+ size_t row_size = ggml_row_size (GGML_TYPE_Q5_0 , n_per_row );
3173
3183
char * qrow = (char * )dst ;
3174
3184
for (int row = 0 ; row < nrow ; ++ row ) {
3175
3185
quantize_row_q5_0_impl (src , (block_q5_0 * )qrow , n_per_row , quant_weights );
@@ -3221,7 +3231,7 @@ size_t quantize_q5_1(const float * src, void * dst, int nrow, int n_per_row, int
3221
3231
if (!quant_weights ) {
3222
3232
return ggml_quantize_q5_1 (src , dst , nrow * n_per_row , n_per_row , hist );
3223
3233
}
3224
- int row_size = ggml_row_size (GGML_TYPE_Q5_1 , n_per_row );
3234
+ size_t row_size = ggml_row_size (GGML_TYPE_Q5_1 , n_per_row );
3225
3235
char * qrow = (char * )dst ;
3226
3236
for (int row = 0 ; row < nrow ; ++ row ) {
3227
3237
quantize_row_q5_1_impl (src , (block_q5_1 * )qrow , n_per_row , quant_weights );
@@ -8565,7 +8575,7 @@ static int iq2_compare_func(const void * left, const void * right) {
8565
8575
return l [0 ] < r [0 ] ? -1 : l [0 ] > r [0 ] ? 1 : l [1 ] < r [1 ] ? -1 : l [1 ] > r [1 ] ? 1 : 0 ;
8566
8576
}
8567
8577
8568
- static void q2xs_init_impl (int grid_size ) {
8578
+ void iq2xs_init_impl (int grid_size ) {
8569
8579
const int gindex = iq2_data_index (grid_size );
8570
8580
if (iq2_data [gindex ].grid ) {
8571
8581
return ;
@@ -8720,19 +8730,7 @@ static void q2xs_init_impl(int grid_size) {
8720
8730
free (dist2 );
8721
8731
}
8722
8732
8723
- void ggml_init_iq2_quantization (enum ggml_type type ) {
8724
- if (type == GGML_TYPE_IQ2_XXS ) {
8725
- q2xs_init_impl (256 );
8726
- }
8727
- else if (type == GGML_TYPE_IQ2_XS ) {
8728
- q2xs_init_impl (512 );
8729
- }
8730
- else {
8731
- fprintf (stderr , "======================== Why are you calling %s with type %d?\n" , __func__ , (int )type );
8732
- }
8733
- }
8734
-
8735
- static void q2xs_deinit_impl (int grid_size ) {
8733
+ void iq2xs_free_impl (int grid_size ) {
8736
8734
GGML_ASSERT (grid_size == 256 || grid_size == 512 || grid_size == 1024 );
8737
8735
const int gindex = iq2_data_index (grid_size );
8738
8736
if (iq2_data [gindex ].grid ) {
@@ -8742,18 +8740,6 @@ static void q2xs_deinit_impl(int grid_size) {
8742
8740
}
8743
8741
}
8744
8742
8745
- void ggml_deinit_iq2_quantization (enum ggml_type type ) {
8746
- if (type == GGML_TYPE_IQ2_XXS ) {
8747
- q2xs_deinit_impl (256 );
8748
- }
8749
- else if (type == GGML_TYPE_IQ2_XS ) {
8750
- q2xs_deinit_impl (512 );
8751
- }
8752
- else {
8753
- fprintf (stderr , "======================== Why are you calling %s with type %d?\n" , __func__ , (int )type );
8754
- }
8755
- }
8756
-
8757
8743
static int iq2_find_best_neighbour (const uint16_t * restrict neighbours , const uint64_t * restrict grid ,
8758
8744
const float * restrict xval , const float * restrict weight , float scale , int8_t * restrict L ) {
8759
8745
int num_neighbors = neighbours [0 ];
@@ -8786,10 +8772,10 @@ static void quantize_row_iq2_xxs_impl(const float * restrict x, void * restrict
8786
8772
const int * kmap_q2xs = iq2_data [gindex ].map ;
8787
8773
const uint16_t * kneighbors_q2xs = iq2_data [gindex ].neighbours ;
8788
8774
8789
- GGML_ASSERT (quant_weights );
8790
- GGML_ASSERT (kgrid_q2xs );
8791
- GGML_ASSERT (kmap_q2xs );
8792
- GGML_ASSERT (kneighbors_q2xs );
8775
+ GGML_ASSERT (quant_weights && "missing quantization weights" );
8776
+ GGML_ASSERT (kgrid_q2xs && "forgot to call ggml_quantize_init()?" );
8777
+ GGML_ASSERT (kmap_q2xs && "forgot to call ggml_quantize_init()?" );
8778
+ GGML_ASSERT (kneighbors_q2xs && "forgot to call ggml_quantize_init()?" );
8793
8779
GGML_ASSERT (n %QK_K == 0 );
8794
8780
8795
8781
const int kMaxQ = 3 ;
@@ -9005,10 +8991,10 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
9005
8991
const int * kmap_q2xs = iq2_data [gindex ].map ;
9006
8992
const uint16_t * kneighbors_q2xs = iq2_data [gindex ].neighbours ;
9007
8993
9008
- GGML_ASSERT (quant_weights );
9009
- GGML_ASSERT (kmap_q2xs );
9010
- GGML_ASSERT (kgrid_q2xs );
9011
- GGML_ASSERT (kneighbors_q2xs );
8994
+ GGML_ASSERT (quant_weights && "missing quantization weights" );
8995
+ GGML_ASSERT (kmap_q2xs && "forgot to call ggml_quantize_init()?" );
8996
+ GGML_ASSERT (kgrid_q2xs && "forgot to call ggml_quantize_init()?" );
8997
+ GGML_ASSERT (kneighbors_q2xs && "forgot to call ggml_quantize_init()?" );
9012
8998
GGML_ASSERT (n %QK_K == 0 );
9013
8999
9014
9000
const int kMaxQ = 3 ;
0 commit comments