@@ -192,6 +192,10 @@ static void examine_simple_variable(PlannerInfo *root, Var *var,
192
192
static bool get_variable_range (PlannerInfo * root , VariableStatData * vardata ,
193
193
Oid sortop , Oid collation ,
194
194
Datum * min , Datum * max );
195
+ static void get_stats_slot_range (AttStatsSlot * sslot ,
196
+ Oid opfuncoid , FmgrInfo * opproc ,
197
+ Oid collation , int16 typLen , bool typByVal ,
198
+ Datum * min , Datum * max , bool * p_have_data );
195
199
static bool get_actual_variable_range (PlannerInfo * root ,
196
200
VariableStatData * vardata ,
197
201
Oid sortop , Oid collation ,
@@ -679,7 +683,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
679
683
* compute the resulting contribution to selectivity.
680
684
*/
681
685
hist_selec = ineq_histogram_selectivity (root , vardata ,
682
- & opproc , isgt , iseq ,
686
+ operator , & opproc , isgt , iseq ,
683
687
collation ,
684
688
constval , consttype );
685
689
@@ -1019,6 +1023,9 @@ generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
1019
1023
* satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
1020
1024
* The isgt and iseq flags distinguish which of the four cases apply.
1021
1025
*
1026
+ * While opproc could be looked up from the operator OID, common callers
1027
+ * also need to call it separately, so we make the caller pass both.
1028
+ *
1022
1029
* Returns -1 if there is no histogram (valid results will always be >= 0).
1023
1030
*
1024
1031
* Note that the result disregards both the most-common-values (if any) and
@@ -1030,7 +1037,7 @@ generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
1030
1037
double
1031
1038
ineq_histogram_selectivity (PlannerInfo * root ,
1032
1039
VariableStatData * vardata ,
1033
- FmgrInfo * opproc , bool isgt , bool iseq ,
1040
+ Oid opoid , FmgrInfo * opproc , bool isgt , bool iseq ,
1034
1041
Oid collation ,
1035
1042
Datum constval , Oid consttype )
1036
1043
{
@@ -1042,31 +1049,30 @@ ineq_histogram_selectivity(PlannerInfo *root,
1042
1049
/*
1043
1050
* Someday, ANALYZE might store more than one histogram per rel/att,
1044
1051
* corresponding to more than one possible sort ordering defined for the
1045
- * column type. However, to make that work we will need to figure out
1046
- * which staop to search for --- it's not necessarily the one we have at
1047
- * hand! (For example, we might have a '<=' operator rather than the '<'
1048
- * operator that will appear in staop.) The collation might not agree
1049
- * either. For now, just assume that whatever appears in pg_statistic is
1050
- * sorted the same way our operator sorts, or the reverse way if isgt is
1051
- * true. This could result in a bogus estimate, but it still seems better
1052
- * than falling back to the default estimate .
1052
+ * column type. Right now, we know there is only one, so just grab it and
1053
+ * see if it matches the query.
1054
+ *
1055
+ * Note that we can't use opoid as search argument; the staop appearing in
1056
+ * pg_statistic will be for the relevant '<' operator, but what we have
1057
+ * might be some other inequality operator such as '>='. (Even if opoid
1058
+ * is a '<' operator, it could be cross-type.) Hence we must use
1059
+ * comparison_ops_are_compatible() to see if the operators match .
1053
1060
*/
1054
1061
if (HeapTupleIsValid (vardata -> statsTuple ) &&
1055
1062
statistic_proc_security_check (vardata , opproc -> fn_oid ) &&
1056
1063
get_attstatsslot (& sslot , vardata -> statsTuple ,
1057
1064
STATISTIC_KIND_HISTOGRAM , InvalidOid ,
1058
1065
ATTSTATSSLOT_VALUES ))
1059
1066
{
1060
- if (sslot .nvalues > 1 )
1067
+ if (sslot .nvalues > 1 &&
1068
+ sslot .stacoll == collation &&
1069
+ comparison_ops_are_compatible (sslot .staop , opoid ))
1061
1070
{
1062
1071
/*
1063
1072
* Use binary search to find the desired location, namely the
1064
1073
* right end of the histogram bin containing the comparison value,
1065
1074
* which is the leftmost entry for which the comparison operator
1066
- * succeeds (if isgt) or fails (if !isgt). (If the given operator
1067
- * isn't actually sort-compatible with the histogram, you'll get
1068
- * garbage results ... but probably not any more garbage-y than
1069
- * you would have from the old linear search.)
1075
+ * succeeds (if isgt) or fails (if !isgt).
1070
1076
*
1071
1077
* In this loop, we pay no attention to whether the operator iseq
1072
1078
* or not; that detail will be mopped up below. (We cannot tell,
@@ -1332,6 +1338,50 @@ ineq_histogram_selectivity(PlannerInfo *root,
1332
1338
hist_selec = 1.0 - cutoff ;
1333
1339
}
1334
1340
}
1341
+ else if (sslot .nvalues > 1 )
1342
+ {
1343
+ /*
1344
+ * If we get here, we have a histogram but it's not sorted the way
1345
+ * we want. Do a brute-force search to see how many of the
1346
+ * entries satisfy the comparison condition, and take that
1347
+ * fraction as our estimate. (This is identical to the inner loop
1348
+ * of histogram_selectivity; maybe share code?)
1349
+ */
1350
+ LOCAL_FCINFO (fcinfo , 2 );
1351
+ int nmatch = 0 ;
1352
+
1353
+ InitFunctionCallInfoData (* fcinfo , opproc , 2 , collation ,
1354
+ NULL , NULL );
1355
+ fcinfo -> args [0 ].isnull = false;
1356
+ fcinfo -> args [1 ].isnull = false;
1357
+ fcinfo -> args [1 ].value = constval ;
1358
+ for (int i = 0 ; i < sslot .nvalues ; i ++ )
1359
+ {
1360
+ Datum fresult ;
1361
+
1362
+ fcinfo -> args [0 ].value = sslot .values [i ];
1363
+ fcinfo -> isnull = false;
1364
+ fresult = FunctionCallInvoke (fcinfo );
1365
+ if (!fcinfo -> isnull && DatumGetBool (fresult ))
1366
+ nmatch ++ ;
1367
+ }
1368
+ hist_selec = ((double ) nmatch ) / ((double ) sslot .nvalues );
1369
+
1370
+ /*
1371
+ * As above, clamp to a hundredth of the histogram resolution.
1372
+ * This case is surely even less trustworthy than the normal one,
1373
+ * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the
1374
+ * clamp should be more restrictive in this case?)
1375
+ */
1376
+ {
1377
+ double cutoff = 0.01 / (double ) (sslot .nvalues - 1 );
1378
+
1379
+ if (hist_selec < cutoff )
1380
+ hist_selec = cutoff ;
1381
+ else if (hist_selec > 1.0 - cutoff )
1382
+ hist_selec = 1.0 - cutoff ;
1383
+ }
1384
+ }
1335
1385
1336
1386
free_attstatsslot (& sslot );
1337
1387
}
@@ -5363,8 +5413,8 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
5363
5413
int16 typLen ;
5364
5414
bool typByVal ;
5365
5415
Oid opfuncoid ;
5416
+ FmgrInfo opproc ;
5366
5417
AttStatsSlot sslot ;
5367
- int i ;
5368
5418
5369
5419
/*
5370
5420
* XXX It's very tempting to try to use the actual column min and max, if
@@ -5395,78 +5445,56 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
5395
5445
(opfuncoid = get_opcode (sortop ))))
5396
5446
return false;
5397
5447
5448
+ opproc .fn_oid = InvalidOid ; /* mark this as not looked up yet */
5449
+
5398
5450
get_typlenbyval (vardata -> atttype , & typLen , & typByVal );
5399
5451
5400
5452
/*
5401
- * If there is a histogram, grab the first and last values.
5402
- *
5403
- * If there is a histogram that is sorted with some other operator than
5404
- * the one we want, fail --- this suggests that there is data we can't
5405
- * use. XXX consider collation too.
5453
+ * If there is a histogram with the ordering we want, grab the first and
5454
+ * last values.
5406
5455
*/
5407
5456
if (get_attstatsslot (& sslot , vardata -> statsTuple ,
5408
5457
STATISTIC_KIND_HISTOGRAM , sortop ,
5409
5458
ATTSTATSSLOT_VALUES ))
5410
5459
{
5411
- if (sslot .nvalues > 0 )
5460
+ if (sslot .stacoll == collation && sslot . nvalues > 0 )
5412
5461
{
5413
5462
tmin = datumCopy (sslot .values [0 ], typByVal , typLen );
5414
5463
tmax = datumCopy (sslot .values [sslot .nvalues - 1 ], typByVal , typLen );
5415
5464
have_data = true;
5416
5465
}
5417
5466
free_attstatsslot (& sslot );
5418
5467
}
5419
- else if (get_attstatsslot (& sslot , vardata -> statsTuple ,
5420
- STATISTIC_KIND_HISTOGRAM , InvalidOid ,
5421
- 0 ))
5468
+
5469
+ /*
5470
+ * Otherwise, if there is a histogram with some other ordering, scan it
5471
+ * and get the min and max values according to the ordering we want. This
5472
+ * of course may not find values that are really extremal according to our
5473
+ * ordering, but it beats ignoring available data.
5474
+ */
5475
+ if (!have_data &&
5476
+ get_attstatsslot (& sslot , vardata -> statsTuple ,
5477
+ STATISTIC_KIND_HISTOGRAM , InvalidOid ,
5478
+ ATTSTATSSLOT_VALUES ))
5422
5479
{
5480
+ get_stats_slot_range (& sslot , opfuncoid , & opproc ,
5481
+ collation , typLen , typByVal ,
5482
+ & tmin , & tmax , & have_data );
5423
5483
free_attstatsslot (& sslot );
5424
- return false;
5425
5484
}
5426
5485
5427
5486
/*
5428
5487
* If we have most-common-values info, look for extreme MCVs. This is
5429
5488
* needed even if we also have a histogram, since the histogram excludes
5430
- * the MCVs. However, usually the MCVs will not be the extreme values, so
5431
- * avoid unnecessary data copying.
5489
+ * the MCVs.
5432
5490
*/
5433
5491
if (get_attstatsslot (& sslot , vardata -> statsTuple ,
5434
5492
STATISTIC_KIND_MCV , InvalidOid ,
5435
5493
ATTSTATSSLOT_VALUES ))
5436
5494
{
5437
- bool tmin_is_mcv = false;
5438
- bool tmax_is_mcv = false;
5439
- FmgrInfo opproc ;
5440
-
5441
- fmgr_info (opfuncoid , & opproc );
5442
-
5443
- for (i = 0 ; i < sslot .nvalues ; i ++ )
5444
- {
5445
- if (!have_data )
5446
- {
5447
- tmin = tmax = sslot .values [i ];
5448
- tmin_is_mcv = tmax_is_mcv = have_data = true;
5449
- continue ;
5450
- }
5451
- if (DatumGetBool (FunctionCall2Coll (& opproc ,
5452
- collation ,
5453
- sslot .values [i ], tmin )))
5454
- {
5455
- tmin = sslot .values [i ];
5456
- tmin_is_mcv = true;
5457
- }
5458
- if (DatumGetBool (FunctionCall2Coll (& opproc ,
5459
- collation ,
5460
- tmax , sslot .values [i ])))
5461
- {
5462
- tmax = sslot .values [i ];
5463
- tmax_is_mcv = true;
5464
- }
5465
- }
5466
- if (tmin_is_mcv )
5467
- tmin = datumCopy (tmin , typByVal , typLen );
5468
- if (tmax_is_mcv )
5469
- tmax = datumCopy (tmax , typByVal , typLen );
5495
+ get_stats_slot_range (& sslot , opfuncoid , & opproc ,
5496
+ collation , typLen , typByVal ,
5497
+ & tmin , & tmax , & have_data );
5470
5498
free_attstatsslot (& sslot );
5471
5499
}
5472
5500
@@ -5475,6 +5503,62 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata,
5475
5503
return have_data ;
5476
5504
}
5477
5505
5506
+ /*
5507
+ * get_stats_slot_range: scan sslot for min/max values
5508
+ *
5509
+ * Subroutine for get_variable_range: update min/max/have_data according
5510
+ * to what we find in the statistics array.
5511
+ */
5512
+ static void
5513
+ get_stats_slot_range (AttStatsSlot * sslot , Oid opfuncoid , FmgrInfo * opproc ,
5514
+ Oid collation , int16 typLen , bool typByVal ,
5515
+ Datum * min , Datum * max , bool * p_have_data )
5516
+ {
5517
+ Datum tmin = * min ;
5518
+ Datum tmax = * max ;
5519
+ bool have_data = * p_have_data ;
5520
+ bool found_tmin = false;
5521
+ bool found_tmax = false;
5522
+
5523
+ /* Look up the comparison function, if we didn't already do so */
5524
+ if (opproc -> fn_oid != opfuncoid )
5525
+ fmgr_info (opfuncoid , opproc );
5526
+
5527
+ /* Scan all the slot's values */
5528
+ for (int i = 0 ; i < sslot -> nvalues ; i ++ )
5529
+ {
5530
+ if (!have_data )
5531
+ {
5532
+ tmin = tmax = sslot -> values [i ];
5533
+ found_tmin = found_tmax = true;
5534
+ * p_have_data = have_data = true;
5535
+ continue ;
5536
+ }
5537
+ if (DatumGetBool (FunctionCall2Coll (opproc ,
5538
+ collation ,
5539
+ sslot -> values [i ], tmin )))
5540
+ {
5541
+ tmin = sslot -> values [i ];
5542
+ found_tmin = true;
5543
+ }
5544
+ if (DatumGetBool (FunctionCall2Coll (opproc ,
5545
+ collation ,
5546
+ tmax , sslot -> values [i ])))
5547
+ {
5548
+ tmax = sslot -> values [i ];
5549
+ found_tmax = true;
5550
+ }
5551
+ }
5552
+
5553
+ /*
5554
+ * Copy the slot's values, if we found new extreme values.
5555
+ */
5556
+ if (found_tmin )
5557
+ * min = datumCopy (tmin , typByVal , typLen );
5558
+ if (found_tmax )
5559
+ * max = datumCopy (tmax , typByVal , typLen );
5560
+ }
5561
+
5478
5562
5479
5563
/*
5480
5564
* get_actual_variable_range
0 commit comments