summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorPeter Eisentraut2019-03-22 11:09:32 +0000
committerPeter Eisentraut2019-03-22 11:12:43 +0000
commit5e1963fb764e9cc092e0f7b58b28985c311431d9 (patch)
tree544492f24e3d48d00bd2a19c11663f84f1e18ce4 /src/include
parent2ab6d28d233af17987ea323e3235b2bda89b4f2e (diff)
Collations with nondeterministic comparison
This adds a flag "deterministic" to collations. If that is false, such a collation disables various optimizations that assume that strings are equal only if they are byte-wise equal. That then allows use cases such as case-insensitive or accent-insensitive comparisons or handling of strings with different Unicode normal forms. This functionality is only supported with the ICU provider. At least glibc doesn't appear to have any locales that work in a nondeterministic way, so it's not worth supporting this for the libc provider. The term "deterministic comparison" in this context is from Unicode Technical Standard #10 (https://unicode.org/reports/tr10/#Deterministic_Comparison). This patch makes changes in three areas: - CREATE COLLATION DDL changes and system catalog changes to support this new flag. - Many executor nodes and auxiliary code are extended to track collations. Previously, this code would just throw away collation information, because the eventually-called user-defined functions didn't use it since they only cared about equality, which didn't need collation information. - String data type functions that do equality comparisons and hashing are changed to take the (non-)deterministic flag into account. For comparison, this just means skipping various shortcuts and tie breakers that use byte-wise comparison. For hashing, we first need to convert the input string to a canonical "sort key" using the ICU analogue of strxfrm(). Reviewed-by: Daniel Verite <[email protected]> Reviewed-by: Peter Geoghegan <[email protected]> Discussion: https://www.postgresql.org/message-id/flat/[email protected]
Diffstat (limited to 'src/include')
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_collation.h2
-rw-r--r--src/include/executor/executor.h4
-rw-r--r--src/include/executor/hashjoin.h1
-rw-r--r--src/include/executor/nodeHash.h2
-rw-r--r--src/include/nodes/execnodes.h3
-rw-r--r--src/include/nodes/plannodes.h7
-rw-r--r--src/include/optimizer/planmain.h2
-rw-r--r--src/include/optimizer/tlist.h1
-rw-r--r--src/include/partitioning/partbounds.h1
-rw-r--r--src/include/utils/lsyscache.h1
-rw-r--r--src/include/utils/pg_locale.h1
12 files changed, 24 insertions, 3 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 45c33b63d61..d4dfe237c99 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201903211
+#define CATALOG_VERSION_NO 201903221
#endif
diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h
index 10fe711a912..4d2fcb3858a 100644
--- a/src/include/catalog/pg_collation.h
+++ b/src/include/catalog/pg_collation.h
@@ -33,6 +33,7 @@ CATALOG(pg_collation,3456,CollationRelationId)
Oid collnamespace; /* OID of namespace containing collation */
Oid collowner; /* owner of collation */
char collprovider; /* see constants below */
+ bool collisdeterministic BKI_DEFAULT(t);
int32 collencoding; /* encoding for this collation; -1 = "all" */
NameData collcollate; /* LC_COLLATE setting */
NameData collctype; /* LC_CTYPE setting */
@@ -61,6 +62,7 @@ typedef FormData_pg_collation *Form_pg_collation;
extern Oid CollationCreate(const char *collname, Oid collnamespace,
Oid collowner,
char collprovider,
+ bool collisdeterministic,
int32 collencoding,
const char *collcollate, const char *collctype,
const char *collversion,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 9003f2ce583..0cf7aa3495f 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -111,6 +111,7 @@ extern ExprState *execTuplesMatchPrepare(TupleDesc desc,
int numCols,
const AttrNumber *keyColIdx,
const Oid *eqOperators,
+ const Oid *collations,
PlanState *parent);
extern void execTuplesHashPrepare(int numCols,
const Oid *eqOperators,
@@ -121,6 +122,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent,
int numCols, AttrNumber *keyColIdx,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
+ Oid *collations,
long nbuckets, Size additionalsize,
MemoryContext tablecxt,
MemoryContext tempcxt, bool use_variable_hash_iv);
@@ -129,6 +131,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent,
int numCols, AttrNumber *keyColIdx,
const Oid *eqfuncoids,
FmgrInfo *hashfunctions,
+ Oid *collations,
long nbuckets, Size additionalsize,
MemoryContext metacxt,
MemoryContext tablecxt,
@@ -257,6 +260,7 @@ extern ExprState *ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
int numCols,
const AttrNumber *keyColIdx,
const Oid *eqfunctions,
+ const Oid *collations,
PlanState *parent);
extern ProjectionInfo *ExecBuildProjectionInfo(List *targetList,
ExprContext *econtext,
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index e7bf158c1bd..2c94b926d37 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -337,6 +337,7 @@ typedef struct HashJoinTableData
FmgrInfo *outer_hashfunctions; /* lookup data for hash functions */
FmgrInfo *inner_hashfunctions; /* lookup data for hash functions */
bool *hashStrict; /* is each hash join operator strict? */
+ Oid *collations;
Size spaceUsed; /* memory space currently used by tuples */
Size spaceAllowed; /* upper limit for space used */
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index 1309b32b903..12337660235 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -24,7 +24,7 @@ extern Node *MultiExecHash(HashState *node);
extern void ExecEndHash(HashState *node);
extern void ExecReScanHash(HashState *node);
-extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators,
+extern HashJoinTable ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
bool keepNulls);
extern void ExecParallelHashTableAlloc(HashJoinTable hashtable,
int batchno);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 62eb1a06eef..869c303e157 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -693,6 +693,7 @@ typedef struct TupleHashTableData
AttrNumber *keyColIdx; /* attr numbers of key columns */
FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
ExprState *tab_eq_func; /* comparator for table datatype(s) */
+ Oid *tab_collations; /* collations for hash and comparison */
MemoryContext tablecxt; /* memory context containing table */
MemoryContext tempcxt; /* context for function evaluations */
Size entrysize; /* actual size to make each hash entry */
@@ -862,6 +863,7 @@ typedef struct SubPlanState
AttrNumber *keyColIdx; /* control data for hash tables */
Oid *tab_eq_funcoids; /* equality func oids for table
* datatype(s) */
+ Oid *tab_collations; /* collations for hash and comparison */
FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
FmgrInfo *tab_eq_funcs; /* equality functions for table datatype(s) */
FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
@@ -1872,6 +1874,7 @@ typedef struct HashJoinState
List *hj_OuterHashKeys; /* list of ExprState nodes */
List *hj_InnerHashKeys; /* list of ExprState nodes */
List *hj_HashOperators; /* list of operator OIDs */
+ List *hj_Collations;
HashJoinTable hj_HashTable;
uint32 hj_CurHashValue;
int hj_CurBucketNo;
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index d66a187a530..24740c31e3d 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -297,6 +297,7 @@ typedef struct RecursiveUnion
* duplicate-ness */
AttrNumber *dupColIdx; /* their indexes in the target list */
Oid *dupOperators; /* equality operators to compare with */
+ Oid *dupCollations;
long numGroups; /* estimated number of groups in input */
} RecursiveUnion;
@@ -773,6 +774,7 @@ typedef struct Group
int numCols; /* number of grouping columns */
AttrNumber *grpColIdx; /* their indexes in the target list */
Oid *grpOperators; /* equality operators to compare with */
+ Oid *grpCollations;
} Group;
/* ---------------
@@ -797,6 +799,7 @@ typedef struct Agg
int numCols; /* number of grouping columns */
AttrNumber *grpColIdx; /* their indexes in the target list */
Oid *grpOperators; /* equality operators to compare with */
+ Oid *grpCollations;
long numGroups; /* estimated number of groups in input */
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
/* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */
@@ -815,9 +818,11 @@ typedef struct WindowAgg
int partNumCols; /* number of columns in partition clause */
AttrNumber *partColIdx; /* their indexes in the target list */
Oid *partOperators; /* equality operators for partition columns */
+ Oid *partCollations; /* collations for partition columns */
int ordNumCols; /* number of columns in ordering clause */
AttrNumber *ordColIdx; /* their indexes in the target list */
Oid *ordOperators; /* equality operators for ordering columns */
+ Oid *ordCollations; /* collations for ordering columns */
int frameOptions; /* frame_clause options, see WindowDef */
Node *startOffset; /* expression for starting bound, if any */
Node *endOffset; /* expression for ending bound, if any */
@@ -839,6 +844,7 @@ typedef struct Unique
int numCols; /* number of columns to check for uniqueness */
AttrNumber *uniqColIdx; /* their indexes in the target list */
Oid *uniqOperators; /* equality operators to compare with */
+ Oid *uniqCollations; /* collations for equality comparisons */
} Unique;
/* ------------
@@ -913,6 +919,7 @@ typedef struct SetOp
* duplicate-ness */
AttrNumber *dupColIdx; /* their indexes in the target list */
Oid *dupOperators; /* equality operators to compare with */
+ Oid *dupCollations;
AttrNumber flagColIdx; /* where is the flag column, if any */
int firstFlag; /* flag value for first input relation */
long numGroups; /* estimated number of groups in input */
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 3bbdb5e2f74..b093a3c8ac2 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -53,7 +53,7 @@ extern bool is_projection_capable_plan(Plan *plan);
extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
extern Agg *make_agg(List *tlist, List *qual,
AggStrategy aggstrategy, AggSplit aggsplit,
- int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators,
+ int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
List *groupingSets, List *chain,
double dNumGroups, Plan *lefttree);
extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount);
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 58db79203bc..46d614f4fb6 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -32,6 +32,7 @@ extern bool tlist_same_collations(List *tlist, List *colCollations, bool junkOK)
extern void apply_tlist_labeling(List *dest_tlist, List *src_tlist);
extern Oid *extract_grouping_ops(List *groupClause);
+extern Oid *extract_grouping_collations(List *groupClause, List *tlist);
extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
extern bool grouping_is_sortable(List *groupClause);
extern bool grouping_is_hashable(List *groupClause);
diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h
index b1ae39ad635..683e1574eae 100644
--- a/src/include/partitioning/partbounds.h
+++ b/src/include/partitioning/partbounds.h
@@ -77,6 +77,7 @@ typedef struct PartitionBoundInfoData
extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b);
extern uint64 compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc,
+ Oid *partcollation,
Datum *values, bool *isnull);
extern List *get_qual_from_partbound(Relation rel, Relation parent,
PartitionBoundSpec *spec);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 16b0b1d2dcc..b9a9ecb7cc3 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -90,6 +90,7 @@ extern Oid get_atttype(Oid relid, AttrNumber attnum);
extern void get_atttypetypmodcoll(Oid relid, AttrNumber attnum,
Oid *typid, int32 *typmod, Oid *collid);
extern char *get_collation_name(Oid colloid);
+extern bool get_collation_isdeterministic(Oid colloid);
extern char *get_constraint_name(Oid conoid);
extern char *get_language_name(Oid langoid, bool missing_ok);
extern Oid get_opclass_family(Oid opclass);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 606952afd7d..a342a625490 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -82,6 +82,7 @@ extern void cache_locale_time(void);
struct pg_locale_struct
{
char provider;
+ bool deterministic;
union
{
#ifdef HAVE_LOCALE_T