diff options
author | Teodor Sigaev | 2016-03-16 15:59:21 +0000 |
---|---|---|
committer | Teodor Sigaev | 2016-03-16 15:59:21 +0000 |
commit | f576b17cd6ba653bdace1f0da9a3b57f4984e460 (patch) | |
tree | db0272738b722363da7c86b0e3851cc541417b7c /contrib/pg_trgm/trgm.h | |
parent | 1c4f001b79878deb9475e5b32ff4ef87773c9f23 (diff) |
Add word_similarity to pg_trgm contrib module.
Patch introduces a concept of similarity over string and just a word from
another string.
Version of extension is not changed because 1.2 was already introduced in 9.6
release cycle, so, there wasn't a public version.
Author: Alexander Korotkov, Artur Zakirov
Diffstat (limited to 'contrib/pg_trgm/trgm.h')
-rw-r--r-- | contrib/pg_trgm/trgm.h | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h index 046cf58e134..8cd88e763c1 100644 --- a/contrib/pg_trgm/trgm.h +++ b/contrib/pg_trgm/trgm.h @@ -26,13 +26,14 @@ #define DIVUNION /* operator strategy numbers */ -#define SimilarityStrategyNumber 1 -#define DistanceStrategyNumber 2 -#define LikeStrategyNumber 3 -#define ILikeStrategyNumber 4 -#define RegExpStrategyNumber 5 -#define RegExpICaseStrategyNumber 6 - +#define SimilarityStrategyNumber 1 +#define DistanceStrategyNumber 2 +#define LikeStrategyNumber 3 +#define ILikeStrategyNumber 4 +#define RegExpStrategyNumber 5 +#define RegExpICaseStrategyNumber 6 +#define WordSimilarityStrategyNumber 7 +#define WordDistanceStrategyNumber 8 typedef char trgm[3]; @@ -103,15 +104,28 @@ typedef char *BITVECP; #define GETARR(x) ( (trgm*)( (char*)x+TRGMHDRSIZE ) ) #define ARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) ) +/* + * If DIVUNION is defined then similarity formula is: + * count / (len1 + len2 - count) + * else if DIVUNION is not defined then similarity formula is: + * count / max(len1, len2) + */ +#ifdef DIVUNION +#define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count))) +#else +#define CALCSML(count, len1, len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2))) +#endif + typedef struct TrgmPackedGraph TrgmPackedGraph; extern double similarity_threshold; +extern double word_similarity_threshold; extern uint32 trgm2int(trgm *ptr); extern void compact_trigram(trgm *tptr, char *str, int bytelen); extern TRGM *generate_trgm(char *str, int slen); extern TRGM *generate_wildcard_trgm(const char *str, int slen); -extern float4 cnt_sml(TRGM *trg1, TRGM *trg2); +extern float4 cnt_sml(TRGM *trg1, TRGM *trg2, bool inexact); extern bool trgm_contained_by(TRGM *trg1, TRGM *trg2); extern bool *trgm_presence_map(TRGM *query, TRGM *key); extern TRGM *createTrgmNFA(text *text_re, Oid collation, |