diff options
| author | Teodor Sigaev | 2016-04-07 15:44:18 +0000 |
|---|---|---|
| committer | Teodor Sigaev | 2016-04-07 15:44:18 +0000 |
| commit | bb140506df605fab58f48926ee1db1f80bdafb59 (patch) | |
| tree | 581f9aeb71e3596000af3b4904e0c62a372d77b3 /src/include | |
| parent | 015e88942aa50f0d419ddac00e63bb06d6e62e86 (diff) | |
Phrase full text search.
Patch introduces new text search operator (<-> or <DISTANCE>) into tsquery.
On-disk and binary in/out format of tsquery are backward compatible.
It has two side effect:
- change order for tsquery, so, users, who has a btree index over tsquery,
should reindex it
- less number of parenthesis in tsquery output, and tsquery becomes more
readable
Authors: Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov
Reviewers: Alexander Korotkov, Artur Zakirov
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
| -rw-r--r-- | src/include/catalog/pg_operator.h | 3 | ||||
| -rw-r--r-- | src/include/catalog/pg_proc.h | 7 | ||||
| -rw-r--r-- | src/include/tsearch/ts_public.h | 22 | ||||
| -rw-r--r-- | src/include/tsearch/ts_type.h | 30 | ||||
| -rw-r--r-- | src/include/tsearch/ts_utils.h | 15 |
6 files changed, 61 insertions, 18 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 0edc6cbafe7..6d254ba133c 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201604062 +#define CATALOG_VERSION_NO 201604071 #endif diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index b3daff28e3f..a5e4a02ebc8 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -1675,6 +1675,9 @@ DATA(insert OID = 3680 ( "&&" PGNSP PGUID b f f 3615 3615 3615 0 0 tsque DESCR("AND-concatenate"); DATA(insert OID = 3681 ( "||" PGNSP PGUID b f f 3615 3615 3615 0 0 tsquery_or - - )); DESCR("OR-concatenate"); +/* <-> operation calls tsquery_phrase, but function is polymorphic. So, point to OID of the tsquery_phrase */ +DATA(insert OID = 5005 ( "<->" PGNSP PGUID b f f 3615 3615 3615 0 0 5003 - - )); +DESCR("phrase-concatenate"); DATA(insert OID = 3682 ( "!!" PGNSP PGUID l f f 0 3615 3615 0 0 tsquery_not - - )); DESCR("NOT tsquery"); DATA(insert OID = 3693 ( "@>" PGNSP PGUID b f f 3615 3615 16 3694 0 tsq_mcontains contsel contjoinsel )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index d7dbc739280..c351594be46 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4607,6 +4607,9 @@ DESCR("less-equal-greater"); DATA(insert OID = 3669 ( tsquery_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_and _null_ _null_ _null_ )); DATA(insert OID = 3670 ( tsquery_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_or _null_ _null_ _null_ )); +DATA(insert OID = 5003 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_phrase _null_ _null_ _null_ )); +DATA(insert OID = 5004 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 3615 "3615 3615 23" _null_ _null_ _null_ _null_ _null_ tsquery_phrase_distance _null_ _null_ _null_ )); +DESCR("phrase-concatenate with distance"); DATA(insert OID = 3671 ( tsquery_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3615 "3615" _null_ _null_ _null_ _null_ _null_ tsquery_not _null_ _null_ _null_ )); DATA(insert OID = 3691 ( tsq_mcontains PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsq_mcontains _null_ _null_ _null_ )); @@ -4726,12 +4729,16 @@ DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 DESCR("make tsquery"); DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ )); +DESCR("transform to tsquery"); DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ )); DESCR("transform to tsvector"); DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ )); DESCR("make tsquery"); DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ )); DESCR("transform to tsquery"); +DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ )); +DESCR("transform to tsquery"); DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ )); DESCR("trigger for automatic update of tsvector column"); diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index 6f7a891ae84..9364eee438c 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -34,16 +34,17 @@ typedef struct */ typedef struct { - uint32 selected:1, - in:1, - replace:1, - repeated:1, - skip:1, - unused:3, - type:8, - len:16; - char *word; - QueryOperand *item; + uint32 selected: 1, + in: 1, + replace: 1, + repeated: 1, + skip: 1, + unused: 3, + type: 8, + len: 16; + WordEntryPos pos; + char *word; + QueryOperand *item; } HeadlineWordEntry; typedef struct @@ -51,6 +52,7 @@ typedef struct HeadlineWordEntry *words; int32 lenwords; int32 curwords; + int32 vectorpos; /* positions a-la tsvector */ char *startsel; char *stopsel; char *fragdelim; diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h index bc99524dc08..5f4e5961939 100644 --- a/src/include/tsearch/ts_type.h +++ b/src/include/tsearch/ts_type.h @@ -49,6 +49,8 @@ typedef struct #define MAXSTRLEN ( (1<<11) - 1) #define MAXSTRPOS ( (1<<20) - 1) +extern int comparePos(const void *a, const void *b); + /* * Equivalent to * typedef struct { @@ -213,15 +215,33 @@ typedef struct } QueryOperand; -/* Legal values for QueryOperator.operator */ -#define OP_NOT 1 -#define OP_AND 2 -#define OP_OR 3 +/* + * Legal values for QueryOperator.operator. + * They should be ordered by priority! We assume that phrase + * has highest priority, but this agreement is only + * for query transformation! That's need to simplify + * algorithm of query transformation. + */ +#define OP_OR 1 +#define OP_AND 2 +#define OP_NOT 3 +#define OP_PHRASE 4 +#define OP_NOT_PHRASE 5 /* + * OP_PHRASE negation operations must have greater + * priority in order to force infix() to surround + * the whole OP_PHRASE expression with parentheses. + */ + +#define TOP_PRIORITY 6 /* highest priority for val nodes */ + +#define OP_PRIORITY(x) (x) +#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper) typedef struct { QueryItemType type; int8 oper; /* see above */ + int16 distance; /* distance between agrs for OP_PHRASE */ uint32 left; /* pointer to left operand. Right operand is * item + 1, left operand is placed * item+item->left */ @@ -304,6 +324,8 @@ extern Datum tsquery_numnode(PG_FUNCTION_ARGS); extern Datum tsquery_and(PG_FUNCTION_ARGS); extern Datum tsquery_or(PG_FUNCTION_ARGS); +extern Datum tsquery_phrase(PG_FUNCTION_ARGS); +extern Datum tsquery_phrase_distance(PG_FUNCTION_ARGS); extern Datum tsquery_not(PG_FUNCTION_ARGS); extern Datum tsquery_rewrite(PG_FUNCTION_ARGS); diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index 88533a64235..855bbfecd64 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -55,7 +55,7 @@ extern TSQuery parse_tsquery(char *buf, extern void pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix); extern void pushStop(TSQueryParserState state); -extern void pushOperator(TSQueryParserState state, int8 oper); +extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance); /* * parse plain text and lexize words @@ -104,8 +104,15 @@ extern text *generateHeadline(HeadlineParsedText *prs); /* * Common check function for tsvector @@ tsquery */ +typedef struct ExecPhraseData +{ + int npos; + bool allocated; + WordEntryPos *pos; +} ExecPhraseData; + extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, - bool (*chkcond) (void *checkval, QueryOperand *val)); + bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *)); extern bool tsquery_requires_match(QueryItem *curitem); /* @@ -120,6 +127,8 @@ extern Datum to_tsquery_byid(PG_FUNCTION_ARGS); extern Datum to_tsquery(PG_FUNCTION_ARGS); extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS); extern Datum plainto_tsquery(PG_FUNCTION_ARGS); +extern Datum phraseto_tsquery_byid(PG_FUNCTION_ARGS); +extern Datum phraseto_tsquery(PG_FUNCTION_ARGS); /* * GiST support function @@ -169,7 +178,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS); * TSQuery Utilities */ extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len); -extern QueryItem *clean_fakeval(QueryItem *ptr, int32 *len); +extern TSQuery cleanup_fakeval_and_phrase(TSQuery in); typedef struct QTNode { |
