summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorTeodor Sigaev2016-04-07 15:44:18 +0000
committerTeodor Sigaev2016-04-07 15:44:18 +0000
commitbb140506df605fab58f48926ee1db1f80bdafb59 (patch)
tree581f9aeb71e3596000af3b4904e0c62a372d77b3 /src/include
parent015e88942aa50f0d419ddac00e63bb06d6e62e86 (diff)
Phrase full text search.
Patch introduces new text search operator (<-> or <DISTANCE>) into tsquery. On-disk and binary in/out format of tsquery are backward compatible. It has two side effect: - change order for tsquery, so, users, who has a btree index over tsquery, should reindex it - less number of parenthesis in tsquery output, and tsquery becomes more readable Authors: Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov Reviewers: Alexander Korotkov, Artur Zakirov
Diffstat (limited to 'src/include')
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_operator.h3
-rw-r--r--src/include/catalog/pg_proc.h7
-rw-r--r--src/include/tsearch/ts_public.h22
-rw-r--r--src/include/tsearch/ts_type.h30
-rw-r--r--src/include/tsearch/ts_utils.h15
6 files changed, 61 insertions, 18 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 0edc6cbafe7..6d254ba133c 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201604062
+#define CATALOG_VERSION_NO 201604071
#endif
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index b3daff28e3f..a5e4a02ebc8 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -1675,6 +1675,9 @@ DATA(insert OID = 3680 ( "&&" PGNSP PGUID b f f 3615 3615 3615 0 0 tsque
DESCR("AND-concatenate");
DATA(insert OID = 3681 ( "||" PGNSP PGUID b f f 3615 3615 3615 0 0 tsquery_or - - ));
DESCR("OR-concatenate");
+/* <-> operation calls tsquery_phrase, but function is polymorphic. So, point to OID of the tsquery_phrase */
+DATA(insert OID = 5005 ( "<->" PGNSP PGUID b f f 3615 3615 3615 0 0 5003 - - ));
+DESCR("phrase-concatenate");
DATA(insert OID = 3682 ( "!!" PGNSP PGUID l f f 0 3615 3615 0 0 tsquery_not - - ));
DESCR("NOT tsquery");
DATA(insert OID = 3693 ( "@>" PGNSP PGUID b f f 3615 3615 16 3694 0 tsq_mcontains contsel contjoinsel ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index d7dbc739280..c351594be46 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4607,6 +4607,9 @@ DESCR("less-equal-greater");
DATA(insert OID = 3669 ( tsquery_and PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_and _null_ _null_ _null_ ));
DATA(insert OID = 3670 ( tsquery_or PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_or _null_ _null_ _null_ ));
+DATA(insert OID = 5003 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_phrase _null_ _null_ _null_ ));
+DATA(insert OID = 5004 ( tsquery_phrase PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 3615 "3615 3615 23" _null_ _null_ _null_ _null_ _null_ tsquery_phrase_distance _null_ _null_ _null_ ));
+DESCR("phrase-concatenate with distance");
DATA(insert OID = 3671 ( tsquery_not PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3615 "3615" _null_ _null_ _null_ _null_ _null_ tsquery_not _null_ _null_ _null_ ));
DATA(insert OID = 3691 ( tsq_mcontains PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsq_mcontains _null_ _null_ _null_ ));
@@ -4726,12 +4729,16 @@ DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2
DESCR("make tsquery");
DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
DESCR("transform to tsvector");
DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
DESCR("make tsquery");
DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 3752 ( tsvector_update_trigger PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
DESCR("trigger for automatic update of tsvector column");
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index 6f7a891ae84..9364eee438c 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -34,16 +34,17 @@ typedef struct
*/
typedef struct
{
- uint32 selected:1,
- in:1,
- replace:1,
- repeated:1,
- skip:1,
- unused:3,
- type:8,
- len:16;
- char *word;
- QueryOperand *item;
+ uint32 selected: 1,
+ in: 1,
+ replace: 1,
+ repeated: 1,
+ skip: 1,
+ unused: 3,
+ type: 8,
+ len: 16;
+ WordEntryPos pos;
+ char *word;
+ QueryOperand *item;
} HeadlineWordEntry;
typedef struct
@@ -51,6 +52,7 @@ typedef struct
HeadlineWordEntry *words;
int32 lenwords;
int32 curwords;
+ int32 vectorpos; /* positions a-la tsvector */
char *startsel;
char *stopsel;
char *fragdelim;
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index bc99524dc08..5f4e5961939 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -49,6 +49,8 @@ typedef struct
#define MAXSTRLEN ( (1<<11) - 1)
#define MAXSTRPOS ( (1<<20) - 1)
+extern int comparePos(const void *a, const void *b);
+
/*
* Equivalent to
* typedef struct {
@@ -213,15 +215,33 @@ typedef struct
} QueryOperand;
-/* Legal values for QueryOperator.operator */
-#define OP_NOT 1
-#define OP_AND 2
-#define OP_OR 3
+/*
+ * Legal values for QueryOperator.operator.
+ * They should be ordered by priority! We assume that phrase
+ * has highest priority, but this agreement is only
+ * for query transformation! That's need to simplify
+ * algorithm of query transformation.
+ */
+#define OP_OR 1
+#define OP_AND 2
+#define OP_NOT 3
+#define OP_PHRASE 4
+#define OP_NOT_PHRASE 5 /*
+ * OP_PHRASE negation operations must have greater
+ * priority in order to force infix() to surround
+ * the whole OP_PHRASE expression with parentheses.
+ */
+
+#define TOP_PRIORITY 6 /* highest priority for val nodes */
+
+#define OP_PRIORITY(x) (x)
+#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
typedef struct
{
QueryItemType type;
int8 oper; /* see above */
+ int16 distance; /* distance between agrs for OP_PHRASE */
uint32 left; /* pointer to left operand. Right operand is
* item + 1, left operand is placed
* item+item->left */
@@ -304,6 +324,8 @@ extern Datum tsquery_numnode(PG_FUNCTION_ARGS);
extern Datum tsquery_and(PG_FUNCTION_ARGS);
extern Datum tsquery_or(PG_FUNCTION_ARGS);
+extern Datum tsquery_phrase(PG_FUNCTION_ARGS);
+extern Datum tsquery_phrase_distance(PG_FUNCTION_ARGS);
extern Datum tsquery_not(PG_FUNCTION_ARGS);
extern Datum tsquery_rewrite(PG_FUNCTION_ARGS);
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index 88533a64235..855bbfecd64 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -55,7 +55,7 @@ extern TSQuery parse_tsquery(char *buf,
extern void pushValue(TSQueryParserState state,
char *strval, int lenval, int16 weight, bool prefix);
extern void pushStop(TSQueryParserState state);
-extern void pushOperator(TSQueryParserState state, int8 oper);
+extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
/*
* parse plain text and lexize words
@@ -104,8 +104,15 @@ extern text *generateHeadline(HeadlineParsedText *prs);
/*
* Common check function for tsvector @@ tsquery
*/
+typedef struct ExecPhraseData
+{
+ int npos;
+ bool allocated;
+ WordEntryPos *pos;
+} ExecPhraseData;
+
extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
- bool (*chkcond) (void *checkval, QueryOperand *val));
+ bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
extern bool tsquery_requires_match(QueryItem *curitem);
/*
@@ -120,6 +127,8 @@ extern Datum to_tsquery_byid(PG_FUNCTION_ARGS);
extern Datum to_tsquery(PG_FUNCTION_ARGS);
extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS);
extern Datum plainto_tsquery(PG_FUNCTION_ARGS);
+extern Datum phraseto_tsquery_byid(PG_FUNCTION_ARGS);
+extern Datum phraseto_tsquery(PG_FUNCTION_ARGS);
/*
* GiST support function
@@ -169,7 +178,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS);
* TSQuery Utilities
*/
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
-extern QueryItem *clean_fakeval(QueryItem *ptr, int32 *len);
+extern TSQuery cleanup_fakeval_and_phrase(TSQuery in);
typedef struct QTNode
{