diff options
author | Tom Lane | 2001-02-13 00:02:36 +0000 |
---|---|---|
committer | Tom Lane | 2001-02-13 00:02:36 +0000 |
commit | f7a839bc2ba3f15d48006fe931499d4d9cfb314f (patch) | |
tree | 94bed4f73922d577c2466149fe703f94d6f77722 /src/backend/regex/regcomp.c | |
parent | f4e4c7291ea691e558285fafbd6648ba85af5f26 (diff) |
Clean up portability problems in regexp package: change all routine
definitions from K&R to ANSI C style, and fix broken assumption that
int and long are the same datatype. This repairs problems observed
on Alpha with regexps having between 32 and 63 states.
Diffstat (limited to 'src/backend/regex/regcomp.c')
-rw-r--r-- | src/backend/regex/regcomp.c | 459 |
1 files changed, 129 insertions, 330 deletions
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 3d9ff83de83..b45a3c52375 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -37,19 +37,11 @@ * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 */ -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; - -#endif /* LIBC_SCCS and not lint */ - #include "postgres.h" #include <sys/types.h> -#include <stdio.h> -#include <string.h> #include <ctype.h> #include <limits.h> -#include <stdlib.h> #include <assert.h> #include "regex/regex.h" @@ -78,64 +70,51 @@ struct parse sopno pend[NPAREN]; /* -> ) ([0] unused) */ }; -/* ========= begin header generated by ./mkh ========= */ -#ifdef __cplusplus -extern "C" -{ -#endif - -/* === regcomp.c === */ - static void p_ere(struct parse * p, int stop); - static void p_ere_exp(struct parse * p); - static void p_str(struct parse * p); - static void p_bre(struct parse * p, int end1, int end2); - static int p_simp_re(struct parse * p, int starordinary); - static int p_count(struct parse * p); - static void p_bracket(struct parse * p); - static void p_b_term(struct parse * p, cset *cs); - static void p_b_cclass(struct parse * p, cset *cs); - static void p_b_eclass(struct parse * p, cset *cs); - static pg_wchar p_b_symbol(struct parse * p); - static char p_b_coll_elem(struct parse * p, int endc); +static void p_ere(struct parse * p, int stop); +static void p_ere_exp(struct parse * p); +static void p_str(struct parse * p); +static void p_bre(struct parse * p, int end1, int end2); +static int p_simp_re(struct parse * p, int starordinary); +static int p_count(struct parse * p); +static void p_bracket(struct parse * p); +static void p_b_term(struct parse * p, cset *cs); +static void p_b_cclass(struct parse * p, cset *cs); +static void p_b_eclass(struct parse * p, cset *cs); +static pg_wchar p_b_symbol(struct parse * p); +static char p_b_coll_elem(struct parse * p, int endc); #ifdef MULTIBYTE - static unsigned char othercase(int ch); +static unsigned char othercase(int ch); #else - static char othercase(int ch); -#endif - static void bothcases(struct parse * p, int ch); - static void ordinary(struct parse * p, int ch); - static void nonnewline(struct parse * p); - static void repeat(struct parse * p, sopno start, int from, int to); - static int seterr(struct parse * p, int e); - static cset *allocset(struct parse * p); - static void freeset(struct parse * p, cset *cs); - static int freezeset(struct parse * p, cset *cs); - static int firstch(struct parse * p, cset *cs); - static int nch(struct parse * p, cset *cs); - static void mcadd(struct parse * p, cset *cs, char *cp); - static void mcinvert(struct parse * p, cset *cs); - static void mccase(struct parse * p, cset *cs); - static int isinsets(struct re_guts * g, int c); - static int samesets(struct re_guts * g, int c1, int c2); - static void categorize(struct parse * p, struct re_guts * g); - static sopno dupl(struct parse * p, sopno start, sopno finish); - static void doemit(struct parse * p, sop op, size_t opnd); - static void doinsert(struct parse * p, sop op, size_t opnd, sopno pos); - static void dofwd(struct parse * p, sopno pos, sop value); - static void enlarge(struct parse * p, sopno size); - static void stripsnug(struct parse * p, struct re_guts * g); - static void findmust(struct parse * p, struct re_guts * g); - static sopno pluscount(struct parse * p, struct re_guts * g); - static int pg_isdigit(int c); - static int pg_isalpha(int c); - static int pg_isupper(int c); - static int pg_islower(int c); - -#ifdef __cplusplus -} - +static char othercase(int ch); #endif -/* ========= end header generated by ./mkh ========= */ +static void bothcases(struct parse * p, int ch); +static void ordinary(struct parse * p, int ch); +static void nonnewline(struct parse * p); +static void repeat(struct parse * p, sopno start, int from, int to); +static int seterr(struct parse * p, int e); +static cset *allocset(struct parse * p); +static void freeset(struct parse * p, cset *cs); +static int freezeset(struct parse * p, cset *cs); +static int firstch(struct parse * p, cset *cs); +static int nch(struct parse * p, cset *cs); +static void mcadd(struct parse * p, cset *cs, char *cp); +static void mcinvert(struct parse * p, cset *cs); +static void mccase(struct parse * p, cset *cs); +static int isinsets(struct re_guts * g, int c); +static int samesets(struct re_guts * g, int c1, int c2); +static void categorize(struct parse * p, struct re_guts * g); +static sopno dupl(struct parse * p, sopno start, sopno finish); +static void doemit(struct parse * p, sop op, size_t opnd); +static void doinsert(struct parse * p, sop op, size_t opnd, sopno pos); +static void dofwd(struct parse * p, sopno pos, sop value); +static void enlarge(struct parse * p, sopno size); +static void stripsnug(struct parse * p, struct re_guts * g); +static void findmust(struct parse * p, struct re_guts * g); +static sopno pluscount(struct parse * p, struct re_guts * g); +static int pg_isdigit(int c); +static int pg_isalpha(int c); +static int pg_isupper(int c); +static int pg_islower(int c); static pg_wchar nuls[10]; /* place to point scanner in event of * error */ @@ -178,22 +157,10 @@ static int never = 0; /* for use in asserts; shuts lint up */ #endif /* - - regcomp - interface for parser and compilation - = extern int regcomp(regex_t *, const char *, int); - = #define REG_BASIC 0000 - = #define REG_EXTENDED 0001 - = #define REG_ICASE 0002 - = #define REG_NOSUB 0004 - = #define REG_NEWLINE 0010 - = #define REG_NOSPEC 0020 - = #define REG_PEND 0040 - = #define REG_DUMP 0200 + * regcomp - interface for parser and compilation */ int /* 0 success, otherwise REG_something */ -pg95_regcomp(preg, pattern, cflags) -regex_t *preg; -const char *pattern; -int cflags; +pg95_regcomp(regex_t *preg, const char *pattern, int cflags) { struct parse pa; struct re_guts *g; @@ -325,13 +292,11 @@ int cflags; } /* - - p_ere - ERE parser top level, concatenation and alternation - == static void p_ere(struct parse *p, int stop); + * p_ere - ERE parser top level, concatenation and alternation */ static void -p_ere(p, stop) -struct parse *p; -int stop; /* character this ERE should end at */ +p_ere(struct parse *p, + int stop) /* character this ERE should end at */ { char c; sopno prevback = 0; @@ -374,12 +339,10 @@ int stop; /* character this ERE should end at */ } /* - - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op - == static void p_ere_exp(struct parse *p); + * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op */ static void -p_ere_exp(p) -struct parse *p; +p_ere_exp(struct parse *p) { pg_wchar c; sopno pos; @@ -535,12 +498,10 @@ struct parse *p; } /* - - p_str - string (no metacharacters) "parser" - == static void p_str(struct parse *p); + * p_str - string (no metacharacters) "parser" */ static void -p_str(p) -struct parse *p; +p_str(struct parse *p) { REQUIRE(MORE(), REG_EMPTY); while (MORE()) @@ -548,9 +509,8 @@ struct parse *p; } /* - - p_bre - BRE parser top level, anchoring and concatenation - == static void p_bre(struct parse *p, int end1, \ - == int end2); + * p_bre - BRE parser top level, anchoring and concatenation + * * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first @@ -560,10 +520,9 @@ struct parse *p; * The amount of lookahead needed to avoid this kludge is excessive. */ static void -p_bre(p, end1, end2) -struct parse *p; -int end1; /* first terminating character */ -int end2; /* second terminating character */ +p_bre(struct parse *p, + int end1, /* first terminating character */ + int end2) /* second terminating character */ { sopno start = HERE(); int first = 1; /* first subexpression? */ @@ -592,13 +551,11 @@ int end2; /* second terminating character */ } /* - - p_simp_re - parse a simple RE, an atom possibly followed by a repetition - == static int p_simp_re(struct parse *p, int starordinary); + * p_simp_re - parse a simple RE, an atom possibly followed by a repetition */ static int /* was the simple RE an unbackslashed $? */ -p_simp_re(p, starordinary) -struct parse *p; -int starordinary; /* is a leading * an ordinary character? */ +p_simp_re(struct parse *p, + int starordinary) /* is a leading * an ordinary character? */ { int c; int count; @@ -731,12 +688,10 @@ int starordinary; /* is a leading * an ordinary character? */ } /* - - p_count - parse a repetition count - == static int p_count(struct parse *p); + * p_count - parse a repetition count */ static int /* the value */ -p_count(p) -struct parse *p; +p_count(struct parse *p) { int count = 0; int ndigits = 0; @@ -752,15 +707,13 @@ struct parse *p; } /* - - p_bracket - parse a bracketed character list - == static void p_bracket(struct parse *p); + * p_bracket - parse a bracketed character list * * Note a significant property of this code: if the allocset() did SETERROR, * no set operations are done. */ static void -p_bracket(p) -struct parse *p; +p_bracket(struct parse *p) { cset *cs = allocset(p); int invert = 0; @@ -850,13 +803,10 @@ struct parse *p; } /* - - p_b_term - parse one term of a bracketed character list - == static void p_b_term(struct parse *p, cset *cs); + * p_b_term - parse one term of a bracketed character list */ static void -p_b_term(p, cs) -struct parse *p; -cset *cs; +p_b_term(struct parse *p, cset *cs) { pg_wchar c; pg_wchar start, @@ -925,13 +875,10 @@ cset *cs; } /* - - p_b_cclass - parse a character-class name and deal with it - == static void p_b_cclass(struct parse *p, cset *cs); + * p_b_cclass - parse a character-class name and deal with it */ static void -p_b_cclass(p, cs) -struct parse *p; -cset *cs; +p_b_cclass(struct parse *p, cset *cs) { pg_wchar *sp = p->next; struct cclass *cp; @@ -964,15 +911,12 @@ cset *cs; } /* - - p_b_eclass - parse an equivalence-class name and deal with it - == static void p_b_eclass(struct parse *p, cset *cs); + * p_b_eclass - parse an equivalence-class name and deal with it * * This implementation is incomplete. xxx */ static void -p_b_eclass(p, cs) -struct parse *p; -cset *cs; +p_b_eclass(struct parse *p, cset *cs) { char c; @@ -981,12 +925,10 @@ cset *cs; } /* - - p_b_symbol - parse a character or [..]ed multicharacter collating symbol - == static char p_b_symbol(struct parse *p); + * p_b_symbol - parse a character or [..]ed multicharacter collating symbol */ static pg_wchar /* value of symbol */ -p_b_symbol(p) -struct parse *p; +p_b_symbol(struct parse *p) { pg_wchar value; @@ -1001,13 +943,10 @@ struct parse *p; } /* - - p_b_coll_elem - parse a collating-element name and look it up - == static char p_b_coll_elem(struct parse *p, int endc); + * p_b_coll_elem - parse a collating-element name and look it up */ static char /* value of collating element */ -p_b_coll_elem(p, endc) -struct parse *p; -int endc; /* name ended by endc,']' */ +p_b_coll_elem(struct parse *p, int endc) { pg_wchar *sp = p->next; struct cname *cp; @@ -1035,16 +974,14 @@ int endc; /* name ended by endc,']' */ } /* - - othercase - return the case counterpart of an alphabetic - == static char othercase(int ch); + * othercase - return the case counterpart of an alphabetic */ #ifdef MULTIBYTE static unsigned char /* if no counterpart, return ch */ #else static char /* if no counterpart, return ch */ #endif -othercase(ch) -int ch; +othercase(int ch) { assert(pg_isalpha(ch)); if (pg_isupper(ch)) @@ -1069,15 +1006,12 @@ int ch; } /* - - bothcases - emit a dualcase version of a two-case character - == static void bothcases(struct parse *p, int ch); + * bothcases - emit a dualcase version of a two-case character * * Boy, is this implementation ever a kludge... */ static void -bothcases(p, ch) -struct parse *p; -int ch; +bothcases(struct parse *p, int ch) { pg_wchar *oldnext = p->next; pg_wchar *oldend = p->end; @@ -1096,13 +1030,10 @@ int ch; } /* - - ordinary - emit an ordinary character - == static void ordinary(struct parse *p, int ch); + * ordinary - emit an ordinary character */ static void -ordinary(p, ch) -struct parse *p; -int ch; +ordinary(struct parse *p, int ch) { cat_t *cap = p->g->categories; @@ -1121,14 +1052,12 @@ int ch; } /* - - nonnewline - emit REG_NEWLINE version of OANY - == static void nonnewline(struct parse *p); + * nonnewline - emit REG_NEWLINE version of OANY * * Boy, is this implementation ever a kludge... */ static void -nonnewline(p) -struct parse *p; +nonnewline(struct parse *p) { pg_wchar *oldnext = p->next; pg_wchar *oldend = p->end; @@ -1147,15 +1076,13 @@ struct parse *p; } /* - - repeat - generate code for a bounded repetition, recursively if needed - == static void repeat(struct parse *p, sopno start, int from, int to); + * repeat - generate code for a bounded repetition, recursively if needed */ static void -repeat(p, start, from, to) -struct parse *p; -sopno start; /* operand from here to end of strip */ -int from; /* repeated from this number */ -int to; /* to this number of times (maybe +repeat(struct parse *p, + sopno start, /* operand from here to end of strip */ + int from, /* repeated from this number */ + int to) /* to this number of times (maybe * INFINITY) */ { sopno finish = HERE(); @@ -1222,13 +1149,10 @@ int to; /* to this number of times (maybe } /* - - seterr - set an error condition - == static int seterr(struct parse *p, int e); + * seterr - set an error condition */ static int /* useless but makes type checking happy */ -seterr(p, e) -struct parse *p; -int e; +seterr(struct parse *p, int e) { if (p->error == 0) /* keep earliest error condition */ p->error = e; @@ -1238,12 +1162,10 @@ int e; } /* - - allocset - allocate a set of characters for [] - == static cset *allocset(struct parse *p); + * allocset - allocate a set of characters for [] */ static cset * -allocset(p) -struct parse *p; +allocset(struct parse *p) { int no = p->g->ncsets++; size_t nc; @@ -1296,13 +1218,10 @@ struct parse *p; } /* - - freeset - free a now-unused set - == static void freeset(struct parse *p, cset *cs); + * freeset - free a now-unused set */ static void -freeset(p, cs) -struct parse *p; -cset *cs; +freeset(struct parse *p, cset *cs) { int i; cset *top = &p->g->sets[p->g->ncsets]; @@ -1315,8 +1234,7 @@ cset *cs; } /* - - freezeset - final processing on a set of characters - == static int freezeset(struct parse *p, cset *cs); + * freezeset - final processing on a set of characters * * The main task here is merging identical sets. This is usually a waste * of time (although the hash code minimizes the overhead), but can win @@ -1325,9 +1243,7 @@ cset *cs; * the same value! */ static int /* set number */ -freezeset(p, cs) -struct parse *p; -cset *cs; +freezeset(struct parse *p, cset *cs) { uch h = cs->hash; int i; @@ -1357,13 +1273,10 @@ cset *cs; } /* - - firstch - return first character in a set (which must have at least one) - == static int firstch(struct parse *p, cset *cs); + * firstch - return first character in a set (which must have at least one) */ static int /* character; there is no "none" value */ -firstch(p, cs) -struct parse *p; -cset *cs; +firstch(struct parse *p, cset *cs) { int i; size_t css = (size_t) p->g->csetsize; @@ -1376,13 +1289,10 @@ cset *cs; } /* - - nch - number of characters in a set - == static int nch(struct parse *p, cset *cs); + * nch - number of characters in a set */ static int -nch(p, cs) -struct parse *p; -cset *cs; +nch(struct parse *p, cset *cs) { int i; size_t css = (size_t) p->g->csetsize; @@ -1395,15 +1305,10 @@ cset *cs; } /* - - mcadd - add a collating element to a cset - == static void mcadd(struct parse *p, cset *cs, \ - == char *cp); + * mcadd - add a collating element to a cset */ static void -mcadd(p, cs, cp) -struct parse *p; -cset *cs; -char *cp; +mcadd(struct parse *p, cset *cs, char *cp) { size_t oldend = cs->smultis; @@ -1423,106 +1328,34 @@ char *cp; } /* - - mcsub - subtract a collating element from a cset - == static void mcsub(cset *cs, char *cp); - */ -/* -static void -mcsub(cs, cp) -cset *cs; -char *cp; -{ - char *fp = mcfind(cs, cp); - size_t len = strlen(fp); - - assert(fp != NULL); - memmove(fp, fp + len + 1, - cs->smultis - (fp + len + 1 - cs->multis)); - cs->smultis -= len; - - if (cs->smultis == 0) { - free(cs->multis); - cs->multis = NULL; - return; - } - - cs->multis = realloc(cs->multis, cs->smultis); - assert(cs->multis != NULL); -} -*/ - -/* - - mcin - is a collating element in a cset? - == static int mcin(cset *cs, char *cp); - */ -/* -static int -mcin(cs, cp) -cset *cs; -char *cp; -{ - return(mcfind(cs, cp) != NULL); -} -*/ - -/* - - mcfind - find a collating element in a cset - == static char *mcfind(cset *cs, char *cp); - */ -/* -static char * -mcfind(cs, cp) -cset *cs; -char *cp; -{ - char *p; - - if (cs->multis == NULL) - return(NULL); - for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) - if (strcmp(cp, p) == 0) - return(p); - return(NULL); -} -*/ -/* - - mcinvert - invert the list of collating elements in a cset - == static void mcinvert(struct parse *p, cset *cs); + * mcinvert - invert the list of collating elements in a cset * * This would have to know the set of possibilities. Implementation * is deferred. */ static void -mcinvert(p, cs) -struct parse *p; -cset *cs; +mcinvert(struct parse *p, cset *cs) { assert(cs->multis == NULL); /* xxx */ } /* - - mccase - add case counterparts of the list of collating elements in a cset - == static void mccase(struct parse *p, cset *cs); + * mccase - add case counterparts of the list of collating elements in a cset * * This would have to know the set of possibilities. Implementation * is deferred. */ static void -mccase(p, cs) -struct parse *p; -cset *cs; +mccase(struct parse *p, cset *cs) { assert(cs->multis == NULL); /* xxx */ } /* - - isinsets - is this character in any sets? - == static int isinsets(struct re_guts *g, int c); + * isinsets - is this character in any sets? */ static int /* predicate */ -isinsets(g, c) -struct re_guts *g; -int c; +isinsets(struct re_guts *g, int c) { uch *col; int i; @@ -1536,14 +1369,10 @@ int c; } /* - - samesets - are these two characters in exactly the same sets? - == static int samesets(struct re_guts *g, int c1, int c2); + * samesets - are these two characters in exactly the same sets? */ static int /* predicate */ -samesets(g, c1, c2) -struct re_guts *g; -int c1; -int c2; +samesets(struct re_guts *g, int c1, int c2) { uch *col; int i; @@ -1558,13 +1387,10 @@ int c2; } /* - - categorize - sort out character categories - == static void categorize(struct parse *p, struct re_guts *g); + * categorize - sort out character categories */ static void -categorize(p, g) -struct parse *p; -struct re_guts *g; +categorize(struct parse *p, struct re_guts *g) { cat_t *cats = g->categories; int c; @@ -1587,14 +1413,12 @@ struct re_guts *g; } /* - - dupl - emit a duplicate of a bunch of sops - == static sopno dupl(struct parse *p, sopno start, sopno finish); + * dupl - emit a duplicate of a bunch of sops */ static sopno /* start of duplicate */ -dupl(p, start, finish) -struct parse *p; -sopno start; /* from here */ -sopno finish; /* to this less one */ +dupl(struct parse *p, + sopno start, /* from here */ + sopno finish) /* to this less one */ { sopno ret = HERE(); sopno len = finish - start; @@ -1611,18 +1435,14 @@ sopno finish; /* to this less one */ } /* - - doemit - emit a strip operator - == static void doemit(struct parse *p, sop op, size_t opnd); + * doemit - emit a strip operator * * It might seem better to implement this as a macro with a function as * hard-case backup, but it's just too big and messy unless there are * some changes to the data structures. Maybe later. */ static void -doemit(p, op, opnd) -struct parse *p; -sop op; -size_t opnd; +doemit(struct parse *p, sop op, size_t opnd) { /* avoid making error situations worse */ if (p->error != 0) @@ -1641,15 +1461,10 @@ size_t opnd; } /* - - doinsert - insert a sop into the strip - == static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos); + * doinsert - insert a sop into the strip */ static void -doinsert(p, op, opnd, pos) -struct parse *p; -sop op; -size_t opnd; -sopno pos; +doinsert(struct parse *p, sop op, size_t opnd, sopno pos) { sopno sn; sop s; @@ -1680,14 +1495,10 @@ sopno pos; } /* - - dofwd - complete a forward reference - == static void dofwd(struct parse *p, sopno pos, sop value); + * dofwd - complete a forward reference */ static void -dofwd(p, pos, value) -struct parse *p; -sopno pos; -sop value; +dofwd(struct parse *p, sopno pos, sop value) { /* avoid making error situations worse */ if (p->error != 0) @@ -1698,13 +1509,10 @@ sop value; } /* - - enlarge - enlarge the strip - == static void enlarge(struct parse *p, sopno size); + * enlarge - enlarge the strip */ static void -enlarge(p, size) -struct parse *p; -sopno size; +enlarge(struct parse *p, sopno size) { sop *sp; @@ -1722,13 +1530,10 @@ sopno size; } /* - - stripsnug - compact the strip - == static void stripsnug(struct parse *p, struct re_guts *g); + * stripsnug - compact the strip */ static void -stripsnug(p, g) -struct parse *p; -struct re_guts *g; +stripsnug(struct parse *p, struct re_guts *g) { g->nstates = p->slen; g->strip = (sop *) realloc((char *) p->strip, p->slen * sizeof(sop)); @@ -1740,8 +1545,7 @@ struct re_guts *g; } /* - - findmust - fill in must and mlen with longest mandatory literal string - == static void findmust(struct parse *p, struct re_guts *g); + * findmust - fill in must and mlen with longest mandatory literal string * * This algorithm could do fancy things like analyzing the operands of | * for common subsequences. Someday. This code is simple and finds most @@ -1750,9 +1554,7 @@ struct re_guts *g; * Note that must and mlen got initialized during setup. */ static void -findmust(p, g) -struct parse *p; -struct re_guts *g; +findmust(struct parse *p, struct re_guts *g) { sop *scan; sop *start = 0; @@ -1838,13 +1640,10 @@ struct re_guts *g; } /* - - pluscount - count + nesting - == static sopno pluscount(struct parse *p, struct re_guts *g); + * pluscount - count + nesting */ static sopno /* nesting depth */ -pluscount(p, g) -struct parse *p; -struct re_guts *g; +pluscount(struct parse *p, struct re_guts *g) { sop *scan; sop s; @@ -1876,7 +1675,7 @@ struct re_guts *g; } /* - * some ctype functions with none-ascii-char guard + * some ctype functions with non-ascii-char guard */ static int pg_isdigit(int c) |