diff options
author | Marc G. Fournier | 1998-03-15 07:39:04 +0000 |
---|---|---|
committer | Marc G. Fournier | 1998-03-15 07:39:04 +0000 |
commit | 661ecf3c48e16a9add216287eb969d7615e47968 (patch) | |
tree | 91b54d5905aa2e22bd0ae9ea8c6b0f3cab75d3f4 /src/backend | |
parent | 31a925c4d07675bc098a742ee9ca642ec79a40ee (diff) |
From: [email protected]
Included are patches intended for allowing PostgreSQL to handle
multi-byte charachter sets such as EUC(Extende Unix Code), Unicode and
Mule internal code. With the MB patch you can use multi-byte character
sets in regexp and LIKE. The encoding system chosen is determined at
the compile time.
To enable the MB extension, you need to define a variable "MB" in
Makefile.global or in Makefile.custom. For further information please
take a look at README.mb under doc directory.
(Note that unlike "jp patch" I do not use modified GNU regexp any
more. I changed Henry Spencer's regexp coming with PostgreSQL.)
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/regex/Makefile | 6 | ||||
-rw-r--r-- | src/backend/regex/engine.c | 170 | ||||
-rw-r--r-- | src/backend/regex/regcomp.c | 178 | ||||
-rw-r--r-- | src/backend/regex/regerror.c | 5 | ||||
-rw-r--r-- | src/backend/regex/regexec.c | 25 | ||||
-rw-r--r-- | src/backend/regex/regfree.c | 6 | ||||
-rw-r--r-- | src/backend/regex/utftest.c | 33 | ||||
-rw-r--r-- | src/backend/regex/utils.c | 348 | ||||
-rw-r--r-- | src/backend/regex/wstrcmp.c | 48 | ||||
-rw-r--r-- | src/backend/regex/wstrncmp.c | 83 | ||||
-rw-r--r-- | src/backend/utils/adt/Makefile | 5 | ||||
-rw-r--r-- | src/backend/utils/adt/like.c | 29 |
12 files changed, 819 insertions, 117 deletions
diff --git a/src/backend/regex/Makefile b/src/backend/regex/Makefile index f7ef534bd3d..6f080cfcb51 100644 --- a/src/backend/regex/Makefile +++ b/src/backend/regex/Makefile @@ -4,7 +4,7 @@ # Makefile for regex # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/regex/Makefile,v 1.4 1997/12/20 00:26:58 scrappy Exp $ +# $Header: /cvsroot/pgsql/src/backend/regex/Makefile,v 1.5 1998/03/15 07:38:14 scrappy Exp $ # #------------------------------------------------------------------------- @@ -17,6 +17,10 @@ CFLAGS+=$(INCLUDE_OPT) CFLAGS+=-DPOSIX_MISTAKE OBJS = regcomp.o regerror.o regexec.o regfree.o +ifdef MB +OBJS += utils.o wstrcmp.o wstrncmp.o +CFLAGS += -DMB=$(MB) +endif all: SUBSYS.o diff --git a/src/backend/regex/engine.c b/src/backend/regex/engine.c index 4801361f90f..1964f2a0248 100644 --- a/src/backend/regex/engine.c +++ b/src/backend/regex/engine.c @@ -73,11 +73,11 @@ struct match struct re_guts *g; int eflags; regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ - char *offp; /* offsets work from here */ - char *beginp; /* start of string -- virtual NUL precedes */ - char *endp; /* end of string -- virtual NUL here */ - char *coldp; /* can be no match starting before here */ - char **lastpos; /* [nplus+1] */ + pg_wchar *offp; /* offsets work from here */ + pg_wchar *beginp; /* start of string -- virtual NUL precedes */ + pg_wchar *endp; /* end of string -- virtual NUL here */ + pg_wchar *coldp; /* can be no match starting before here */ + pg_wchar **lastpos; /* [nplus+1] */ STATEVARS; states st; /* current states */ states fresh; /* states for a fresh start */ @@ -93,19 +93,19 @@ extern "C" /* === engine.c === */ static int - matcher(struct re_guts * g, char *string, size_t nmatch, + matcher(struct re_guts * g, pg_wchar *string, size_t nmatch, regmatch_t pmatch[], int eflags); - static char * - dissect(struct match * m, char *start, char *stop, + static pg_wchar * + dissect(struct match * m, pg_wchar *start, pg_wchar *stop, sopno startst, sopno stopst); - static char * - backref(struct match * m, char *start, char *stop, + static pg_wchar * + backref(struct match * m, pg_wchar *start, pg_wchar *stop, sopno startst, sopno stopst, sopno lev); - static char * - fast(struct match * m, char *start, char *stop, + static pg_wchar * + fast(struct match * m, pg_wchar *start, pg_wchar *stop, sopno startst, sopno stopst); - static char * - slow(struct match * m, char *start, char *stop, sopno startst, sopno stopst); + static pg_wchar * + slow(struct match * m, pg_wchar *start, pg_wchar *stop, sopno startst, sopno stopst); static states step(struct re_guts * g, sopno start, sopno stop, states bef, int ch, states aft); @@ -116,20 +116,35 @@ extern "C" #define BOW (BOL+4) #define EOW (BOL+5) #define CODEMAX (BOL+5) /* highest code used */ -#define NONCHAR(c) ((c) > CHAR_MAX) -#define NNONCHAR (CODEMAX-CHAR_MAX) + +#ifdef MB +# if MB == MULE_INTERNAL +# define NONCHAR(c) ((c) > 16777216) /* 16777216 == 2^24 == 3 bytes */ +# define NNONCHAR (CODEMAX-16777216) +# elif MB == EUC_JP || MB == EUC_CN || MB == EUC_KR || MB == EUC_TW +# define NONCHAR(c) ((c) > USHRT_MAX) +# define NNONCHAR (CODEMAX-USHRT_MAX) +# elif MB == UNICODE +# define NONCHAR(c) ((c) > USHRT_MAX) +# define NNONCHAR (CODEMAX-USHRT_MAX) +# endif +#else +# define NONCHAR(c) ((c) > CHAR_MAX) +# define NNONCHAR (CODEMAX-CHAR_MAX) +#endif + #ifdef REDEBUG static void - print(struct match * m, char *caption, states st, int ch, FILE *d); + print(struct match * m, pg_wchar *caption, states st, int ch, FILE *d); #endif #ifdef REDEBUG static void - at(struct match * m, char *title, char *start, char *stop, + at(struct match * m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, sopno startst, sopno stopst); #endif #ifdef REDEBUG - static char * - pchar(int ch); + static pg_wchar * + p_char(int ch); #endif #ifdef __cplusplus @@ -150,26 +165,26 @@ extern "C" /* - matcher - the actual matching engine - == static int matcher(struct re_guts *g, char *string, \ + == static int matcher(struct re_guts *g, pg_wchar *string, \ == size_t nmatch, regmatch_t pmatch[], int eflags); */ static int /* 0 success, REG_NOMATCH failure */ matcher(g, string, nmatch, pmatch, eflags) struct re_guts *g; -char *string; +pg_wchar *string; size_t nmatch; regmatch_t pmatch[]; int eflags; { - char *endp; + pg_wchar *endp; int i; struct match mv; struct match *m = &mv; - char *dp; + pg_wchar *dp; const sopno gf = g->firststate + 1; /* +1 for OEND */ const sopno gl = g->laststate; - char *start; - char *stop; + pg_wchar *start; + pg_wchar *stop; /* simplify the situation where possible */ if (g->cflags & REG_NOSUB) @@ -182,7 +197,11 @@ int eflags; else { start = string; +#ifdef MB + stop = start + pg_wchar_strlen(start); +#else stop = start + strlen(start); +#endif } if (stop < start) return (REG_INVARG); @@ -192,7 +211,11 @@ int eflags; { for (dp = start; dp < stop; dp++) if (*dp == g->must[0] && stop - dp >= g->mlen && +#ifdef MB + memcmp(dp, g->must, (size_t) (g->mlen * sizeof(pg_wchar))) == 0) +#else memcmp(dp, g->must, (size_t) g->mlen) == 0) +#endif break; if (dp == stop) /* we didn't find g->must */ return (REG_NOMATCH); @@ -258,8 +281,8 @@ int eflags; else { if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = (char **) malloc((g->nplus + 1) * - sizeof(char *)); + m->lastpos = (pg_wchar **) malloc((g->nplus + 1) * + sizeof(pg_wchar *)); if (g->nplus > 0 && m->lastpos == NULL) { free(m->pmatch); @@ -324,9 +347,9 @@ int eflags; } if (m->pmatch != NULL) - free((char *) m->pmatch); + free((pg_wchar *) m->pmatch); if (m->lastpos != NULL) - free((char *) m->lastpos); + free((pg_wchar *) m->lastpos); STATETEARDOWN(m); return (0); } @@ -336,27 +359,27 @@ int eflags; == static char *dissect(struct match *m, char *start, \ == char *stop, sopno startst, sopno stopst); */ -static char * /* == stop (success) always */ +static pg_wchar * /* == stop (success) always */ dissect(m, start, stop, startst, stopst) struct match *m; -char *start; -char *stop; +pg_wchar *start; +pg_wchar *stop; sopno startst; sopno stopst; { int i; sopno ss; /* start sop of current subRE */ sopno es; /* end sop of current subRE */ - char *sp; /* start of string matched by it */ - char *stp; /* string matched by it cannot pass here */ - char *rest; /* start of rest of string */ - char *tail; /* string unmatched by rest of RE */ + pg_wchar *sp; /* start of string matched by it */ + pg_wchar *stp; /* string matched by it cannot pass here */ + pg_wchar *rest; /* start of rest of string */ + pg_wchar *tail; /* string unmatched by rest of RE */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ - char *ssp; /* start of string matched by subsubRE */ - char *sep; /* end of string matched by subsubRE */ - char *oldssp; /* previous ssp */ - char *dp; + pg_wchar *ssp; /* start of string matched by subsubRE */ + pg_wchar *sep; /* end of string matched by subsubRE */ + pg_wchar *oldssp; /* previous ssp */ + pg_wchar *dp; AT("diss", start, stop, startst, stopst); sp = start; @@ -536,22 +559,22 @@ sopno stopst; == static char *backref(struct match *m, char *start, \ == char *stop, sopno startst, sopno stopst, sopno lev); */ -static char * /* == stop (success) or NULL (failure) */ +static pg_wchar * /* == stop (success) or NULL (failure) */ backref(m, start, stop, startst, stopst, lev) struct match *m; -char *start; -char *stop; +pg_wchar *start; +pg_wchar *stop; sopno startst; sopno stopst; sopno lev; /* PLUS nesting level */ { int i; sopno ss; /* start sop of current subRE */ - char *sp; /* start of string matched by it */ + pg_wchar *sp; /* start of string matched by it */ sopno ssub; /* start sop of subsubRE */ sopno esub; /* end sop of subsubRE */ - char *ssp; /* start of string matched by subsubRE */ - char *dp; + pg_wchar *ssp; /* start of string matched by subsubRE */ + pg_wchar *dp; size_t len; int hard; sop s; @@ -567,7 +590,7 @@ sopno lev; /* PLUS nesting level */ switch (OP(s = m->g->strip[ss])) { case OCHAR: - if (sp == stop || *sp++ != (char) OPND(s)) + if (sp == stop || *sp++ != (pg_wchar) OPND(s)) return (NULL); break; case OANY: @@ -750,23 +773,23 @@ sopno lev; /* PLUS nesting level */ == static char *fast(struct match *m, char *start, \ == char *stop, sopno startst, sopno stopst); */ -static char * /* where tentative match ended, or NULL */ +static pg_wchar * /* where tentative match ended, or NULL */ fast(m, start, stop, startst, stopst) struct match *m; -char *start; -char *stop; +pg_wchar *start; +pg_wchar *stop; sopno startst; sopno stopst; { states st = m->st; states fresh = m->fresh; states tmp = m->tmp; - char *p = start; + pg_wchar *p = start; int c = (start == m->beginp) ? OUT : *(start - 1); int lastc; /* previous c */ int flagch; int i; - char *coldp; /* last p after which no match was + pg_wchar *coldp; /* last p after which no match was * underway */ CLEAR(st); @@ -849,23 +872,23 @@ sopno stopst; == static char *slow(struct match *m, char *start, \ == char *stop, sopno startst, sopno stopst); */ -static char * /* where it ended */ +static pg_wchar * /* where it ended */ slow(m, start, stop, startst, stopst) struct match *m; -char *start; -char *stop; +pg_wchar *start; +pg_wchar *stop; sopno startst; sopno stopst; { states st = m->st; states empty = m->empty; states tmp = m->tmp; - char *p = start; + pg_wchar *p = start; int c = (start == m->beginp) ? OUT : *(start - 1); int lastc; /* previous c */ int flagch; int i; - char *matchp; /* last p at which a match ended */ + pg_wchar *matchp; /* last p at which a match ended */ AT("slow", start, stop, startst, stopst); CLEAR(st); @@ -978,8 +1001,8 @@ states aft; /* states already known reachable after */ break; case OCHAR: /* only characters can match */ - assert(!NONCHAR(ch) || ch != (char) OPND(s)); - if (ch == (char) OPND(s)) + assert(!NONCHAR(ch) || ch != (pg_wchar) OPND(s)); + if (ch == (pg_wchar) OPND(s)) FWD(aft, bef, 1); break; case OBOL: @@ -1082,7 +1105,7 @@ states aft; /* states already known reachable after */ static void print(m, caption, st, ch, d) struct match *m; -char *caption; +pg_wchar *caption; states st; int ch; FILE *d; @@ -1109,16 +1132,16 @@ FILE *d; /* - at - print current situation == #ifdef REDEBUG - == static void at(struct match *m, char *title, char *start, char *stop, \ + == static void at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, \ == sopno startst, sopno stopst); == #endif */ static void at(m, title, start, stop, startst, stopst) struct match *m; -char *title; -char *start; -char *stop; +pg_wchar *title; +pg_wchar *start; +pg_wchar *stop; sopno startst; sopno stopst; { @@ -1143,13 +1166,24 @@ sopno stopst; * a matching debug.o, and this is convenient. It all disappears in * the non-debug compilation anyway, so it doesn't matter much. */ -static char * /* -> representation */ + + +static int pg_isprint(int c) +{ +#ifdef MB + return(c >= 0 && c <= UCHAR_MAX && isprint(c)); +#else + return(isprint(c)); +#endif +} + +static pg_wchar * /* -> representation */ pchar(ch) int ch; { - static char pbuf[10]; + static pg_wchar pbuf[10]; - if (isprint(ch) || ch == ' ') + if (pg_isprint(ch) || ch == ' ') sprintf(pbuf, "%c", ch); else sprintf(pbuf, "\\%o", ch); diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index e31f8654049..6b7c472f1b9 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -62,8 +62,8 @@ static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; */ struct parse { - char *next; /* next character in RE */ - char *end; /* end of string (-> NUL normally) */ + pg_wchar *next; /* next character in RE */ + pg_wchar *end; /* end of string (-> NUL normally) */ int error; /* has an error been seen? */ sop *strip; /* malloced strip */ sopno ssize; /* malloced strip size (allocated) */ @@ -93,7 +93,7 @@ extern "C" static void p_b_term(struct parse * p, cset *cs); static void p_b_cclass(struct parse * p, cset *cs); static void p_b_eclass(struct parse * p, cset *cs); - static char p_b_symbol(struct parse * p); + static pg_wchar p_b_symbol(struct parse * p); static char p_b_coll_elem(struct parse * p, int endc); static char othercase(int ch); static void bothcases(struct parse * p, int ch); @@ -120,6 +120,10 @@ extern "C" static void stripsnug(struct parse * p, struct re_guts * g); static void findmust(struct parse * p, struct re_guts * g); static sopno pluscount(struct parse * p, struct re_guts * g); + static int pg_isdigit(int c); + static int pg_isalpha(int c); + static int pg_isupper(int c); + static int pg_islower(int c); #ifdef __cplusplus } @@ -127,7 +131,7 @@ extern "C" #endif /* ========= end header generated by ./mkh ========= */ -static char nuls[10]; /* place to point scanner in event of +static pg_wchar nuls[10]; /* place to point scanner in event of * error */ /* @@ -190,6 +194,9 @@ int cflags; struct parse *p = &pa; int i; size_t len; +#ifdef MB + pg_wchar *wcp; +#endif #ifdef REDEBUG #define GOODFLAGS(f) (f) @@ -203,12 +210,31 @@ int cflags; if (cflags & REG_PEND) { +#ifdef MB + wcp = preg->patsave; + if (preg->re_endp < wcp) + return (REG_INVARG); + len = preg->re_endp - wcp; +#else if (preg->re_endp < pattern) return (REG_INVARG); len = preg->re_endp - pattern; +#endif + } + else { +#ifdef MB + wcp = (pg_wchar *)malloc((strlen(pattern)+1) * sizeof(pg_wchar)); + if (wcp == NULL) { + return (REG_ESPACE); + } + preg->patsave = wcp; + (void)pg_mb2wchar((unsigned char *)pattern,wcp); + len = pg_wchar_strlen(wcp); +#else + + len = strlen((char *) pattern); +#endif } - else - len = strlen((char *) pattern); /* do the mallocs early so failure handling is easy */ g = (struct re_guts *) malloc(sizeof(struct re_guts) + @@ -227,7 +253,11 @@ int cflags; /* set things up */ p->g = g; - p->next = (char *) pattern; /* convenience; we do not modify it */ +#ifdef MB + p->next = wcp; +#else + p->next = pattern; /* convenience; we do not modify it */ +#endif p->end = p->next + len; p->error = 0; p->ncsalloc = 0; @@ -342,7 +372,7 @@ static void p_ere_exp(p) struct parse *p; { - char c; + pg_wchar c; sopno pos; int count; int count2; @@ -420,7 +450,7 @@ struct parse *p; break; case '{': /* okay as ordinary except if digit * follows */ - REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); + REQUIRE(!MORE() || !pg_isdigit(PEEK()), REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c); @@ -432,7 +462,7 @@ struct parse *p; c = PEEK(); /* we call { a repetition if followed by a digit */ if (!(c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())))) + (c == '{' && MORE2() && pg_isdigit(PEEK2())))) return; /* no repetition, we're done */ NEXT(); @@ -463,7 +493,7 @@ struct parse *p; count = p_count(p); if (EAT(',')) { - if (isdigit(PEEK())) + if (pg_isdigit(PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); @@ -490,7 +520,7 @@ struct parse *p; return; c = PEEK(); if (!(c == '*' || c == '+' || c == '?' || - (c == '{' && MORE2() && isdigit(PEEK2())))) + (c == '{' && MORE2() && pg_isdigit(PEEK2())))) return; SETERROR(REG_BADRPT); } @@ -568,7 +598,7 @@ int starordinary; /* is a leading * an ordinary character? */ int i; sopno subno; -#define BACKSL (1<<CHAR_BIT) +#define BACKSL (1<<24) pos = HERE(); /* repetion op, if any, covers from here */ @@ -577,7 +607,11 @@ int starordinary; /* is a leading * an ordinary character? */ if (c == '\\') { REQUIRE(MORE(), REG_EESCAPE); +#ifdef MB + c = BACKSL | (pg_wchar) GETNEXT(); +#else c = BACKSL | (unsigned char) GETNEXT(); +#endif } switch (c) { @@ -660,7 +694,7 @@ int starordinary; /* is a leading * an ordinary character? */ count = p_count(p); if (EAT(',')) { - if (MORE() && isdigit(PEEK())) + if (MORE() && pg_isdigit(PEEK())) { count2 = p_count(p); REQUIRE(count <= count2, REG_BADBR); @@ -698,7 +732,7 @@ struct parse *p; int count = 0; int ndigits = 0; - while (MORE() && isdigit(PEEK()) && count <= DUPMAX) + while (MORE() && pg_isdigit(PEEK()) && count <= DUPMAX) { count = count * 10 + (GETNEXT() - '0'); ndigits++; @@ -721,15 +755,27 @@ struct parse *p; { cset *cs = allocset(p); int invert = 0; +#ifdef MB + pg_wchar sp1[] = {'[', ':', '<', ':', ']', ']'}; + pg_wchar sp2[] = {'[', ':', '>', ':', ']', ']'}; +#endif /* Dept of Truly Sickening Special-Case Kludges */ +#ifdef MB + if (p->next + 5 < p->end && pg_wchar_strncmp(p->next, sp1, 6) == 0) +#else if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) +#endif { EMIT(OBOW, 0); NEXTn(6); return; } +#ifdef MB + if (p->next + 5 < p->end && pg_wchar_strncmp(p->next, sp2, 6) == 0) +#else if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) +#endif { EMIT(OEOW, 0); NEXTn(6); @@ -757,7 +803,7 @@ struct parse *p; int ci; for (i = p->g->csetsize - 1; i >= 0; i--) - if (CHIN(cs, i) && isalpha(i)) + if (CHIN(cs, i) && pg_isalpha(i)) { ci = othercase(i); if (ci != i) @@ -801,8 +847,8 @@ p_b_term(p, cs) struct parse *p; cset *cs; { - char c; - char start, + pg_wchar c; + pg_wchar start, finish; int i; @@ -857,6 +903,11 @@ cset *cs; finish = start; /* xxx what about signed chars here... */ REQUIRE(start <= finish, REG_ERANGE); +#ifdef MB + if (CHlc(start) != CHlc(finish)) { + SETERROR(REG_ERANGE); + } +#endif for (i = start; i <= finish; i++) CHadd(cs, i); break; @@ -872,17 +923,21 @@ p_b_cclass(p, cs) struct parse *p; cset *cs; { - char *sp = p->next; + pg_wchar *sp = p->next; struct cclass *cp; size_t len; char *u; char c; - while (MORE() && isalpha(PEEK())) + while (MORE() && pg_isalpha(PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) +#ifdef MB + if (pg_char_and_wchar_strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') +#else if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') +#endif break; if (cp->name == NULL) { @@ -919,11 +974,11 @@ cset *cs; - p_b_symbol - parse a character or [..]ed multicharacter collating symbol == static char p_b_symbol(struct parse *p); */ -static char /* value of symbol */ +static pg_wchar /* value of symbol */ p_b_symbol(p) struct parse *p; { - char value; + pg_wchar value; REQUIRE(MORE(), REG_EBRACK); if (!EATTWO('[', '.')) @@ -944,7 +999,7 @@ p_b_coll_elem(p, endc) struct parse *p; int endc; /* name ended by endc,']' */ { - char *sp = p->next; + pg_wchar *sp = p->next; struct cname *cp; int len; @@ -957,7 +1012,11 @@ int endc; /* name ended by endc,']' */ } len = p->next - sp; for (cp = cnames; cp->name != NULL; cp++) +#ifdef MB + if (pg_char_and_wchar_strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') +#else if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') +#endif return (cp->code); /* known name */ if (len == 1) return (*sp); /* single character */ @@ -973,10 +1032,10 @@ static char /* if no counterpart, return ch */ othercase(ch) int ch; { - assert(isalpha(ch)); - if (isupper(ch)) + assert(pg_isalpha(ch)); + if (pg_isupper(ch)) return (tolower(ch)); - else if (islower(ch)) + else if (pg_islower(ch)) return (toupper(ch)); else /* peculiar, but could happen */ @@ -994,9 +1053,9 @@ bothcases(p, ch) struct parse *p; int ch; { - char *oldnext = p->next; - char *oldend = p->end; - char bracket[3]; + pg_wchar *oldnext = p->next; + pg_wchar *oldend = p->end; + pg_wchar bracket[3]; assert(othercase(ch) != ch);/* p_bracket() would recurse */ p->next = bracket; @@ -1021,12 +1080,16 @@ int ch; { cat_t *cap = p->g->categories; - if ((p->g->cflags & REG_ICASE) && isalpha(ch) && othercase(ch) != ch) + if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch) bothcases(p, ch); else { +#ifdef MB + EMIT(OCHAR, (pg_wchar) ch); +#else EMIT(OCHAR, (unsigned char) ch); - if (cap[ch] == 0) +#endif + if (ch >= CHAR_MIN && ch <= CHAR_MAX && cap[ch] == 0) cap[ch] = p->g->ncategories++; } } @@ -1041,9 +1104,9 @@ static void nonnewline(p) struct parse *p; { - char *oldnext = p->next; - char *oldend = p->end; - char bracket[4]; + pg_wchar *oldnext = p->next; + pg_wchar *oldend = p->end; + pg_wchar bracket[4]; p->next = bracket; p->end = bracket + 3; @@ -1674,7 +1737,7 @@ struct re_guts *g; sop *newstart = 0; sopno newlen; sop s; - char *cp; + pg_wchar *cp; sopno i; /* avoid making error situations worse */ @@ -1729,7 +1792,11 @@ struct re_guts *g; return; /* turn it into a character string */ +#ifdef MB + g->must = (pg_wchar *)malloc((size_t) (g->mlen + 1)*sizeof(pg_wchar)); +#else g->must = malloc((size_t) g->mlen + 1); +#endif if (g->must == NULL) { /* argh; just forget it */ g->mlen = 0; @@ -1742,7 +1809,7 @@ struct re_guts *g; while (OP(s = *scan++) != OCHAR) continue; assert(cp < g->must + g->mlen); - *cp++ = (char) OPND(s); + *cp++ = (pg_wchar) OPND(s); } assert(cp == g->must + g->mlen); *cp++ = '\0'; /* just on general principles */ @@ -1785,3 +1852,42 @@ struct re_guts *g; g->iflags |= BAD; return (maxnest); } + +/* + * some ctype functions with none-ascii-char guard + */ +static int pg_isdigit(int c) +{ +#ifdef MB + return(c >= 0 && c <= UCHAR_MAX && isdigit(c)); +#else + return(isdigit(c)); +#endif +} + +static int pg_isalpha(int c) +{ +#ifdef MB + return(c >= 0 && c <= UCHAR_MAX && isalpha(c)); +#else + return(isalpha(c)); +#endif +} + +static int pg_isupper(int c) +{ +#ifdef MB + return(c >= 0 && c <= UCHAR_MAX && isupper(c)); +#else + return(isupper(c)); +#endif +} + +static int pg_islower(int c) +{ +#ifdef MB + return(c >= 0 && c <= UCHAR_MAX && islower(c)); +#else + return(islower(c)); +#endif +} diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c index a8ba2443c5f..abdc314a94b 100644 --- a/src/backend/regex/regerror.c +++ b/src/backend/regex/regerror.c @@ -52,6 +52,7 @@ static char sccsid[] = "@(#)regerror.c 8.4 (Berkeley) 3/20/94"; #include <regex/regex.h> #include <regex/utils.h> +#include <regex/regex2.h> /* ========= begin header generated by ./mkh ========= */ #ifdef __cplusplus @@ -214,7 +215,11 @@ char *localbuf; struct rerr *r; for (r = rerrs; r->code != 0; r++) +#ifdef MB + if (pg_char_and_wchar_strcmp(r->name, preg->re_endp) == 0) +#else if (strcmp(r->name, preg->re_endp) == 0) +#endif break; if (r->code == 0) return ("0"); diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index e3ac5cd0644..8a51810a203 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -164,6 +164,11 @@ int eflags; { struct re_guts *g = preg->re_g; +#ifdef MB + pg_wchar *str; + int sts; +#endif + #ifdef REDEBUG #define GOODFLAGS(f) (f) #else @@ -177,8 +182,24 @@ int eflags; return (REG_BADPAT); eflags = GOODFLAGS(eflags); +#ifdef MB + str = (pg_wchar *)malloc((strlen(string)+1) * sizeof(pg_wchar)); + if (!str) { + return(REG_ESPACE); + } + (void)pg_mb2wchar((unsigned char *)string,str); if (g->nstates <= CHAR_BIT * sizeof(states1) && !(eflags & REG_LARGE)) - return (smatcher(g, (char *) string, nmatch, pmatch, eflags)); + sts = smatcher(g, str, nmatch, pmatch, eflags); else - return (lmatcher(g, (char *) string, nmatch, pmatch, eflags)); + sts = lmatcher(g, str, nmatch, pmatch, eflags); + free((char *)str); + return(sts); + +# else + + if (g->nstates <= CHAR_BIT * sizeof(states1) && !(eflags & REG_LARGE)) + return (smatcher(g, (pg_wchar *) string, nmatch, pmatch, eflags)); + else + return (lmatcher(g, (pg_wchar *) string, nmatch, pmatch, eflags)); +#endif } diff --git a/src/backend/regex/regfree.c b/src/backend/regex/regfree.c index e53fe54e86a..b169c840412 100644 --- a/src/backend/regex/regfree.c +++ b/src/backend/regex/regfree.c @@ -68,7 +68,11 @@ regex_t *preg; return; preg->re_magic = 0; /* mark it invalid */ g->magic = 0; /* mark it invalid */ - +#ifdef MB + if (preg->patsave != NULL) { + free((char *)preg->patsave); + } +#endif if (g->strip != NULL) free((char *) g->strip); if (g->sets != NULL) diff --git a/src/backend/regex/utftest.c b/src/backend/regex/utftest.c new file mode 100644 index 00000000000..28baf7255ef --- /dev/null +++ b/src/backend/regex/utftest.c @@ -0,0 +1,33 @@ +/* + * testing of utf2wchar() + * $Id: utftest.c,v 1.1 1998/03/15 07:38:37 scrappy Exp $ + */ +#include <regex/regex.h> +#include <regex/utils.h> +#include <regex/regex2.h> + +#include <regex/pg_wchar.h> + +main() +{ + /* Example 1 from RFC2044 */ + char utf1[] = {0x41,0xe2,0x89,0xa2,0xce,0x91,0x2e,0}; + /* Example 2 from RFC2044 */ + char utf2[] = {0x48,0x69,0x20,0x4d,0x6f,0x6d,0x20,0xe2,0x98,0xba,0x21,0}; + /* Example 3 from RFC2044 */ + char utf3[] = {0xe6,0x97,0xa5,0xe6,0x9c,0xac,0xe8,0xaa,0x9e,0}; + char *utf[] = {utf1,utf2,utf3}; + pg_wchar ucs[128]; + pg_wchar *p; + int i; + + for (i=0;i<sizeof(utf)/sizeof(char *);i++) { + pg_utf2wchar(utf[i],ucs); + p = ucs; + while(*p) { + printf("%04x ",*p); + p++; + } + printf("\n"); + } +} diff --git a/src/backend/regex/utils.c b/src/backend/regex/utils.c new file mode 100644 index 00000000000..1f904e338ce --- /dev/null +++ b/src/backend/regex/utils.c @@ -0,0 +1,348 @@ +/* + * misc conversion functions between pg_wchar and other encodings. + * Tatsuo Ishii + * $Id: utils.c,v 1.1 1998/03/15 07:38:39 scrappy Exp $ + */ +#include <regex/pg_wchar.h> +/* + * convert EUC to pg_wchar (EUC process code) + * caller should allocate enough space for "to" + */ +static void pg_euc2wchar(const unsigned char *from, pg_wchar *to) +{ + while (*from) { + if (*from == SS2) { + from++; + *to = *from++; + } else if (*from == SS3) { + from++; + *to = *from++ << 8; + *to |= 0x3f & *from++; + } else if (*from & 0x80) { + *to = *from++ << 8; + *to |= *from++; + } else { + *to = *from++; + } + to++; + } + *to = 0; +} + +static void pg_eucjp2wchar(const unsigned char *from, pg_wchar *to) +{ + pg_euc2wchar(from,to); +} + +static void pg_euckr2wchar(const unsigned char *from, pg_wchar *to) +{ + pg_euc2wchar(from,to); +} + +static void pg_eucch2wchar(const unsigned char *from, pg_wchar *to) +{ + while (*from) { + if (*from == SS2) { + from++; + *to = 0x3f00 & (*from++ << 8); + *to = *from++; + } else if (*from == SS3) { + from++; + *to = *from++ << 8; + *to |= 0x3f & *from++; + } else if (*from & 0x80) { + *to = *from++ << 8; + *to |= *from++; + } else { + *to = *from++; + } + to++; + } + *to = 0; +} + +static void pg_euccn2wchar(const unsigned char *from, pg_wchar *to) +{ + while (*from) { + if (*from == SS2) { + from++; + *to = *from++ << 16; + *to |= *from++ << 8; + *to |= *from++; + } else if (*from == SS3) { + from++; + *to = *from++ << 8; + *to |= 0x3f & *from++; + } else if (*from & 0x80) { + *to = *from++ << 8; + *to |= *from++; + } else { + *to = *from++; + } + to++; + } + *to = 0; +} + +/* + * convert UTF-8 to pg_wchar (UCS-2) + * caller should allocate enough space for "to" + */ +static void pg_utf2wchar(const unsigned char *from, pg_wchar *to) +{ + unsigned char c1,c2,c3; + while (*from) { + if ((*from & 0x80) == 0) { + *to = *from++; + } else if ((*from & 0xe0) == 0xc0) { + c1 = *from++ & 0x1f; + c2 = *from++ & 0x3f; + *to = c1 << 6; + *to |= c2; + } else if ((*from & 0xe0) == 0xe0) { + c1 = *from++ & 0x0f; + c2 = *from++ & 0x3f; + c3 = *from++ & 0x3f; + *to = c1 << 12; + *to |= c2 << 6; + *to |= c3; + } + to++; + } + *to = 0; +} + +/* + * convert mule internal code to pg_wchar. + * in this case pg_wchar consists of following 4 bytes: + * + * 0x00(unused) + * 0x00(ASCII)|leading character (one of LC1, LC12, LC2 or LC22) + * 0x00(ASCII,1 byte code)|other than 0x00(2 byte code) + * the lowest byte of the code + * + * note that Type N (variable length byte encoding) cannot be represented by + * this schema. sorry. + * caller should allocate enough space for "to" + */ +static void pg_mule2wchar(const unsigned char *from, pg_wchar *to) +{ + while (*from) { + if (IS_LC1(*from)) { + *to = *from++ << 16; + *to |= *from++; + } else if (IS_LCPRV1(*from)) { + from++; + *to = *from++ << 16; + *to |= *from++; + } else if (IS_LC2(*from)) { + *to = *from++ << 16; + *to |= *from++ << 8; + *to |= *from++; + } else if (IS_LCPRV2(*from)) { + from++; + *to = *from++ << 16; + *to |= *from++ << 8; + *to |= *from++; + } else { /* assume ASCII */ + *to = *from++; + } + to++; + } + *to = 0; +} + +/* + * convert EUC to pg_wchar (EUC process code) + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static void pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + while (*from && len > 0) { + if (*from == SS2) { + from++; + len--; + *to = 0xff & *from++; + len--; + } else if (*from == SS3) { + from++; + *to = *from++ << 8; + *to |= 0x3f & *from++; + len -= 3; + } else if (*from & 0x80) { + *to = *from++ << 8; + *to |= *from++; + len -= 2; + } else { + *to = *from++; + len--; + } + to++; + } + *to = 0; +} + +static void pg_eucjp2wchar_with_len +(const unsigned char *from, pg_wchar *to, int len) +{ + pg_euc2wchar_with_len(from,to,len); +} + +static void pg_euckr2wchar_with_len +(const unsigned char *from, pg_wchar *to, int len) +{ + pg_euc2wchar_with_len(from,to,len); +} + +static void pg_eucch2wchar_with_len +(const unsigned char *from, pg_wchar *to, int len) +{ + while (*from && len > 0) { + if (*from == SS2) { + from++; + len--; + *to = 0x3f00 & (*from++ << 8); + *to = *from++; + len -= 2; + } else if (*from == SS3) { + from++; + *to = *from++ << 8; + *to |= 0x3f & *from++; + len -= 3; + } else if (*from & 0x80) { + *to = *from++ << 8; + *to |= *from++; + len -= 2; + } else { + *to = *from++; + len--; + } + to++; + } + *to = 0; +} + +static void pg_euccn2wchar_with_len +(const unsigned char *from, pg_wchar *to, int len) +{ + while (*from && len > 0) { + if (*from == SS2) { + from++; + len--; + *to = *from++ << 16; + *to |= *from++ << 8; + *to |= *from++; + len -= 3; + } else if (*from == SS3) { + from++; + *to = *from++ << 8; + *to |= 0x3f & *from++; + len -= 3; + } else if (*from & 0x80) { + *to = *from++ << 8; + *to |= *from++; + len -= 2; + } else { + *to = *from++; + len--; + } + to++; + } + *to = 0; +} + +/* + * convert UTF-8 to pg_wchar (UCS-2) + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static void pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + unsigned char c1,c2,c3; + while (*from && len > 0) { + if ((*from & 0x80) == 0) { + *to = *from++; + len--; + } else if ((*from & 0xe0) == 0xc0) { + c1 = *from++ & 0x1f; + c2 = *from++ & 0x3f; + len -= 2; + *to = c1 << 6; + *to |= c2; + } else if ((*from & 0xe0) == 0xe0) { + c1 = *from++ & 0x0f; + c2 = *from++ & 0x3f; + c3 = *from++ & 0x3f; + len -= 3; + *to = c1 << 12; + *to |= c2 << 6; + *to |= c3; + } + to++; + } + *to = 0; +} + +/* + * convert mule internal code to pg_wchar + * caller should allocate enough space for "to" + * len: length of from. + * "from" not necessarily null terminated. + */ +static void pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + while (*from && len > 0) { + if (IS_LC1(*from)) { + *to = *from++ << 16; + *to |= *from++; + len -= 2; + } else if (IS_LCPRV1(*from)) { + from++; + *to = *from++ << 16; + *to |= *from++; + len -= 3; + } else if (IS_LC2(*from)) { + *to = *from++ << 16; + *to |= *from++ << 8; + *to |= *from++; + len -= 3; + } else if (IS_LCPRV2(*from)) { + from++; + *to = *from++ << 16; + *to |= *from++ << 8; + *to |= *from++; + len -= 4; + } else { /* assume ASCII */ + *to = (unsigned char)*from++; + len--; + } + to++; + } + *to = 0; +} + +typedef struct { + void (*mb2wchar)(); + void (*mb2wchar_with_len)(); +} pg_wchar_tbl; + +static pg_wchar_tbl pg_wchar_table[] = { + {pg_eucjp2wchar, pg_eucjp2wchar_with_len}, + {pg_eucch2wchar, pg_eucch2wchar_with_len}, + {pg_euckr2wchar, pg_euckr2wchar_with_len}, + {pg_euccn2wchar, pg_euccn2wchar_with_len}, + {pg_utf2wchar, pg_utf2wchar_with_len}, + {pg_mule2wchar, pg_mule2wchar_with_len}}; + +void pg_mb2wchar(const unsigned char *from, pg_wchar *to) +{ + (*pg_wchar_table[MB].mb2wchar)(from,to); +} + +void pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) +{ + (*pg_wchar_table[MB].mb2wchar_with_len)(from,to,len); +} diff --git a/src/backend/regex/wstrcmp.c b/src/backend/regex/wstrcmp.c new file mode 100644 index 00000000000..b562f103150 --- /dev/null +++ b/src/backend/regex/wstrcmp.c @@ -0,0 +1,48 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Chris Torek. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <regex/pg_wchar.h> + +int +pg_char_and_wchar_strcmp(s1, s2) + register const char *s1; + register const pg_wchar *s2; +{ + while ((pg_wchar)*s1 == *s2++) + if (*s1++ == 0) + return (0); + return (*(const unsigned char *)s1 - *(const pg_wchar *)(s2 - 1)); +} diff --git a/src/backend/regex/wstrncmp.c b/src/backend/regex/wstrncmp.c new file mode 100644 index 00000000000..e7ce52ed7b2 --- /dev/null +++ b/src/backend/regex/wstrncmp.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from FreeBSD 2.2.1-RELEASE software. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <regex/pg_wchar.h> + +int +pg_wchar_strncmp(s1, s2, n) + register const pg_wchar *s1, *s2; + register size_t n; +{ + + if (n == 0) + return (0); + do { + if (*s1 != *s2++) + return (*(const pg_wchar *)s1 - + *(const pg_wchar *)(s2 - 1)); + if (*s1++ == 0) + break; + } while (--n != 0); + return (0); +} + +int +pg_char_and_wchar_strncmp(s1, s2, n) + register const char *s1; + register const pg_wchar *s2; + register size_t n; +{ + + if (n == 0) + return (0); + do { + if ((pg_wchar )*s1 != *s2++) + return (*(const pg_wchar *)s1 - + *(const pg_wchar *)(s2 - 1)); + if (*s1++ == 0) + break; + } while (--n != 0); + return (0); +} + +size_t +pg_wchar_strlen(str) + const pg_wchar *str; +{ + register const pg_wchar *s; + + for (s = str; *s; ++s); + return(s - str); +} diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index fe7b5dd1d36..75cc755f7cc 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -4,7 +4,7 @@ # Makefile for utils/adt # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/utils/adt/Makefile,v 1.10 1997/12/20 00:28:21 scrappy Exp $ +# $Header: /cvsroot/pgsql/src/backend/utils/adt/Makefile,v 1.11 1998/03/15 07:38:42 scrappy Exp $ # #------------------------------------------------------------------------- @@ -14,6 +14,9 @@ include ../../../Makefile.global INCLUDE_OPT = -I../.. CFLAGS+=$(INCLUDE_OPT) +ifdef MB +CFLAGS+=-DMB=$(MB) +endif OBJS = acl.o arrayfuncs.o arrayutils.o bool.o cash.o char.o chunk.o date.o \ datum.o dt.o filename.o float.o geo_ops.o geo_selfuncs.o int.o \ diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 7d4681262d5..27d6ffc0140 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -21,8 +21,9 @@ #include "postgres.h" /* postgres system include file */ #include "utils/palloc.h" #include "utils/builtins.h" /* where the function declarations go */ +#include "regex/pg_wchar.h" -static int like(char *text, char *p); +static int like(pg_wchar *text, pg_wchar *p); /* * interface routines called by the function manager @@ -39,16 +40,22 @@ static int like(char *text, char *p); static bool fixedlen_like(char *s, struct varlena * p, int charlen) { - char *sterm, + pg_wchar *sterm, *pterm; int result; + int len; if (!s || !p) return FALSE; /* be sure sterm is null-terminated */ +#ifdef MB + sterm = (pg_wchar *) palloc((charlen + 1)*sizeof(pg_wchar)); + (void)pg_mb2wchar_with_len((unsigned char *)s,sterm,charlen); +#else sterm = (char *) palloc(charlen + 1); StrNCpy(sterm, s, charlen + 1); +#endif /* * p is a text = varlena, not a string so we have to make a string @@ -56,9 +63,15 @@ fixedlen_like(char *s, struct varlena * p, int charlen) */ /* palloc the length of the text + the null character */ - pterm = (char *) palloc(VARSIZE(p) - VARHDRSZ + 1); - memmove(pterm, VARDATA(p), VARSIZE(p) - VARHDRSZ); - *(pterm + VARSIZE(p) - VARHDRSZ) = (char) NULL; + len = VARSIZE(p) - VARHDRSZ; +#ifdef MB + pterm = (pg_wchar *) palloc((len + 1)*sizeof(pg_wchar)); + (void)pg_mb2wchar_with_len((unsigned char *)VARDATA(p),pterm,len); +#else + pterm = (char *) palloc(len + 1); + memmove(pterm, VARDATA(p), len); + *(pterm + len) = (char) NULL; +#endif /* do the regexp matching */ result = like(sterm, pterm); @@ -150,7 +163,7 @@ textnlike(struct varlena * s, struct varlena * p) } -/* $Revision: 1.12 $ +/* $Revision: 1.13 $ ** "like.c" A first attempt at a LIKE operator for Postgres95. ** ** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. @@ -185,7 +198,7 @@ textnlike(struct varlena * s, struct varlena * p) ** Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT. */ static int -DoMatch(char *text, char *p) +DoMatch(pg_wchar *text, pg_wchar *p) { int matched; @@ -228,7 +241,7 @@ DoMatch(char *text, char *p) ** User-level routine. Returns TRUE or FALSE. */ static int -like(char *text, char *p) +like(pg_wchar *text, pg_wchar *p) { if (p[0] == '%' && p[1] == '\0') return TRUE; |