From: [email protected]

Included are patches intended for allowing PostgreSQL to handle multi-byte charachter sets such as EUC(Extende Unix Code), Unicode and Mule internal code. With the MB patch you can use multi-byte character sets in regexp and LIKE. The encoding system chosen is determined at the compile time. To enable the MB extension, you need to define a variable "MB" in Makefile.global or in Makefile.custom. For further information please take a look at README.mb under doc directory. (Note that unlike "jp patch" I do not use modified GNU regexp any more. I changed Henry Spencer's regexp coming with PostgreSQL.)
author: Marc G. Fournier 1998-03-15 07:39:04 +0000
committer: Marc G. Fournier 1998-03-15 07:39:04 +0000
commit: 661ecf3c48e16a9add216287eb969d7615e47968 (patch)
tree: 91b54d5905aa2e22bd0ae9ea8c6b0f3cab75d3f4 /src/backend
parent: 31a925c4d07675bc098a742ee9ca642ec79a40ee (diff)
12 files changed, 819 insertions, 117 deletions
diff --git a/src/backend/regex/Makefile b/src/backend/regex/Makefile
index f7ef534bd3d..6f080cfcb51 100644
--- a/src/backend/regex/Makefile
+++ b/src/backend/regex/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for regex
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/regex/Makefile,v 1.4 1997/12/20 00:26:58 scrappy Exp $
+#    $Header: /cvsroot/pgsql/src/backend/regex/Makefile,v 1.5 1998/03/15 07:38:14 scrappy Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -17,6 +17,10 @@ CFLAGS+=$(INCLUDE_OPT)
 CFLAGS+=-DPOSIX_MISTAKE 
 
 OBJS = regcomp.o regerror.o regexec.o regfree.o
+ifdef MB
+OBJS += utils.o wstrcmp.o wstrncmp.o
+CFLAGS += -DMB=$(MB)
+endif
 
 all: SUBSYS.o
 
diff --git a/src/backend/regex/engine.c b/src/backend/regex/engine.c
index 4801361f90f..1964f2a0248 100644
--- a/src/backend/regex/engine.c
+++ b/src/backend/regex/engine.c
@@ -73,11 +73,11 @@ struct match
 	struct re_guts *g;
 	int			eflags;
 	regmatch_t *pmatch;			/* [nsub+1] (0 element unused) */
-	char	   *offp;			/* offsets work from here */
-	char	   *beginp;			/* start of string -- virtual NUL precedes */
-	char	   *endp;			/* end of string -- virtual NUL here */
-	char	   *coldp;			/* can be no match starting before here */
-	char	  **lastpos;		/* [nplus+1] */
+	pg_wchar   *offp;			/* offsets work from here */
+	pg_wchar   *beginp;			/* start of string -- virtual NUL precedes */
+	pg_wchar   *endp;			/* end of string -- virtual NUL here */
+	pg_wchar   *coldp;			/* can be no match starting before here */
+	pg_wchar   **lastpos;		/* [nplus+1] */
 				STATEVARS;
 	states		st;				/* current states */
 	states		fresh;			/* states for a fresh start */
@@ -93,19 +93,19 @@ extern		"C"
 
 /* === engine.c === */
 	static int
-				matcher(struct re_guts * g, char *string, size_t nmatch,
+				matcher(struct re_guts * g, pg_wchar *string, size_t nmatch,
 									regmatch_t pmatch[], int eflags);
-	static char *
-				dissect(struct match * m, char *start, char *stop,
+	static pg_wchar *
+				dissect(struct match * m, pg_wchar *start, pg_wchar *stop,
 									sopno startst, sopno stopst);
-	static char *
-				backref(struct match * m, char *start, char *stop,
+	static pg_wchar *
+				backref(struct match * m, pg_wchar *start, pg_wchar *stop,
 								 sopno startst, sopno stopst, sopno lev);
-	static char *
-				fast(struct match * m, char *start, char *stop,
+	static pg_wchar *
+				fast(struct match * m, pg_wchar *start, pg_wchar *stop,
 								 sopno startst, sopno stopst);
-	static char *
-				slow(struct match * m, char *start, char *stop, sopno startst, sopno stopst);
+	static pg_wchar *
+				slow(struct match * m, pg_wchar *start, pg_wchar *stop, sopno startst, sopno stopst);
 	static		states
 				step(struct re_guts * g, sopno start,
 							 sopno stop, states bef, int ch, states aft);
@@ -116,20 +116,35 @@ extern		"C"
 #define BOW		(BOL+4)
 #define EOW		(BOL+5)
 #define CODEMAX (BOL+5)			/* highest code used */
-#define NONCHAR(c)		((c) > CHAR_MAX)
-#define NNONCHAR		(CODEMAX-CHAR_MAX)
+
+#ifdef MB
+#  if MB == MULE_INTERNAL
+#    define NONCHAR(c)	((c) > 16777216)	/* 16777216 == 2^24 == 3 bytes */
+#    define NNONCHAR	(CODEMAX-16777216)
+#  elif MB == EUC_JP || MB == EUC_CN || MB == EUC_KR || MB == EUC_TW
+#    define NONCHAR(c)	((c) > USHRT_MAX)
+#    define NNONCHAR	(CODEMAX-USHRT_MAX)
+#  elif MB == UNICODE
+#    define NONCHAR(c)	((c) > USHRT_MAX)
+#    define NNONCHAR	(CODEMAX-USHRT_MAX)
+#  endif
+#else
+#  define NONCHAR(c)		((c) > CHAR_MAX)
+#  define NNONCHAR		(CODEMAX-CHAR_MAX)
+#endif
+
 #ifdef REDEBUG
 	static void
-				print(struct match * m, char *caption, states st, int ch, FILE *d);
+				print(struct match * m, pg_wchar *caption, states st, int ch, FILE *d);
 #endif
 #ifdef REDEBUG
 	static void
-				at(struct match * m, char *title, char *start, char *stop,
+				at(struct match * m, pg_wchar *title, pg_wchar *start, pg_wchar *stop,
 							   sopno startst, sopno stopst);
 #endif
 #ifdef REDEBUG
-	static char *
-				pchar(int ch);
+	static pg_wchar *
+				p_char(int ch);
 #endif
 
 #ifdef __cplusplus
@@ -150,26 +165,26 @@ extern		"C"
 
 /*
  - matcher - the actual matching engine
- == static int matcher(struct re_guts *g, char *string, \
+ == static int matcher(struct re_guts *g, pg_wchar *string, \
  ==		size_t nmatch, regmatch_t pmatch[], int eflags);
  */
 static int						/* 0 success, REG_NOMATCH failure */
 matcher(g, string, nmatch, pmatch, eflags)
 struct re_guts *g;
-char	   *string;
+pg_wchar	   *string;
 size_t		nmatch;
 regmatch_t	pmatch[];
 int			eflags;
 {
-	char	   *endp;
+	pg_wchar	   *endp;
 	int			i;
 	struct match mv;
 	struct match *m = &mv;
-	char	   *dp;
+	pg_wchar	   *dp;
 	const sopno gf = g->firststate + 1; /* +1 for OEND */
 	const sopno gl = g->laststate;
-	char	   *start;
-	char	   *stop;
+	pg_wchar	   *start;
+	pg_wchar	   *stop;
 
 	/* simplify the situation where possible */
 	if (g->cflags & REG_NOSUB)
@@ -182,7 +197,11 @@ int			eflags;
 	else
 	{
 		start = string;
+#ifdef MB
+		stop = start + pg_wchar_strlen(start);
+#else
 		stop = start + strlen(start);
+#endif
 	}
 	if (stop < start)
 		return (REG_INVARG);
@@ -192,7 +211,11 @@ int			eflags;
 	{
 		for (dp = start; dp < stop; dp++)
 			if (*dp == g->must[0] && stop - dp >= g->mlen &&
+#ifdef MB
+				memcmp(dp, g->must, (size_t) (g->mlen * sizeof(pg_wchar))) == 0)
+#else
 				memcmp(dp, g->must, (size_t) g->mlen) == 0)
+#endif
 				break;
 		if (dp == stop)			/* we didn't find g->must */
 			return (REG_NOMATCH);
@@ -258,8 +281,8 @@ int			eflags;
 		else
 		{
 			if (g->nplus > 0 && m->lastpos == NULL)
-				m->lastpos = (char **) malloc((g->nplus + 1) *
-											  sizeof(char *));
+				m->lastpos = (pg_wchar **) malloc((g->nplus + 1) *
+											  sizeof(pg_wchar *));
 			if (g->nplus > 0 && m->lastpos == NULL)
 			{
 				free(m->pmatch);
@@ -324,9 +347,9 @@ int			eflags;
 	}
 
 	if (m->pmatch != NULL)
-		free((char *) m->pmatch);
+		free((pg_wchar *) m->pmatch);
 	if (m->lastpos != NULL)
-		free((char *) m->lastpos);
+		free((pg_wchar *) m->lastpos);
 	STATETEARDOWN(m);
 	return (0);
 }
@@ -336,27 +359,27 @@ int			eflags;
  == static char *dissect(struct match *m, char *start, \
  ==		char *stop, sopno startst, sopno stopst);
  */
-static char *					/* == stop (success) always */
+static pg_wchar *				/* == stop (success) always */
 dissect(m, start, stop, startst, stopst)
 struct match *m;
-char	   *start;
-char	   *stop;
+pg_wchar	   *start;
+pg_wchar	   *stop;
 sopno		startst;
 sopno		stopst;
 {
 	int			i;
 	sopno		ss;				/* start sop of current subRE */
 	sopno		es;				/* end sop of current subRE */
-	char	   *sp;				/* start of string matched by it */
-	char	   *stp;			/* string matched by it cannot pass here */
-	char	   *rest;			/* start of rest of string */
-	char	   *tail;			/* string unmatched by rest of RE */
+	pg_wchar	   *sp;			/* start of string matched by it */
+	pg_wchar	   *stp;		/* string matched by it cannot pass here */
+	pg_wchar	   *rest;		/* start of rest of string */
+	pg_wchar	   *tail;		/* string unmatched by rest of RE */
 	sopno		ssub;			/* start sop of subsubRE */
 	sopno		esub;			/* end sop of subsubRE */
-	char	   *ssp;			/* start of string matched by subsubRE */
-	char	   *sep;			/* end of string matched by subsubRE */
-	char	   *oldssp;			/* previous ssp */
-	char	   *dp;
+	pg_wchar	   *ssp;		/* start of string matched by subsubRE */
+	pg_wchar	   *sep;		/* end of string matched by subsubRE */
+	pg_wchar	   *oldssp;		/* previous ssp */
+	pg_wchar	   *dp;
 
 	AT("diss", start, stop, startst, stopst);
 	sp = start;
@@ -536,22 +559,22 @@ sopno		stopst;
  == static char *backref(struct match *m, char *start, \
  ==		char *stop, sopno startst, sopno stopst, sopno lev);
  */
-static char *					/* == stop (success) or NULL (failure) */
+static pg_wchar *		/* == stop (success) or NULL (failure) */
 backref(m, start, stop, startst, stopst, lev)
 struct match *m;
-char	   *start;
-char	   *stop;
+pg_wchar	   *start;
+pg_wchar	   *stop;
 sopno		startst;
 sopno		stopst;
 sopno		lev;				/* PLUS nesting level */
 {
 	int			i;
 	sopno		ss;				/* start sop of current subRE */
-	char	   *sp;				/* start of string matched by it */
+	pg_wchar	   *sp;			/* start of string matched by it */
 	sopno		ssub;			/* start sop of subsubRE */
 	sopno		esub;			/* end sop of subsubRE */
-	char	   *ssp;			/* start of string matched by subsubRE */
-	char	   *dp;
+	pg_wchar	   *ssp;		/* start of string matched by subsubRE */
+	pg_wchar	   *dp;
 	size_t		len;
 	int			hard;
 	sop			s;
@@ -567,7 +590,7 @@ sopno		lev;				/* PLUS nesting level */
 		switch (OP(s = m->g->strip[ss]))
 		{
 			case OCHAR:
-				if (sp == stop || *sp++ != (char) OPND(s))
+				if (sp == stop || *sp++ != (pg_wchar) OPND(s))
 					return (NULL);
 				break;
 			case OANY:
@@ -750,23 +773,23 @@ sopno		lev;				/* PLUS nesting level */
  == static char *fast(struct match *m, char *start, \
  ==		char *stop, sopno startst, sopno stopst);
  */
-static char *					/* where tentative match ended, or NULL */
+static pg_wchar *		/* where tentative match ended, or NULL */
 fast(m, start, stop, startst, stopst)
 struct match *m;
-char	   *start;
-char	   *stop;
+pg_wchar	   *start;
+pg_wchar	   *stop;
 sopno		startst;
 sopno		stopst;
 {
 	states		st = m->st;
 	states		fresh = m->fresh;
 	states		tmp = m->tmp;
-	char	   *p = start;
+	pg_wchar	   *p = start;
 	int			c = (start == m->beginp) ? OUT : *(start - 1);
 	int			lastc;			/* previous c */
 	int			flagch;
 	int			i;
-	char	   *coldp;			/* last p after which no match was
+	pg_wchar	   *coldp;		/* last p after which no match was
 								 * underway */
 
 	CLEAR(st);
@@ -849,23 +872,23 @@ sopno		stopst;
  == static char *slow(struct match *m, char *start, \
  ==		char *stop, sopno startst, sopno stopst);
  */
-static char *					/* where it ended */
+static pg_wchar *					/* where it ended */
 slow(m, start, stop, startst, stopst)
 struct match *m;
-char	   *start;
-char	   *stop;
+pg_wchar	   *start;
+pg_wchar	   *stop;
 sopno		startst;
 sopno		stopst;
 {
 	states		st = m->st;
 	states		empty = m->empty;
 	states		tmp = m->tmp;
-	char	   *p = start;
+	pg_wchar	   *p = start;
 	int			c = (start == m->beginp) ? OUT : *(start - 1);
 	int			lastc;			/* previous c */
 	int			flagch;
 	int			i;
-	char	   *matchp;			/* last p at which a match ended */
+	pg_wchar	   *matchp;	/* last p at which a match ended */
 
 	AT("slow", start, stop, startst, stopst);
 	CLEAR(st);
@@ -978,8 +1001,8 @@ states		aft;				/* states already known reachable after */
 				break;
 			case OCHAR:
 				/* only characters can match */
-				assert(!NONCHAR(ch) || ch != (char) OPND(s));
-				if (ch == (char) OPND(s))
+				assert(!NONCHAR(ch) || ch != (pg_wchar) OPND(s));
+				if (ch == (pg_wchar) OPND(s))
 					FWD(aft, bef, 1);
 				break;
 			case OBOL:
@@ -1082,7 +1105,7 @@ states		aft;				/* states already known reachable after */
 static void
 print(m, caption, st, ch, d)
 struct match *m;
-char	   *caption;
+pg_wchar	   *caption;
 states		st;
 int			ch;
 FILE	   *d;
@@ -1109,16 +1132,16 @@ FILE	   *d;
 /*
  - at - print current situation
  == #ifdef REDEBUG
- == static void at(struct match *m, char *title, char *start, char *stop, \
+ == static void at(struct match *m, pg_wchar *title, pg_wchar *start, pg_wchar *stop, \
  ==												sopno startst, sopno stopst);
  == #endif
  */
 static void
 at(m, title, start, stop, startst, stopst)
 struct match *m;
-char	   *title;
-char	   *start;
-char	   *stop;
+pg_wchar	   *title;
+pg_wchar	   *start;
+pg_wchar	   *stop;
 sopno		startst;
 sopno		stopst;
 {
@@ -1143,13 +1166,24 @@ sopno		stopst;
  * a matching debug.o, and this is convenient.	It all disappears in
  * the non-debug compilation anyway, so it doesn't matter much.
  */
-static char *					/* -> representation */
+
+
+static int pg_isprint(int c)
+{
+#ifdef MB
+  return(c >= 0 && c <= UCHAR_MAX && isprint(c));
+#else
+  return(isprint(c));
+#endif
+}
+
+static pg_wchar *					/* -> representation */
 pchar(ch)
 int			ch;
 {
-	static char pbuf[10];
+	static pg_wchar pbuf[10];
 
-	if (isprint(ch) || ch == ' ')
+	if (pg_isprint(ch) || ch == ' ')
 		sprintf(pbuf, "%c", ch);
 	else
 		sprintf(pbuf, "\\%o", ch);
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index e31f8654049..6b7c472f1b9 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -62,8 +62,8 @@ static char sccsid[] = "@(#)regcomp.c	8.5 (Berkeley) 3/20/94";
  */
 struct parse
 {
-	char	   *next;			/* next character in RE */
-	char	   *end;			/* end of string (-> NUL normally) */
+	pg_wchar   *next;			/* next character in RE */
+	pg_wchar   *end;			/* end of string (-> NUL normally) */
 	int			error;			/* has an error been seen? */
 	sop		   *strip;			/* malloced strip */
 	sopno		ssize;			/* malloced strip size (allocated) */
@@ -93,7 +93,7 @@ extern		"C"
 	static void p_b_term(struct parse * p, cset *cs);
 	static void p_b_cclass(struct parse * p, cset *cs);
 	static void p_b_eclass(struct parse * p, cset *cs);
-	static char p_b_symbol(struct parse * p);
+	static pg_wchar p_b_symbol(struct parse * p);
 	static char p_b_coll_elem(struct parse * p, int endc);
 	static char othercase(int ch);
 	static void bothcases(struct parse * p, int ch);
@@ -120,6 +120,10 @@ extern		"C"
 	static void stripsnug(struct parse * p, struct re_guts * g);
 	static void findmust(struct parse * p, struct re_guts * g);
 	static sopno pluscount(struct parse * p, struct re_guts * g);
+	static int pg_isdigit(int c);
+	static int pg_isalpha(int c);
+	static int pg_isupper(int c);
+	static int pg_islower(int c);
 
 #ifdef __cplusplus
 }
@@ -127,7 +131,7 @@ extern		"C"
 #endif
 /* ========= end header generated by ./mkh ========= */
 
-static char nuls[10];			/* place to point scanner in event of
+static pg_wchar nuls[10];			/* place to point scanner in event of
 								 * error */
 
 /*
@@ -190,6 +194,9 @@ int			cflags;
 	struct parse *p = &pa;
 	int			i;
 	size_t		len;
+#ifdef MB
+ 	pg_wchar *wcp;
+#endif
 
 #ifdef REDEBUG
 #define  GOODFLAGS(f)	 (f)
@@ -203,12 +210,31 @@ int			cflags;
 
 	if (cflags & REG_PEND)
 	{
+#ifdef MB
+	        wcp = preg->patsave;
+		if (preg->re_endp < wcp)
+			return (REG_INVARG);
+		len = preg->re_endp - wcp;
+#else
 		if (preg->re_endp < pattern)
 			return (REG_INVARG);
 		len = preg->re_endp - pattern;
+#endif
+	}
+	else {
+#ifdef MB
+	  wcp = (pg_wchar *)malloc((strlen(pattern)+1) * sizeof(pg_wchar));
+	  if (wcp == NULL) {
+	    return (REG_ESPACE);
+	  }
+	  preg->patsave = wcp;
+	  (void)pg_mb2wchar((unsigned char *)pattern,wcp);
+	  len = pg_wchar_strlen(wcp);
+#else
+
+	  len = strlen((char *) pattern);
+#endif
 	}
-	else
-		len = strlen((char *) pattern);
 
 	/* do the mallocs early so failure handling is easy */
 	g = (struct re_guts *) malloc(sizeof(struct re_guts) +
@@ -227,7 +253,11 @@ int			cflags;
 
 	/* set things up */
 	p->g = g;
-	p->next = (char *) pattern; /* convenience; we do not modify it */
+#ifdef MB
+	p->next = wcp;
+#else
+	p->next = pattern; /* convenience; we do not modify it */
+#endif
 	p->end = p->next + len;
 	p->error = 0;
 	p->ncsalloc = 0;
@@ -342,7 +372,7 @@ static void
 p_ere_exp(p)
 struct parse *p;
 {
-	char		c;
+	pg_wchar	c;
 	sopno		pos;
 	int			count;
 	int			count2;
@@ -420,7 +450,7 @@ struct parse *p;
 			break;
 		case '{':				/* okay as ordinary except if digit
 								 * follows */
-			REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
+			REQUIRE(!MORE() || !pg_isdigit(PEEK()), REG_BADRPT);
 			/* FALLTHROUGH */
 		default:
 			ordinary(p, c);
@@ -432,7 +462,7 @@ struct parse *p;
 	c = PEEK();
 	/* we call { a repetition if followed by a digit */
 	if (!(c == '*' || c == '+' || c == '?' ||
-		  (c == '{' && MORE2() && isdigit(PEEK2()))))
+		  (c == '{' && MORE2() && pg_isdigit(PEEK2()))))
 		return;					/* no repetition, we're done */
 	NEXT();
 
@@ -463,7 +493,7 @@ struct parse *p;
 			count = p_count(p);
 			if (EAT(','))
 			{
-				if (isdigit(PEEK()))
+				if (pg_isdigit(PEEK()))
 				{
 					count2 = p_count(p);
 					REQUIRE(count <= count2, REG_BADBR);
@@ -490,7 +520,7 @@ struct parse *p;
 		return;
 	c = PEEK();
 	if (!(c == '*' || c == '+' || c == '?' ||
-		  (c == '{' && MORE2() && isdigit(PEEK2()))))
+		  (c == '{' && MORE2() && pg_isdigit(PEEK2()))))
 		return;
 	SETERROR(REG_BADRPT);
 }
@@ -568,7 +598,7 @@ int			starordinary;		/* is a leading * an ordinary character? */
 	int			i;
 	sopno		subno;
 
-#define  BACKSL  (1<<CHAR_BIT)
+#define  BACKSL  (1<<24)
 
 	pos = HERE();				/* repetion op, if any, covers from here */
 
@@ -577,7 +607,11 @@ int			starordinary;		/* is a leading * an ordinary character? */
 	if (c == '\\')
 	{
 		REQUIRE(MORE(), REG_EESCAPE);
+#ifdef MB
+		c = BACKSL | (pg_wchar) GETNEXT();
+#else
 		c = BACKSL | (unsigned char) GETNEXT();
+#endif
 	}
 	switch (c)
 	{
@@ -660,7 +694,7 @@ int			starordinary;		/* is a leading * an ordinary character? */
 		count = p_count(p);
 		if (EAT(','))
 		{
-			if (MORE() && isdigit(PEEK()))
+			if (MORE() && pg_isdigit(PEEK()))
 			{
 				count2 = p_count(p);
 				REQUIRE(count <= count2, REG_BADBR);
@@ -698,7 +732,7 @@ struct parse *p;
 	int			count = 0;
 	int			ndigits = 0;
 
-	while (MORE() && isdigit(PEEK()) && count <= DUPMAX)
+	while (MORE() && pg_isdigit(PEEK()) && count <= DUPMAX)
 	{
 		count = count * 10 + (GETNEXT() - '0');
 		ndigits++;
@@ -721,15 +755,27 @@ struct parse *p;
 {
 	cset	   *cs = allocset(p);
 	int			invert = 0;
+#ifdef MB
+	pg_wchar sp1[] = {'[', ':', '<', ':', ']', ']'};
+	pg_wchar sp2[] = {'[', ':', '>', ':', ']', ']'};
+#endif
 
 	/* Dept of Truly Sickening Special-Case Kludges */
+#ifdef MB
+ 	if (p->next + 5 < p->end && pg_wchar_strncmp(p->next, sp1, 6) == 0)
+#else
 	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0)
+#endif
 	{
 		EMIT(OBOW, 0);
 		NEXTn(6);
 		return;
 	}
+#ifdef MB
+ 	if (p->next + 5 < p->end && pg_wchar_strncmp(p->next, sp2, 6) == 0)
+#else
 	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0)
+#endif
 	{
 		EMIT(OEOW, 0);
 		NEXTn(6);
@@ -757,7 +803,7 @@ struct parse *p;
 		int			ci;
 
 		for (i = p->g->csetsize - 1; i >= 0; i--)
-			if (CHIN(cs, i) && isalpha(i))
+			if (CHIN(cs, i) && pg_isalpha(i))
 			{
 				ci = othercase(i);
 				if (ci != i)
@@ -801,8 +847,8 @@ p_b_term(p, cs)
 struct parse *p;
 cset	   *cs;
 {
-	char		c;
-	char		start,
+	pg_wchar		c;
+	pg_wchar		start,
 				finish;
 	int			i;
 
@@ -857,6 +903,11 @@ cset	   *cs;
 				finish = start;
 /* xxx what about signed chars here... */
 			REQUIRE(start <= finish, REG_ERANGE);
+#ifdef MB
+		  if (CHlc(start) != CHlc(finish)) {
+		    SETERROR(REG_ERANGE);
+		  }
+#endif
 			for (i = start; i <= finish; i++)
 				CHadd(cs, i);
 			break;
@@ -872,17 +923,21 @@ p_b_cclass(p, cs)
 struct parse *p;
 cset	   *cs;
 {
-	char	   *sp = p->next;
+	pg_wchar   *sp = p->next;
 	struct cclass *cp;
 	size_t		len;
 	char	   *u;
 	char		c;
 
-	while (MORE() && isalpha(PEEK()))
+	while (MORE() && pg_isalpha(PEEK()))
 		NEXT();
 	len = p->next - sp;
 	for (cp = cclasses; cp->name != NULL; cp++)
+#ifdef MB
+		if (pg_char_and_wchar_strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+#else
 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+#endif
 			break;
 	if (cp->name == NULL)
 	{
@@ -919,11 +974,11 @@ cset	   *cs;
  - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
  == static char p_b_symbol(struct parse *p);
  */
-static char						/* value of symbol */
+static pg_wchar						/* value of symbol */
 p_b_symbol(p)
 struct parse *p;
 {
-	char		value;
+	pg_wchar		value;
 
 	REQUIRE(MORE(), REG_EBRACK);
 	if (!EATTWO('[', '.'))
@@ -944,7 +999,7 @@ p_b_coll_elem(p, endc)
 struct parse *p;
 int			endc;				/* name ended by endc,']' */
 {
-	char	   *sp = p->next;
+	pg_wchar	   *sp = p->next;
 	struct cname *cp;
 	int			len;
 
@@ -957,7 +1012,11 @@ int			endc;				/* name ended by endc,']' */
 	}
 	len = p->next - sp;
 	for (cp = cnames; cp->name != NULL; cp++)
+#ifdef MB
+		if (pg_char_and_wchar_strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+#else
 		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+#endif
 			return (cp->code);	/* known name */
 	if (len == 1)
 		return (*sp);			/* single character */
@@ -973,10 +1032,10 @@ static char						/* if no counterpart, return ch */
 othercase(ch)
 int			ch;
 {
-	assert(isalpha(ch));
-	if (isupper(ch))
+	assert(pg_isalpha(ch));
+	if (pg_isupper(ch))
 		return (tolower(ch));
-	else if (islower(ch))
+	else if (pg_islower(ch))
 		return (toupper(ch));
 	else
 /* peculiar, but could happen */
@@ -994,9 +1053,9 @@ bothcases(p, ch)
 struct parse *p;
 int			ch;
 {
-	char	   *oldnext = p->next;
-	char	   *oldend = p->end;
-	char		bracket[3];
+	pg_wchar	   *oldnext = p->next;
+	pg_wchar	   *oldend = p->end;
+	pg_wchar		bracket[3];
 
 	assert(othercase(ch) != ch);/* p_bracket() would recurse */
 	p->next = bracket;
@@ -1021,12 +1080,16 @@ int			ch;
 {
 	cat_t	   *cap = p->g->categories;
 
-	if ((p->g->cflags & REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
+	if ((p->g->cflags & REG_ICASE) && pg_isalpha(ch) && othercase(ch) != ch)
 		bothcases(p, ch);
 	else
 	{
+#ifdef MB
+		EMIT(OCHAR, (pg_wchar) ch);
+#else
 		EMIT(OCHAR, (unsigned char) ch);
-		if (cap[ch] == 0)
+#endif
+		if (ch >= CHAR_MIN && ch <= CHAR_MAX && cap[ch] == 0)
 			cap[ch] = p->g->ncategories++;
 	}
 }
@@ -1041,9 +1104,9 @@ static void
 nonnewline(p)
 struct parse *p;
 {
-	char	   *oldnext = p->next;
-	char	   *oldend = p->end;
-	char		bracket[4];
+	pg_wchar	   *oldnext = p->next;
+	pg_wchar	   *oldend = p->end;
+	pg_wchar		bracket[4];
 
 	p->next = bracket;
 	p->end = bracket + 3;
@@ -1674,7 +1737,7 @@ struct re_guts *g;
 	sop		   *newstart = 0;
 	sopno		newlen;
 	sop			s;
-	char	   *cp;
+	pg_wchar	   *cp;
 	sopno		i;
 
 	/* avoid making error situations worse */
@@ -1729,7 +1792,11 @@ struct re_guts *g;
 		return;
 
 	/* turn it into a character string */
+#ifdef MB
+	g->must = (pg_wchar *)malloc((size_t) (g->mlen + 1)*sizeof(pg_wchar));
+#else
 	g->must = malloc((size_t) g->mlen + 1);
+#endif
 	if (g->must == NULL)
 	{							/* argh; just forget it */
 		g->mlen = 0;
@@ -1742,7 +1809,7 @@ struct re_guts *g;
 		while (OP(s = *scan++) != OCHAR)
 			continue;
 		assert(cp < g->must + g->mlen);
-		*cp++ = (char) OPND(s);
+		*cp++ = (pg_wchar) OPND(s);
 	}
 	assert(cp == g->must + g->mlen);
 	*cp++ = '\0';				/* just on general principles */
@@ -1785,3 +1852,42 @@ struct re_guts *g;
 		g->iflags |= BAD;
 	return (maxnest);
 }
+
+/*
+ * some ctype functions with none-ascii-char guard
+ */
+static int pg_isdigit(int c)
+{
+#ifdef MB
+  return(c >= 0 && c <= UCHAR_MAX && isdigit(c));
+#else
+  return(isdigit(c));
+#endif
+}
+
+static int pg_isalpha(int c)
+{
+#ifdef MB
+  return(c >= 0 && c <= UCHAR_MAX && isalpha(c));
+#else
+  return(isalpha(c));
+#endif
+}
+
+static int pg_isupper(int c)
+{
+#ifdef MB
+  return(c >= 0 && c <= UCHAR_MAX && isupper(c));
+#else
+  return(isupper(c));
+#endif
+}
+
+static int pg_islower(int c)
+{
+#ifdef MB
+  return(c >= 0 && c <= UCHAR_MAX && islower(c));
+#else
+  return(islower(c));
+#endif
+}
diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c
index a8ba2443c5f..abdc314a94b 100644
--- a/src/backend/regex/regerror.c
+++ b/src/backend/regex/regerror.c
@@ -52,6 +52,7 @@ static char sccsid[] = "@(#)regerror.c	8.4 (Berkeley) 3/20/94";
 
 #include <regex/regex.h>
 #include <regex/utils.h>
+#include <regex/regex2.h>
 
 /* ========= begin header generated by ./mkh ========= */
 #ifdef __cplusplus
@@ -214,7 +215,11 @@ char	   *localbuf;
 	struct rerr *r;
 
 	for (r = rerrs; r->code != 0; r++)
+#ifdef MB
+		if (pg_char_and_wchar_strcmp(r->name, preg->re_endp) == 0)
+#else
 		if (strcmp(r->name, preg->re_endp) == 0)
+#endif
 			break;
 	if (r->code == 0)
 		return ("0");
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index e3ac5cd0644..8a51810a203 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -164,6 +164,11 @@ int			eflags;
 {
 	struct re_guts *g = preg->re_g;
 
+#ifdef MB
+	pg_wchar *str;
+	int sts;
+#endif
+
 #ifdef REDEBUG
 #define  GOODFLAGS(f)	 (f)
 #else
@@ -177,8 +182,24 @@ int			eflags;
 		return (REG_BADPAT);
 	eflags = GOODFLAGS(eflags);
 
+#ifdef MB
+	str = (pg_wchar *)malloc((strlen(string)+1) * sizeof(pg_wchar));
+	if (!str) {
+	  return(REG_ESPACE);
+	}
+	(void)pg_mb2wchar((unsigned char *)string,str);
 	if (g->nstates <= CHAR_BIT * sizeof(states1) && !(eflags & REG_LARGE))
-		return (smatcher(g, (char *) string, nmatch, pmatch, eflags));
+	  sts = smatcher(g, str, nmatch, pmatch, eflags);
 	else
-		return (lmatcher(g, (char *) string, nmatch, pmatch, eflags));
+	  sts = lmatcher(g, str, nmatch, pmatch, eflags);
+	free((char *)str);
+	return(sts);
+
+#  else
+
+	if (g->nstates <= CHAR_BIT * sizeof(states1) && !(eflags & REG_LARGE))
+		return (smatcher(g, (pg_wchar *) string, nmatch, pmatch, eflags));
+	else
+		return (lmatcher(g, (pg_wchar *) string, nmatch, pmatch, eflags));
+#endif
 }
diff --git a/src/backend/regex/regfree.c b/src/backend/regex/regfree.c
index e53fe54e86a..b169c840412 100644
--- a/src/backend/regex/regfree.c
+++ b/src/backend/regex/regfree.c
@@ -68,7 +68,11 @@ regex_t    *preg;
 		return;
 	preg->re_magic = 0;			/* mark it invalid */
 	g->magic = 0;				/* mark it invalid */
-
+#ifdef MB
+	if (preg->patsave != NULL) {
+	  free((char *)preg->patsave);
+	}
+#endif
 	if (g->strip != NULL)
 		free((char *) g->strip);
 	if (g->sets != NULL)
diff --git a/src/backend/regex/utftest.c b/src/backend/regex/utftest.c
new file mode 100644
index 00000000000..28baf7255ef
--- /dev/null
+++ b/src/backend/regex/utftest.c
@@ -0,0 +1,33 @@
+/*
+ * testing of utf2wchar()
+ * $Id: utftest.c,v 1.1 1998/03/15 07:38:37 scrappy Exp $
+ */
+#include <regex/regex.h>
+#include <regex/utils.h>
+#include <regex/regex2.h>
+
+#include <regex/pg_wchar.h>
+
+main()
+{
+  /* Example 1 from RFC2044 */
+  char utf1[] = {0x41,0xe2,0x89,0xa2,0xce,0x91,0x2e,0};
+  /* Example 2 from RFC2044 */
+  char utf2[] = {0x48,0x69,0x20,0x4d,0x6f,0x6d,0x20,0xe2,0x98,0xba,0x21,0};
+  /* Example 3 from RFC2044 */
+  char utf3[] = {0xe6,0x97,0xa5,0xe6,0x9c,0xac,0xe8,0xaa,0x9e,0};
+  char *utf[] = {utf1,utf2,utf3};
+  pg_wchar ucs[128];
+  pg_wchar *p;
+  int i;
+
+  for (i=0;i<sizeof(utf)/sizeof(char *);i++) {
+    pg_utf2wchar(utf[i],ucs);
+    p = ucs;
+    while(*p) {
+      printf("%04x ",*p);
+      p++;
+    }
+    printf("\n");
+  }
+}
diff --git a/src/backend/regex/utils.c b/src/backend/regex/utils.c
new file mode 100644
index 00000000000..1f904e338ce
--- /dev/null
+++ b/src/backend/regex/utils.c
@@ -0,0 +1,348 @@
+/*
+ * misc conversion functions between pg_wchar and other encodings.
+ * Tatsuo Ishii
+ * $Id: utils.c,v 1.1 1998/03/15 07:38:39 scrappy Exp $
+ */
+#include <regex/pg_wchar.h>
+/*
+ * convert EUC to pg_wchar (EUC process code)
+ * caller should allocate enough space for "to"
+ */
+static void pg_euc2wchar(const unsigned char *from, pg_wchar *to)
+{
+  while (*from) {
+    if (*from == SS2) {
+      from++;
+      *to = *from++;
+    } else if (*from == SS3) {
+      from++;
+      *to = *from++ << 8;
+      *to |= 0x3f & *from++;
+    } else if (*from & 0x80) {
+      *to = *from++ << 8;
+      *to |= *from++;
+    } else {
+      *to = *from++;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+static void pg_eucjp2wchar(const unsigned char *from, pg_wchar *to)
+{
+  pg_euc2wchar(from,to);
+}
+
+static void pg_euckr2wchar(const unsigned char *from, pg_wchar *to)
+{
+  pg_euc2wchar(from,to);
+}
+
+static void pg_eucch2wchar(const unsigned char *from, pg_wchar *to)
+{
+  while (*from) {
+    if (*from == SS2) {
+      from++;
+      *to = 0x3f00 & (*from++ << 8);
+      *to = *from++;
+    } else if (*from == SS3) {
+      from++;
+      *to = *from++ << 8;
+      *to |= 0x3f & *from++;
+    } else if (*from & 0x80) {
+      *to = *from++ << 8;
+      *to |= *from++;
+    } else {
+      *to = *from++;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+static void pg_euccn2wchar(const unsigned char *from, pg_wchar *to)
+{
+  while (*from) {
+    if (*from == SS2) {
+      from++;
+      *to = *from++ << 16;
+      *to |= *from++ << 8;
+      *to |= *from++;
+    } else if (*from == SS3) {
+      from++;
+      *to = *from++ << 8;
+      *to |= 0x3f & *from++;
+    } else if (*from & 0x80) {
+      *to = *from++ << 8;
+      *to |= *from++;
+    } else {
+      *to = *from++;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+/*
+ * convert UTF-8 to pg_wchar (UCS-2)
+ * caller should allocate enough space for "to"
+ */
+static void pg_utf2wchar(const unsigned char *from, pg_wchar *to)
+{
+  unsigned char c1,c2,c3;
+  while (*from) {
+    if ((*from & 0x80) == 0) {
+      *to = *from++;
+    } else if ((*from & 0xe0) == 0xc0) {
+      c1 = *from++ & 0x1f;
+      c2 = *from++ & 0x3f;
+      *to = c1 << 6;
+      *to |= c2;
+    } else if ((*from & 0xe0) == 0xe0) {
+      c1 = *from++ & 0x0f;
+      c2 = *from++ & 0x3f;
+      c3 = *from++ & 0x3f;
+      *to = c1 << 12;
+      *to |= c2 << 6;
+      *to |= c3;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+/*
+ * convert mule internal code to pg_wchar.
+ * in this case pg_wchar consists of following 4 bytes:
+ *
+ * 0x00(unused)
+ * 0x00(ASCII)|leading character (one of LC1, LC12, LC2 or LC22)
+ * 0x00(ASCII,1 byte code)|other than 0x00(2 byte code)
+ * the lowest byte of the code
+ *
+ * note that Type N (variable length byte encoding) cannot be represented by
+ * this schema. sorry.
+ * caller should allocate enough space for "to"
+ */
+static void pg_mule2wchar(const unsigned char *from, pg_wchar *to)
+{
+  while (*from) {
+    if (IS_LC1(*from)) {
+      *to = *from++ << 16;
+      *to |= *from++;
+    } else if (IS_LCPRV1(*from)) {
+      from++;
+      *to = *from++ << 16;
+      *to |= *from++;
+    } else if (IS_LC2(*from)) {
+      *to = *from++ << 16;
+      *to |= *from++ << 8;
+      *to |= *from++;
+    } else if (IS_LCPRV2(*from)) {
+      from++;
+      *to = *from++ << 16;
+      *to |= *from++ << 8;
+      *to |= *from++;
+    } else {	/* assume ASCII */
+      *to = *from++;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+/*
+ * convert EUC to pg_wchar (EUC process code)
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static void pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+  while (*from && len > 0) {
+    if (*from == SS2) {
+      from++;
+      len--;
+      *to = 0xff & *from++;
+      len--;
+    } else if (*from == SS3) {
+      from++;
+      *to = *from++ << 8;
+      *to |= 0x3f & *from++;
+      len -= 3;
+    } else if (*from & 0x80) {
+      *to = *from++ << 8;
+      *to |= *from++;
+      len -= 2;
+    } else {
+      *to = *from++;
+      len--;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+static void pg_eucjp2wchar_with_len
+(const unsigned char *from, pg_wchar *to, int len)
+{
+  pg_euc2wchar_with_len(from,to,len);
+}
+
+static void pg_euckr2wchar_with_len
+(const unsigned char *from, pg_wchar *to, int len)
+{
+  pg_euc2wchar_with_len(from,to,len);
+}
+
+static void pg_eucch2wchar_with_len
+(const unsigned char *from, pg_wchar *to, int len)
+{
+  while (*from && len > 0) {
+    if (*from == SS2) {
+      from++;
+      len--;
+      *to = 0x3f00 & (*from++ << 8);
+      *to = *from++;
+      len -= 2;
+    } else if (*from == SS3) {
+      from++;
+      *to = *from++ << 8;
+      *to |= 0x3f & *from++;
+      len -= 3;
+    } else if (*from & 0x80) {
+      *to = *from++ << 8;
+      *to |= *from++;
+      len -= 2;
+    } else {
+      *to = *from++;
+      len--;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+static void pg_euccn2wchar_with_len
+(const unsigned char *from, pg_wchar *to, int len)
+{
+  while (*from && len > 0) {
+    if (*from == SS2) {
+      from++;
+      len--;
+      *to = *from++ << 16;
+      *to |= *from++ << 8;
+      *to |= *from++;
+      len -= 3;
+    } else if (*from == SS3) {
+      from++;
+      *to = *from++ << 8;
+      *to |= 0x3f & *from++;
+      len -= 3;
+    } else if (*from & 0x80) {
+      *to = *from++ << 8;
+      *to |= *from++;
+      len -= 2;
+    } else {
+      *to = *from++;
+      len--;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+/*
+ * convert UTF-8 to pg_wchar (UCS-2)
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static void pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+  unsigned char c1,c2,c3;
+  while (*from && len > 0) {
+    if ((*from & 0x80) == 0) {
+      *to = *from++;
+      len--;
+    } else if ((*from & 0xe0) == 0xc0) {
+      c1 = *from++ & 0x1f;
+      c2 = *from++ & 0x3f;
+      len -= 2;
+      *to = c1 << 6;
+      *to |= c2;
+    } else if ((*from & 0xe0) == 0xe0) {
+      c1 = *from++ & 0x0f;
+      c2 = *from++ & 0x3f;
+      c3 = *from++ & 0x3f;
+      len -= 3;
+      *to = c1 << 12;
+      *to |= c2 << 6;
+      *to |= c3;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+/*
+ * convert mule internal code to pg_wchar
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static void pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+  while (*from && len > 0) {
+    if (IS_LC1(*from)) {
+      *to = *from++ << 16;
+      *to |= *from++;
+      len -= 2;
+    } else if (IS_LCPRV1(*from)) {
+      from++;
+      *to = *from++ << 16;
+      *to |= *from++;
+      len -= 3;
+    } else if (IS_LC2(*from)) {
+      *to = *from++ << 16;
+      *to |= *from++ << 8;
+      *to |= *from++;
+      len -= 3;
+    } else if (IS_LCPRV2(*from)) {
+      from++;
+      *to = *from++ << 16;
+      *to |= *from++ << 8;
+      *to |= *from++;
+      len -= 4;
+    } else {	/* assume ASCII */
+      *to = (unsigned char)*from++;
+      len--;
+    }
+    to++;
+  }
+  *to = 0;
+}
+
+typedef struct {
+  void	(*mb2wchar)();
+  void	(*mb2wchar_with_len)();
+} pg_wchar_tbl;
+
+static pg_wchar_tbl pg_wchar_table[] = {
+  {pg_eucjp2wchar, pg_eucjp2wchar_with_len},
+  {pg_eucch2wchar, pg_eucch2wchar_with_len},
+  {pg_euckr2wchar, pg_euckr2wchar_with_len},
+  {pg_euccn2wchar, pg_euccn2wchar_with_len},
+  {pg_utf2wchar, pg_utf2wchar_with_len},
+  {pg_mule2wchar, pg_mule2wchar_with_len}};
+
+void pg_mb2wchar(const unsigned char *from, pg_wchar *to)
+{
+  (*pg_wchar_table[MB].mb2wchar)(from,to);
+}
+
+void pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+  (*pg_wchar_table[MB].mb2wchar_with_len)(from,to,len);
+}
diff --git a/src/backend/regex/wstrcmp.c b/src/backend/regex/wstrcmp.c
new file mode 100644
index 00000000000..b562f103150
--- /dev/null
+++ b/src/backend/regex/wstrcmp.c
@@ -0,0 +1,48 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <regex/pg_wchar.h>
+
+int
+pg_char_and_wchar_strcmp(s1, s2)
+	register const char *s1;
+	register const pg_wchar *s2;
+{
+	while ((pg_wchar)*s1 == *s2++)
+		if (*s1++ == 0)
+			return (0);
+	return (*(const unsigned char *)s1 - *(const pg_wchar *)(s2 - 1));
+}
diff --git a/src/backend/regex/wstrncmp.c b/src/backend/regex/wstrncmp.c
new file mode 100644
index 00000000000..e7ce52ed7b2
--- /dev/null
+++ b/src/backend/regex/wstrncmp.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from FreeBSD 2.2.1-RELEASE software.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <regex/pg_wchar.h>
+
+int
+pg_wchar_strncmp(s1, s2, n)
+	register const pg_wchar *s1, *s2;
+	register size_t n;
+{
+
+	if (n == 0)
+		return (0);
+	do {
+		if (*s1 != *s2++)
+			return (*(const pg_wchar *)s1 -
+				*(const pg_wchar *)(s2 - 1));
+		if (*s1++ == 0)
+			break;
+	} while (--n != 0);
+	return (0);
+}
+
+int
+pg_char_and_wchar_strncmp(s1, s2, n)
+	register const char *s1;
+	register const pg_wchar *s2;
+	register size_t n;
+{
+
+	if (n == 0)
+		return (0);
+	do {
+		if ((pg_wchar )*s1 != *s2++)
+			return (*(const pg_wchar *)s1 -
+				*(const pg_wchar *)(s2 - 1));
+		if (*s1++ == 0)
+			break;
+	} while (--n != 0);
+	return (0);
+}
+
+size_t
+pg_wchar_strlen(str)
+	const pg_wchar *str;
+{
+	register const pg_wchar *s;
+
+	for (s = str; *s; ++s);
+	return(s - str);
+}
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index fe7b5dd1d36..75cc755f7cc 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -4,7 +4,7 @@
 #    Makefile for utils/adt
 #
 # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/utils/adt/Makefile,v 1.10 1997/12/20 00:28:21 scrappy Exp $
+#    $Header: /cvsroot/pgsql/src/backend/utils/adt/Makefile,v 1.11 1998/03/15 07:38:42 scrappy Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -14,6 +14,9 @@ include ../../../Makefile.global
 INCLUDE_OPT = -I../.. 
 
 CFLAGS+=$(INCLUDE_OPT)
+ifdef MB
+CFLAGS+=-DMB=$(MB)
+endif
 
 OBJS = acl.o arrayfuncs.o arrayutils.o bool.o cash.o char.o chunk.o date.o \
 	datum.o dt.o filename.o float.o geo_ops.o geo_selfuncs.o int.o \
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 7d4681262d5..27d6ffc0140 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -21,8 +21,9 @@
 #include "postgres.h"			/* postgres system include file */
 #include "utils/palloc.h"
 #include "utils/builtins.h"		/* where the function declarations go */
+#include "regex/pg_wchar.h"
 
-static int	like(char *text, char *p);
+static int	like(pg_wchar *text, pg_wchar *p);
 
 /*
  *	interface routines called by the function manager
@@ -39,16 +40,22 @@ static int	like(char *text, char *p);
 static bool
 fixedlen_like(char *s, struct varlena * p, int charlen)
 {
-	char	   *sterm,
+	pg_wchar	   *sterm,
 			   *pterm;
 	int			result;
+	int	len;
 
 	if (!s || !p)
 		return FALSE;
 
 	/* be sure sterm is null-terminated */
+#ifdef MB
+	sterm = (pg_wchar *) palloc((charlen + 1)*sizeof(pg_wchar));
+	(void)pg_mb2wchar_with_len((unsigned char *)s,sterm,charlen);
+#else
 	sterm = (char *) palloc(charlen + 1);
 	StrNCpy(sterm, s, charlen + 1);
+#endif
 
 	/*
 	 * p is a text = varlena, not a string so we have to make a string
@@ -56,9 +63,15 @@ fixedlen_like(char *s, struct varlena * p, int charlen)
 	 */
 
 	/* palloc the length of the text + the null character */
-	pterm = (char *) palloc(VARSIZE(p) - VARHDRSZ + 1);
-	memmove(pterm, VARDATA(p), VARSIZE(p) - VARHDRSZ);
-	*(pterm + VARSIZE(p) - VARHDRSZ) = (char) NULL;
+	len = VARSIZE(p) - VARHDRSZ;
+#ifdef MB
+	pterm = (pg_wchar *) palloc((len + 1)*sizeof(pg_wchar));
+	(void)pg_mb2wchar_with_len((unsigned char *)VARDATA(p),pterm,len);
+#else
+	pterm = (char *) palloc(len + 1);
+	memmove(pterm, VARDATA(p), len);
+	*(pterm + len) = (char) NULL;
+#endif
 
 	/* do the regexp matching */
 	result = like(sterm, pterm);
@@ -150,7 +163,7 @@ textnlike(struct varlena * s, struct varlena * p)
 }
 
 
-/*	$Revision: 1.12 $
+/*	$Revision: 1.13 $
 **	"like.c" A first attempt at a LIKE operator for Postgres95.
 **
 **	Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
@@ -185,7 +198,7 @@ textnlike(struct varlena * s, struct varlena * p)
 **	Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
 */
 static int
-DoMatch(char *text, char *p)
+DoMatch(pg_wchar *text, pg_wchar *p)
 {
 	int			matched;
 
@@ -228,7 +241,7 @@ DoMatch(char *text, char *p)
 **	User-level routine.  Returns TRUE or FALSE.
 */
 static int
-like(char *text, char *p)
+like(pg_wchar *text, pg_wchar *p)
 {
 	if (p[0] == '%' && p[1] == '\0')
 		return TRUE;
author	Marc G. Fournier	1998-03-15 07:39:04 +0000
committer	Marc G. Fournier	1998-03-15 07:39:04 +0000
commit	661ecf3c48e16a9add216287eb969d7615e47968 (patch)
tree	91b54d5905aa2e22bd0ae9ea8c6b0f3cab75d3f4 /src/backend
parent	31a925c4d07675bc098a742ee9ca642ec79a40ee (diff)