wchar: compat mbsnrtowcs(), tests
authorMarko Kreen <[email protected]>
Sun, 6 Jan 2013 10:48:38 +0000 (12:48 +0200)
committerMarko Kreen <[email protected]>
Sun, 6 Jan 2013 10:48:38 +0000 (12:48 +0200)
test/Makefile
test/force_compat.sed
test/test_common.c
test/test_common.h
test/test_wchar.c [new file with mode: 0644]
usual/wchar.c
usual/wchar.h

index dc8d33902893e0a7332a0b3a481cac8a7d718b66..af633391003862bba4303c5d797315198929f114 100644 (file)
@@ -14,6 +14,7 @@ regtest_system_SOURCES = \
        test_cfparser.c test_endian.c test_hashtab.c test_mdict.c \
        test_shlist.c test_time.c test_hashing.c test_fileutil.c \
        test_socket.c test_getopt.c test_ctype.c test_fnmatch.c \
+       test_wchar.c \
        test_common.h tinytest.h tinytest_macros.h
 
 # build regtest_system against actual library
index 33430dacd5c3dca6a7853e134764d999b72b0dcd..dc3033e7347f9e4b8f8bf105736f3abf8c3eedab 100644 (file)
@@ -11,3 +11,4 @@
 /^#define.*GETOPT/s,.*,/* & */,
 /^#define.*CTYPE_ON_CHAR/s,.*,/* & */,
 /^#define.*FNMATCH/s,.*,/* & */,
+/^#define.*MBSNRTOWCS/s,.*,/* & */,
index 5c407cce7ef965f1f3dbc1ed7dea059aeb8a7851..dca15200678f76d48fd09f81a56618bbbdf2deb9 100644 (file)
@@ -12,6 +12,7 @@ struct testgroup_t groups[] = {
        { "hashing/", hashing_tests },
        { "endian/", endian_tests },
        { "string/", string_tests },
+       { "wchar/", wchar_tests },
        { "fnmatch/", fnmatch_tests },
        { "ctype/", ctype_tests },
        { "heap/", heap_tests },
index 375fa7134c5f84c42990978ab5fe6c0b542cac0a..a9074b8cb847feb8be052a82241071089fa22dc3 100644 (file)
@@ -34,3 +34,4 @@ extern struct testcase_t socket_tests[];
 extern struct testcase_t getopt_tests[];
 extern struct testcase_t ctype_tests[];
 extern struct testcase_t fnmatch_tests[];
+extern struct testcase_t wchar_tests[];
diff --git a/test/test_wchar.c b/test/test_wchar.c
new file mode 100644 (file)
index 0000000..df87b45
--- /dev/null
@@ -0,0 +1,139 @@
+
+#include <usual/wchar.h>
+#include <string.h>
+
+#include "test_common.h"
+
+
+/*
+ * mbstr_decode()
+ */
+
+static const char *decode(const char *s, int inbuf)
+{
+       static char out[128];
+       wchar_t tmp[128];
+       wchar_t *res;
+       int reslen = 4;
+       unsigned i;
+
+       for (i = 0; i < 128; i++)
+               tmp[i] = '~';
+
+       res = mbstr_decode(s, inbuf, &reslen, tmp, sizeof(tmp), true);
+       if (res == NULL) {
+               if (errno == EILSEQ) return "EILSEQ";
+               if (errno == ENOMEM) return "ENOMEM";
+               return "NULL??";
+       }
+       if (res != tmp)
+               return "EBUF";
+       if (res[reslen] == 0)
+               res[reslen] = 'Z';
+       else
+               return "reslen fail?";
+
+       for (i = 0; i < 128; i++) {
+               out[i] = tmp[i];
+               if (out[i] == '~') {
+                       out[i+1] = 0;
+                       break;
+               } else if (out[i] == 0) {
+                       out[i] = '#';
+               } else if (tmp[i] > 127) {
+                       out[i] = 'A' + tmp[i] % 26;
+               }
+       }
+       return out;
+}
+
+static void test_mbstr_decode(void *p)
+{
+       str_check(decode("", 0), "Z~");
+       str_check(decode("", 1), "Z~");
+       str_check(decode("a", 0), "Z~");
+
+       str_check(decode("abc", 0), "Z~");
+       str_check(decode("abc", 1), "aZ~");
+       str_check(decode("abc", 2), "abZ~");
+       str_check(decode("abc", 3), "abcZ~");
+       str_check(decode("abc", 4), "abcZ~");
+       str_check(decode("abc", 5), "abcZ~");
+
+       if (MB_CUR_MAX > 1) {
+               str_check(decode("aa\200cc", 5), "aaYccZ~");
+               str_check(decode("a\200cc", 5), "aYccZ~");
+               str_check(decode("aa\200c", 5), "aaYcZ~");
+       }
+end:;
+}
+
+/*
+ * mbsnrtowcs()
+ */
+
+
+static const char *mbsnr(const char *str, int inbuf, int outbuf)
+{
+       static char out[128];
+       wchar_t tmp[128];
+       int res;
+       unsigned i;
+       const char *s = str;
+       mbstate_t ps;
+
+       for (i = 0; i < 128; i++)
+               tmp[i] = '~';
+
+       memset(&ps, 0, sizeof(ps));
+       res = mbsnrtowcs(tmp, &s, inbuf, outbuf, &ps);
+       if (res < 0) {
+               if (errno == EILSEQ) {
+                       snprintf(out, sizeof(out), "EILSEQ(%d)", (int)(s - str));
+                       return out;
+               }
+               return "unknown error";
+       }
+       if (tmp[res] == 0)
+               tmp[res] = s ? 'z' : 'Z';
+
+       for (i = 0; i < 128; i++) {
+               out[i] = tmp[i];
+               if (out[i] == '~') {
+                       out[i+1] = 0;
+                       break;
+               }
+       }
+       return out;
+}
+
+static void test_mbsnrtowcs(void *p)
+{
+       str_check(mbsnr("", 1, 1), "Z~");
+       str_check(mbsnr("", 0, 0), "~");
+       str_check(mbsnr("", 0, 1), "~"); /* XXX */
+       str_check(mbsnr("", 1, 0), "~");
+
+       str_check(mbsnr("x", 1, 1), "x~");
+       str_check(mbsnr("x", 0, 0), "~");
+       str_check(mbsnr("x", 0, 1), "~"); /* XXX */
+       str_check(mbsnr("x", 1, 0), "~");
+
+       str_check(mbsnr("abc", 3, 3), "abc~");
+       str_check(mbsnr("abc", 3, 4), "abc~"); /* XXX */
+
+       str_check(mbsnr("abc", 4, 3), "abc~");
+       str_check(mbsnr("abc", 4, 4), "abcZ~");
+end:;
+}
+
+/*
+ * Describe
+ */
+
+struct testcase_t wchar_tests[] = {
+       { "mbsnrtowcs", test_mbsnrtowcs },
+       { "mbstr_decode", test_mbstr_decode },
+       END_OF_TESTCASES
+};
+
index fbed966d224bf9752bf2cb62d6f42d536f10ca12..640ce76fc2ffddc31d1e24f182f59cafca8fee21 100644 (file)
@@ -44,17 +44,18 @@ wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p,
                        return NULL;
        }
 
-#ifdef HAVE_MBSNRTOWCS
        /* try full decode at once */
        s = str;
        memset(&ps, 0, sizeof(ps));
        clen = mbsnrtowcs(dst, &s, str_len, wmax, &ps);
-       if (clen > 0 && s == NULL) {
+       if (clen >= 0) {
                if (wlen_p)
                        *wlen_p = clen;
+               dst[clen] = 0;
                return dst;
        }
-#endif
+       if (!allow_invalid)
+               goto fail;
 
        /* full decode failed, decode chars one-by-one */
        s = str;
@@ -64,29 +65,34 @@ wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p,
        while (s < str_end && w < wend) {
                clen = mbrtowc(w, s, str_end - s, &ps);
                if (clen > 0) {
+                       /* single char */
                        w++;
                        s += clen;
+               } else if (clen == 0) {
+                       /* string end */
+                       break;
                } else if (allow_invalid) {
                        /* allow invalid encoding */
                        memset(&ps, 0, sizeof(ps));
                        *w++ = (unsigned char)*s++;
                } else {
+                       /* invalid encoding */
                        goto fail;
                }
        }
 
-       if (s != str_end)
+       /* make sure we got string end */
+       if (s < str_end && *s != '\0')
                goto fail;
 
        *w = 0;
-       if (wlen_p != NULL)
+       if (wlen_p)
                *wlen_p = w - dst;
        return dst;
 
 fail:
        if (dst != wbuf)
                free(dst);
-       errno = EILSEQ;
        return NULL;
 }
 
@@ -107,3 +113,52 @@ wctype_t wctype_wcsn(const wchar_t *name, unsigned int namelen)
        return wctype(buf);
 }
 
+#ifndef HAVE_MBSNRTOWCS
+
+size_t mbsnrtowcs(wchar_t *dst, const char **src_p, size_t srclen, size_t dstlen, mbstate_t *ps)
+{
+       int clen;
+       const char *s, *s_end;
+       wchar_t *w;
+       mbstate_t pstmp;
+       size_t count = 0;
+
+       if (!ps) {
+               memset(&pstmp, 0, sizeof(pstmp));
+               ps = &pstmp;
+       }
+
+       s = *src_p;
+       s_end = s + srclen;
+       w = dst;
+       while (s < s_end) {
+               if (w && count >= dstlen) {
+                       /* dst is full */
+                       break;
+               }
+               clen = mbrtowc(w, s, s_end - s, ps);
+               if (clen > 0) {
+                       /* proper character */
+                       if (w)
+                               w++;
+                       count++;
+                       s += clen;
+               } else if (clen < 0) {
+                       /* invalid encoding */
+                       *src_p = s;
+                       return (size_t)(-1);
+               } else {
+                       /* end of string */
+                       if (w)
+                               *w = 0;
+                       *src_p = NULL;
+                       return count;
+               }
+       }
+       /* end due to srclen */
+       *src_p = s;
+       return count;
+}
+
+#endif
+
index 1d06a846ea9cf15d928e4fcf2fa72a61392e3815..38c98f51fa4f1eb87f2c00158ff9213fa0aebc16 100644 (file)
@@ -28,4 +28,9 @@ wchar_t *mbstr_decode(const char *str, int str_len, int *wlen_p, wchar_t *wbuf,
 
 wctype_t wctype_wcsn(const wchar_t *name, unsigned int namelen);
 
+#ifndef HAVE_MBSNRTOWCS
+#define mbsnrtowcs(a,b,c,d,e) usual_mbsnrtowcs(a,b,c,d,e)
+size_t mbsnrtowcs(wchar_t *dst, const char **src_p, size_t srclen, size_t dstlen, mbstate_t *ps);
+#endif
+
 #endif