diff options
author | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-09-06 12:33:45 +0000 |
---|---|---|
committer | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-09-06 12:33:45 +0000 |
commit | edd7c787adc53f70a9d2790076e4c6d77a1f5324 (patch) | |
tree | 0b19582f07ef27790bde97603bd271efb88bcb35 | |
parent | 629b1e4324ecfbdb9e953f2a0da74a833786e1e9 (diff) |
* array.c (rb_ary_cycle): typo in rdoc. a patch from Yugui
<[email protected]>. [ruby-dev:31748]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13348 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | array.c | 2 | ||||
-rw-r--r-- | encoding.c | 8 | ||||
-rw-r--r-- | euc_jp.c | 10 | ||||
-rw-r--r-- | ext/strscan/strscan.c | 2 | ||||
-rw-r--r-- | include/ruby/encoding.h | 2 | ||||
-rw-r--r-- | include/ruby/oniguruma.h | 8 | ||||
-rw-r--r-- | include/ruby/regex.h | 4 | ||||
-rw-r--r-- | parse.y | 29 | ||||
-rw-r--r-- | re.c | 26 | ||||
-rw-r--r-- | regcomp.c | 18 | ||||
-rw-r--r-- | regenc.c | 24 | ||||
-rw-r--r-- | regerror.c | 8 | ||||
-rw-r--r-- | regexec.c | 68 | ||||
-rw-r--r-- | regparse.c | 28 | ||||
-rw-r--r-- | sjis.c | 8 | ||||
-rw-r--r-- | string.c | 15 | ||||
-rw-r--r-- | unicode.c | 10 | ||||
-rw-r--r-- | utf8.c | 4 |
19 files changed, 146 insertions, 133 deletions
@@ -1,3 +1,8 @@ +Thu Sep 6 21:31:49 2007 Yukihiro Matsumoto <[email protected]> + + * array.c (rb_ary_cycle): typo in rdoc. a patch from Yugui + <[email protected]>. [ruby-dev:31748] + Thu Sep 6 12:42:10 2007 Nobuyoshi Nakada <[email protected]> * string.c (rb_str_succ, rb_str_chop_bang, rb_str_chop): m17n support. @@ -2929,7 +2929,7 @@ rb_ary_choice(VALUE ary) * Calls <i>block</i> repeatedly forever. * * a = ["a", "b", "c"] - * a.each {|x| puts x } # print, a, b, c, a, b, c,.. forever. + * a.cycle {|x| puts x } # print, a, b, c, a, b, c,.. forever. * */ diff --git a/encoding.c b/encoding.c index fc0aa3b945..8b674d13ea 100644 --- a/encoding.c +++ b/encoding.c @@ -189,7 +189,7 @@ rb_enc_nth(const char *p, const char *e, int nth, rb_encoding *enc) } else { for (c=0; p<e && nth--; c++) { - int n = rb_enc_mbclen(p, enc); + int n = rb_enc_mbclen(p, e, enc); if (n == 0) return 0; p += n; @@ -208,7 +208,7 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) } for (c=0; p<e; c++) { - int n = rb_enc_mbclen(p, enc); + int n = rb_enc_mbclen(p, e, enc); if (n == 0) return -1; p += n; @@ -217,9 +217,9 @@ rb_enc_strlen(const char *p, const char *e, rb_encoding *enc) } int -rb_enc_mbclen(const char *p, rb_encoding *enc) +rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) { - int n = ONIGENC_MBC_ENC_LEN(enc, (UChar*)p); + int n = ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e); if (n == 0) { rb_raise(rb_eArgError, "invalid mbstring sequence"); } @@ -51,7 +51,7 @@ static const int EncLen_EUCJP[] = { }; static int -mbc_enc_len(const UChar* p) +mbc_enc_len(const UChar* p, const UChar* e) { return EncLen_EUCJP[*p]; } @@ -62,7 +62,7 @@ mbc_to_code(const UChar* p, const UChar* end) int c, i, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_EUC_JP, p); + len = enc_len(ONIG_ENCODING_EUC_JP, p, end); n = (OnigCodePoint )*p++; if (len == 1) return n; @@ -113,7 +113,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf)) + if (enc_len(ONIG_ENCODING_EUC_JP, buf, p) != (p - buf)) return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; #endif return p - buf; @@ -134,7 +134,7 @@ mbc_case_fold(OnigCaseFoldType flag, else { int i; - len = enc_len(ONIG_ENCODING_EUC_JP, p); + len = enc_len(ONIG_ENCODING_EUC_JP, p, end); for (i = 0; i < len; i++) { *lower++ = *p++; } @@ -156,7 +156,7 @@ left_adjust_char_head(const UChar* start, const UChar* s) p = s; while (!eucjp_islead(*p) && p > start) p--; - len = enc_len(ONIG_ENCODING_EUC_JP, p); + len = enc_len(ONIG_ENCODING_EUC_JP, p, s); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 44cd0f4131..ac07b39789 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -661,7 +661,7 @@ strscan_getch(VALUE self) if (EOS_P(p)) return Qnil; - len = rb_enc_mbclen(CURPTR(p), enc); + len = rb_enc_mbclen(CURPTR(p), S_PEND(p), enc); if (p->curr + len > S_LEN(p)) { len = S_LEN(p) - p->curr; } diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index e807e8a547..f43160a682 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -50,7 +50,7 @@ rb_encoding * rb_enc_find(const char *name); #define rb_enc_mbmaxlen(enc) (enc)->max_enc_len /* ptr,encoding -> mbclen */ -int rb_enc_mbclen(const char*, rb_encoding*); +int rb_enc_mbclen(const char*, const char *, rb_encoding*); /* code,encoding -> codelen */ int rb_enc_codelen(int, rb_encoding*); diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index d0d04782e4..a75f811ff8 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -144,7 +144,7 @@ typedef struct { typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); typedef struct OnigEncodingTypeST { - int (*mbc_enc_len)(const OnigUChar* p); + int (*mbc_enc_len)(const OnigUChar* p,const OnigUChar* e); const char* name; int max_enc_len; int min_enc_len; @@ -255,11 +255,11 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; #define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII -#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc, p) +#define enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e) #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) -#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1) +#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1) #define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) #define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) #define ONIGENC_IS_MBC_WORD(enc,s,end) \ @@ -281,7 +281,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; #define ONIGENC_STEP_BACK(enc,start,s,n) \ onigenc_step_back((enc),(start),(s),(n)) -#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p) +#define ONIGENC_MBC_ENC_LEN(enc,p,e) (enc)->mbc_enc_len(p,e) #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) #define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) diff --git a/include/ruby/regex.h b/include/ruby/regex.h index ad736775fe..b214c63d3e 100644 --- a/include/ruby/regex.h +++ b/include/ruby/regex.h @@ -29,8 +29,8 @@ extern "C" { ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -#define ismbchar(p, enc) (mbclen((p),(enc)) != 1) -#define mbclen(p,enc) rb_enc_mbclen((p), (enc)) +#define ismbchar(p, e, enc) (mbclen((p),(e),(enc)) != 1) +#define mbclen(p,e,enc) rb_enc_mbclen((p),(e),(enc)) #endif /* ifndef ONIG_RUBY_M17N */ @@ -4558,10 +4558,10 @@ ripper_dispatch_delayed_token(struct parser_params *parser, int t) # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128) #endif -#define parser_mbclen() mbclen((lex_p-1),parser->enc) -#define is_identchar(p, enc) (rb_enc_isalnum(*p, enc) || (*p) == '_' || ismbchar(p, enc)) -#define parser_ismbchar() ismbchar((lex_p-1), parser->enc) -#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),parser->enc)) +#define parser_mbclen() mbclen((lex_p-1),lex_pend,parser->enc) +#define is_identchar(p,e,enc) (rb_enc_isalnum(*p,enc) || (*p) == '_' || ismbchar(p,e,enc)) +#define parser_ismbchar() ismbchar((lex_p-1), lex_pend, parser->enc) +#define parser_is_identchar() (!parser->eofp && is_identchar((lex_p-1),lex_pend,parser->enc)) static int parser_yyerror(struct parser_params *parser, const char *msg) @@ -5995,7 +5995,7 @@ parser_yylex(struct parser_params *parser) } } else if ((rb_enc_isalnum(c, parser->enc) || c == '_') && - lex_p < lex_pend && is_identchar(lex_p, parser->enc)) { + lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser->enc)) { goto ternary; } else if (c == '\\') { @@ -8328,7 +8328,7 @@ internal_id_gen(struct parser_params *parser) } static int -is_special_global_name(const char *m, rb_encoding *enc) +is_special_global_name(const char *m, const char *e, rb_encoding *enc) { switch (*m) { case '~': case '*': case '$': case '?': case '!': case '@': @@ -8340,7 +8340,7 @@ is_special_global_name(const char *m, rb_encoding *enc) break; case '-': ++m; - if (is_identchar(m, enc)) m += rb_enc_mbclen(m, enc); + if (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); break; default: if (!rb_enc_isdigit(*m, enc)) return 0; @@ -8353,6 +8353,7 @@ int rb_symname_p(const char *name) { const char *m = name; + const char *e = m + strlen(m); int localid = Qfalse; rb_encoding *enc = rb_enc_from_index(0); @@ -8362,7 +8363,7 @@ rb_symname_p(const char *name) return Qfalse; case '$': - if (is_special_global_name(++m, enc)) return Qtrue; + if (is_special_global_name(++m, e, enc)) return Qtrue; goto id; case '@': @@ -8411,8 +8412,9 @@ rb_symname_p(const char *name) default: localid = !rb_enc_isupper(*m, enc); id: - if (*m != '_' && !rb_enc_isalpha(*m, enc) && !ismbchar(m, enc)) return Qfalse; - while (is_identchar(m, enc)) m += rb_enc_mbclen(m, enc); + if (*m != '_' && !rb_enc_isalpha(*m, enc) && !ismbchar(m, e, enc)) + return Qfalse; + while (is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc); if (localid) { switch (*m) { case '!': case '?': case '=': ++m; @@ -8427,6 +8429,7 @@ ID rb_intern3(const char *name, long len, rb_encoding *enc) { const char *m = name; + const char *e = m + len; VALUE str; ID id; int last; @@ -8445,7 +8448,7 @@ rb_intern3(const char *name, long len, rb_encoding *enc) switch (*m) { case '$': id |= ID_GLOBAL; - if (is_special_global_name(++m, enc)) goto new_id; + if (is_special_global_name(++m, e, enc)) goto new_id; break; case '@': if (m[1] == '@') { @@ -8490,8 +8493,8 @@ rb_intern3(const char *name, long len, rb_encoding *enc) break; } if (!rb_enc_isdigit(*m, enc)) { - while (m <= name + last && is_identchar(m, enc)) { - m += rb_enc_mbclen(m, enc); + while (m <= name + last && is_identchar(m, e, enc)) { + m += rb_enc_mbclen(m, e, enc); } } if (m - name < len) id = ID_JUNK; @@ -393,11 +393,11 @@ rb_reg_expr_str(VALUE str, const char *s, long len) p = s; pend = p + len; while (p<pend) { - if (*p == '/' || (!rb_enc_isprint(*p, enc) && !ismbchar(p, enc))) { + if (*p == '/' || (!rb_enc_isprint(*p, enc) && !ismbchar(p, pend, enc))) { need_escape = 1; break; } - p += mbclen(p, enc); + p += mbclen(p, pend, enc); } if (!need_escape) { rb_str_buf_cat(str, s, len); @@ -406,7 +406,7 @@ rb_reg_expr_str(VALUE str, const char *s, long len) p = s; while (p<pend) { if (*p == '\\') { - int n = mbclen(p+1, enc) + 1; + int n = mbclen(p+1, pend, enc) + 1; rb_str_buf_cat(str, p, n); p += n; continue; @@ -416,9 +416,9 @@ rb_reg_expr_str(VALUE str, const char *s, long len) rb_str_buf_cat(str, &c, 1); rb_str_buf_cat(str, p, 1); } - else if (ismbchar(p, enc)) { - rb_str_buf_cat(str, p, mbclen(p, enc)); - p += mbclen(p, enc); + else if (ismbchar(p, pend, enc)) { + rb_str_buf_cat(str, p, mbclen(p, pend, enc)); + p += mbclen(p, pend, enc); continue; } else if (rb_enc_isprint(*p, enc)) { @@ -1906,8 +1906,8 @@ rb_reg_quote(VALUE str) send = s + RSTRING_LEN(str); for (; s < send; s++) { c = *s; - if (ismbchar(s, enc)) { - int n = mbclen(s, enc); + if (ismbchar(s, send, enc)) { + int n = mbclen(s, send, enc); while (n-- && s < send) s++; @@ -1935,8 +1935,8 @@ rb_reg_quote(VALUE str) for (; s < send; s++) { c = *s; - if (ismbchar(s, enc)) { - int n = mbclen(s, enc); + if (ismbchar(s, send, enc)) { + int n = mbclen(s, send, enc); while (n-- && s < send) *t++ = *s++; @@ -2180,8 +2180,8 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp) while (s < e) { char *ss = s++; - if (ismbchar(ss, enc)) { - s += mbclen(ss, enc) - 1; + if (ismbchar(ss, e, enc)) { + s += mbclen(ss, e, enc) - 1; continue; } if (*ss != '\\' || s == e) continue; @@ -2214,7 +2214,7 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp) name_end = name = s + 1; while (name_end < e) { if (*name_end == '>') break; - name_end += mbclen(name_end, enc); + name_end += mbclen(name_end, e, enc); } if (name_end < e) { no = name_to_backref_number(regs, regexp, name, name_end); @@ -469,13 +469,13 @@ compile_length_string_node(Node* node, regex_t* reg) ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; - prev_len = enc_len(enc, p); + prev_len = enc_len(enc, p, sn->end); p += prev_len; slen = 1; rlen = 0; for (; p < sn->end; ) { - len = enc_len(enc, p); + len = enc_len(enc, p, sn->end); if (len == prev_len) { slen++; } @@ -518,12 +518,12 @@ compile_string_node(Node* node, regex_t* reg) ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; - prev_len = enc_len(enc, p); + prev_len = enc_len(enc, p, end); p += prev_len; slen = 1; for (; p < end; ) { - len = enc_len(enc, p); + len = enc_len(enc, p, end); if (len == prev_len) { slen++; } @@ -2312,7 +2312,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) StrNode* sn = NSTR(node); UChar *s = sn->s; while (s < sn->end) { - s += enc_len(reg->enc, s); + s += enc_len(reg->enc, s, sn->end); (*len)++; } } @@ -3389,7 +3389,7 @@ expand_case_fold_string(Node* node, regex_t* reg) goto err; } - len = enc_len(reg->enc, p); + len = enc_len(reg->enc, p, end); if (n == 0) { if (IS_NULL(snode)) { @@ -4212,7 +4212,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) p = add->s; end = p + add->len; for (i = to->len; p < end; ) { - len = enc_len(enc, p); + len = enc_len(enc, p, end); if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; @@ -4234,7 +4234,7 @@ concat_opt_exact_info_str(OptExactInfo* to, UChar *p; for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - len = enc_len(enc, p); + len = enc_len(enc, p, end); if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; @@ -4260,7 +4260,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) for (i = 0; i < to->len && i < add->len; ) { if (to->s[i] != add->s[i]) break; - len = enc_len(env->enc, to->s + i); + len = enc_len(env->enc, to->s + i, to->s + to->len); for (j = 1; j < len; j++) { if (to->s[i+j] != add->s[i+j]) break; @@ -55,7 +55,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U { UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); if (p < s) { - p += enc_len(enc, p); + p += enc_len(enc, p, s); } return p; } @@ -68,7 +68,7 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, if (p < s) { if (prev) *prev = (const UChar* )p; - p += enc_len(enc, p); + p += enc_len(enc, p, s); } else { if (prev) *prev = (const UChar* )NULL; /* Sorry */ @@ -102,7 +102,7 @@ onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n) { UChar* q = (UChar* )p; while (n-- > 0) { - q += ONIGENC_MBC_ENC_LEN(enc, q); + q += ONIGENC_MBC_ENC_LEN(enc, q, end); } return (q <= end ? q : NULL); } @@ -114,7 +114,7 @@ onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end) UChar* q = (UChar* )p; while (q < end) { - q += ONIGENC_MBC_ENC_LEN(enc, q); + q += ONIGENC_MBC_ENC_LEN(enc, q, end); n++; } return n; @@ -125,6 +125,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s) { int n = 0; UChar* p = (UChar* )s; + UChar* e = p + strlen(s); while (1) { if (*p == '\0') { @@ -140,7 +141,7 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s) } if (len == 1) return n; } - p += ONIGENC_MBC_ENC_LEN(enc, p); + p += ONIGENC_MBC_ENC_LEN(enc, p, e); n++; } } @@ -150,6 +151,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) { UChar* start = (UChar* )s; UChar* p = (UChar* )s; + UChar* e = p + strlen(s); while (1) { if (*p == '\0') { @@ -165,7 +167,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) } if (len == 1) return (int )(p - start); } - p += ONIGENC_MBC_ENC_LEN(enc, p); + p += ONIGENC_MBC_ENC_LEN(enc, p, e); } } @@ -638,7 +640,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) int c, i, len; OnigCodePoint n; - len = enc_len(enc, p); + len = enc_len(enc, p, end); n = (OnigCodePoint )(*p++); if (len == 1) return n; @@ -665,7 +667,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, else { int i; - len = enc_len(enc, p); + len = enc_len(enc, p, end); for (i = 0; i < len; i++) { *lower++ = *p++; } @@ -718,7 +720,7 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(enc, buf) != (p - buf)) + if (enc_len(enc, buf, p) != (p - buf)) return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; #endif return p - buf; @@ -741,7 +743,7 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(enc, buf) != (p - buf)) + if (enc_len(enc, buf, p) != (p - buf)) return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; #endif return p - buf; @@ -825,7 +827,7 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, if (x) return x; sascii++; - p += enc_len(enc, p); + p += enc_len(enc, p, end); } return 0; } diff --git a/regerror.c b/regerror.c index b0cc71d18d..61187d4a61 100644 --- a/regerror.c +++ b/regerror.c @@ -209,7 +209,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, buf[len++] = (UChar )code; } - p += enc_len(enc, p); + p += enc_len(enc, p, end); if (len >= buf_size) break; } @@ -330,15 +330,15 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) while (p < pat_end) { if (*p == '\\') { *s++ = *p++; - len = enc_len(enc, p); + len = enc_len(enc, p, pat_end); while (len-- > 0) *s++ = *p++; } else if (*p == '/') { *s++ = (unsigned char )'\\'; *s++ = *p++; } - else if (ONIGENC_IS_MBC_HEAD(enc, p)) { - len = enc_len(enc, p); + else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) { + len = enc_len(enc, p, pat_end); if (ONIGENC_MBC_MINLEN(enc) == 1) { while (len-- > 0) *s++ = *p++; } @@ -1642,12 +1642,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ + s += enc_len(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ MOP_OUT; break; case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); - if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; + if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail; cclass_mb: GET_LENGTH_INC(tlen, p); @@ -1657,7 +1657,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, int mb_len; DATA_ENSURE(1); - mb_len = enc_len(encode, s); + mb_len = enc_len(encode, s, end); DATA_ENSURE(mb_len); ss = s; s += mb_len; @@ -1677,7 +1677,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, s)) { + if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { p += SIZE_BITSET; goto cclass_mb; } @@ -1697,13 +1697,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, s); + s += enc_len(encode, s, end); MOP_OUT; break; case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_HEAD(encode, s)) { + if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) { s++; GET_LENGTH_INC(tlen, p); p += tlen; @@ -1715,7 +1715,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, { OnigCodePoint code; UChar *ss; - int mb_len = enc_len(encode, s); + int mb_len = enc_len(encode, s, end); if (! DATA_ENSURE_CHECK(mb_len)) { DATA_ENSURE(1); @@ -1744,7 +1744,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); - if (ONIGENC_IS_MBC_HEAD(encode, s)) { + if (ONIGENC_IS_MBC_HEAD(encode, s, end)) { p += SIZE_BITSET; goto cclass_mb_not; } @@ -1769,7 +1769,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(1); GET_POINTER_INC(node, p); - mb_len = enc_len(encode, s); + mb_len = enc_len(encode, s, end); ss = s; s += mb_len; DATA_ENSURE(0); @@ -1781,7 +1781,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); DATA_ENSURE(1); - n = enc_len(encode, s); + n = enc_len(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; @@ -1790,7 +1790,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); - n = enc_len(encode, s); + n = enc_len(encode, s, end); DATA_ENSURE(n); s += n; MOP_OUT; @@ -1799,7 +1799,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, s); + n = enc_len(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; @@ -1811,7 +1811,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, s); + n = enc_len(encode, s, end); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -1830,7 +1830,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, s); + n = enc_len(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; @@ -1845,7 +1845,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, s); + n = enc_len(encode, s, end); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -1906,7 +1906,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, s); + s += enc_len(encode, s, end); MOP_OUT; break; @@ -1915,7 +1915,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, s); + s += enc_len(encode, s, end); MOP_OUT; break; @@ -2043,7 +2043,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enc_len(encode, s))) { + ON_STR_END(s + enc_len(encode, s, end))) { MOP_OUT; continue; } @@ -2157,7 +2157,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enc_len(encode, sprev, end)) < s) sprev += len; MOP_OUT; @@ -2189,7 +2189,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(n); sprev = s; STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enc_len(encode, sprev, end)) < s) sprev += len; MOP_OUT; @@ -2224,7 +2224,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP_VALUE(pstart, swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enc_len(encode, sprev, end)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); @@ -2263,7 +2263,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enc_len(encode, sprev, end)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); @@ -2289,7 +2289,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; if (backref_match_at_nested_level(reg, stk, stk_base, ic , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enc_len(encode, sprev)) < s) + while (sprev + (len = enc_len(encode, sprev, end)) < s) sprev += len; p += (SIZE_MEMNUM * tlen); @@ -2760,7 +2760,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, if (t == target_end) return s; } - s += enc_len(enc, s); + s += enc_len(enc, s, end); } return (UChar* )NULL; @@ -2805,7 +2805,7 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag, s, text_end)) return s; - s += enc_len(enc, s); + s += enc_len(enc, s, text_end); } return (UChar* )NULL; @@ -2903,7 +2903,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->map[*se]; t = s; do { - s += enc_len(reg->enc, s); + s += enc_len(reg->enc, s, end); } while ((s - t) < skip && s < end); } } @@ -2919,7 +2919,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->int_map[*se]; t = s; do { - s += enc_len(reg->enc, s); + s += enc_len(reg->enc, s, end); } while ((s - t) < skip && s < end); } } @@ -3024,7 +3024,7 @@ map_search(OnigEncoding enc, UChar map[], while (s < text_range) { if (map[*s]) return (UChar* )s; - s += enc_len(enc, s); + s += enc_len(enc, s, text_range); } return (UChar* )NULL; } @@ -3127,7 +3127,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { UChar *q = p + reg->dmin; - while (p < q) p += enc_len(reg->enc, p); + while (p < q) p += enc_len(reg->enc, p, end); } } @@ -3158,7 +3158,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, if (p - reg->dmin < s) { retry_gate: pprev = p; - p += enc_len(reg->enc, p); + p += enc_len(reg->enc, p, end); goto retry; } @@ -3604,7 +3604,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, while (s <= high) { MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enc_len(reg->enc, s, end); } } while (s < range); goto mismatch; @@ -3617,11 +3617,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enc_len(reg->enc, s, end); while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { prev = s; - s += enc_len(reg->enc, s); + s += enc_len(reg->enc, s, end); } } while (s < range); goto mismatch; @@ -3632,7 +3632,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s); + s += enc_len(reg->enc, s, end); } while (s < range); if (s == range) { /* because empty match with /$/. */ diff --git a/regparse.c b/regparse.c index df92266ae1..092ba614c3 100644 --- a/regparse.c +++ b/regparse.c @@ -246,12 +246,12 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) #define PUNFETCH p = pfetch_prev #define PINC do { \ pfetch_prev = p; \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ + p += ONIGENC_MBC_ENC_LEN(enc, p, end); \ } while (0) #define PFETCH(c) do { \ c = ONIGENC_MBC_TO_CODE(enc, p, end); \ pfetch_prev = p; \ - p += ONIGENC_MBC_ENC_LEN(enc, p); \ + p += ONIGENC_MBC_ENC_LEN(enc, p, end); \ } while (0) #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE) @@ -1539,7 +1539,7 @@ static int str_node_can_be_split(StrNode* sn, OnigEncoding enc) { if (sn->end > sn->s) { - return ((enc_len(enc, sn->s) < sn->end - sn->s) ? 1 : 0); + return ((enc_len(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0); } return 0; } @@ -2733,12 +2733,12 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, while (p < to) { x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enc_len(enc, p); + q = p + enc_len(enc, p, to); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; - q += enc_len(enc, q); + q += enc_len(enc, q, to); } if (i >= n) { if (IS_NOT_NULL(next)) @@ -2764,19 +2764,19 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, while (p < to) { if (in_esc) { in_esc = 0; - p += enc_len(enc, p); + p += enc_len(enc, p, to); } else { x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enc_len(enc, p); + q = p + enc_len(enc, p, to); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; - q += enc_len(enc, q); + q += enc_len(enc, q, to); } if (i >= n) return 1; - p += enc_len(enc, p); + p += enc_len(enc, p, to); } else { x = ONIGENC_MBC_TO_CODE(enc, p, to); @@ -2904,7 +2904,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) { + if (p > prev + enc_len(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { PINC; tok->type = TK_CODE_POINT; tok->base = 16; @@ -3244,7 +3244,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) { + if ((p > prev + enc_len(enc, prev, end)) && !PEND && PPEEK_IS('}')) { PINC; tok->type = TK_CODE_POINT; tok->u.code = (OnigCodePoint )num; @@ -3443,7 +3443,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.code = (OnigCodePoint )num; } else { /* string */ - p = tok->backp + enc_len(enc, tok->backp); + p = tok->backp + enc_len(enc, tok->backp, end); } break; } @@ -4120,7 +4120,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, goto err; } - len = enc_len(env->enc, buf); + len = enc_len(env->enc, buf, buf+i); if (i < len) { r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; goto err; @@ -4927,7 +4927,7 @@ parse_exp(Node** np, OnigToken* tok, int term, len = 1; while (1) { if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enc_len(env->enc, NSTR(*np)->s)) { + if (len == enc_len(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { r = fetch_token(tok, src, end, env); NSTRING_CLEAR_RAW(*np); goto string_end; @@ -71,7 +71,7 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { #define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] static int -mbc_enc_len(const UChar* p) +mbc_enc_len(const UChar* p, const UChar* e) { return EncLen_SJIS[*p]; } @@ -98,7 +98,7 @@ mbc_to_code(const UChar* p, const UChar* end) int c, i, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_SJIS, p); + len = enc_len(ONIG_ENCODING_SJIS, p, end); c = *p++; n = c; if (len == 1) return n; @@ -139,7 +139,7 @@ mbc_case_fold(OnigCaseFoldType flag, } else { int i; - int len = enc_len(ONIG_ENCODING_SJIS, p); + int len = enc_len(ONIG_ENCODING_SJIS, p, end); for (i = 0; i < len; i++) { *lower++ = *p++; @@ -192,7 +192,7 @@ left_adjust_char_head(const UChar* start, const UChar* s) } } } - len = enc_len(ONIG_ENCODING_SJIS, p); + len = enc_len(ONIG_ENCODING_SJIS, p, s); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); @@ -707,7 +707,7 @@ str_sublen(VALUE str, long pos, rb_encoding *enc) i = 0; while (p < e) { - p += rb_enc_mbclen(p, enc); + p += rb_enc_mbclen(p, e, enc); i++; } return i; @@ -2375,7 +2375,7 @@ str_gsub(int argc, VALUE *argv, VALUE str, int bang) * in order to prevent infinite loops. */ if (RSTRING_LEN(str) <= END(0)) break; - len = rb_enc_mbclen(RSTRING_PTR(str)+END(0), enc); + len = rb_enc_mbclen(RSTRING_PTR(str)+END(0), RSTRING_END(str), enc); memcpy(bp, RSTRING_PTR(str)+END(0), len); bp += len; offset = END(0) + len; @@ -2595,7 +2595,7 @@ rb_str_reverse(VALUE str) } else { while (s < e) { - int clen = rb_enc_mbclen(s, enc); + int clen = rb_enc_mbclen(s, e, enc); if (clen == 0) { rb_raise(rb_eArgError, "invalid mbstring sequence"); @@ -3861,11 +3861,13 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) } else if (last_null == 1) { rb_ary_push(result, rb_str_subseq(str, beg, - rb_enc_mbclen(RSTRING_PTR(str)+beg,enc))); + rb_enc_mbclen(RSTRING_PTR(str)+beg, + RSTRING_END(str), + enc))); beg = start; } else { - start += rb_enc_mbclen(RSTRING_PTR(str)+start,enc); + start += rb_enc_mbclen(RSTRING_PTR(str)+start,RSTRING_END(str),enc); last_null = 1; continue; } @@ -4455,7 +4457,8 @@ scan_once(VALUE str, VALUE pat, long *start) * Always consume at least one character of the input string */ if (RSTRING_LEN(str) > END(0)) - *start = END(0)+rb_enc_mbclen(RSTRING_PTR(str)+END(0),enc); + *start = END(0)+rb_enc_mbclen(RSTRING_PTR(str)+END(0), + RSTRING_END(str), enc); else *start = END(0)+1; } @@ -10832,7 +10832,7 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end if (len >= PROPERTY_NAME_MAX_SIZE) return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - p += enc_len(enc, p); + p += enc_len(enc, p, end); } buf[len] = 0; @@ -10963,7 +10963,7 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc, if (CaseFoldInited == 0) init_case_fold_table(); code = ONIGENC_MBC_TO_CODE(enc, p, end); - len = enc_len(enc, p); + len = enc_len(enc, p, end); *pp += len; #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI @@ -11155,7 +11155,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, n = 0; code = ONIGENC_MBC_TO_CODE(enc, p, end); - len = enc_len(enc, p); + len = enc_len(enc, p, end); #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { @@ -11304,7 +11304,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, else codes[1] = code; - clen = enc_len(enc, p); + clen = enc_len(enc, p, end); len += clen; if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { for (i = 0; i < z2->n; i++) { @@ -11325,7 +11325,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, else codes[2] = code; - clen = enc_len(enc, p); + clen = enc_len(enc, p, end); len += clen; if (onig_st_lookup(Unfold3Table, (st_data_t )codes, (void* )&z2) != 0) { @@ -60,7 +60,7 @@ static const int EncLen_UTF8[] = { }; static int -utf8_mbc_enc_len(const UChar* p) +utf8_mbc_enc_len(const UChar* p, const UChar* e) { return EncLen_UTF8[*p]; } @@ -96,7 +96,7 @@ utf8_mbc_to_code(const UChar* p, const UChar* end) int c, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_UTF8, p); + len = enc_len(ONIG_ENCODING_UTF8, p, end); c = *p++; if (len > 1) { len--; |