diff options
Diffstat (limited to 'regparse.c')
-rw-r--r-- | regparse.c | 734 |
1 files changed, 688 insertions, 46 deletions
diff --git a/regparse.c b/regparse.c index 2924601bc2..fba0a34c42 100644 --- a/regparse.c +++ b/regparse.c @@ -5785,60 +5785,702 @@ node_linebreak(Node** np, ScanEnv* env) } static int +propname2ctype(ScanEnv* env, const char* propname) +{ + UChar* name = (UChar*)propname; + int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII, + name, name + strlen(propname)); + return ctype; +} + +static int node_extended_grapheme_cluster(Node** np, ScanEnv* env) { - /* same as (?>\P{M}\p{M}*) */ Node* np1 = NULL; - Node* np2 = NULL; - Node* qn = NULL; - Node* list1 = NULL; + Node* list = NULL; Node* list2 = NULL; + Node* alt = NULL; + Node* alt2 = NULL; int r = 0; #ifdef USE_UNICODE_PROPERTIES if (ONIGENC_IS_UNICODE(env->enc)) { /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */ - CClassNode* cc1; - CClassNode* cc2; - UChar* propname = (UChar* )"M"; - int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII, - propname, propname + 1); - if (ctype >= 0) { - /* \P{M} */ - np1 = node_new_cclass(); - if (IS_NULL(np1)) goto err; - cc1 = NCCLASS(np1); - r = add_ctype_to_cc(cc1, ctype, 0, 0, env); - if (r != 0) goto err; - NCCLASS_SET_NOT(cc1); + Node* tmp = NULL; + int num1, num2; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2]; + CClassNode* cc; + OnigOptionType option; + int extend = propname2ctype(env, "Grapheme_Cluster_Break=Extend"); - /* \p{M}* */ - np2 = node_new_cclass(); - if (IS_NULL(np2)) goto err; - cc2 = NCCLASS(np2); - r = add_ctype_to_cc(cc2, ctype, 0, 0, env); - if (r != 0) goto err; + /* Prepend* + * ( RI-sequence | Hangul-Syllable | !Control ) + * ( Grapheme_Extend | SpacingMark )* */ - qn = node_new_quantifier(0, REPEAT_INFINITE, 0); - if (IS_NULL(qn)) goto err; - NQTFR(qn)->target = np2; - np2 = NULL; - - /* \P{M}\p{M}* */ - list2 = node_new_list(qn, NULL_NODE); - if (IS_NULL(list2)) goto err; - qn = NULL; - list1 = node_new_list(np1, list2); - if (IS_NULL(list1)) goto err; - np1 = NULL; - list2 = NULL; - - /* (?>...) */ - *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); - if (IS_NULL(*np)) goto err; - NENCLOSE(*np)->target = list1; - return ONIG_NORMAL; - } + /* ( Grapheme_Extend | SpacingMark )* */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, extend, 0, 0, env); + if (r != 0) goto err; + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=SpacingMark"), 0, 0, env); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list = tmp; + np1 = NULL; + + /* ( RI-sequence | Hangul-Syllable | !Control ) */ + /* !Control */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Control"), 1, 0, env); + if (r != 0) goto err; + BITSET_CLEAR_BIT(cc->bs, 0x0a); + BITSET_CLEAR_BIT(cc->bs, 0x0d); + + tmp = onig_node_new_alt(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + alt = tmp; + np1 = NULL; + + /* Hangul-Syllable + * := L* V+ T* + * | L* LV V* T* + * | L* LVT T* + * | L+ + * | T+ */ + + /* T+ */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(1, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = onig_node_new_alt(np1, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + np1 = NULL; + + /* L+ */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(1, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = onig_node_new_alt(np1, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + np1 = NULL; + + /* L* LVT T* */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=LVT"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + list2 = NULL; + + /* L* LV V* T* */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=V"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=LV"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + list2 = NULL; + + /* L* V+ T* */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=V"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(1, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + list2 = NULL; + + /* Emoji sequence := (E_Base | EBG) Extend* E_Modifier? + * (ZWJ (Glue_After_Zwj | EBG Extend* E_Modifier?) )* */ + + /* ZWJ (Glue_After_Zwj | E_Base_GAZ Extend* E_Modifier?) */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, 1, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, extend, 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, NULL_NODE); + if (IS_NULL(tmp)) goto err; + alt2 = tmp; + list2 = NULL; + + /* Glue_After_Zwj */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, extend, 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_code_range(&(cc->mbuf), env, 0x1F308, 0x1F308); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F33E, 0x1F33E); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F373, 0x1F373); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F393, 0x1F393); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F3A4, 0x1F3A4); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F3A8, 0x1F3A8); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F3EB, 0x1F3EB); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F3ED, 0x1F3ED); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F4BB, 0x1F4BC); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F527, 0x1F527); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F52C, 0x1F52C); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F680, 0x1F680); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F692, 0x1F692); + if (r != 0) goto err; + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Glue_After_Zwj"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt2); + if (IS_NULL(tmp)) goto err; + alt2 = tmp; + list2 = NULL; + + /* Emoji variation sequence + * http://unicode.org/Public/emoji/4.0/emoji-zwj-sequences.txt + */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_code_range(&(cc->mbuf), env, 0xfe0f, 0xfe0f); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, 1, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_code_range(&(cc->mbuf), env, 0x2640, 0x2640); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x2642, 0x2642); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x2695, 0x2696); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x2708, 0x2708); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt2); + if (IS_NULL(tmp)) goto err; + alt2 = tmp; + list2 = NULL; + + tmp = node_new_list(alt2, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + alt2 = NULL; + + /* ZWJ */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = list2; + np1 = tmp; + list2 = NULL; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + /* E_Modifier? */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, 1, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + /* Extend* */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, extend, 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + /* (E_Base | EBG) */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_code_range(&(cc->mbuf), env, 0x1F3C2, 0x1F3C2); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F3C7, 0x1F3C7); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F3CC, 0x1F3CC); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F3F3, 0x1F3F3); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F441, 0x1F441); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F46F, 0x1F46F); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F574, 0x1F574); + if (r != 0) goto err; + r = add_code_range(&(cc->mbuf), env, 0x1F6CC, 0x1F6CC); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base"), 0, 0, env); + if (r != 0) goto err; + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + list2 = NULL; + + /* ZWJ (E_Base_GAZ | Glue_After_Zwj) E_Modifier? */ + /* a sequence starting with ZWJ seems artificial, but GraphemeBreakTest + * has such examples. + * http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakTest.html + */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, 1, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Glue_After_Zwj"), 0, 0, env); + if (r != 0) goto err; + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D); + if (r != 0) goto err; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + list2 = NULL; + + /* RI-Sequence := Regional_Indicator{2} */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_code_range(&(cc->mbuf), env, 0x1F1E6, 0x1F1FF); + if (r != 0) goto err; + + tmp = node_new_quantifier(2, 2, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + list2 = NULL; + + tmp = node_new_list(alt, list); + if (IS_NULL(tmp)) goto err; + list = tmp; + alt = NULL; + + /* Prepend* */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Prepend"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list); + if (IS_NULL(tmp)) goto err; + list = tmp; + np1 = NULL; + + /* PerlSyntax: (?s:.), RubySyntax: (?m:.) */ + np1 = node_new_anychar(); + if (IS_NULL(np1)) goto err; + + option = env->option; + ONOFF(option, ONIG_OPTION_MULTILINE, 0); + tmp = node_new_option(option); + if (IS_NULL(tmp)) goto err; + NENCLOSE(tmp)->target = np1; + np1 = tmp; + + tmp = onig_node_new_alt(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + alt = tmp; + np1 = NULL; + + /* Prepend+ */ + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D); + if (r != 0) goto err; + + tmp = node_new_quantifier(0, 1, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, NULL_NODE); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + np1 = node_new_cclass(); + if (IS_NULL(np1)) goto err; + cc = NCCLASS(np1); + r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Prepend"), 0, 0, env); + if (r != 0) goto err; + + tmp = node_new_quantifier(1, REPEAT_INFINITE, 0); + if (IS_NULL(tmp)) goto err; + NQTFR(tmp)->target = np1; + np1 = tmp; + + tmp = node_new_list(np1, list2); + if (IS_NULL(tmp)) goto err; + list2 = tmp; + np1 = NULL; + + tmp = onig_node_new_alt(list2, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + list2 = NULL; + + tmp = onig_node_new_alt(list, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + list = NULL; + + /* \x0D\x0A */ + num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf); + if (num1 < 0) return num1; + num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1); + if (num2 < 0) return num2; + np1 = node_new_str_raw(buf, buf + num1 + num2); + if (IS_NULL(np1)) goto err; + + tmp = onig_node_new_alt(np1, alt); + if (IS_NULL(tmp)) goto err; + alt = tmp; + np1 = NULL; + + /* (?>...) */ + *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + if (IS_NULL(*np)) goto err; + NENCLOSE(*np)->target = alt; + return ONIG_NORMAL; } #endif /* USE_UNICODE_PROPERTIES */ if (IS_NULL(*np)) { @@ -5857,10 +6499,10 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) err: onig_node_free(np1); - onig_node_free(np2); - onig_node_free(qn); - onig_node_free(list1); + onig_node_free(list); onig_node_free(list2); + onig_node_free(alt); + onig_node_free(alt2); return (r == 0) ? ONIGERR_MEMORY : r; } |