diff options
author | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-03-14 09:39:54 +0000 |
---|---|---|
committer | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-03-14 09:39:54 +0000 |
commit | 4b15b54d680bbb6e97673e67085ed4f70b612ccb (patch) | |
tree | 2ce22b0f45a8a3de7db95f9e120d49df1acc2720 | |
parent | ea5e885a958967c999e15512c6e72defab232de4 (diff) |
* include/ruby/oniguruma.h, enc/unicode.c: Adjusting flag assignments
and macros to work with unified CaseMappingSpecials array.
(with Kimihito Matsui)
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54101 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | enc/unicode.c | 7 | ||||
-rw-r--r-- | include/ruby/oniguruma.h | 38 |
3 files changed, 31 insertions, 20 deletions
@@ -1,3 +1,9 @@ +Mon Mar 14 18:39:53 2016 Martin Duerst <[email protected]> + + * include/ruby/oniguruma.h, enc/unicode.c: Adjusting flag assignments + and macros to work with unified CaseMappingSpecials array. + (with Kimihito Matsui) + Mon Mar 14 16:53:37 2016 Nobuyoshi Nakada <[email protected]> * compile.c (compile_named_capture_assign): optimize named capture diff --git a/enc/unicode.c b/enc/unicode.c index d4b2c2c427..f4487e40d5 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -137,6 +137,7 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) return 1; } +/* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */ #define U ONIGENC_CASE_UPCASE #define D ONIGENC_CASE_DOWNCASE #define F ONIGENC_CASE_FOLD @@ -157,6 +158,12 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) #undef I #undef L +/* macros related to ONIGENC_CASE flags */ +/* defined here because not used in other files */ +#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexWidth) +#define OnigSpecialIndexEncode(n) (((n)<<OnigSpecialIndexShift)&OnigSpecialIndexMask) +#define OnigSpecialIndexDecode(n) (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift) + #include "enc/unicode/name2ctype.h" #define CODE_RANGES_NUM numberof(CodeRanges) diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 2411e0f507..b087ef829f 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -121,39 +121,37 @@ typedef ptrdiff_t OnigPosition; * * Subfields (starting with 0 at LSB): * 0-2: Code point count in casefold.h - * 3-9: Index into TitleCase array in casefold.h - * 10-15, 18-20: Case mapping flags + * 3-12: Index into SpecialCaseMapping array in casefold.h + * 13-22: Case folding/mapping flags */ typedef unsigned int OnigCaseFoldType; /* case fold flag */ ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; /* bits for actual code point count; 3 bits is more than enough, currently only 2 used */ -#define OnigCodePointMask (0x7) +#define OnigCodePointMaskWidth 3 +#define OnigCodePointMask ((1<<OnigCodePointMaskWidth)-1) #define OnigCodePointCount(n) ((n)&OnigCodePointMask) #define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask) /* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */ /* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */ /* bits for index into table with separate titlecase mappings */ -/* 7 bits provide 128 values; as of Unicode 8.0.0, 89 values are used */ -#define OnigTitlecaseShift (3) -#define OnigTitlecaseWidth (7) -#define OnigTitlecaseMask (((1<<OnigTitlecaseWidth)-1)<<OnigTitlecaseShift) -#define OnigTitlecaseEncode(n) (((n)<<OnigTitlecaseShift)&OnigTitlecaseMask) -#define OnigTitlecaseDecode(n) (((n)&OnigTitlecaseMask)>>OnigTitlecaseShift) - -#define OnigTitlecaseIndex - -#define ONIGENC_CASE_TITLECASE (1<<10) /* has/needs titlecase mapping */ -#define ONIGENC_CASE_UPCASE (1<<11) /* has/needs uppercase mapping */ -#define ONIGENC_CASE_DOWNCASE (1<<12) /* has/needs lowercase mapping */ -#define ONIGENC_CASE_FOLD (1<<13) /* has/needs case folding */ -#define ONIGENC_CASE_SPECIAL (1<<14) /* has/needs special mapping from separate table */ -#define ONIGENC_CASE_MODIFIED (1<<15) /* data has been modified */ -#define ONIGENC_CASE_ASCII_ONLY (1<<18) /* only modify ASCII range */ -#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<19) /* needs Lithuanian-specific mapping */ +/* 10 bits provide 1024 values */ +#define OnigSpecialIndexShift 3 +#define OnigSpecialIndexWidth 10 + +#define ONIGENC_CASE_UPCASE (1<<13) /* has/needs uppercase mapping */ +#define ONIGENC_CASE_DOWNCASE (1<<14) /* has/needs lowercase mapping */ +#define ONIGENC_CASE_TITLECASE (1<<15) /* has/needs (special) titlecase mapping */ +#define ONIGENC_CASE_SPECIAL_OFFSET 3 /* offset in bytes from ONIGENC_CASE to ONIGENC_CASE_SPECIAL */ +#define ONIGENC_CASE_UP_SPECIAL (1<<16) /* has special upcase mapping */ +#define ONIGENC_CASE_DOWN_SPECIAL (1<<17) /* has special downcase mapping */ +#define ONIGENC_CASE_MODIFIED (1<<18) /* data has been modified */ +#define ONIGENC_CASE_FOLD (1<<19) /* has/needs case folding */ #define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */ +#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) /* needs Lithuanian-specific mapping */ +#define ONIGENC_CASE_ASCII_ONLY (1<<22) /* only modify ASCII range */ #define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */ #define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR |