diff options
author | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-02-24 13:32:01 +0000 |
---|---|---|
committer | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-02-24 13:32:01 +0000 |
commit | f1f48e610311d812eaf408d0947e8a3686ed3364 (patch) | |
tree | 81d3fe56ff75679c57bc42b629ca5f86fd973c39 | |
parent | 169b867fbf039ccb7f990449b898b2f9870a5269 (diff) |
* include/ruby/oniguruma.h: Rearranging flag assignments and making
space for titlecase indices; adding additional macros to add or
extract titlecase index; adding comments for better documentation.
* enc/unicode.c: Moving some macros to include/ruby/oniguruma.h;
activating use of titlecase indices.
(with Kimihito Matsui)
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53915 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | enc/unicode.c | 6 | ||||
-rw-r--r-- | include/ruby/oniguruma.h | 47 |
3 files changed, 46 insertions, 18 deletions
@@ -1,6 +1,15 @@ +Wed Feb 24 22:31:13 2016 Martin Duerst <[email protected]> + + * include/ruby/oniguruma.h: Rearranging flag assignments and making + space for titlecase indices; adding additional macros to add or + extract titlecase index; adding comments for better documentation. + * enc/unicode.c: Moving some macros to include/ruby/oniguruma.h; + activating use of titlecase indices. + (with Kimihito Matsui) + Wed Feb 24 21:03:04 2016 Tanaka Akira <[email protected]> - * random.c (limited_rand): Add a specialized path for the limit fits + * random.c (limited_rand): Add a specialized path for when the limit fits in 32 bit. Tue Feb 23 21:52:24 2016 Martin Duerst <[email protected]> diff --git a/enc/unicode.c b/enc/unicode.c index d26497fc9c..e9c2803cab 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -71,10 +71,6 @@ static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; -/* use bottom bytes for actual code point count; 3 bits is more than enough */ -#define OnigCodePointCount(n) ((n)&0x7) -#define OnigCaseFoldFlags(n) ((n)&~0x7) - typedef struct { int n; OnigCodePoint code[3]; @@ -144,7 +140,7 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) #define U ONIGENC_CASE_UPCASE #define D ONIGENC_CASE_DOWNCASE #define F ONIGENC_CASE_FOLD -#define T(n) ONIGENC_CASE_TITLECASE +#define T(n) (ONIGENC_CASE_TITLECASE|OnigTitlecaseEncode(n)) #include "enc/unicode/casefold.h" diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 0d46f306f5..7f1d66c460 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -116,22 +116,45 @@ typedef ptrdiff_t OnigPosition; #define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) +/* + * Onig casefold/case mapping flags and related definitions + * + * Subfields (starting with 0 at LSB): + * 0-2: Code point count in casefold.h + * 3-9: Index into TitleCase array in casefold.h + * 10-15, 18-20: Case mapping flags + */ typedef unsigned int OnigCaseFoldType; /* case fold flag */ ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; -/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ -/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ -#define ONIGENC_CASE_TITLECASE (1<<10) -#define ONIGENC_CASE_UPCASE (1<<11) -#define ONIGENC_CASE_DOWNCASE (1<<12) -#define ONIGENC_CASE_FOLD (1<<13) -#define ONIGENC_CASE_ONCEONLY (1<<14) -#define ONIGENC_CASE_MODIFIED (1<<15) -#define ONIGENC_CASE_ASCII_ONLY (1<<19) -#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) -#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) -#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) +/* bits for actual code point count; 3 bits is more than enough, currently only 2 used */ +#define OnigCodePointMask (0x7) +#define OnigCodePointCount(n) ((n)&OnigCodePointMask) +#define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask) +/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */ +/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */ + +/* bits for index into table with separate titlecase mappings */ +/* 7 bits provide 128 values; as of Unicode 8.0.0, 89 values are used */ +#define OnigTitlecaseShift (3) +#define OnigTitlecaseWidth (7) +#define OnigTitlecaseMask (((1<<OnigTitlecaseWidth)-1)<<OnigTitlecaseShift) +#define OnigTitlecaseEncode(n) (((n)<<OnigTitlecaseShift)&OnigTitlecaseMask) +#define OnigTitlecaseDecode(n) (((n)&OnigTitlecaseMask)>>OnigTitlecaseShift) + +#define OnigTitlecaseIndex + +#define ONIGENC_CASE_TITLECASE (1<<10) /* has/needs titlecase mapping */ +#define ONIGENC_CASE_UPCASE (1<<11) /* has/needs uppercase mapping */ +#define ONIGENC_CASE_DOWNCASE (1<<12) /* has/needs lowercase mapping */ +#define ONIGENC_CASE_FOLD (1<<13) /* has/needs case folding */ +#define ONIGENC_CASE_SPECIAL (1<<14) /* has/needs special mapping from separate table */ +#define ONIGENC_CASE_MODIFIED (1<<15) /* data has been modified */ +#define ONIGENC_CASE_ASCII_ONLY (1<<18) /* only modify ASCII range */ +#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<19) /* needs Lithuanian-specific mapping */ +#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */ +#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */ #define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR #define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag |