diff options
author | ksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-03-05 15:31:51 +0000 |
---|---|---|
committer | ksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-03-05 15:31:51 +0000 |
commit | 5770336f8be4ac6dbdff43587fda2b508d3786de (patch) | |
tree | d75456ec841c8a8d8eba9c3a4d25b39243c56ef6 /oniguruma.h | |
parent | 33a06e4acac0cb7f080436507e06c82de45f7b9b (diff) |
Initial revision
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'oniguruma.h')
-rw-r--r-- | oniguruma.h | 715 |
1 files changed, 715 insertions, 0 deletions
diff --git a/oniguruma.h b/oniguruma.h new file mode 100644 index 0000000000..8654bff188 --- /dev/null +++ b/oniguruma.h @@ -0,0 +1,715 @@ +/********************************************************************** + + oniguruma.h - Oniguruma (regular expression library) + + Copyright (C) 2002-2004 K.Kosako ([email protected]) + +**********************************************************************/ +#ifndef ONIGURUMA_H +#define ONIGURUMA_H + +#define ONIGURUMA +#define ONIGURUMA_VERSION_MAJOR 2 +#define ONIGURUMA_VERSION_MINOR 2 +#define ONIGURUMA_VERSION_TEENY 4 + +#ifndef P_ +#if defined(__STDC__) || defined(_WIN32) +# define P_(args) args +#else +# define P_(args) () +#endif +#endif + +#ifndef PV_ +#ifdef HAVE_STDARG_PROTOTYPES +# define PV_(args) args +#else +# define PV_(args) () +#endif +#endif + +#ifndef ONIG_EXTERN +#if defined(_WIN32) && !defined(__CYGWIN__) +#if defined(EXPORT) || defined(RUBY_EXPORT) +#define ONIG_EXTERN extern __declspec(dllexport) +#else +#define ONIG_EXTERN extern __declspec(dllimport) +#endif +#endif +#endif + +#ifndef ONIG_EXTERN +#define ONIG_EXTERN extern +#endif + +/* PART: character encoding */ + +typedef unsigned char UChar; +typedef unsigned long OnigCodePoint; +typedef unsigned int OnigDistance; + +#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) + +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigCodePointRange; + +#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16 +typedef struct { + int target_num; + int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE]; + UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE]; +} OnigEncFoldMatchInfo; + + +#if defined(RUBY_PLATFORM) && defined(M17N_H) + +#define ONIG_RUBY_M17N +typedef m17n_encoding* OnigEncoding; + +#else + +typedef struct { + const char len_table[256]; + const char* name; + int max_enc_len; + int is_fold_match; + int ctype_support_level; /* sb-only/full */ + int is_continuous_sb_mb; /* code point is continuous from sb to mb */ + OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end); + int (*code_to_mbclen)(OnigCodePoint code); + int (*code_to_mbc)(OnigCodePoint code, UChar *buf); + int (*mbc_to_lower)(UChar* p, UChar* lower); + int (*mbc_is_case_ambig)(UChar* p); + int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype); + int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]); + UChar* (*left_adjust_char_head)(UChar* start, UChar* s); + int (*is_allowed_reverse_match)(UChar* p, UChar* e); + int (*get_all_fold_match_code)(OnigCodePoint** codes); + int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info); +} OnigEncodingType; + +typedef OnigEncodingType* OnigEncoding; + +ONIG_EXTERN OnigEncodingType OnigEncodingASCII; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_1; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_2; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_3; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_4; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_5; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_6; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_7; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_8; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_9; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_10; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_11; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_13; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15; +ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR; +ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN; +ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; +ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; +ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; + +#define ONIG_ENCODING_ASCII (&OnigEncodingASCII) +#define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) +#define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2) +#define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3) +#define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4) +#define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5) +#define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6) +#define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7) +#define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8) +#define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9) +#define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10) +#define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11) +#define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13) +#define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14) +#define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) +#define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) +#define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) +#define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP) +#define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW) +#define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR) +#define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN) +#define ONIG_ENCODING_SJIS (&OnigEncodingSJIS) +#define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) +#define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) +#define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) + +#endif /* else RUBY && M17N */ + +#define ONIG_ENCODING_UNDEF ((OnigEncoding )0) + + +/* work size */ +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN + +/* character types */ +#define ONIGENC_CTYPE_ALPHA (1<< 0) +#define ONIGENC_CTYPE_BLANK (1<< 1) +#define ONIGENC_CTYPE_CNTRL (1<< 2) +#define ONIGENC_CTYPE_DIGIT (1<< 3) +#define ONIGENC_CTYPE_GRAPH (1<< 4) +#define ONIGENC_CTYPE_LOWER (1<< 5) +#define ONIGENC_CTYPE_PRINT (1<< 6) +#define ONIGENC_CTYPE_PUNCT (1<< 7) +#define ONIGENC_CTYPE_SPACE (1<< 8) +#define ONIGENC_CTYPE_UPPER (1<< 9) +#define ONIGENC_CTYPE_XDIGIT (1<<10) +#define ONIGENC_CTYPE_WORD (1<<11) +#define ONIGENC_CTYPE_ASCII (1<<12) +#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT) + +/* ctype support level */ +#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0 +#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1 + + +#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte) + +#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) +#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) +#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1) +#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) +#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) +#define ONIGENC_IS_CODE_SB_WORD(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) +#define ONIGENC_IS_MBC_WORD(enc,s,end) \ + ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end)) + + +#ifdef ONIG_RUBY_M17N + +#include <ctype.h> /* for isblank(), isgraph() */ + +#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf) +#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p) + +#define ONIGENC_IS_FOLD_MATCH(enc) FALSE +#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE +#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + onigenc_is_allowed_reverse_match(enc, s, end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + onigenc_get_left_adjust_char_head(enc, start, s) +#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0 +#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ + ONIG_NO_SUPPORT_CONFIG +#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b) +#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc) +#define ONIGENC_MBC_MAXLEN_DIST(enc) \ + (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \ + : ONIG_INFINITE_DISTANCE) +#define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code)) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf) + +#if 0 +#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */ +#endif + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \ + onigenc_is_code_ctype(enc,code,ctype) + +#ifdef isblank +# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code) +#else +# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t') +#endif +#ifdef isgraph +# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code) +#else +# define ONIGENC_IS_CODE_GRAPH(enc,code) \ + (isprint((int )code) && !isspace((int )code)) +#endif + +#define ONIGENC_IS_CODE_PRINT(enc,code) m17n_isprint(enc,code) +#define ONIGENC_IS_CODE_ALNUM(enc,code) m17n_isalnum(enc,code) +#define ONIGENC_IS_CODE_ALPHA(enc,code) m17n_isalpha(enc,code) +#define ONIGENC_IS_CODE_LOWER(enc,code) m17n_islower(enc,code) +#define ONIGENC_IS_CODE_UPPER(enc,code) m17n_isupper(enc,code) +#define ONIGENC_IS_CODE_CNTRL(enc,code) m17n_iscntrl(enc,code) +#define ONIGENC_IS_CODE_PUNCT(enc,code) m17n_ispunct(enc,code) +#define ONIGENC_IS_CODE_SPACE(enc,code) m17n_isspace(enc,code) +#define ONIGENC_IS_CODE_DIGIT(enc,code) m17n_isdigit(enc,code) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) m17n_isxdigit(enc,code) +#define ONIGENC_IS_CODE_WORD(enc,code) m17n_iswchar(enc,code) + +ONIG_EXTERN +int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype)); +ONIG_EXTERN +int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +ONIG_EXTERN +int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf)); +ONIG_EXTERN +int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p)); +ONIG_EXTERN +int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)); + +#else /* ONIG_RUBY_M17N */ + +#define ONIGENC_NAME(enc) ((enc)->name) + +#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf) +#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p) + +#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match) +#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb) +#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level) +#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ + (enc)->is_allowed_reverse_match(s,end) +#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ + (enc)->left_adjust_char_head(start, s) +#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \ + (enc)->get_all_fold_match_code(codes) +#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \ + (enc)->get_fold_match_info(p,end,info) +#define ONIGENC_STEP_BACK(enc,start,s,n) \ + onigenc_step_back((enc),(start),(s),(n)) + +#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)]) +#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) +#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) +#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e)) +#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) +#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) + +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype) + +#define ONIGENC_IS_CODE_GRAPH(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) +#define ONIGENC_IS_CODE_PRINT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT) +#define ONIGENC_IS_CODE_ALNUM(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM) +#define ONIGENC_IS_CODE_ALPHA(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA) +#define ONIGENC_IS_CODE_LOWER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER) +#define ONIGENC_IS_CODE_UPPER(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER) +#define ONIGENC_IS_CODE_CNTRL(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL) +#define ONIGENC_IS_CODE_PUNCT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT) +#define ONIGENC_IS_CODE_SPACE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE) +#define ONIGENC_IS_CODE_BLANK(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK) +#define ONIGENC_IS_CODE_DIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT) +#define ONIGENC_IS_CODE_XDIGIT(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT) +#define ONIGENC_IS_CODE_WORD(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD) + +#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ + (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr) + +ONIG_EXTERN +UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n)); + +#endif /* is not ONIG_RUBY_M17N */ + + +/* encoding API */ +ONIG_EXTERN +int onigenc_init P_(()); +ONIG_EXTERN +int onigenc_set_default_encoding P_((OnigEncoding enc)); +ONIG_EXTERN +OnigEncoding onigenc_get_default_encoding P_(()); +ONIG_EXTERN +void onigenc_set_default_caseconv_table P_((UChar* table)); +ONIG_EXTERN +UChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, UChar* start, UChar* s, UChar** prev)); +ONIG_EXTERN +UChar* onigenc_get_prev_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +ONIG_EXTERN +UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +ONIG_EXTERN +UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); + + + +/* PART: regular expression */ + +/* config parameters */ +#define ONIG_NREGION 10 +#define ONIG_MAX_BACKREF_NUM 1000 +#define ONIG_MAX_REPEAT_NUM 100000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000 +/* constants */ +#define ONIG_MAX_ERROR_MESSAGE_LEN 90 + +#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N) +ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; +#undef ismbchar +#define ismbchar(c) (mbclen((c)) != 1) +#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)]) +#endif + +typedef unsigned int OnigOptionType; + +#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE + +/* options */ +#define ONIG_OPTION_NONE 0 +#define ONIG_OPTION_IGNORECASE 1L +#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) +#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) +#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) +#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1) +#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1) +#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1) +#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1) +#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) +/* options (search time) */ +#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) +#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) +#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) + +#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) +#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) +#define ONIG_IS_OPTION_ON(options,option) ((options) & (option)) + +/* syntax */ +typedef struct { + unsigned int op; + unsigned int op2; + unsigned int behavior; + OnigOptionType options; /* default option */ +} OnigSyntaxType; + +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; +ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; +ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; +ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; + +/* predefined syntaxes (see regparse.c) */ +#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) +#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) +#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) +#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep) +#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) +#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) +#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) + +/* default syntax */ +ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; +#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax + +/* syntax (operators) */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?<name>...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k<name> */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g<name>, \g<n> */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@<x>..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */ + +/* syntax (behavior) */ +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */ + +/* syntax (behavior) in char class [...] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */ +/* syntax (behavior) warning */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */ + +/* meta character specifiers (onig_set_meta_char()) */ +#define ONIG_META_CHAR_ESCAPE 0 +#define ONIG_META_CHAR_ANYCHAR 1 +#define ONIG_META_CHAR_ANYTIME 2 +#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3 +#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4 +#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5 + +#define ONIG_INEFFECTIVE_META_CHAR 0 + +/* error codes */ +#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000) +/* normal return */ +#define ONIG_NORMAL 0 +#define ONIG_MISMATCH -1 +#define ONIG_NO_SUPPORT_CONFIG -2 +/* internal error */ +#define ONIGERR_PARSER_BUG -11 +#define ONIGERR_STACK_BUG -12 +#define ONIGERR_UNDEFINED_BYTECODE -13 +#define ONIGERR_UNEXPECTED_BYTECODE -14 +#define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 +#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 +/* general error */ +#define ONIGERR_INVALID_ARGUMENT -30 +/* syntax error */ +#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 +#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 +#define ONIGERR_EMPTY_CHAR_CLASS -102 +#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103 +#define ONIGERR_END_PATTERN_AT_BACKSLASH -104 +#define ONIGERR_END_PATTERN_AT_META -105 +#define ONIGERR_END_PATTERN_AT_CONTROL -106 +#define ONIGERR_META_CODE_SYNTAX -108 +#define ONIGERR_CONTROL_CODE_SYNTAX -109 +#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110 +#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111 +#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113 +#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114 +#define ONIGERR_NESTED_REPEAT_OPERATOR -115 +#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116 +#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117 +#define ONIGERR_END_PATTERN_IN_GROUP -118 +#define ONIGERR_UNDEFINED_GROUP_OPTION -119 +#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121 +#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122 +#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123 +/* values error (syntax error) */ +#define ONIGERR_TOO_BIG_NUMBER -200 +#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201 +#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202 +#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203 +#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204 +#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205 +#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206 +#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207 +#define ONIGERR_INVALID_BACKREF -208 +#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209 +#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212 +#define ONIGERR_EMPTY_GROUP_NAME -214 +#define ONIGERR_INVALID_GROUP_NAME -215 +#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216 +#define ONIGERR_UNDEFINED_NAME_REFERENCE -217 +#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218 +#define ONIGERR_MULTIPLEX_DEFINED_NAME -219 +#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220 +#define ONIGERR_NEVER_ENDING_RECURSION -221 +#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 +#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +/* errors related to thread */ +#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 + + +/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 +#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ + ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) + +/* match result region type */ +struct re_registers { + int allocated; + int num_regs; + int* beg; + int* end; + /* extended */ + struct re_registers** list; /* capture history. list[1]-list[31] */ +}; + +#define ONIG_REGION_NOTPOS -1 + +typedef struct re_registers OnigRegion; + +typedef struct { + UChar* par; + UChar* par_end; +} OnigErrorInfo; + +typedef struct { + int lower; + int upper; +} OnigRepeatRange; + +typedef void (*OnigWarnFunc) P_((char* s)); +extern void onig_null_warn P_((char* s)); +#define ONIG_NULL_WARN onig_null_warn + +#define ONIG_CHAR_TABLE_SIZE 256 + +/* regex_t state */ +#define ONIG_STATE_NORMAL 0 +#define ONIG_STATE_SEARCHING 1 +#define ONIG_STATE_COMPILING -1 +#define ONIG_STATE_MODIFY -2 + +#define ONIG_STATE(reg) \ + ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state) + +typedef struct re_pattern_buffer { + /* common members of BBuf(bytes-buffer) */ + unsigned char* p; /* compiled pattern */ + unsigned int used; /* used space for p */ + unsigned int alloc; /* allocated space for p */ + + int state; /* normal, searching, compiling */ + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ + int stack_pop_level; + int repeat_range_alloc; + OnigRepeatRange* repeat_range; + + OnigEncoding enc; + OnigOptionType options; + OnigSyntaxType* syntax; + void* name_table; + + /* optimization info (string search, char-map and anchors) */ + int optimize; /* optimize flag */ + int threshold_len; /* search str-length for apply optimize */ + int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ + OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + int sub_anchor; /* start-anchor for exact or map */ + unsigned char *exact; + unsigned char *exact_end; + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + int *int_map; /* BM skip for exact_len > 255 */ + int *int_map_backward; /* BM skip for backward search */ + OnigDistance dmin; /* min-distance of exact or map */ + OnigDistance dmax; /* max-distance of exact or map */ + + /* regex_t link chain */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ +} regex_t; + + +/* Oniguruma Native API */ +ONIG_EXTERN +int onig_init P_((void)); +ONIG_EXTERN +int onig_error_code_to_str PV_((UChar* s, int err_code, ...)); +ONIG_EXTERN +void onig_set_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +void onig_set_verb_warn_func P_((OnigWarnFunc f)); +ONIG_EXTERN +int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +void onig_free P_((regex_t*)); +ONIG_EXTERN +int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +ONIG_EXTERN +int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option)); +ONIG_EXTERN +OnigRegion* onig_region_new P_((void)); +ONIG_EXTERN +void onig_region_free P_((OnigRegion* region, int free_self)); +ONIG_EXTERN +void onig_region_copy P_((OnigRegion* to, OnigRegion* from)); +ONIG_EXTERN +void onig_region_clear P_((OnigRegion* region)); +ONIG_EXTERN +int onig_region_resize P_((OnigRegion* region, int n)); +ONIG_EXTERN +int onig_name_to_group_numbers P_((regex_t* reg, UChar* name, UChar* name_end, + int** nums)); +ONIG_EXTERN +int onig_name_to_backref_number P_((regex_t* reg, UChar* name, UChar* name_end, OnigRegion *region)); +ONIG_EXTERN +int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex_t*,void*), void* arg)); +ONIG_EXTERN +int onig_number_of_names P_((regex_t* reg)); +ONIG_EXTERN +OnigEncoding onig_get_encoding P_((regex_t* reg)); +ONIG_EXTERN +OnigOptionType onig_get_options P_((regex_t* reg)); +ONIG_EXTERN +OnigSyntaxType* onig_get_syntax P_((regex_t* reg)); +ONIG_EXTERN +int onig_set_default_syntax P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); +ONIG_EXTERN +void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); +ONIG_EXTERN +void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); +ONIG_EXTERN +void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)); +ONIG_EXTERN +void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); +ONIG_EXTERN +int onig_set_meta_char P_((unsigned int what, unsigned int c)); +ONIG_EXTERN +int onig_end P_((void)); +ONIG_EXTERN +const char* onig_version P_((void)); + +#endif /* ONIGURUMA_H */ |