diff options
author | Kevin Newton <[email protected]> | 2023-09-27 12:22:36 -0400 |
---|---|---|
committer | Kevin Newton <[email protected]> | 2023-09-27 13:57:38 -0400 |
commit | 8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch) | |
tree | 46ef2bd5c51d5b7f923eda6a60edefc7a08200db /prism/util/pm_strpbrk.c | |
parent | 7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff) |
Rename YARP filepaths to prism filepaths
Diffstat (limited to 'prism/util/pm_strpbrk.c')
-rw-r--r-- | prism/util/pm_strpbrk.c | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c new file mode 100644 index 0000000000..7c0015d289 --- /dev/null +++ b/prism/util/pm_strpbrk.c @@ -0,0 +1,66 @@ +#include "yarp/util/yp_strpbrk.h" + +// This is the slow path that does care about the encoding. +static inline const uint8_t * +yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) { + size_t index = 0; + + while (index < maximum) { + if (strchr((const char *) charset, source[index]) != NULL) { + return source + index; + } + + size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index)); + if (width == 0) { + return NULL; + } + + index += width; + } + + return NULL; +} + +// This is the fast path that does not care about the encoding. +static inline const uint8_t * +yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) { + size_t index = 0; + + while (index < maximum) { + if (strchr((const char *) charset, source[index]) != NULL) { + return source + index; + } + + index++; + } + + return NULL; +} + +// Here we have rolled our own version of strpbrk. The standard library strpbrk +// has undefined behavior when the source string is not null-terminated. We want +// to support strings that are not null-terminated because yp_parse does not +// have the contract that the string is null-terminated. (This is desirable +// because it means the extension can call yp_parse with the result of a call to +// mmap). +// +// The standard library strpbrk also does not support passing a maximum length +// to search. We want to support this for the reason mentioned above, but we +// also don't want it to stop on null bytes. Ruby actually allows null bytes +// within strings, comments, regular expressions, etc. So we need to be able to +// skip past them. +// +// Finally, we want to support encodings wherein the charset could contain +// characters that are trailing bytes of multi-byte characters. For example, in +// Shift-JIS, the backslash character can be a trailing byte. In that case we +// need to take a slower path and iterate one multi-byte character at a time. +const uint8_t * +yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) { + if (length <= 0) { + return NULL; + } else if (parser->encoding_changed && parser->encoding.multibyte) { + return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length); + } else { + return yp_strpbrk_single_byte(source, charset, (size_t) length); + } +} |