diff options
author | Kevin Newton <[email protected]> | 2023-09-27 12:22:36 -0400 |
---|---|---|
committer | Kevin Newton <[email protected]> | 2023-09-27 13:57:38 -0400 |
commit | 8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch) | |
tree | 46ef2bd5c51d5b7f923eda6a60edefc7a08200db /prism/util | |
parent | 7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff) |
Rename YARP filepaths to prism filepaths
Diffstat (limited to 'prism/util')
-rw-r--r-- | prism/util/pm_buffer.c | 101 | ||||
-rw-r--r-- | prism/util/pm_buffer.h | 51 | ||||
-rw-r--r-- | prism/util/pm_char.c | 272 | ||||
-rw-r--r-- | prism/util/pm_char.h | 91 | ||||
-rw-r--r-- | prism/util/pm_constant_pool.c | 230 | ||||
-rw-r--r-- | prism/util/pm_constant_pool.h | 74 | ||||
-rw-r--r-- | prism/util/pm_list.c | 41 | ||||
-rw-r--r-- | prism/util/pm_list.h | 67 | ||||
-rw-r--r-- | prism/util/pm_memchr.c | 31 | ||||
-rw-r--r-- | prism/util/pm_memchr.h | 14 | ||||
-rw-r--r-- | prism/util/pm_newline_list.c | 134 | ||||
-rw-r--r-- | prism/util/pm_newline_list.h | 61 | ||||
-rw-r--r-- | prism/util/pm_state_stack.c | 19 | ||||
-rw-r--r-- | prism/util/pm_state_stack.h | 24 | ||||
-rw-r--r-- | prism/util/pm_string.c | 200 | ||||
-rw-r--r-- | prism/util/pm_string.h | 61 | ||||
-rw-r--r-- | prism/util/pm_string_list.c | 29 | ||||
-rw-r--r-- | prism/util/pm_string_list.h | 25 | ||||
-rw-r--r-- | prism/util/pm_strncasecmp.c | 17 | ||||
-rw-r--r-- | prism/util/pm_strpbrk.c | 66 | ||||
-rw-r--r-- | prism/util/pm_strpbrk.h | 29 |
21 files changed, 1637 insertions, 0 deletions
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c new file mode 100644 index 0000000000..15cdef74f8 --- /dev/null +++ b/prism/util/pm_buffer.c @@ -0,0 +1,101 @@ +#include "yarp/util/yp_buffer.h" + +#define YP_BUFFER_INITIAL_SIZE 1024 + +// Return the size of the yp_buffer_t struct. +size_t +yp_buffer_sizeof(void) { + return sizeof(yp_buffer_t); +} + +// Initialize a yp_buffer_t with its default values. +bool +yp_buffer_init(yp_buffer_t *buffer) { + buffer->length = 0; + buffer->capacity = YP_BUFFER_INITIAL_SIZE; + + buffer->value = (char *) malloc(YP_BUFFER_INITIAL_SIZE); + return buffer->value != NULL; +} + +// Return the value of the buffer. +char * +yp_buffer_value(yp_buffer_t *buffer) { + return buffer->value; +} + +// Return the length of the buffer. +size_t +yp_buffer_length(yp_buffer_t *buffer) { + return buffer->length; +} + +// Append the given amount of space to the buffer. +static inline void +yp_buffer_append_length(yp_buffer_t *buffer, size_t length) { + size_t next_length = buffer->length + length; + + if (next_length > buffer->capacity) { + do { + buffer->capacity *= 2; + } while (next_length > buffer->capacity); + + buffer->value = realloc(buffer->value, buffer->capacity); + } + + buffer->length = next_length; +} + +// Append a generic pointer to memory to the buffer. +static inline void +yp_buffer_append(yp_buffer_t *buffer, const void *source, size_t length) { + yp_buffer_append_length(buffer, length); + memcpy(buffer->value + (buffer->length - length), source, length); +} + +// Append the given amount of space as zeroes to the buffer. +void +yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length) { + yp_buffer_append_length(buffer, length); + memset(buffer->value + (buffer->length - length), 0, length); +} + +// Append a string to the buffer. +void +yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length) { + yp_buffer_append(buffer, value, length); +} + +// Append a list of bytes to the buffer. +void +yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length) { + yp_buffer_append(buffer, (const char *) value, length); +} + +// Append a single byte to the buffer. +void +yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value) { + const void *source = &value; + yp_buffer_append(buffer, source, sizeof(uint8_t)); +} + +// Append a 32-bit unsigned integer to the buffer. +void +yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value) { + if (value < 128) { + yp_buffer_append_u8(buffer, (uint8_t) value); + } else { + uint32_t n = value; + while (n >= 128) { + yp_buffer_append_u8(buffer, (uint8_t) (n | 128)); + n >>= 7; + } + yp_buffer_append_u8(buffer, (uint8_t) n); + } +} + +// Free the memory associated with the buffer. +void +yp_buffer_free(yp_buffer_t *buffer) { + free(buffer->value); +} diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h new file mode 100644 index 0000000000..c388e8d5ce --- /dev/null +++ b/prism/util/pm_buffer.h @@ -0,0 +1,51 @@ +#ifndef YARP_BUFFER_H +#define YARP_BUFFER_H + +#include "yarp/defines.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +// A yp_buffer_t is a simple memory buffer that stores data in a contiguous +// block of memory. It is used to store the serialized representation of a +// YARP tree. +typedef struct { + char *value; + size_t length; + size_t capacity; +} yp_buffer_t; + +// Return the size of the yp_buffer_t struct. +YP_EXPORTED_FUNCTION size_t yp_buffer_sizeof(void); + +// Initialize a yp_buffer_t with its default values. +YP_EXPORTED_FUNCTION bool yp_buffer_init(yp_buffer_t *buffer); + +// Return the value of the buffer. +YP_EXPORTED_FUNCTION char * yp_buffer_value(yp_buffer_t *buffer); + +// Return the length of the buffer. +YP_EXPORTED_FUNCTION size_t yp_buffer_length(yp_buffer_t *buffer); + +// Append the given amount of space as zeroes to the buffer. +void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length); + +// Append a string to the buffer. +void yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length); + +// Append a list of bytes to the buffer. +void yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length); + +// Append a single byte to the buffer. +void yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value); + +// Append a 32-bit unsigned integer to the buffer. +void yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value); + +// Free the memory associated with the buffer. +YP_EXPORTED_FUNCTION void yp_buffer_free(yp_buffer_t *buffer); + +#endif diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c new file mode 100644 index 0000000000..42c3896626 --- /dev/null +++ b/prism/util/pm_char.c @@ -0,0 +1,272 @@ +#include "yarp/util/yp_char.h" + +#define YP_CHAR_BIT_WHITESPACE (1 << 0) +#define YP_CHAR_BIT_INLINE_WHITESPACE (1 << 1) +#define YP_CHAR_BIT_REGEXP_OPTION (1 << 2) + +#define YP_NUMBER_BIT_BINARY_DIGIT (1 << 0) +#define YP_NUMBER_BIT_BINARY_NUMBER (1 << 1) +#define YP_NUMBER_BIT_OCTAL_DIGIT (1 << 2) +#define YP_NUMBER_BIT_OCTAL_NUMBER (1 << 3) +#define YP_NUMBER_BIT_DECIMAL_DIGIT (1 << 4) +#define YP_NUMBER_BIT_DECIMAL_NUMBER (1 << 5) +#define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6) +#define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7) + +static const uint8_t yp_byte_table[256] = { +// 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x + 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x + 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx +}; + +static const uint8_t yp_number_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x + 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x + 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x + 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx +}; + +static inline size_t +yp_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { + if (length <= 0) return 0; + + size_t size = 0; + size_t maximum = (size_t) length; + + while (size < maximum && (yp_byte_table[string[size]] & kind)) size++; + return size; +} + +// Returns the number of characters at the start of the string that are +// whitespace. Disallows searching past the given maximum number of characters. +size_t +yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) { + return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE); +} + +// Returns the number of characters at the start of the string that are +// whitespace while also tracking the location of each newline. Disallows +// searching past the given maximum number of characters. +size_t +yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list) { + if (length <= 0) return 0; + + size_t size = 0; + size_t maximum = (size_t) length; + + while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) { + if (string[size] == '\n') { + yp_newline_list_append(newline_list, string + size); + } + + size++; + } + + return size; +} + +// Returns the number of characters at the start of the string that are inline +// whitespace. Disallows searching past the given maximum number of characters. +size_t +yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) { + return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE); +} + +// Returns the number of characters at the start of the string that are regexp +// options. Disallows searching past the given maximum number of characters. +size_t +yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) { + return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION); +} + +static inline bool +yp_char_is_char_kind(const uint8_t b, uint8_t kind) { + return (yp_byte_table[b] & kind) != 0; +} + +// Returns true if the given character is a whitespace character. +bool +yp_char_is_whitespace(const uint8_t b) { + return yp_char_is_char_kind(b, YP_CHAR_BIT_WHITESPACE); +} + +// Returns true if the given character is an inline whitespace character. +bool +yp_char_is_inline_whitespace(const uint8_t b) { + return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE); +} + +// Scan through the string and return the number of characters at the start of +// the string that match the given kind. Disallows searching past the given +// maximum number of characters. +static inline size_t +yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) { + if (length <= 0) return 0; + + size_t size = 0; + size_t maximum = (size_t) length; + + while (size < maximum && (yp_number_table[string[size]] & kind)) size++; + return size; +} + +// Scan through the string and return the number of characters at the start of +// the string that match the given kind. Disallows searching past the given +// maximum number of characters. +// +// Additionally, report the location of the last invalid underscore character +// found in the string through the out invalid parameter. +static inline size_t +yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) { + if (length <= 0) return 0; + + size_t size = 0; + size_t maximum = (size_t) length; + + bool underscore = false; + while (size < maximum && (yp_number_table[string[size]] & kind)) { + if (string[size] == '_') { + if (underscore) *invalid = string + size; + underscore = true; + } else { + underscore = false; + } + + size++; + } + + if (string[size - 1] == '_') *invalid = string + size - 1; + return size; +} + +// Returns the number of characters at the start of the string that are binary +// digits or underscores. Disallows searching past the given maximum number of +// characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t +yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { + return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER); +} + +// Returns the number of characters at the start of the string that are octal +// digits or underscores. Disallows searching past the given maximum number of +// characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t +yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { + return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER); +} + +// Returns the number of characters at the start of the string that are decimal +// digits. Disallows searching past the given maximum number of characters. +size_t +yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) { + return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT); +} + +// Returns the number of characters at the start of the string that are decimal +// digits or underscores. Disallows searching past the given maximum number of +// characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t +yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { + return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER); +} + +// Returns the number of characters at the start of the string that are +// hexadecimal digits. Disallows searching past the given maximum number of +// characters. +size_t +yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) { + return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT); +} + +// Returns the number of characters at the start of the string that are +// hexadecimal digits or underscores. Disallows searching past the given maximum +// number of characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t +yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) { + return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER); +} + +static inline bool +yp_char_is_number_kind(const uint8_t b, uint8_t kind) { + return (yp_number_table[b] & kind) != 0; +} + +// Returns true if the given character is a binary digit. +bool +yp_char_is_binary_digit(const uint8_t b) { + return yp_char_is_number_kind(b, YP_NUMBER_BIT_BINARY_DIGIT); +} + +// Returns true if the given character is an octal digit. +bool +yp_char_is_octal_digit(const uint8_t b) { + return yp_char_is_number_kind(b, YP_NUMBER_BIT_OCTAL_DIGIT); +} + +// Returns true if the given character is a decimal digit. +bool +yp_char_is_decimal_digit(const uint8_t b) { + return yp_char_is_number_kind(b, YP_NUMBER_BIT_DECIMAL_DIGIT); +} + +// Returns true if the given character is a hexadecimal digit. +bool +yp_char_is_hexadecimal_digit(const uint8_t b) { + return yp_char_is_number_kind(b, YP_NUMBER_BIT_HEXADECIMAL_DIGIT); +} + +#undef YP_CHAR_BIT_WHITESPACE +#undef YP_CHAR_BIT_INLINE_WHITESPACE +#undef YP_CHAR_BIT_REGEXP_OPTION + +#undef YP_NUMBER_BIT_BINARY_DIGIT +#undef YP_NUMBER_BIT_BINARY_NUMBER +#undef YP_NUMBER_BIT_OCTAL_DIGIT +#undef YP_NUMBER_BIT_OCTAL_NUMBER +#undef YP_NUMBER_BIT_DECIMAL_DIGIT +#undef YP_NUMBER_BIT_DECIMAL_NUMBER +#undef YP_NUMBER_BIT_HEXADECIMAL_NUMBER +#undef YP_NUMBER_BIT_HEXADECIMAL_DIGIT diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h new file mode 100644 index 0000000000..f08d6a8c9d --- /dev/null +++ b/prism/util/pm_char.h @@ -0,0 +1,91 @@ +#ifndef YP_CHAR_H +#define YP_CHAR_H + +#include "yarp/defines.h" +#include "yarp/util/yp_newline_list.h" + +#include <stdbool.h> +#include <stddef.h> + +// Returns the number of characters at the start of the string that are +// whitespace. Disallows searching past the given maximum number of characters. +size_t yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length); + +// Returns the number of characters at the start of the string that are +// whitespace while also tracking the location of each newline. Disallows +// searching past the given maximum number of characters. +size_t +yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list); + +// Returns the number of characters at the start of the string that are inline +// whitespace. Disallows searching past the given maximum number of characters. +size_t yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length); + +// Returns the number of characters at the start of the string that are decimal +// digits. Disallows searching past the given maximum number of characters. +size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length); + +// Returns the number of characters at the start of the string that are +// hexadecimal digits. Disallows searching past the given maximum number of +// characters. +size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length); + +// Returns the number of characters at the start of the string that are octal +// digits or underscores. Disallows searching past the given maximum number of +// characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); + +// Returns the number of characters at the start of the string that are decimal +// digits or underscores. Disallows searching past the given maximum number of +// characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); + +// Returns the number of characters at the start of the string that are +// hexadecimal digits or underscores. Disallows searching past the given maximum +// number of characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); + +// Returns the number of characters at the start of the string that are regexp +// options. Disallows searching past the given maximum number of characters. +size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length); + +// Returns the number of characters at the start of the string that are binary +// digits or underscores. Disallows searching past the given maximum number of +// characters. +// +// If multiple underscores are found in a row or if an underscore is +// found at the end of the number, then the invalid pointer is set to the index +// of the first invalid underscore. +size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid); + +// Returns true if the given character is a whitespace character. +bool yp_char_is_whitespace(const uint8_t b); + +// Returns true if the given character is an inline whitespace character. +bool yp_char_is_inline_whitespace(const uint8_t b); + +// Returns true if the given character is a binary digit. +bool yp_char_is_binary_digit(const uint8_t b); + +// Returns true if the given character is an octal digit. +bool yp_char_is_octal_digit(const uint8_t b); + +// Returns true if the given character is a decimal digit. +bool yp_char_is_decimal_digit(const uint8_t b); + +// Returns true if the given character is a hexadecimal digit. +bool yp_char_is_hexadecimal_digit(const uint8_t b); + +#endif diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c new file mode 100644 index 0000000000..18376dce82 --- /dev/null +++ b/prism/util/pm_constant_pool.c @@ -0,0 +1,230 @@ +#include "yarp/util/yp_constant_pool.h" + +// Initialize a list of constant ids. +void +yp_constant_id_list_init(yp_constant_id_list_t *list) { + list->ids = NULL; + list->size = 0; + list->capacity = 0; +} + +// Append a constant id to a list of constant ids. Returns false if any +// potential reallocations fail. +bool +yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id) { + if (list->size >= list->capacity) { + list->capacity = list->capacity == 0 ? 8 : list->capacity * 2; + list->ids = (yp_constant_id_t *) realloc(list->ids, sizeof(yp_constant_id_t) * list->capacity); + if (list->ids == NULL) return false; + } + + list->ids[list->size++] = id; + return true; +} + +// Checks if the current constant id list includes the given constant id. +bool +yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id) { + for (size_t index = 0; index < list->size; index++) { + if (list->ids[index] == id) return true; + } + return false; +} + +// Get the memory size of a list of constant ids. +size_t +yp_constant_id_list_memsize(yp_constant_id_list_t *list) { + return sizeof(yp_constant_id_list_t) + (list->capacity * sizeof(yp_constant_id_t)); +} + +// Free the memory associated with a list of constant ids. +void +yp_constant_id_list_free(yp_constant_id_list_t *list) { + if (list->ids != NULL) { + free(list->ids); + } +} + +// A relatively simple hash function (djb2) that is used to hash strings. We are +// optimizing here for simplicity and speed. +static inline uint32_t +yp_constant_pool_hash(const uint8_t *start, size_t length) { + // This is a prime number used as the initial value for the hash function. + uint32_t value = 5381; + + for (size_t index = 0; index < length; index++) { + value = ((value << 5) + value) + start[index]; + } + + return value; +} + +// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 +static uint32_t +next_power_of_two(uint32_t v) { + // Avoid underflow in subtraction on next line. + if (v == 0) { + // 1 is the nearest power of 2 to 0 (2^0) + return 1; + } + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +#ifndef NDEBUG +static bool +is_power_of_two(uint32_t size) { + return (size & (size - 1)) == 0; +} +#endif + +// Resize a constant pool to a given capacity. +static inline bool +yp_constant_pool_resize(yp_constant_pool_t *pool) { + assert(is_power_of_two(pool->capacity)); + + uint32_t next_capacity = pool->capacity * 2; + if (next_capacity < pool->capacity) return false; + + const uint32_t mask = next_capacity - 1; + yp_constant_t *next_constants = calloc(next_capacity, sizeof(yp_constant_t)); + if (next_constants == NULL) return false; + + // For each constant in the current constant pool, rehash the content, find + // the index in the next constant pool, and insert it. + for (uint32_t index = 0; index < pool->capacity; index++) { + yp_constant_t *constant = &pool->constants[index]; + + // If an id is set on this constant, then we know we have content here. + // In this case we need to insert it into the next constant pool. + if (constant->id != 0) { + uint32_t next_index = constant->hash & mask; + + // This implements linear scanning to find the next available slot + // in case this index is already taken. We don't need to bother + // comparing the values since we know that the hash is unique. + while (next_constants[next_index].id != 0) { + next_index = (next_index + 1) & mask; + } + + // Here we copy over the entire constant, which includes the id so + // that they are consistent between resizes. + next_constants[next_index] = *constant; + } + } + + free(pool->constants); + pool->constants = next_constants; + pool->capacity = next_capacity; + return true; +} + +// Initialize a new constant pool with a given capacity. +bool +yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity) { + const uint32_t maximum = (~((uint32_t) 0)); + if (capacity >= ((maximum / 2) + 1)) return false; + + capacity = next_power_of_two(capacity); + pool->constants = calloc(capacity, sizeof(yp_constant_t)); + if (pool->constants == NULL) return false; + + pool->size = 0; + pool->capacity = capacity; + return true; +} + +// Insert a constant into a constant pool and return its index in the pool. +static inline yp_constant_id_t +yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length, bool owned) { + if (pool->size >= (pool->capacity / 4 * 3)) { + if (!yp_constant_pool_resize(pool)) return 0; + } + + assert(is_power_of_two(pool->capacity)); + const uint32_t mask = pool->capacity - 1; + + uint32_t hash = yp_constant_pool_hash(start, length); + uint32_t index = hash & mask; + yp_constant_t *constant; + + while (constant = &pool->constants[index], constant->id != 0) { + // If there is a collision, then we need to check if the content is the + // same as the content we are trying to insert. If it is, then we can + // return the id of the existing constant. + if ((constant->length == length) && memcmp(constant->start, start, length) == 0) { + // Since we have found a match, we need to check if this is + // attempting to insert a shared or an owned constant. We want to + // prefer shared constants since they don't require allocations. + if (owned) { + // If we're attempting to insert an owned constant and we have + // an existing constant, then either way we don't want the given + // memory. Either it's duplicated with the existing constant or + // it's not necessary because we have a shared version. + free((void *) start); + } else if (constant->owned) { + // If we're attempting to insert a shared constant and the + // existing constant is owned, then we can free the owned + // constant and replace it with the shared constant. + free((void *) constant->start); + constant->start = start; + constant->owned = false; + } + + return constant->id; + } + + index = (index + 1) & mask; + } + + pool->size++; + assert(pool->size < ((uint32_t) (1 << 31))); + + *constant = (yp_constant_t) { + .id = (unsigned int) (pool->size & 0x7FFFFFFF), + .owned = owned, + .start = start, + .length = length, + .hash = hash + }; + + return constant->id; +} + +// Insert a constant into a constant pool. Returns the id of the constant, or 0 +// if any potential calls to resize fail. +yp_constant_id_t +yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) { + return yp_constant_pool_insert(pool, start, length, false); +} + +// Insert a constant into a constant pool from memory that is now owned by the +// constant pool. Returns the id of the constant, or 0 if any potential calls to +// resize fail. +yp_constant_id_t +yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) { + return yp_constant_pool_insert(pool, start, length, true); +} + +// Free the memory associated with a constant pool. +void +yp_constant_pool_free(yp_constant_pool_t *pool) { + // For each constant in the current constant pool, free the contents if the + // contents are owned. + for (uint32_t index = 0; index < pool->capacity; index++) { + yp_constant_t *constant = &pool->constants[index]; + + // If an id is set on this constant, then we know we have content here. + if (constant->id != 0 && constant->owned) { + free((void *) constant->start); + } + } + + free(pool->constants); +} diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h new file mode 100644 index 0000000000..c07cd0b7d8 --- /dev/null +++ b/prism/util/pm_constant_pool.h @@ -0,0 +1,74 @@ +// The constant pool is a data structure that stores a set of strings. Each +// string is assigned a unique id, which can be used to compare strings for +// equality. This comparison ends up being much faster than strcmp, since it +// only requires a single integer comparison. + +#ifndef YP_CONSTANT_POOL_H +#define YP_CONSTANT_POOL_H + +#include "yarp/defines.h" + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +typedef uint32_t yp_constant_id_t; + +typedef struct { + yp_constant_id_t *ids; + size_t size; + size_t capacity; +} yp_constant_id_list_t; + +// Initialize a list of constant ids. +void yp_constant_id_list_init(yp_constant_id_list_t *list); + +// Append a constant id to a list of constant ids. Returns false if any +// potential reallocations fail. +bool yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id); + +// Checks if the current constant id list includes the given constant id. +bool +yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id); + +// Get the memory size of a list of constant ids. +size_t yp_constant_id_list_memsize(yp_constant_id_list_t *list); + +// Free the memory associated with a list of constant ids. +void yp_constant_id_list_free(yp_constant_id_list_t *list); + +typedef struct { + unsigned int id: 31; + bool owned: 1; + const uint8_t *start; + size_t length; + uint32_t hash; +} yp_constant_t; + +typedef struct { + yp_constant_t *constants; + uint32_t size; + uint32_t capacity; +} yp_constant_pool_t; + +// Define an empty constant pool. +#define YP_CONSTANT_POOL_EMPTY ((yp_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 }) + +// Initialize a new constant pool with a given capacity. +bool yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity); + +// Insert a constant into a constant pool that is a slice of a source string. +// Returns the id of the constant, or 0 if any potential calls to resize fail. +yp_constant_id_t yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length); + +// Insert a constant into a constant pool from memory that is now owned by the +// constant pool. Returns the id of the constant, or 0 if any potential calls to +// resize fail. +yp_constant_id_t yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length); + +// Free the memory associated with a constant pool. +void yp_constant_pool_free(yp_constant_pool_t *pool); + +#endif diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c new file mode 100644 index 0000000000..e03dd79d8d --- /dev/null +++ b/prism/util/pm_list.c @@ -0,0 +1,41 @@ +#include "yarp/util/yp_list.h" + +// Returns true if the given list is empty. +YP_EXPORTED_FUNCTION bool +yp_list_empty_p(yp_list_t *list) { + return list->head == NULL; +} + +// Returns the size of the list. +YP_EXPORTED_FUNCTION size_t +yp_list_size(yp_list_t *list) { + return list->size; +} + +// Append a node to the given list. +void +yp_list_append(yp_list_t *list, yp_list_node_t *node) { + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; + list->size++; +} + +// Deallocate the internal state of the given list. +YP_EXPORTED_FUNCTION void +yp_list_free(yp_list_t *list) { + yp_list_node_t *node = list->head; + yp_list_node_t *next; + + while (node != NULL) { + next = node->next; + free(node); + node = next; + } + + list->size = 0; +} diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h new file mode 100644 index 0000000000..205a5a7eab --- /dev/null +++ b/prism/util/pm_list.h @@ -0,0 +1,67 @@ +// This struct represents an abstract linked list that provides common +// functionality. It is meant to be used any time a linked list is necessary to +// store data. +// +// The linked list itself operates off a set of pointers. Because the pointers +// are not necessarily sequential, they can be of any size. We use this fact to +// allow the consumer of this linked list to extend the node struct to include +// any data they want. This is done by using the yp_list_node_t as the first +// member of the struct. +// +// For example, if we want to store a list of integers, we can do the following: +// +// typedef struct { +// yp_list_node_t node; +// int value; +// } yp_int_node_t; +// +// yp_list_t list = YP_LIST_EMPTY; +// yp_int_node_t *node = malloc(sizeof(yp_int_node_t)); +// node->value = 5; +// +// yp_list_append(&list, &node->node); +// +// The yp_list_t struct is used to represent the overall linked list. It +// contains a pointer to the head and tail of the list. This allows for easy +// iteration and appending of new nodes. + +#ifndef YARP_LIST_H +#define YARP_LIST_H + +#include "yarp/defines.h" + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +// This represents a node in the linked list. +typedef struct yp_list_node { + struct yp_list_node *next; +} yp_list_node_t; + +// This represents the overall linked list. It keeps a pointer to the head and +// tail so that iteration is easy and pushing new nodes is easy. +typedef struct { + size_t size; + yp_list_node_t *head; + yp_list_node_t *tail; +} yp_list_t; + +// This represents an empty list. It's used to initialize a stack-allocated list +// as opposed to a method call. +#define YP_LIST_EMPTY ((yp_list_t) { .size = 0, .head = NULL, .tail = NULL }) + +// Returns true if the given list is empty. +YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list); + +// Returns the size of the list. +YP_EXPORTED_FUNCTION size_t yp_list_size(yp_list_t *list); + +// Append a node to the given list. +void yp_list_append(yp_list_t *list, yp_list_node_t *node); + +// Deallocate the internal state of the given list. +YP_EXPORTED_FUNCTION void yp_list_free(yp_list_t *list); + +#endif diff --git a/prism/util/pm_memchr.c b/prism/util/pm_memchr.c new file mode 100644 index 0000000000..af9c14397e --- /dev/null +++ b/prism/util/pm_memchr.c @@ -0,0 +1,31 @@ +#include "yarp/util/yp_memchr.h" + +#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40 + +// We need to roll our own memchr to handle cases where the encoding changes and +// we need to search for a character in a buffer that could be the trailing byte +// of a multibyte character. +void * +yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) { + if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) { + const uint8_t *source = (const uint8_t *) memory; + size_t index = 0; + + while (index < number) { + if (source[index] == character) { + return (void *) (source + index); + } + + size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index)); + if (width == 0) { + return NULL; + } + + index += width; + } + + return NULL; + } else { + return memchr(memory, character, number); + } +} diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h new file mode 100644 index 0000000000..97f4b15a83 --- /dev/null +++ b/prism/util/pm_memchr.h @@ -0,0 +1,14 @@ +#ifndef YP_MEMCHR_H +#define YP_MEMCHR_H + +#include "yarp/defines.h" +#include "yarp/enc/yp_encoding.h" + +#include <stddef.h> + +// We need to roll our own memchr to handle cases where the encoding changes and +// we need to search for a character in a buffer that could be the trailing byte +// of a multibyte character. +void * yp_memchr(const void *source, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding); + +#endif diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c new file mode 100644 index 0000000000..969b884560 --- /dev/null +++ b/prism/util/pm_newline_list.c @@ -0,0 +1,134 @@ +#include "yarp/util/yp_newline_list.h" + +// Initialize a new newline list with the given capacity. Returns true if the +// allocation of the offsets succeeds, otherwise returns false. +bool +yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity) { + list->offsets = (size_t *) calloc(capacity, sizeof(size_t)); + if (list->offsets == NULL) return false; + + list->start = start; + + // This is 1 instead of 0 because we want to include the first line of the + // file as having offset 0, which is set because of calloc. + list->size = 1; + list->capacity = capacity; + + list->last_index = 0; + list->last_offset = 0; + + return true; +} + +// Append a new offset to the newline list. Returns true if the reallocation of +// the offsets succeeds (if one was necessary), otherwise returns false. +bool +yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) { + if (list->size == list->capacity) { + size_t *original_offsets = list->offsets; + + list->capacity = (list->capacity * 3) / 2; + list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t)); + memcpy(list->offsets, original_offsets, list->size * sizeof(size_t)); + free(original_offsets); + if (list->offsets == NULL) return false; + } + + assert(*cursor == '\n'); + assert(cursor >= list->start); + size_t newline_offset = (size_t) (cursor - list->start + 1); + + assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]); + list->offsets[list->size++] = newline_offset; + + return true; +} + +// Conditionally append a new offset to the newline list, if the value passed in is a newline. +bool +yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor) { + if (*cursor != '\n') { + return true; + } + return yp_newline_list_append(list, cursor); +} + +// Returns the line and column of the given offset, assuming we don't have any +// information about the previous index that we found. +static yp_line_column_t +yp_newline_list_line_column_search(yp_newline_list_t *list, size_t offset) { + size_t left = 0; + size_t right = list->size - 1; + + while (left <= right) { + size_t mid = left + (right - left) / 2; + + if (list->offsets[mid] == offset) { + return ((yp_line_column_t) { mid, 0 }); + } + + if (list->offsets[mid] < offset) { + left = mid + 1; + } else { + right = mid - 1; + } + } + + return ((yp_line_column_t) { left - 1, offset - list->offsets[left - 1] }); +} + +// Returns the line and column of the given offset, assuming we know the last +// index that we found. +static yp_line_column_t +yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) { + if (offset > list->last_offset) { + size_t index = list->last_index; + while (index < list->size && list->offsets[index] < offset) { + index++; + } + + if (index == list->size) { + return ((yp_line_column_t) { index - 1, offset - list->offsets[index - 1] }); + } + + return ((yp_line_column_t) { index, 0 }); + } else { + size_t index = list->last_index; + while (index > 0 && list->offsets[index] > offset) { + index--; + } + + if (index == 0) { + return ((yp_line_column_t) { 0, offset }); + } + + return ((yp_line_column_t) { index, offset - list->offsets[index - 1] }); + } +} + +// Returns the line and column of the given offset. If the offset is not in the +// list, the line and column of the closest offset less than the given offset +// are returned. +yp_line_column_t +yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor) { + assert(cursor >= list->start); + size_t offset = (size_t) (cursor - list->start); + yp_line_column_t result; + + if (list->last_offset == 0) { + result = yp_newline_list_line_column_search(list, offset); + } else { + result = yp_newline_list_line_column_scan(list, offset); + } + + list->last_index = result.line; + list->last_offset = offset; + + return result; +} + +// Free the internal memory allocated for the newline list. +void +yp_newline_list_free(yp_newline_list_t *list) { + free(list->offsets); +} diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h new file mode 100644 index 0000000000..9231305008 --- /dev/null +++ b/prism/util/pm_newline_list.h @@ -0,0 +1,61 @@ +// When compiling the syntax tree, it's necessary to know the line and column +// of many nodes. This is necessary to support things like error messages, +// tracepoints, etc. +// +// It's possible that we could store the start line, start column, end line, and +// end column on every node in addition to the offsets that we already store, +// but that would be quite a lot of memory overhead. + +#ifndef YP_NEWLINE_LIST_H +#define YP_NEWLINE_LIST_H + +#include "yarp/defines.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> + +// A list of offsets of newlines in a string. The offsets are assumed to be +// sorted/inserted in ascending order. +typedef struct { + const uint8_t *start; + + size_t *offsets; + size_t size; + size_t capacity; + + size_t last_offset; + size_t last_index; +} yp_newline_list_t; + +// A line and column in a string. +typedef struct { + size_t line; + size_t column; +} yp_line_column_t; + +#define YP_NEWLINE_LIST_EMPTY ((yp_newline_list_t) { \ + .start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \ +}) + +// Initialize a new newline list with the given capacity. Returns true if the +// allocation of the offsets succeeds, otherwise returns false. +bool yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity); + +// Append a new offset to the newline list. Returns true if the reallocation of +// the offsets succeeds (if one was necessary), otherwise returns false. +bool yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor); + +// Conditionally append a new offset to the newline list, if the value passed in is a newline. +bool yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor); + +// Returns the line and column of the given offset. If the offset is not in the +// list, the line and column of the closest offset less than the given offset +// are returned. +yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor); + +// Free the internal memory allocated for the newline list. +void yp_newline_list_free(yp_newline_list_t *list); + +#endif diff --git a/prism/util/pm_state_stack.c b/prism/util/pm_state_stack.c new file mode 100644 index 0000000000..7ff95bd611 --- /dev/null +++ b/prism/util/pm_state_stack.c @@ -0,0 +1,19 @@ +#include "yarp/util/yp_state_stack.h" + +// Pushes a value onto the stack. +void +yp_state_stack_push(yp_state_stack_t *stack, bool value) { + *stack = (*stack << 1) | (value & 1); +} + +// Pops a value off the stack. +void +yp_state_stack_pop(yp_state_stack_t *stack) { + *stack >>= 1; +} + +// Returns the value at the top of the stack. +bool +yp_state_stack_p(yp_state_stack_t *stack) { + return *stack & 1; +} diff --git a/prism/util/pm_state_stack.h b/prism/util/pm_state_stack.h new file mode 100644 index 0000000000..69d8d7d54b --- /dev/null +++ b/prism/util/pm_state_stack.h @@ -0,0 +1,24 @@ +#ifndef YP_STATE_STACK_H +#define YP_STATE_STACK_H + +#include "yarp/defines.h" + +#include <stdbool.h> +#include <stdint.h> + +// A struct that represents a stack of bools. +typedef uint32_t yp_state_stack_t; + +// Initializes the state stack to an empty stack. +#define YP_STATE_STACK_EMPTY ((yp_state_stack_t) 0) + +// Pushes a value onto the stack. +void yp_state_stack_push(yp_state_stack_t *stack, bool value); + +// Pops a value off the stack. +void yp_state_stack_pop(yp_state_stack_t *stack); + +// Returns the value at the top of the stack. +bool yp_state_stack_p(yp_state_stack_t *stack); + +#endif diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c new file mode 100644 index 0000000000..9ee25155a3 --- /dev/null +++ b/prism/util/pm_string.c @@ -0,0 +1,200 @@ +#include "yarp/util/yp_string.h" + +// The following headers are necessary to read files using demand paging. +#ifdef _WIN32 +#include <windows.h> +#else +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#endif + +// Initialize a shared string that is based on initial input. +void +yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end) { + assert(start <= end); + + *string = (yp_string_t) { + .type = YP_STRING_SHARED, + .source = start, + .length = (size_t) (end - start) + }; +} + +// Initialize an owned string that is responsible for freeing allocated memory. +void +yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length) { + *string = (yp_string_t) { + .type = YP_STRING_OWNED, + .source = source, + .length = length + }; +} + +// Initialize a constant string that doesn't own its memory source. +void +yp_string_constant_init(yp_string_t *string, const char *source, size_t length) { + *string = (yp_string_t) { + .type = YP_STRING_CONSTANT, + .source = (const uint8_t *) source, + .length = length + }; +} + +static void +yp_string_mapped_init_internal(yp_string_t *string, uint8_t *source, size_t length) { + *string = (yp_string_t) { + .type = YP_STRING_MAPPED, + .source = source, + .length = length + }; +} + +// Returns the memory size associated with the string. +size_t +yp_string_memsize(const yp_string_t *string) { + size_t size = sizeof(yp_string_t); + if (string->type == YP_STRING_OWNED) { + size += string->length; + } + return size; +} + +// Ensure the string is owned. If it is not, then reinitialize it as owned and +// copy over the previous source. +void +yp_string_ensure_owned(yp_string_t *string) { + if (string->type == YP_STRING_OWNED) return; + + size_t length = yp_string_length(string); + const uint8_t *source = yp_string_source(string); + + uint8_t *memory = malloc(length); + if (!memory) return; + + yp_string_owned_init(string, memory, length); + memcpy((void *) string->source, source, length); +} + +// Returns the length associated with the string. +YP_EXPORTED_FUNCTION size_t +yp_string_length(const yp_string_t *string) { + return string->length; +} + +// Returns the start pointer associated with the string. +YP_EXPORTED_FUNCTION const uint8_t * +yp_string_source(const yp_string_t *string) { + return string->source; +} + +// Free the associated memory of the given string. +YP_EXPORTED_FUNCTION void +yp_string_free(yp_string_t *string) { + void *memory = (void *) string->source; + + if (string->type == YP_STRING_OWNED) { + free(memory); + } else if (string->type == YP_STRING_MAPPED && string->length) { +#if defined(_WIN32) + UnmapViewOfFile(memory); +#else + munmap(memory, string->length); +#endif + } +} + +bool +yp_string_mapped_init(yp_string_t *string, const char *filepath) { +#ifdef _WIN32 + // Open the file for reading. + HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + + if (file == INVALID_HANDLE_VALUE) { + perror("CreateFile failed"); + return false; + } + + // Get the file size. + DWORD file_size = GetFileSize(file, NULL); + if (file_size == INVALID_FILE_SIZE) { + CloseHandle(file); + perror("GetFileSize failed"); + return false; + } + + // If the file is empty, then we don't need to do anything else, we'll set + // the source to a constant empty string and return. + if (file_size == 0) { + CloseHandle(file); + uint8_t empty[] = ""; + yp_string_mapped_init_internal(string, empty, 0); + return true; + } + + // Create a mapping of the file. + HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); + if (mapping == NULL) { + CloseHandle(file); + perror("CreateFileMapping failed"); + return false; + } + + // Map the file into memory. + uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); + CloseHandle(mapping); + CloseHandle(file); + + if (source == NULL) { + perror("MapViewOfFile failed"); + return false; + } + + yp_string_mapped_init_internal(string, source, (size_t) file_size); + return true; +#else + // Open the file for reading + int fd = open(filepath, O_RDONLY); + if (fd == -1) { + perror("open"); + return false; + } + + // Stat the file to get the file size + struct stat sb; + if (fstat(fd, &sb) == -1) { + close(fd); + perror("fstat"); + return false; + } + + // mmap the file descriptor to virtually get the contents + size_t size = (size_t) sb.st_size; + uint8_t *source = NULL; + + if (size == 0) { + close(fd); + uint8_t empty[] = ""; + yp_string_mapped_init_internal(string, empty, 0); + return true; + } + + source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (source == MAP_FAILED) { + perror("Map failed"); + return false; + } + + close(fd); + yp_string_mapped_init_internal(string, source, size); + return true; +#endif +} + +// Returns the size of the yp_string_t struct. This is necessary to allocate the +// correct amount of memory in the FFI backend. +YP_EXPORTED_FUNCTION size_t +yp_string_sizeof(void) { + return sizeof(yp_string_t); +} diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h new file mode 100644 index 0000000000..bcdf8b66d9 --- /dev/null +++ b/prism/util/pm_string.h @@ -0,0 +1,61 @@ +#ifndef YARP_STRING_H +#define YARP_STRING_H + +#include "yarp/defines.h" + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +// This struct represents a string value. +typedef struct { + enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type; + const uint8_t *source; + size_t length; +} yp_string_t; + +#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 }) + +// Initialize a shared string that is based on initial input. +void yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end); + +// Initialize an owned string that is responsible for freeing allocated memory. +void yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length); + +// Initialize a constant string that doesn't own its memory source. +void yp_string_constant_init(yp_string_t *string, const char *source, size_t length); + +// Read the file indicated by the filepath parameter into source and load its +// contents and size into the given yp_string_t. +// The given yp_string_t should be freed using yp_string_free() when it is no longer used. +// +// We want to use demand paging as much as possible in order to avoid having to +// read the entire file into memory (which could be detrimental to performance +// for large files). This means that if we're on windows we'll use +// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use +// `mmap`, and on other POSIX systems we'll use `read`. +YP_EXPORTED_FUNCTION bool yp_string_mapped_init(yp_string_t *string, const char *filepath); + +// Returns the memory size associated with the string. +size_t yp_string_memsize(const yp_string_t *string); + +// Ensure the string is owned. If it is not, then reinitialize it as owned and +// copy over the previous source. +void yp_string_ensure_owned(yp_string_t *string); + +// Returns the length associated with the string. +YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string); + +// Returns the start pointer associated with the string. +YP_EXPORTED_FUNCTION const uint8_t * yp_string_source(const yp_string_t *string); + +// Free the associated memory of the given string. +YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string); + +// Returns the size of the yp_string_t struct. This is necessary to allocate the +// correct amount of memory in the FFI backend. +YP_EXPORTED_FUNCTION size_t yp_string_sizeof(void); + +#endif // YARP_STRING_H diff --git a/prism/util/pm_string_list.c b/prism/util/pm_string_list.c new file mode 100644 index 0000000000..3b2b4381c5 --- /dev/null +++ b/prism/util/pm_string_list.c @@ -0,0 +1,29 @@ +#include "yarp/util/yp_string_list.h" + +// Initialize a yp_string_list_t with its default values. +void +yp_string_list_init(yp_string_list_t *string_list) { + string_list->strings = (yp_string_t *) malloc(sizeof(yp_string_t)); + string_list->length = 0; + string_list->capacity = 1; +} + +// Append a yp_string_t to the given string list. +void +yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) { + if (string_list->length + 1 > string_list->capacity) { + yp_string_t *original_string = string_list->strings; + string_list->capacity *= 2; + string_list->strings = (yp_string_t *) malloc(string_list->capacity * sizeof(yp_string_t)); + memcpy(string_list->strings, original_string, (string_list->length) * sizeof(yp_string_t)); + free(original_string); + } + + string_list->strings[string_list->length++] = *string; +} + +// Free the memory associated with the string list. +void +yp_string_list_free(yp_string_list_t *string_list) { + free(string_list->strings); +} diff --git a/prism/util/pm_string_list.h b/prism/util/pm_string_list.h new file mode 100644 index 0000000000..0009a27a60 --- /dev/null +++ b/prism/util/pm_string_list.h @@ -0,0 +1,25 @@ +#ifndef YARP_STRING_LIST_H +#define YARP_STRING_LIST_H + +#include "yarp/defines.h" +#include "yarp/util/yp_string.h" + +#include <stddef.h> +#include <stdlib.h> + +typedef struct { + yp_string_t *strings; + size_t length; + size_t capacity; +} yp_string_list_t; + +// Initialize a yp_string_list_t with its default values. +YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list); + +// Append a yp_string_t to the given string list. +void yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string); + +// Free the memory associated with the string list. +YP_EXPORTED_FUNCTION void yp_string_list_free(yp_string_list_t *string_list); + +#endif diff --git a/prism/util/pm_strncasecmp.c b/prism/util/pm_strncasecmp.c new file mode 100644 index 0000000000..1cbaf904f4 --- /dev/null +++ b/prism/util/pm_strncasecmp.c @@ -0,0 +1,17 @@ +#include <ctype.h> +#include <stddef.h> +#include <stdint.h> + +int +yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) { + size_t offset = 0; + int difference = 0; + + while (offset < length && string1[offset] != '\0') { + if (string2[offset] == '\0') return string1[offset]; + if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference; + offset++; + } + + return difference; +} diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c new file mode 100644 index 0000000000..7c0015d289 --- /dev/null +++ b/prism/util/pm_strpbrk.c @@ -0,0 +1,66 @@ +#include "yarp/util/yp_strpbrk.h" + +// This is the slow path that does care about the encoding. +static inline const uint8_t * +yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) { + size_t index = 0; + + while (index < maximum) { + if (strchr((const char *) charset, source[index]) != NULL) { + return source + index; + } + + size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index)); + if (width == 0) { + return NULL; + } + + index += width; + } + + return NULL; +} + +// This is the fast path that does not care about the encoding. +static inline const uint8_t * +yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) { + size_t index = 0; + + while (index < maximum) { + if (strchr((const char *) charset, source[index]) != NULL) { + return source + index; + } + + index++; + } + + return NULL; +} + +// Here we have rolled our own version of strpbrk. The standard library strpbrk +// has undefined behavior when the source string is not null-terminated. We want +// to support strings that are not null-terminated because yp_parse does not +// have the contract that the string is null-terminated. (This is desirable +// because it means the extension can call yp_parse with the result of a call to +// mmap). +// +// The standard library strpbrk also does not support passing a maximum length +// to search. We want to support this for the reason mentioned above, but we +// also don't want it to stop on null bytes. Ruby actually allows null bytes +// within strings, comments, regular expressions, etc. So we need to be able to +// skip past them. +// +// Finally, we want to support encodings wherein the charset could contain +// characters that are trailing bytes of multi-byte characters. For example, in +// Shift-JIS, the backslash character can be a trailing byte. In that case we +// need to take a slower path and iterate one multi-byte character at a time. +const uint8_t * +yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) { + if (length <= 0) { + return NULL; + } else if (parser->encoding_changed && parser->encoding.multibyte) { + return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length); + } else { + return yp_strpbrk_single_byte(source, charset, (size_t) length); + } +} diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h new file mode 100644 index 0000000000..d0bdd5bec0 --- /dev/null +++ b/prism/util/pm_strpbrk.h @@ -0,0 +1,29 @@ +#ifndef YP_STRPBRK_H +#define YP_STRPBRK_H + +#include "yarp/defines.h" +#include "yarp/parser.h" + +#include <stddef.h> +#include <string.h> + +// Here we have rolled our own version of strpbrk. The standard library strpbrk +// has undefined behavior when the source string is not null-terminated. We want +// to support strings that are not null-terminated because yp_parse does not +// have the contract that the string is null-terminated. (This is desirable +// because it means the extension can call yp_parse with the result of a call to +// mmap). +// +// The standard library strpbrk also does not support passing a maximum length +// to search. We want to support this for the reason mentioned above, but we +// also don't want it to stop on null bytes. Ruby actually allows null bytes +// within strings, comments, regular expressions, etc. So we need to be able to +// skip past them. +// +// Finally, we want to support encodings wherein the charset could contain +// characters that are trailing bytes of multi-byte characters. For example, in +// Shift-JIS, the backslash character can be a trailing byte. In that case we +// need to take a slower path and iterate one multi-byte character at a time. +const uint8_t * yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length); + +#endif |