summaryrefslogtreecommitdiff
path: root/prism/util
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2023-09-27 12:22:36 -0400
committerKevin Newton <[email protected]>2023-09-27 13:57:38 -0400
commit8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch)
tree46ef2bd5c51d5b7f923eda6a60edefc7a08200db /prism/util
parent7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff)
Rename YARP filepaths to prism filepaths
Diffstat (limited to 'prism/util')
-rw-r--r--prism/util/pm_buffer.c101
-rw-r--r--prism/util/pm_buffer.h51
-rw-r--r--prism/util/pm_char.c272
-rw-r--r--prism/util/pm_char.h91
-rw-r--r--prism/util/pm_constant_pool.c230
-rw-r--r--prism/util/pm_constant_pool.h74
-rw-r--r--prism/util/pm_list.c41
-rw-r--r--prism/util/pm_list.h67
-rw-r--r--prism/util/pm_memchr.c31
-rw-r--r--prism/util/pm_memchr.h14
-rw-r--r--prism/util/pm_newline_list.c134
-rw-r--r--prism/util/pm_newline_list.h61
-rw-r--r--prism/util/pm_state_stack.c19
-rw-r--r--prism/util/pm_state_stack.h24
-rw-r--r--prism/util/pm_string.c200
-rw-r--r--prism/util/pm_string.h61
-rw-r--r--prism/util/pm_string_list.c29
-rw-r--r--prism/util/pm_string_list.h25
-rw-r--r--prism/util/pm_strncasecmp.c17
-rw-r--r--prism/util/pm_strpbrk.c66
-rw-r--r--prism/util/pm_strpbrk.h29
21 files changed, 1637 insertions, 0 deletions
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
new file mode 100644
index 0000000000..15cdef74f8
--- /dev/null
+++ b/prism/util/pm_buffer.c
@@ -0,0 +1,101 @@
+#include "yarp/util/yp_buffer.h"
+
+#define YP_BUFFER_INITIAL_SIZE 1024
+
+// Return the size of the yp_buffer_t struct.
+size_t
+yp_buffer_sizeof(void) {
+ return sizeof(yp_buffer_t);
+}
+
+// Initialize a yp_buffer_t with its default values.
+bool
+yp_buffer_init(yp_buffer_t *buffer) {
+ buffer->length = 0;
+ buffer->capacity = YP_BUFFER_INITIAL_SIZE;
+
+ buffer->value = (char *) malloc(YP_BUFFER_INITIAL_SIZE);
+ return buffer->value != NULL;
+}
+
+// Return the value of the buffer.
+char *
+yp_buffer_value(yp_buffer_t *buffer) {
+ return buffer->value;
+}
+
+// Return the length of the buffer.
+size_t
+yp_buffer_length(yp_buffer_t *buffer) {
+ return buffer->length;
+}
+
+// Append the given amount of space to the buffer.
+static inline void
+yp_buffer_append_length(yp_buffer_t *buffer, size_t length) {
+ size_t next_length = buffer->length + length;
+
+ if (next_length > buffer->capacity) {
+ do {
+ buffer->capacity *= 2;
+ } while (next_length > buffer->capacity);
+
+ buffer->value = realloc(buffer->value, buffer->capacity);
+ }
+
+ buffer->length = next_length;
+}
+
+// Append a generic pointer to memory to the buffer.
+static inline void
+yp_buffer_append(yp_buffer_t *buffer, const void *source, size_t length) {
+ yp_buffer_append_length(buffer, length);
+ memcpy(buffer->value + (buffer->length - length), source, length);
+}
+
+// Append the given amount of space as zeroes to the buffer.
+void
+yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length) {
+ yp_buffer_append_length(buffer, length);
+ memset(buffer->value + (buffer->length - length), 0, length);
+}
+
+// Append a string to the buffer.
+void
+yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length) {
+ yp_buffer_append(buffer, value, length);
+}
+
+// Append a list of bytes to the buffer.
+void
+yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length) {
+ yp_buffer_append(buffer, (const char *) value, length);
+}
+
+// Append a single byte to the buffer.
+void
+yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value) {
+ const void *source = &value;
+ yp_buffer_append(buffer, source, sizeof(uint8_t));
+}
+
+// Append a 32-bit unsigned integer to the buffer.
+void
+yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value) {
+ if (value < 128) {
+ yp_buffer_append_u8(buffer, (uint8_t) value);
+ } else {
+ uint32_t n = value;
+ while (n >= 128) {
+ yp_buffer_append_u8(buffer, (uint8_t) (n | 128));
+ n >>= 7;
+ }
+ yp_buffer_append_u8(buffer, (uint8_t) n);
+ }
+}
+
+// Free the memory associated with the buffer.
+void
+yp_buffer_free(yp_buffer_t *buffer) {
+ free(buffer->value);
+}
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
new file mode 100644
index 0000000000..c388e8d5ce
--- /dev/null
+++ b/prism/util/pm_buffer.h
@@ -0,0 +1,51 @@
+#ifndef YARP_BUFFER_H
+#define YARP_BUFFER_H
+
+#include "yarp/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+// A yp_buffer_t is a simple memory buffer that stores data in a contiguous
+// block of memory. It is used to store the serialized representation of a
+// YARP tree.
+typedef struct {
+ char *value;
+ size_t length;
+ size_t capacity;
+} yp_buffer_t;
+
+// Return the size of the yp_buffer_t struct.
+YP_EXPORTED_FUNCTION size_t yp_buffer_sizeof(void);
+
+// Initialize a yp_buffer_t with its default values.
+YP_EXPORTED_FUNCTION bool yp_buffer_init(yp_buffer_t *buffer);
+
+// Return the value of the buffer.
+YP_EXPORTED_FUNCTION char * yp_buffer_value(yp_buffer_t *buffer);
+
+// Return the length of the buffer.
+YP_EXPORTED_FUNCTION size_t yp_buffer_length(yp_buffer_t *buffer);
+
+// Append the given amount of space as zeroes to the buffer.
+void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
+
+// Append a string to the buffer.
+void yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length);
+
+// Append a list of bytes to the buffer.
+void yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length);
+
+// Append a single byte to the buffer.
+void yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value);
+
+// Append a 32-bit unsigned integer to the buffer.
+void yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value);
+
+// Free the memory associated with the buffer.
+YP_EXPORTED_FUNCTION void yp_buffer_free(yp_buffer_t *buffer);
+
+#endif
diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c
new file mode 100644
index 0000000000..42c3896626
--- /dev/null
+++ b/prism/util/pm_char.c
@@ -0,0 +1,272 @@
+#include "yarp/util/yp_char.h"
+
+#define YP_CHAR_BIT_WHITESPACE (1 << 0)
+#define YP_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
+#define YP_CHAR_BIT_REGEXP_OPTION (1 << 2)
+
+#define YP_NUMBER_BIT_BINARY_DIGIT (1 << 0)
+#define YP_NUMBER_BIT_BINARY_NUMBER (1 << 1)
+#define YP_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
+#define YP_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
+#define YP_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
+#define YP_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
+#define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
+#define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
+
+static const uint8_t yp_byte_table[256] = {
+// 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+ 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
+ 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
+ 0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+static const uint8_t yp_number_table[256] = {
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
+ 0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
+ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
+ 0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
+};
+
+static inline size_t
+yp_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
+ if (length <= 0) return 0;
+
+ size_t size = 0;
+ size_t maximum = (size_t) length;
+
+ while (size < maximum && (yp_byte_table[string[size]] & kind)) size++;
+ return size;
+}
+
+// Returns the number of characters at the start of the string that are
+// whitespace. Disallows searching past the given maximum number of characters.
+size_t
+yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
+ return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
+}
+
+// Returns the number of characters at the start of the string that are
+// whitespace while also tracking the location of each newline. Disallows
+// searching past the given maximum number of characters.
+size_t
+yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list) {
+ if (length <= 0) return 0;
+
+ size_t size = 0;
+ size_t maximum = (size_t) length;
+
+ while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
+ if (string[size] == '\n') {
+ yp_newline_list_append(newline_list, string + size);
+ }
+
+ size++;
+ }
+
+ return size;
+}
+
+// Returns the number of characters at the start of the string that are inline
+// whitespace. Disallows searching past the given maximum number of characters.
+size_t
+yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
+ return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
+}
+
+// Returns the number of characters at the start of the string that are regexp
+// options. Disallows searching past the given maximum number of characters.
+size_t
+yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
+ return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
+}
+
+static inline bool
+yp_char_is_char_kind(const uint8_t b, uint8_t kind) {
+ return (yp_byte_table[b] & kind) != 0;
+}
+
+// Returns true if the given character is a whitespace character.
+bool
+yp_char_is_whitespace(const uint8_t b) {
+ return yp_char_is_char_kind(b, YP_CHAR_BIT_WHITESPACE);
+}
+
+// Returns true if the given character is an inline whitespace character.
+bool
+yp_char_is_inline_whitespace(const uint8_t b) {
+ return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
+}
+
+// Scan through the string and return the number of characters at the start of
+// the string that match the given kind. Disallows searching past the given
+// maximum number of characters.
+static inline size_t
+yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
+ if (length <= 0) return 0;
+
+ size_t size = 0;
+ size_t maximum = (size_t) length;
+
+ while (size < maximum && (yp_number_table[string[size]] & kind)) size++;
+ return size;
+}
+
+// Scan through the string and return the number of characters at the start of
+// the string that match the given kind. Disallows searching past the given
+// maximum number of characters.
+//
+// Additionally, report the location of the last invalid underscore character
+// found in the string through the out invalid parameter.
+static inline size_t
+yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
+ if (length <= 0) return 0;
+
+ size_t size = 0;
+ size_t maximum = (size_t) length;
+
+ bool underscore = false;
+ while (size < maximum && (yp_number_table[string[size]] & kind)) {
+ if (string[size] == '_') {
+ if (underscore) *invalid = string + size;
+ underscore = true;
+ } else {
+ underscore = false;
+ }
+
+ size++;
+ }
+
+ if (string[size - 1] == '_') *invalid = string + size - 1;
+ return size;
+}
+
+// Returns the number of characters at the start of the string that are binary
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t
+yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+ return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER);
+}
+
+// Returns the number of characters at the start of the string that are octal
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t
+yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+ return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER);
+}
+
+// Returns the number of characters at the start of the string that are decimal
+// digits. Disallows searching past the given maximum number of characters.
+size_t
+yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
+ return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
+}
+
+// Returns the number of characters at the start of the string that are decimal
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t
+yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+ return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER);
+}
+
+// Returns the number of characters at the start of the string that are
+// hexadecimal digits. Disallows searching past the given maximum number of
+// characters.
+size_t
+yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
+ return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
+}
+
+// Returns the number of characters at the start of the string that are
+// hexadecimal digits or underscores. Disallows searching past the given maximum
+// number of characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t
+yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+ return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
+}
+
+static inline bool
+yp_char_is_number_kind(const uint8_t b, uint8_t kind) {
+ return (yp_number_table[b] & kind) != 0;
+}
+
+// Returns true if the given character is a binary digit.
+bool
+yp_char_is_binary_digit(const uint8_t b) {
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_BINARY_DIGIT);
+}
+
+// Returns true if the given character is an octal digit.
+bool
+yp_char_is_octal_digit(const uint8_t b) {
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_OCTAL_DIGIT);
+}
+
+// Returns true if the given character is a decimal digit.
+bool
+yp_char_is_decimal_digit(const uint8_t b) {
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_DECIMAL_DIGIT);
+}
+
+// Returns true if the given character is a hexadecimal digit.
+bool
+yp_char_is_hexadecimal_digit(const uint8_t b) {
+ return yp_char_is_number_kind(b, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
+}
+
+#undef YP_CHAR_BIT_WHITESPACE
+#undef YP_CHAR_BIT_INLINE_WHITESPACE
+#undef YP_CHAR_BIT_REGEXP_OPTION
+
+#undef YP_NUMBER_BIT_BINARY_DIGIT
+#undef YP_NUMBER_BIT_BINARY_NUMBER
+#undef YP_NUMBER_BIT_OCTAL_DIGIT
+#undef YP_NUMBER_BIT_OCTAL_NUMBER
+#undef YP_NUMBER_BIT_DECIMAL_DIGIT
+#undef YP_NUMBER_BIT_DECIMAL_NUMBER
+#undef YP_NUMBER_BIT_HEXADECIMAL_NUMBER
+#undef YP_NUMBER_BIT_HEXADECIMAL_DIGIT
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
new file mode 100644
index 0000000000..f08d6a8c9d
--- /dev/null
+++ b/prism/util/pm_char.h
@@ -0,0 +1,91 @@
+#ifndef YP_CHAR_H
+#define YP_CHAR_H
+
+#include "yarp/defines.h"
+#include "yarp/util/yp_newline_list.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+
+// Returns the number of characters at the start of the string that are
+// whitespace. Disallows searching past the given maximum number of characters.
+size_t yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are
+// whitespace while also tracking the location of each newline. Disallows
+// searching past the given maximum number of characters.
+size_t
+yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list);
+
+// Returns the number of characters at the start of the string that are inline
+// whitespace. Disallows searching past the given maximum number of characters.
+size_t yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are decimal
+// digits. Disallows searching past the given maximum number of characters.
+size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are
+// hexadecimal digits. Disallows searching past the given maximum number of
+// characters.
+size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are octal
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+// Returns the number of characters at the start of the string that are decimal
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+// Returns the number of characters at the start of the string that are
+// hexadecimal digits or underscores. Disallows searching past the given maximum
+// number of characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+// Returns the number of characters at the start of the string that are regexp
+// options. Disallows searching past the given maximum number of characters.
+size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are binary
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+// Returns true if the given character is a whitespace character.
+bool yp_char_is_whitespace(const uint8_t b);
+
+// Returns true if the given character is an inline whitespace character.
+bool yp_char_is_inline_whitespace(const uint8_t b);
+
+// Returns true if the given character is a binary digit.
+bool yp_char_is_binary_digit(const uint8_t b);
+
+// Returns true if the given character is an octal digit.
+bool yp_char_is_octal_digit(const uint8_t b);
+
+// Returns true if the given character is a decimal digit.
+bool yp_char_is_decimal_digit(const uint8_t b);
+
+// Returns true if the given character is a hexadecimal digit.
+bool yp_char_is_hexadecimal_digit(const uint8_t b);
+
+#endif
diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c
new file mode 100644
index 0000000000..18376dce82
--- /dev/null
+++ b/prism/util/pm_constant_pool.c
@@ -0,0 +1,230 @@
+#include "yarp/util/yp_constant_pool.h"
+
+// Initialize a list of constant ids.
+void
+yp_constant_id_list_init(yp_constant_id_list_t *list) {
+ list->ids = NULL;
+ list->size = 0;
+ list->capacity = 0;
+}
+
+// Append a constant id to a list of constant ids. Returns false if any
+// potential reallocations fail.
+bool
+yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id) {
+ if (list->size >= list->capacity) {
+ list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
+ list->ids = (yp_constant_id_t *) realloc(list->ids, sizeof(yp_constant_id_t) * list->capacity);
+ if (list->ids == NULL) return false;
+ }
+
+ list->ids[list->size++] = id;
+ return true;
+}
+
+// Checks if the current constant id list includes the given constant id.
+bool
+yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id) {
+ for (size_t index = 0; index < list->size; index++) {
+ if (list->ids[index] == id) return true;
+ }
+ return false;
+}
+
+// Get the memory size of a list of constant ids.
+size_t
+yp_constant_id_list_memsize(yp_constant_id_list_t *list) {
+ return sizeof(yp_constant_id_list_t) + (list->capacity * sizeof(yp_constant_id_t));
+}
+
+// Free the memory associated with a list of constant ids.
+void
+yp_constant_id_list_free(yp_constant_id_list_t *list) {
+ if (list->ids != NULL) {
+ free(list->ids);
+ }
+}
+
+// A relatively simple hash function (djb2) that is used to hash strings. We are
+// optimizing here for simplicity and speed.
+static inline uint32_t
+yp_constant_pool_hash(const uint8_t *start, size_t length) {
+ // This is a prime number used as the initial value for the hash function.
+ uint32_t value = 5381;
+
+ for (size_t index = 0; index < length; index++) {
+ value = ((value << 5) + value) + start[index];
+ }
+
+ return value;
+}
+
+// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+static uint32_t
+next_power_of_two(uint32_t v) {
+ // Avoid underflow in subtraction on next line.
+ if (v == 0) {
+ // 1 is the nearest power of 2 to 0 (2^0)
+ return 1;
+ }
+ v--;
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v++;
+ return v;
+}
+
+#ifndef NDEBUG
+static bool
+is_power_of_two(uint32_t size) {
+ return (size & (size - 1)) == 0;
+}
+#endif
+
+// Resize a constant pool to a given capacity.
+static inline bool
+yp_constant_pool_resize(yp_constant_pool_t *pool) {
+ assert(is_power_of_two(pool->capacity));
+
+ uint32_t next_capacity = pool->capacity * 2;
+ if (next_capacity < pool->capacity) return false;
+
+ const uint32_t mask = next_capacity - 1;
+ yp_constant_t *next_constants = calloc(next_capacity, sizeof(yp_constant_t));
+ if (next_constants == NULL) return false;
+
+ // For each constant in the current constant pool, rehash the content, find
+ // the index in the next constant pool, and insert it.
+ for (uint32_t index = 0; index < pool->capacity; index++) {
+ yp_constant_t *constant = &pool->constants[index];
+
+ // If an id is set on this constant, then we know we have content here.
+ // In this case we need to insert it into the next constant pool.
+ if (constant->id != 0) {
+ uint32_t next_index = constant->hash & mask;
+
+ // This implements linear scanning to find the next available slot
+ // in case this index is already taken. We don't need to bother
+ // comparing the values since we know that the hash is unique.
+ while (next_constants[next_index].id != 0) {
+ next_index = (next_index + 1) & mask;
+ }
+
+ // Here we copy over the entire constant, which includes the id so
+ // that they are consistent between resizes.
+ next_constants[next_index] = *constant;
+ }
+ }
+
+ free(pool->constants);
+ pool->constants = next_constants;
+ pool->capacity = next_capacity;
+ return true;
+}
+
+// Initialize a new constant pool with a given capacity.
+bool
+yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity) {
+ const uint32_t maximum = (~((uint32_t) 0));
+ if (capacity >= ((maximum / 2) + 1)) return false;
+
+ capacity = next_power_of_two(capacity);
+ pool->constants = calloc(capacity, sizeof(yp_constant_t));
+ if (pool->constants == NULL) return false;
+
+ pool->size = 0;
+ pool->capacity = capacity;
+ return true;
+}
+
+// Insert a constant into a constant pool and return its index in the pool.
+static inline yp_constant_id_t
+yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length, bool owned) {
+ if (pool->size >= (pool->capacity / 4 * 3)) {
+ if (!yp_constant_pool_resize(pool)) return 0;
+ }
+
+ assert(is_power_of_two(pool->capacity));
+ const uint32_t mask = pool->capacity - 1;
+
+ uint32_t hash = yp_constant_pool_hash(start, length);
+ uint32_t index = hash & mask;
+ yp_constant_t *constant;
+
+ while (constant = &pool->constants[index], constant->id != 0) {
+ // If there is a collision, then we need to check if the content is the
+ // same as the content we are trying to insert. If it is, then we can
+ // return the id of the existing constant.
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+ // Since we have found a match, we need to check if this is
+ // attempting to insert a shared or an owned constant. We want to
+ // prefer shared constants since they don't require allocations.
+ if (owned) {
+ // If we're attempting to insert an owned constant and we have
+ // an existing constant, then either way we don't want the given
+ // memory. Either it's duplicated with the existing constant or
+ // it's not necessary because we have a shared version.
+ free((void *) start);
+ } else if (constant->owned) {
+ // If we're attempting to insert a shared constant and the
+ // existing constant is owned, then we can free the owned
+ // constant and replace it with the shared constant.
+ free((void *) constant->start);
+ constant->start = start;
+ constant->owned = false;
+ }
+
+ return constant->id;
+ }
+
+ index = (index + 1) & mask;
+ }
+
+ pool->size++;
+ assert(pool->size < ((uint32_t) (1 << 31)));
+
+ *constant = (yp_constant_t) {
+ .id = (unsigned int) (pool->size & 0x7FFFFFFF),
+ .owned = owned,
+ .start = start,
+ .length = length,
+ .hash = hash
+ };
+
+ return constant->id;
+}
+
+// Insert a constant into a constant pool. Returns the id of the constant, or 0
+// if any potential calls to resize fail.
+yp_constant_id_t
+yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
+ return yp_constant_pool_insert(pool, start, length, false);
+}
+
+// Insert a constant into a constant pool from memory that is now owned by the
+// constant pool. Returns the id of the constant, or 0 if any potential calls to
+// resize fail.
+yp_constant_id_t
+yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
+ return yp_constant_pool_insert(pool, start, length, true);
+}
+
+// Free the memory associated with a constant pool.
+void
+yp_constant_pool_free(yp_constant_pool_t *pool) {
+ // For each constant in the current constant pool, free the contents if the
+ // contents are owned.
+ for (uint32_t index = 0; index < pool->capacity; index++) {
+ yp_constant_t *constant = &pool->constants[index];
+
+ // If an id is set on this constant, then we know we have content here.
+ if (constant->id != 0 && constant->owned) {
+ free((void *) constant->start);
+ }
+ }
+
+ free(pool->constants);
+}
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
new file mode 100644
index 0000000000..c07cd0b7d8
--- /dev/null
+++ b/prism/util/pm_constant_pool.h
@@ -0,0 +1,74 @@
+// The constant pool is a data structure that stores a set of strings. Each
+// string is assigned a unique id, which can be used to compare strings for
+// equality. This comparison ends up being much faster than strcmp, since it
+// only requires a single integer comparison.
+
+#ifndef YP_CONSTANT_POOL_H
+#define YP_CONSTANT_POOL_H
+
+#include "yarp/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef uint32_t yp_constant_id_t;
+
+typedef struct {
+ yp_constant_id_t *ids;
+ size_t size;
+ size_t capacity;
+} yp_constant_id_list_t;
+
+// Initialize a list of constant ids.
+void yp_constant_id_list_init(yp_constant_id_list_t *list);
+
+// Append a constant id to a list of constant ids. Returns false if any
+// potential reallocations fail.
+bool yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id);
+
+// Checks if the current constant id list includes the given constant id.
+bool
+yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id);
+
+// Get the memory size of a list of constant ids.
+size_t yp_constant_id_list_memsize(yp_constant_id_list_t *list);
+
+// Free the memory associated with a list of constant ids.
+void yp_constant_id_list_free(yp_constant_id_list_t *list);
+
+typedef struct {
+ unsigned int id: 31;
+ bool owned: 1;
+ const uint8_t *start;
+ size_t length;
+ uint32_t hash;
+} yp_constant_t;
+
+typedef struct {
+ yp_constant_t *constants;
+ uint32_t size;
+ uint32_t capacity;
+} yp_constant_pool_t;
+
+// Define an empty constant pool.
+#define YP_CONSTANT_POOL_EMPTY ((yp_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 })
+
+// Initialize a new constant pool with a given capacity.
+bool yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity);
+
+// Insert a constant into a constant pool that is a slice of a source string.
+// Returns the id of the constant, or 0 if any potential calls to resize fail.
+yp_constant_id_t yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+// Insert a constant into a constant pool from memory that is now owned by the
+// constant pool. Returns the id of the constant, or 0 if any potential calls to
+// resize fail.
+yp_constant_id_t yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+// Free the memory associated with a constant pool.
+void yp_constant_pool_free(yp_constant_pool_t *pool);
+
+#endif
diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c
new file mode 100644
index 0000000000..e03dd79d8d
--- /dev/null
+++ b/prism/util/pm_list.c
@@ -0,0 +1,41 @@
+#include "yarp/util/yp_list.h"
+
+// Returns true if the given list is empty.
+YP_EXPORTED_FUNCTION bool
+yp_list_empty_p(yp_list_t *list) {
+ return list->head == NULL;
+}
+
+// Returns the size of the list.
+YP_EXPORTED_FUNCTION size_t
+yp_list_size(yp_list_t *list) {
+ return list->size;
+}
+
+// Append a node to the given list.
+void
+yp_list_append(yp_list_t *list, yp_list_node_t *node) {
+ if (list->head == NULL) {
+ list->head = node;
+ } else {
+ list->tail->next = node;
+ }
+
+ list->tail = node;
+ list->size++;
+}
+
+// Deallocate the internal state of the given list.
+YP_EXPORTED_FUNCTION void
+yp_list_free(yp_list_t *list) {
+ yp_list_node_t *node = list->head;
+ yp_list_node_t *next;
+
+ while (node != NULL) {
+ next = node->next;
+ free(node);
+ node = next;
+ }
+
+ list->size = 0;
+}
diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h
new file mode 100644
index 0000000000..205a5a7eab
--- /dev/null
+++ b/prism/util/pm_list.h
@@ -0,0 +1,67 @@
+// This struct represents an abstract linked list that provides common
+// functionality. It is meant to be used any time a linked list is necessary to
+// store data.
+//
+// The linked list itself operates off a set of pointers. Because the pointers
+// are not necessarily sequential, they can be of any size. We use this fact to
+// allow the consumer of this linked list to extend the node struct to include
+// any data they want. This is done by using the yp_list_node_t as the first
+// member of the struct.
+//
+// For example, if we want to store a list of integers, we can do the following:
+//
+// typedef struct {
+// yp_list_node_t node;
+// int value;
+// } yp_int_node_t;
+//
+// yp_list_t list = YP_LIST_EMPTY;
+// yp_int_node_t *node = malloc(sizeof(yp_int_node_t));
+// node->value = 5;
+//
+// yp_list_append(&list, &node->node);
+//
+// The yp_list_t struct is used to represent the overall linked list. It
+// contains a pointer to the head and tail of the list. This allows for easy
+// iteration and appending of new nodes.
+
+#ifndef YARP_LIST_H
+#define YARP_LIST_H
+
+#include "yarp/defines.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+// This represents a node in the linked list.
+typedef struct yp_list_node {
+ struct yp_list_node *next;
+} yp_list_node_t;
+
+// This represents the overall linked list. It keeps a pointer to the head and
+// tail so that iteration is easy and pushing new nodes is easy.
+typedef struct {
+ size_t size;
+ yp_list_node_t *head;
+ yp_list_node_t *tail;
+} yp_list_t;
+
+// This represents an empty list. It's used to initialize a stack-allocated list
+// as opposed to a method call.
+#define YP_LIST_EMPTY ((yp_list_t) { .size = 0, .head = NULL, .tail = NULL })
+
+// Returns true if the given list is empty.
+YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list);
+
+// Returns the size of the list.
+YP_EXPORTED_FUNCTION size_t yp_list_size(yp_list_t *list);
+
+// Append a node to the given list.
+void yp_list_append(yp_list_t *list, yp_list_node_t *node);
+
+// Deallocate the internal state of the given list.
+YP_EXPORTED_FUNCTION void yp_list_free(yp_list_t *list);
+
+#endif
diff --git a/prism/util/pm_memchr.c b/prism/util/pm_memchr.c
new file mode 100644
index 0000000000..af9c14397e
--- /dev/null
+++ b/prism/util/pm_memchr.c
@@ -0,0 +1,31 @@
+#include "yarp/util/yp_memchr.h"
+
+#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
+
+// We need to roll our own memchr to handle cases where the encoding changes and
+// we need to search for a character in a buffer that could be the trailing byte
+// of a multibyte character.
+void *
+yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) {
+ if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
+ const uint8_t *source = (const uint8_t *) memory;
+ size_t index = 0;
+
+ while (index < number) {
+ if (source[index] == character) {
+ return (void *) (source + index);
+ }
+
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
+ if (width == 0) {
+ return NULL;
+ }
+
+ index += width;
+ }
+
+ return NULL;
+ } else {
+ return memchr(memory, character, number);
+ }
+}
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
new file mode 100644
index 0000000000..97f4b15a83
--- /dev/null
+++ b/prism/util/pm_memchr.h
@@ -0,0 +1,14 @@
+#ifndef YP_MEMCHR_H
+#define YP_MEMCHR_H
+
+#include "yarp/defines.h"
+#include "yarp/enc/yp_encoding.h"
+
+#include <stddef.h>
+
+// We need to roll our own memchr to handle cases where the encoding changes and
+// we need to search for a character in a buffer that could be the trailing byte
+// of a multibyte character.
+void * yp_memchr(const void *source, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding);
+
+#endif
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
new file mode 100644
index 0000000000..969b884560
--- /dev/null
+++ b/prism/util/pm_newline_list.c
@@ -0,0 +1,134 @@
+#include "yarp/util/yp_newline_list.h"
+
+// Initialize a new newline list with the given capacity. Returns true if the
+// allocation of the offsets succeeds, otherwise returns false.
+bool
+yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity) {
+ list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
+ if (list->offsets == NULL) return false;
+
+ list->start = start;
+
+ // This is 1 instead of 0 because we want to include the first line of the
+ // file as having offset 0, which is set because of calloc.
+ list->size = 1;
+ list->capacity = capacity;
+
+ list->last_index = 0;
+ list->last_offset = 0;
+
+ return true;
+}
+
+// Append a new offset to the newline list. Returns true if the reallocation of
+// the offsets succeeds (if one was necessary), otherwise returns false.
+bool
+yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
+ if (list->size == list->capacity) {
+ size_t *original_offsets = list->offsets;
+
+ list->capacity = (list->capacity * 3) / 2;
+ list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
+ memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
+ free(original_offsets);
+ if (list->offsets == NULL) return false;
+ }
+
+ assert(*cursor == '\n');
+ assert(cursor >= list->start);
+ size_t newline_offset = (size_t) (cursor - list->start + 1);
+
+ assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
+ list->offsets[list->size++] = newline_offset;
+
+ return true;
+}
+
+// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+bool
+yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor) {
+ if (*cursor != '\n') {
+ return true;
+ }
+ return yp_newline_list_append(list, cursor);
+}
+
+// Returns the line and column of the given offset, assuming we don't have any
+// information about the previous index that we found.
+static yp_line_column_t
+yp_newline_list_line_column_search(yp_newline_list_t *list, size_t offset) {
+ size_t left = 0;
+ size_t right = list->size - 1;
+
+ while (left <= right) {
+ size_t mid = left + (right - left) / 2;
+
+ if (list->offsets[mid] == offset) {
+ return ((yp_line_column_t) { mid, 0 });
+ }
+
+ if (list->offsets[mid] < offset) {
+ left = mid + 1;
+ } else {
+ right = mid - 1;
+ }
+ }
+
+ return ((yp_line_column_t) { left - 1, offset - list->offsets[left - 1] });
+}
+
+// Returns the line and column of the given offset, assuming we know the last
+// index that we found.
+static yp_line_column_t
+yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
+ if (offset > list->last_offset) {
+ size_t index = list->last_index;
+ while (index < list->size && list->offsets[index] < offset) {
+ index++;
+ }
+
+ if (index == list->size) {
+ return ((yp_line_column_t) { index - 1, offset - list->offsets[index - 1] });
+ }
+
+ return ((yp_line_column_t) { index, 0 });
+ } else {
+ size_t index = list->last_index;
+ while (index > 0 && list->offsets[index] > offset) {
+ index--;
+ }
+
+ if (index == 0) {
+ return ((yp_line_column_t) { 0, offset });
+ }
+
+ return ((yp_line_column_t) { index, offset - list->offsets[index - 1] });
+ }
+}
+
+// Returns the line and column of the given offset. If the offset is not in the
+// list, the line and column of the closest offset less than the given offset
+// are returned.
+yp_line_column_t
+yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor) {
+ assert(cursor >= list->start);
+ size_t offset = (size_t) (cursor - list->start);
+ yp_line_column_t result;
+
+ if (list->last_offset == 0) {
+ result = yp_newline_list_line_column_search(list, offset);
+ } else {
+ result = yp_newline_list_line_column_scan(list, offset);
+ }
+
+ list->last_index = result.line;
+ list->last_offset = offset;
+
+ return result;
+}
+
+// Free the internal memory allocated for the newline list.
+void
+yp_newline_list_free(yp_newline_list_t *list) {
+ free(list->offsets);
+}
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
new file mode 100644
index 0000000000..9231305008
--- /dev/null
+++ b/prism/util/pm_newline_list.h
@@ -0,0 +1,61 @@
+// When compiling the syntax tree, it's necessary to know the line and column
+// of many nodes. This is necessary to support things like error messages,
+// tracepoints, etc.
+//
+// It's possible that we could store the start line, start column, end line, and
+// end column on every node in addition to the offsets that we already store,
+// but that would be quite a lot of memory overhead.
+
+#ifndef YP_NEWLINE_LIST_H
+#define YP_NEWLINE_LIST_H
+
+#include "yarp/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+// A list of offsets of newlines in a string. The offsets are assumed to be
+// sorted/inserted in ascending order.
+typedef struct {
+ const uint8_t *start;
+
+ size_t *offsets;
+ size_t size;
+ size_t capacity;
+
+ size_t last_offset;
+ size_t last_index;
+} yp_newline_list_t;
+
+// A line and column in a string.
+typedef struct {
+ size_t line;
+ size_t column;
+} yp_line_column_t;
+
+#define YP_NEWLINE_LIST_EMPTY ((yp_newline_list_t) { \
+ .start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \
+})
+
+// Initialize a new newline list with the given capacity. Returns true if the
+// allocation of the offsets succeeds, otherwise returns false.
+bool yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity);
+
+// Append a new offset to the newline list. Returns true if the reallocation of
+// the offsets succeeds (if one was necessary), otherwise returns false.
+bool yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor);
+
+// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+bool yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor);
+
+// Returns the line and column of the given offset. If the offset is not in the
+// list, the line and column of the closest offset less than the given offset
+// are returned.
+yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor);
+
+// Free the internal memory allocated for the newline list.
+void yp_newline_list_free(yp_newline_list_t *list);
+
+#endif
diff --git a/prism/util/pm_state_stack.c b/prism/util/pm_state_stack.c
new file mode 100644
index 0000000000..7ff95bd611
--- /dev/null
+++ b/prism/util/pm_state_stack.c
@@ -0,0 +1,19 @@
+#include "yarp/util/yp_state_stack.h"
+
+// Pushes a value onto the stack.
+void
+yp_state_stack_push(yp_state_stack_t *stack, bool value) {
+ *stack = (*stack << 1) | (value & 1);
+}
+
+// Pops a value off the stack.
+void
+yp_state_stack_pop(yp_state_stack_t *stack) {
+ *stack >>= 1;
+}
+
+// Returns the value at the top of the stack.
+bool
+yp_state_stack_p(yp_state_stack_t *stack) {
+ return *stack & 1;
+}
diff --git a/prism/util/pm_state_stack.h b/prism/util/pm_state_stack.h
new file mode 100644
index 0000000000..69d8d7d54b
--- /dev/null
+++ b/prism/util/pm_state_stack.h
@@ -0,0 +1,24 @@
+#ifndef YP_STATE_STACK_H
+#define YP_STATE_STACK_H
+
+#include "yarp/defines.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+// A struct that represents a stack of bools.
+typedef uint32_t yp_state_stack_t;
+
+// Initializes the state stack to an empty stack.
+#define YP_STATE_STACK_EMPTY ((yp_state_stack_t) 0)
+
+// Pushes a value onto the stack.
+void yp_state_stack_push(yp_state_stack_t *stack, bool value);
+
+// Pops a value off the stack.
+void yp_state_stack_pop(yp_state_stack_t *stack);
+
+// Returns the value at the top of the stack.
+bool yp_state_stack_p(yp_state_stack_t *stack);
+
+#endif
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
new file mode 100644
index 0000000000..9ee25155a3
--- /dev/null
+++ b/prism/util/pm_string.c
@@ -0,0 +1,200 @@
+#include "yarp/util/yp_string.h"
+
+// The following headers are necessary to read files using demand paging.
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#endif
+
+// Initialize a shared string that is based on initial input.
+void
+yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end) {
+ assert(start <= end);
+
+ *string = (yp_string_t) {
+ .type = YP_STRING_SHARED,
+ .source = start,
+ .length = (size_t) (end - start)
+ };
+}
+
+// Initialize an owned string that is responsible for freeing allocated memory.
+void
+yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length) {
+ *string = (yp_string_t) {
+ .type = YP_STRING_OWNED,
+ .source = source,
+ .length = length
+ };
+}
+
+// Initialize a constant string that doesn't own its memory source.
+void
+yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
+ *string = (yp_string_t) {
+ .type = YP_STRING_CONSTANT,
+ .source = (const uint8_t *) source,
+ .length = length
+ };
+}
+
+static void
+yp_string_mapped_init_internal(yp_string_t *string, uint8_t *source, size_t length) {
+ *string = (yp_string_t) {
+ .type = YP_STRING_MAPPED,
+ .source = source,
+ .length = length
+ };
+}
+
+// Returns the memory size associated with the string.
+size_t
+yp_string_memsize(const yp_string_t *string) {
+ size_t size = sizeof(yp_string_t);
+ if (string->type == YP_STRING_OWNED) {
+ size += string->length;
+ }
+ return size;
+}
+
+// Ensure the string is owned. If it is not, then reinitialize it as owned and
+// copy over the previous source.
+void
+yp_string_ensure_owned(yp_string_t *string) {
+ if (string->type == YP_STRING_OWNED) return;
+
+ size_t length = yp_string_length(string);
+ const uint8_t *source = yp_string_source(string);
+
+ uint8_t *memory = malloc(length);
+ if (!memory) return;
+
+ yp_string_owned_init(string, memory, length);
+ memcpy((void *) string->source, source, length);
+}
+
+// Returns the length associated with the string.
+YP_EXPORTED_FUNCTION size_t
+yp_string_length(const yp_string_t *string) {
+ return string->length;
+}
+
+// Returns the start pointer associated with the string.
+YP_EXPORTED_FUNCTION const uint8_t *
+yp_string_source(const yp_string_t *string) {
+ return string->source;
+}
+
+// Free the associated memory of the given string.
+YP_EXPORTED_FUNCTION void
+yp_string_free(yp_string_t *string) {
+ void *memory = (void *) string->source;
+
+ if (string->type == YP_STRING_OWNED) {
+ free(memory);
+ } else if (string->type == YP_STRING_MAPPED && string->length) {
+#if defined(_WIN32)
+ UnmapViewOfFile(memory);
+#else
+ munmap(memory, string->length);
+#endif
+ }
+}
+
+bool
+yp_string_mapped_init(yp_string_t *string, const char *filepath) {
+#ifdef _WIN32
+ // Open the file for reading.
+ HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+
+ if (file == INVALID_HANDLE_VALUE) {
+ perror("CreateFile failed");
+ return false;
+ }
+
+ // Get the file size.
+ DWORD file_size = GetFileSize(file, NULL);
+ if (file_size == INVALID_FILE_SIZE) {
+ CloseHandle(file);
+ perror("GetFileSize failed");
+ return false;
+ }
+
+ // If the file is empty, then we don't need to do anything else, we'll set
+ // the source to a constant empty string and return.
+ if (file_size == 0) {
+ CloseHandle(file);
+ uint8_t empty[] = "";
+ yp_string_mapped_init_internal(string, empty, 0);
+ return true;
+ }
+
+ // Create a mapping of the file.
+ HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
+ if (mapping == NULL) {
+ CloseHandle(file);
+ perror("CreateFileMapping failed");
+ return false;
+ }
+
+ // Map the file into memory.
+ uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+ CloseHandle(mapping);
+ CloseHandle(file);
+
+ if (source == NULL) {
+ perror("MapViewOfFile failed");
+ return false;
+ }
+
+ yp_string_mapped_init_internal(string, source, (size_t) file_size);
+ return true;
+#else
+ // Open the file for reading
+ int fd = open(filepath, O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ return false;
+ }
+
+ // Stat the file to get the file size
+ struct stat sb;
+ if (fstat(fd, &sb) == -1) {
+ close(fd);
+ perror("fstat");
+ return false;
+ }
+
+ // mmap the file descriptor to virtually get the contents
+ size_t size = (size_t) sb.st_size;
+ uint8_t *source = NULL;
+
+ if (size == 0) {
+ close(fd);
+ uint8_t empty[] = "";
+ yp_string_mapped_init_internal(string, empty, 0);
+ return true;
+ }
+
+ source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (source == MAP_FAILED) {
+ perror("Map failed");
+ return false;
+ }
+
+ close(fd);
+ yp_string_mapped_init_internal(string, source, size);
+ return true;
+#endif
+}
+
+// Returns the size of the yp_string_t struct. This is necessary to allocate the
+// correct amount of memory in the FFI backend.
+YP_EXPORTED_FUNCTION size_t
+yp_string_sizeof(void) {
+ return sizeof(yp_string_t);
+}
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
new file mode 100644
index 0000000000..bcdf8b66d9
--- /dev/null
+++ b/prism/util/pm_string.h
@@ -0,0 +1,61 @@
+#ifndef YARP_STRING_H
+#define YARP_STRING_H
+
+#include "yarp/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+// This struct represents a string value.
+typedef struct {
+ enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
+ const uint8_t *source;
+ size_t length;
+} yp_string_t;
+
+#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })
+
+// Initialize a shared string that is based on initial input.
+void yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end);
+
+// Initialize an owned string that is responsible for freeing allocated memory.
+void yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length);
+
+// Initialize a constant string that doesn't own its memory source.
+void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
+
+// Read the file indicated by the filepath parameter into source and load its
+// contents and size into the given yp_string_t.
+// The given yp_string_t should be freed using yp_string_free() when it is no longer used.
+//
+// We want to use demand paging as much as possible in order to avoid having to
+// read the entire file into memory (which could be detrimental to performance
+// for large files). This means that if we're on windows we'll use
+// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+// `mmap`, and on other POSIX systems we'll use `read`.
+YP_EXPORTED_FUNCTION bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
+
+// Returns the memory size associated with the string.
+size_t yp_string_memsize(const yp_string_t *string);
+
+// Ensure the string is owned. If it is not, then reinitialize it as owned and
+// copy over the previous source.
+void yp_string_ensure_owned(yp_string_t *string);
+
+// Returns the length associated with the string.
+YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);
+
+// Returns the start pointer associated with the string.
+YP_EXPORTED_FUNCTION const uint8_t * yp_string_source(const yp_string_t *string);
+
+// Free the associated memory of the given string.
+YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
+
+// Returns the size of the yp_string_t struct. This is necessary to allocate the
+// correct amount of memory in the FFI backend.
+YP_EXPORTED_FUNCTION size_t yp_string_sizeof(void);
+
+#endif // YARP_STRING_H
diff --git a/prism/util/pm_string_list.c b/prism/util/pm_string_list.c
new file mode 100644
index 0000000000..3b2b4381c5
--- /dev/null
+++ b/prism/util/pm_string_list.c
@@ -0,0 +1,29 @@
+#include "yarp/util/yp_string_list.h"
+
+// Initialize a yp_string_list_t with its default values.
+void
+yp_string_list_init(yp_string_list_t *string_list) {
+ string_list->strings = (yp_string_t *) malloc(sizeof(yp_string_t));
+ string_list->length = 0;
+ string_list->capacity = 1;
+}
+
+// Append a yp_string_t to the given string list.
+void
+yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
+ if (string_list->length + 1 > string_list->capacity) {
+ yp_string_t *original_string = string_list->strings;
+ string_list->capacity *= 2;
+ string_list->strings = (yp_string_t *) malloc(string_list->capacity * sizeof(yp_string_t));
+ memcpy(string_list->strings, original_string, (string_list->length) * sizeof(yp_string_t));
+ free(original_string);
+ }
+
+ string_list->strings[string_list->length++] = *string;
+}
+
+// Free the memory associated with the string list.
+void
+yp_string_list_free(yp_string_list_t *string_list) {
+ free(string_list->strings);
+}
diff --git a/prism/util/pm_string_list.h b/prism/util/pm_string_list.h
new file mode 100644
index 0000000000..0009a27a60
--- /dev/null
+++ b/prism/util/pm_string_list.h
@@ -0,0 +1,25 @@
+#ifndef YARP_STRING_LIST_H
+#define YARP_STRING_LIST_H
+
+#include "yarp/defines.h"
+#include "yarp/util/yp_string.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+typedef struct {
+ yp_string_t *strings;
+ size_t length;
+ size_t capacity;
+} yp_string_list_t;
+
+// Initialize a yp_string_list_t with its default values.
+YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);
+
+// Append a yp_string_t to the given string list.
+void yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string);
+
+// Free the memory associated with the string list.
+YP_EXPORTED_FUNCTION void yp_string_list_free(yp_string_list_t *string_list);
+
+#endif
diff --git a/prism/util/pm_strncasecmp.c b/prism/util/pm_strncasecmp.c
new file mode 100644
index 0000000000..1cbaf904f4
--- /dev/null
+++ b/prism/util/pm_strncasecmp.c
@@ -0,0 +1,17 @@
+#include <ctype.h>
+#include <stddef.h>
+#include <stdint.h>
+
+int
+yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
+ size_t offset = 0;
+ int difference = 0;
+
+ while (offset < length && string1[offset] != '\0') {
+ if (string2[offset] == '\0') return string1[offset];
+ if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
+ offset++;
+ }
+
+ return difference;
+}
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
new file mode 100644
index 0000000000..7c0015d289
--- /dev/null
+++ b/prism/util/pm_strpbrk.c
@@ -0,0 +1,66 @@
+#include "yarp/util/yp_strpbrk.h"
+
+// This is the slow path that does care about the encoding.
+static inline const uint8_t *
+yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
+ size_t index = 0;
+
+ while (index < maximum) {
+ if (strchr((const char *) charset, source[index]) != NULL) {
+ return source + index;
+ }
+
+ size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
+ if (width == 0) {
+ return NULL;
+ }
+
+ index += width;
+ }
+
+ return NULL;
+}
+
+// This is the fast path that does not care about the encoding.
+static inline const uint8_t *
+yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
+ size_t index = 0;
+
+ while (index < maximum) {
+ if (strchr((const char *) charset, source[index]) != NULL) {
+ return source + index;
+ }
+
+ index++;
+ }
+
+ return NULL;
+}
+
+// Here we have rolled our own version of strpbrk. The standard library strpbrk
+// has undefined behavior when the source string is not null-terminated. We want
+// to support strings that are not null-terminated because yp_parse does not
+// have the contract that the string is null-terminated. (This is desirable
+// because it means the extension can call yp_parse with the result of a call to
+// mmap).
+//
+// The standard library strpbrk also does not support passing a maximum length
+// to search. We want to support this for the reason mentioned above, but we
+// also don't want it to stop on null bytes. Ruby actually allows null bytes
+// within strings, comments, regular expressions, etc. So we need to be able to
+// skip past them.
+//
+// Finally, we want to support encodings wherein the charset could contain
+// characters that are trailing bytes of multi-byte characters. For example, in
+// Shift-JIS, the backslash character can be a trailing byte. In that case we
+// need to take a slower path and iterate one multi-byte character at a time.
+const uint8_t *
+yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
+ if (length <= 0) {
+ return NULL;
+ } else if (parser->encoding_changed && parser->encoding.multibyte) {
+ return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
+ } else {
+ return yp_strpbrk_single_byte(source, charset, (size_t) length);
+ }
+}
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
new file mode 100644
index 0000000000..d0bdd5bec0
--- /dev/null
+++ b/prism/util/pm_strpbrk.h
@@ -0,0 +1,29 @@
+#ifndef YP_STRPBRK_H
+#define YP_STRPBRK_H
+
+#include "yarp/defines.h"
+#include "yarp/parser.h"
+
+#include <stddef.h>
+#include <string.h>
+
+// Here we have rolled our own version of strpbrk. The standard library strpbrk
+// has undefined behavior when the source string is not null-terminated. We want
+// to support strings that are not null-terminated because yp_parse does not
+// have the contract that the string is null-terminated. (This is desirable
+// because it means the extension can call yp_parse with the result of a call to
+// mmap).
+//
+// The standard library strpbrk also does not support passing a maximum length
+// to search. We want to support this for the reason mentioned above, but we
+// also don't want it to stop on null bytes. Ruby actually allows null bytes
+// within strings, comments, regular expressions, etc. So we need to be able to
+// skip past them.
+//
+// Finally, we want to support encodings wherein the charset could contain
+// characters that are trailing bytes of multi-byte characters. For example, in
+// Shift-JIS, the backslash character can be a trailing byte. In that case we
+// need to take a slower path and iterate one multi-byte character at a time.
+const uint8_t * yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
+
+#endif