Rename YARP filepaths to prism filepaths

author: Kevin Newton <[email protected]> 2023-09-27 12:22:36 -0400
committer: Kevin Newton <[email protected]> 2023-09-27 13:57:38 -0400
commit: 8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch)
tree: 46ef2bd5c51d5b7f923eda6a60edefc7a08200db /prism/util
parent: 7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff)
21 files changed, 1637 insertions, 0 deletions
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
new file mode 100644
index 0000000000..15cdef74f8
--- /dev/null
+++ b/prism/util/pm_buffer.c
@@ -0,0 +1,101 @@
+#include "yarp/util/yp_buffer.h"
+
+#define YP_BUFFER_INITIAL_SIZE 1024
+
+// Return the size of the yp_buffer_t struct.
+size_t
+yp_buffer_sizeof(void) {
+    return sizeof(yp_buffer_t);
+}
+
+// Initialize a yp_buffer_t with its default values.
+bool
+yp_buffer_init(yp_buffer_t *buffer) {
+    buffer->length = 0;
+    buffer->capacity = YP_BUFFER_INITIAL_SIZE;
+
+    buffer->value = (char *) malloc(YP_BUFFER_INITIAL_SIZE);
+    return buffer->value != NULL;
+}
+
+// Return the value of the buffer.
+char *
+yp_buffer_value(yp_buffer_t *buffer) {
+    return buffer->value;
+}
+
+// Return the length of the buffer.
+size_t
+yp_buffer_length(yp_buffer_t *buffer) {
+    return buffer->length;
+}
+
+// Append the given amount of space to the buffer.
+static inline void
+yp_buffer_append_length(yp_buffer_t *buffer, size_t length) {
+    size_t next_length = buffer->length + length;
+
+    if (next_length > buffer->capacity) {
+        do {
+            buffer->capacity *= 2;
+        } while (next_length > buffer->capacity);
+
+        buffer->value = realloc(buffer->value, buffer->capacity);
+    }
+
+    buffer->length = next_length;
+}
+
+// Append a generic pointer to memory to the buffer.
+static inline void
+yp_buffer_append(yp_buffer_t *buffer, const void *source, size_t length) {
+    yp_buffer_append_length(buffer, length);
+    memcpy(buffer->value + (buffer->length - length), source, length);
+}
+
+// Append the given amount of space as zeroes to the buffer.
+void
+yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length) {
+    yp_buffer_append_length(buffer, length);
+    memset(buffer->value + (buffer->length - length), 0, length);
+}
+
+// Append a string to the buffer.
+void
+yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length) {
+    yp_buffer_append(buffer, value, length);
+}
+
+// Append a list of bytes to the buffer.
+void
+yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length) {
+    yp_buffer_append(buffer, (const char *) value, length);
+}
+
+// Append a single byte to the buffer.
+void
+yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value) {
+    const void *source = &value;
+    yp_buffer_append(buffer, source, sizeof(uint8_t));
+}
+
+// Append a 32-bit unsigned integer to the buffer.
+void
+yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value) {
+    if (value < 128) {
+        yp_buffer_append_u8(buffer, (uint8_t) value);
+    } else {
+        uint32_t n = value;
+        while (n >= 128) {
+            yp_buffer_append_u8(buffer, (uint8_t) (n | 128));
+            n >>= 7;
+        }
+        yp_buffer_append_u8(buffer, (uint8_t) n);
+    }
+}
+
+// Free the memory associated with the buffer.
+void
+yp_buffer_free(yp_buffer_t *buffer) {
+    free(buffer->value);
+}
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
new file mode 100644
index 0000000000..c388e8d5ce
--- /dev/null
+++ b/prism/util/pm_buffer.h
@@ -0,0 +1,51 @@
+#ifndef YARP_BUFFER_H
+#define YARP_BUFFER_H
+
+#include "yarp/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+// A yp_buffer_t is a simple memory buffer that stores data in a contiguous
+// block of memory. It is used to store the serialized representation of a
+// YARP tree.
+typedef struct {
+    char *value;
+    size_t length;
+    size_t capacity;
+} yp_buffer_t;
+
+// Return the size of the yp_buffer_t struct.
+YP_EXPORTED_FUNCTION size_t yp_buffer_sizeof(void);
+
+// Initialize a yp_buffer_t with its default values.
+YP_EXPORTED_FUNCTION bool yp_buffer_init(yp_buffer_t *buffer);
+
+// Return the value of the buffer.
+YP_EXPORTED_FUNCTION char * yp_buffer_value(yp_buffer_t *buffer);
+
+// Return the length of the buffer.
+YP_EXPORTED_FUNCTION size_t yp_buffer_length(yp_buffer_t *buffer);
+
+// Append the given amount of space as zeroes to the buffer.
+void yp_buffer_append_zeroes(yp_buffer_t *buffer, size_t length);
+
+// Append a string to the buffer.
+void yp_buffer_append_str(yp_buffer_t *buffer, const char *value, size_t length);
+
+// Append a list of bytes to the buffer.
+void yp_buffer_append_bytes(yp_buffer_t *buffer, const uint8_t *value, size_t length);
+
+// Append a single byte to the buffer.
+void yp_buffer_append_u8(yp_buffer_t *buffer, uint8_t value);
+
+// Append a 32-bit unsigned integer to the buffer.
+void yp_buffer_append_u32(yp_buffer_t *buffer, uint32_t value);
+
+// Free the memory associated with the buffer.
+YP_EXPORTED_FUNCTION void yp_buffer_free(yp_buffer_t *buffer);
+
+#endif
diff --git a/prism/util/pm_char.c b/prism/util/pm_char.c
new file mode 100644
index 0000000000..42c3896626
--- /dev/null
+++ b/prism/util/pm_char.c
@@ -0,0 +1,272 @@
+#include "yarp/util/yp_char.h"
+
+#define YP_CHAR_BIT_WHITESPACE (1 << 0)
+#define YP_CHAR_BIT_INLINE_WHITESPACE (1 << 1)
+#define YP_CHAR_BIT_REGEXP_OPTION (1 << 2)
+
+#define YP_NUMBER_BIT_BINARY_DIGIT (1 << 0)
+#define YP_NUMBER_BIT_BINARY_NUMBER (1 << 1)
+#define YP_NUMBER_BIT_OCTAL_DIGIT (1 << 2)
+#define YP_NUMBER_BIT_OCTAL_NUMBER (1 << 3)
+#define YP_NUMBER_BIT_DECIMAL_DIGIT (1 << 4)
+#define YP_NUMBER_BIT_DECIMAL_NUMBER (1 << 5)
+#define YP_NUMBER_BIT_HEXADECIMAL_DIGIT (1 << 6)
+#define YP_NUMBER_BIT_HEXADECIMAL_NUMBER (1 << 7)
+
+static const uint8_t yp_byte_table[256] = {
+//  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, 0, 0, // 0x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
+    3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5x
+    0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, // 6x
+    0, 0, 0, 4, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, // 7x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ax
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Bx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Cx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Dx
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Ex
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // Fx
+};
+
+static const uint8_t yp_number_table[256] = {
+    // 0     1     2     3     4     5     6     7     8     9     A     B     C     D     E     F
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 1x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 2x
+    0xff, 0xff, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xf0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 3x
+    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 4x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, // 5x
+    0x00, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 6x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 7x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 8x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9x
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ax
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Bx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Cx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Dx
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Ex
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Fx
+};
+
+static inline size_t
+yp_strspn_char_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (yp_byte_table[string[size]] & kind)) size++;
+    return size;
+}
+
+// Returns the number of characters at the start of the string that are
+// whitespace. Disallows searching past the given maximum number of characters.
+size_t
+yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length) {
+    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_WHITESPACE);
+}
+
+// Returns the number of characters at the start of the string that are
+// whitespace while also tracking the location of each newline. Disallows
+// searching past the given maximum number of characters.
+size_t
+yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (yp_byte_table[string[size]] & YP_CHAR_BIT_WHITESPACE)) {
+        if (string[size] == '\n') {
+            yp_newline_list_append(newline_list, string + size);
+        }
+
+        size++;
+    }
+
+    return size;
+}
+
+// Returns the number of characters at the start of the string that are inline
+// whitespace. Disallows searching past the given maximum number of characters.
+size_t
+yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length) {
+    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_INLINE_WHITESPACE);
+}
+
+// Returns the number of characters at the start of the string that are regexp
+// options. Disallows searching past the given maximum number of characters.
+size_t
+yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length) {
+    return yp_strspn_char_kind(string, length, YP_CHAR_BIT_REGEXP_OPTION);
+}
+
+static inline bool
+yp_char_is_char_kind(const uint8_t b, uint8_t kind) {
+    return (yp_byte_table[b] & kind) != 0;
+}
+
+// Returns true if the given character is a whitespace character.
+bool
+yp_char_is_whitespace(const uint8_t b) {
+    return yp_char_is_char_kind(b, YP_CHAR_BIT_WHITESPACE);
+}
+
+// Returns true if the given character is an inline whitespace character.
+bool
+yp_char_is_inline_whitespace(const uint8_t b) {
+    return yp_char_is_char_kind(b, YP_CHAR_BIT_INLINE_WHITESPACE);
+}
+
+// Scan through the string and return the number of characters at the start of
+// the string that match the given kind. Disallows searching past the given
+// maximum number of characters.
+static inline size_t
+yp_strspn_number_kind(const uint8_t *string, ptrdiff_t length, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    while (size < maximum && (yp_number_table[string[size]] & kind)) size++;
+    return size;
+}
+
+// Scan through the string and return the number of characters at the start of
+// the string that match the given kind. Disallows searching past the given
+// maximum number of characters.
+//
+// Additionally, report the location of the last invalid underscore character
+// found in the string through the out invalid parameter.
+static inline size_t
+yp_strspn_number_kind_underscores(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid, uint8_t kind) {
+    if (length <= 0) return 0;
+
+    size_t size = 0;
+    size_t maximum = (size_t) length;
+
+    bool underscore = false;
+    while (size < maximum && (yp_number_table[string[size]] & kind)) {
+        if (string[size] == '_') {
+            if (underscore) *invalid = string + size;
+            underscore = true;
+        } else {
+            underscore = false;
+        }
+
+        size++;
+    }
+
+    if (string[size - 1] == '_') *invalid = string + size - 1;
+    return size;
+}
+
+// Returns the number of characters at the start of the string that are binary
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t
+yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_BINARY_NUMBER);
+}
+
+// Returns the number of characters at the start of the string that are octal
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t
+yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_OCTAL_NUMBER);
+}
+
+// Returns the number of characters at the start of the string that are decimal
+// digits. Disallows searching past the given maximum number of characters.
+size_t
+yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length) {
+    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_DECIMAL_DIGIT);
+}
+
+// Returns the number of characters at the start of the string that are decimal
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t
+yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_DECIMAL_NUMBER);
+}
+
+// Returns the number of characters at the start of the string that are
+// hexadecimal digits. Disallows searching past the given maximum number of
+// characters.
+size_t
+yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length) {
+    return yp_strspn_number_kind(string, length, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
+}
+
+// Returns the number of characters at the start of the string that are
+// hexadecimal digits or underscores. Disallows searching past the given maximum
+// number of characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t
+yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid) {
+    return yp_strspn_number_kind_underscores(string, length, invalid, YP_NUMBER_BIT_HEXADECIMAL_NUMBER);
+}
+
+static inline bool
+yp_char_is_number_kind(const uint8_t b, uint8_t kind) {
+    return (yp_number_table[b] & kind) != 0;
+}
+
+// Returns true if the given character is a binary digit.
+bool
+yp_char_is_binary_digit(const uint8_t b) {
+    return yp_char_is_number_kind(b, YP_NUMBER_BIT_BINARY_DIGIT);
+}
+
+// Returns true if the given character is an octal digit.
+bool
+yp_char_is_octal_digit(const uint8_t b) {
+    return yp_char_is_number_kind(b, YP_NUMBER_BIT_OCTAL_DIGIT);
+}
+
+// Returns true if the given character is a decimal digit.
+bool
+yp_char_is_decimal_digit(const uint8_t b) {
+    return yp_char_is_number_kind(b, YP_NUMBER_BIT_DECIMAL_DIGIT);
+}
+
+// Returns true if the given character is a hexadecimal digit.
+bool
+yp_char_is_hexadecimal_digit(const uint8_t b) {
+    return yp_char_is_number_kind(b, YP_NUMBER_BIT_HEXADECIMAL_DIGIT);
+}
+
+#undef YP_CHAR_BIT_WHITESPACE
+#undef YP_CHAR_BIT_INLINE_WHITESPACE
+#undef YP_CHAR_BIT_REGEXP_OPTION
+
+#undef YP_NUMBER_BIT_BINARY_DIGIT
+#undef YP_NUMBER_BIT_BINARY_NUMBER
+#undef YP_NUMBER_BIT_OCTAL_DIGIT
+#undef YP_NUMBER_BIT_OCTAL_NUMBER
+#undef YP_NUMBER_BIT_DECIMAL_DIGIT
+#undef YP_NUMBER_BIT_DECIMAL_NUMBER
+#undef YP_NUMBER_BIT_HEXADECIMAL_NUMBER
+#undef YP_NUMBER_BIT_HEXADECIMAL_DIGIT
diff --git a/prism/util/pm_char.h b/prism/util/pm_char.h
new file mode 100644
index 0000000000..f08d6a8c9d
--- /dev/null
+++ b/prism/util/pm_char.h
@@ -0,0 +1,91 @@
+#ifndef YP_CHAR_H
+#define YP_CHAR_H
+
+#include "yarp/defines.h"
+#include "yarp/util/yp_newline_list.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+
+// Returns the number of characters at the start of the string that are
+// whitespace. Disallows searching past the given maximum number of characters.
+size_t yp_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are
+// whitespace while also tracking the location of each newline. Disallows
+// searching past the given maximum number of characters.
+size_t
+yp_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, yp_newline_list_t *newline_list);
+
+// Returns the number of characters at the start of the string that are inline
+// whitespace. Disallows searching past the given maximum number of characters.
+size_t yp_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are decimal
+// digits. Disallows searching past the given maximum number of characters.
+size_t yp_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are
+// hexadecimal digits. Disallows searching past the given maximum number of
+// characters.
+size_t yp_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are octal
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+// Returns the number of characters at the start of the string that are decimal
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+// Returns the number of characters at the start of the string that are
+// hexadecimal digits or underscores. Disallows searching past the given maximum
+// number of characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+// Returns the number of characters at the start of the string that are regexp
+// options. Disallows searching past the given maximum number of characters.
+size_t yp_strspn_regexp_option(const uint8_t *string, ptrdiff_t length);
+
+// Returns the number of characters at the start of the string that are binary
+// digits or underscores. Disallows searching past the given maximum number of
+// characters.
+//
+// If multiple underscores are found in a row or if an underscore is
+// found at the end of the number, then the invalid pointer is set to the index
+// of the first invalid underscore.
+size_t yp_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid);
+
+// Returns true if the given character is a whitespace character.
+bool yp_char_is_whitespace(const uint8_t b);
+
+// Returns true if the given character is an inline whitespace character.
+bool yp_char_is_inline_whitespace(const uint8_t b);
+
+// Returns true if the given character is a binary digit.
+bool yp_char_is_binary_digit(const uint8_t b);
+
+// Returns true if the given character is an octal digit.
+bool yp_char_is_octal_digit(const uint8_t b);
+
+// Returns true if the given character is a decimal digit.
+bool yp_char_is_decimal_digit(const uint8_t b);
+
+// Returns true if the given character is a hexadecimal digit.
+bool yp_char_is_hexadecimal_digit(const uint8_t b);
+
+#endif
diff --git a/prism/util/pm_constant_pool.c b/prism/util/pm_constant_pool.c
new file mode 100644
index 0000000000..18376dce82
--- /dev/null
+++ b/prism/util/pm_constant_pool.c
@@ -0,0 +1,230 @@
+#include "yarp/util/yp_constant_pool.h"
+
+// Initialize a list of constant ids.
+void
+yp_constant_id_list_init(yp_constant_id_list_t *list) {
+    list->ids = NULL;
+    list->size = 0;
+    list->capacity = 0;
+}
+
+// Append a constant id to a list of constant ids. Returns false if any
+// potential reallocations fail.
+bool
+yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id) {
+    if (list->size >= list->capacity) {
+        list->capacity = list->capacity == 0 ? 8 : list->capacity * 2;
+        list->ids = (yp_constant_id_t *) realloc(list->ids, sizeof(yp_constant_id_t) * list->capacity);
+        if (list->ids == NULL) return false;
+    }
+
+    list->ids[list->size++] = id;
+    return true;
+}
+
+// Checks if the current constant id list includes the given constant id.
+bool
+yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id) {
+    for (size_t index = 0; index < list->size; index++) {
+        if (list->ids[index] == id) return true;
+    }
+    return false;
+}
+
+// Get the memory size of a list of constant ids.
+size_t
+yp_constant_id_list_memsize(yp_constant_id_list_t *list) {
+    return sizeof(yp_constant_id_list_t) + (list->capacity * sizeof(yp_constant_id_t));
+}
+
+// Free the memory associated with a list of constant ids.
+void
+yp_constant_id_list_free(yp_constant_id_list_t *list) {
+    if (list->ids != NULL) {
+        free(list->ids);
+    }
+}
+
+// A relatively simple hash function (djb2) that is used to hash strings. We are
+// optimizing here for simplicity and speed.
+static inline uint32_t
+yp_constant_pool_hash(const uint8_t *start, size_t length) {
+    // This is a prime number used as the initial value for the hash function.
+    uint32_t value = 5381;
+
+    for (size_t index = 0; index < length; index++) {
+        value = ((value << 5) + value) + start[index];
+    }
+
+    return value;
+}
+
+// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+static uint32_t
+next_power_of_two(uint32_t v) {
+    // Avoid underflow in subtraction on next line.
+    if (v == 0) {
+        // 1 is the nearest power of 2 to 0 (2^0)
+        return 1;
+    }
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+    return v;
+}
+
+#ifndef NDEBUG
+static bool
+is_power_of_two(uint32_t size) {
+    return (size & (size - 1)) == 0;
+}
+#endif
+
+// Resize a constant pool to a given capacity.
+static inline bool
+yp_constant_pool_resize(yp_constant_pool_t *pool) {
+    assert(is_power_of_two(pool->capacity));
+
+    uint32_t next_capacity = pool->capacity * 2;
+    if (next_capacity < pool->capacity) return false;
+
+    const uint32_t mask = next_capacity - 1;
+    yp_constant_t *next_constants = calloc(next_capacity, sizeof(yp_constant_t));
+    if (next_constants == NULL) return false;
+
+    // For each constant in the current constant pool, rehash the content, find
+    // the index in the next constant pool, and insert it.
+    for (uint32_t index = 0; index < pool->capacity; index++) {
+        yp_constant_t *constant = &pool->constants[index];
+
+        // If an id is set on this constant, then we know we have content here.
+        // In this case we need to insert it into the next constant pool.
+        if (constant->id != 0) {
+            uint32_t next_index = constant->hash & mask;
+
+            // This implements linear scanning to find the next available slot
+            // in case this index is already taken. We don't need to bother
+            // comparing the values since we know that the hash is unique.
+            while (next_constants[next_index].id != 0) {
+                next_index = (next_index + 1) & mask;
+            }
+
+            // Here we copy over the entire constant, which includes the id so
+            // that they are consistent between resizes.
+            next_constants[next_index] = *constant;
+        }
+    }
+
+    free(pool->constants);
+    pool->constants = next_constants;
+    pool->capacity = next_capacity;
+    return true;
+}
+
+// Initialize a new constant pool with a given capacity.
+bool
+yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity) {
+    const uint32_t maximum = (~((uint32_t) 0));
+    if (capacity >= ((maximum / 2) + 1)) return false;
+
+    capacity = next_power_of_two(capacity);
+    pool->constants = calloc(capacity, sizeof(yp_constant_t));
+    if (pool->constants == NULL) return false;
+
+    pool->size = 0;
+    pool->capacity = capacity;
+    return true;
+}
+
+// Insert a constant into a constant pool and return its index in the pool.
+static inline yp_constant_id_t
+yp_constant_pool_insert(yp_constant_pool_t *pool, const uint8_t *start, size_t length, bool owned) {
+    if (pool->size >= (pool->capacity / 4 * 3)) {
+        if (!yp_constant_pool_resize(pool)) return 0;
+    }
+
+    assert(is_power_of_two(pool->capacity));
+    const uint32_t mask = pool->capacity - 1;
+
+    uint32_t hash = yp_constant_pool_hash(start, length);
+    uint32_t index = hash & mask;
+    yp_constant_t *constant;
+
+    while (constant = &pool->constants[index], constant->id != 0) {
+        // If there is a collision, then we need to check if the content is the
+        // same as the content we are trying to insert. If it is, then we can
+        // return the id of the existing constant.
+        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+            // Since we have found a match, we need to check if this is
+            // attempting to insert a shared or an owned constant. We want to
+            // prefer shared constants since they don't require allocations.
+            if (owned) {
+                // If we're attempting to insert an owned constant and we have
+                // an existing constant, then either way we don't want the given
+                // memory. Either it's duplicated with the existing constant or
+                // it's not necessary because we have a shared version.
+                free((void *) start);
+            } else if (constant->owned) {
+                // If we're attempting to insert a shared constant and the
+                // existing constant is owned, then we can free the owned
+                // constant and replace it with the shared constant.
+                free((void *) constant->start);
+                constant->start = start;
+                constant->owned = false;
+            }
+
+            return constant->id;
+        }
+
+        index = (index + 1) & mask;
+    }
+
+    pool->size++;
+    assert(pool->size < ((uint32_t) (1 << 31)));
+
+    *constant = (yp_constant_t) {
+        .id = (unsigned int) (pool->size & 0x7FFFFFFF),
+        .owned = owned,
+        .start = start,
+        .length = length,
+        .hash = hash
+    };
+
+    return constant->id;
+}
+
+// Insert a constant into a constant pool. Returns the id of the constant, or 0
+// if any potential calls to resize fail.
+yp_constant_id_t
+yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return yp_constant_pool_insert(pool, start, length, false);
+}
+
+// Insert a constant into a constant pool from memory that is now owned by the
+// constant pool. Returns the id of the constant, or 0 if any potential calls to
+// resize fail.
+yp_constant_id_t
+yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    return yp_constant_pool_insert(pool, start, length, true);
+}
+
+// Free the memory associated with a constant pool.
+void
+yp_constant_pool_free(yp_constant_pool_t *pool) {
+    // For each constant in the current constant pool, free the contents if the
+    // contents are owned.
+    for (uint32_t index = 0; index < pool->capacity; index++) {
+        yp_constant_t *constant = &pool->constants[index];
+
+        // If an id is set on this constant, then we know we have content here.
+        if (constant->id != 0 && constant->owned) {
+            free((void *) constant->start);
+        }
+    }
+
+    free(pool->constants);
+}
diff --git a/prism/util/pm_constant_pool.h b/prism/util/pm_constant_pool.h
new file mode 100644
index 0000000000..c07cd0b7d8
--- /dev/null
+++ b/prism/util/pm_constant_pool.h
@@ -0,0 +1,74 @@
+// The constant pool is a data structure that stores a set of strings. Each
+// string is assigned a unique id, which can be used to compare strings for
+// equality. This comparison ends up being much faster than strcmp, since it
+// only requires a single integer comparison.
+
+#ifndef YP_CONSTANT_POOL_H
+#define YP_CONSTANT_POOL_H
+
+#include "yarp/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef uint32_t yp_constant_id_t;
+
+typedef struct {
+    yp_constant_id_t *ids;
+    size_t size;
+    size_t capacity;
+} yp_constant_id_list_t;
+
+// Initialize a list of constant ids.
+void yp_constant_id_list_init(yp_constant_id_list_t *list);
+
+// Append a constant id to a list of constant ids. Returns false if any
+// potential reallocations fail.
+bool yp_constant_id_list_append(yp_constant_id_list_t *list, yp_constant_id_t id);
+
+// Checks if the current constant id list includes the given constant id.
+bool
+yp_constant_id_list_includes(yp_constant_id_list_t *list, yp_constant_id_t id);
+
+// Get the memory size of a list of constant ids.
+size_t yp_constant_id_list_memsize(yp_constant_id_list_t *list);
+
+// Free the memory associated with a list of constant ids.
+void yp_constant_id_list_free(yp_constant_id_list_t *list);
+
+typedef struct {
+    unsigned int id: 31;
+    bool owned: 1;
+    const uint8_t *start;
+    size_t length;
+    uint32_t hash;
+} yp_constant_t;
+
+typedef struct {
+    yp_constant_t *constants;
+    uint32_t size;
+    uint32_t capacity;
+} yp_constant_pool_t;
+
+// Define an empty constant pool.
+#define YP_CONSTANT_POOL_EMPTY ((yp_constant_pool_t) { .constants = NULL, .size = 0, .capacity = 0 })
+
+// Initialize a new constant pool with a given capacity.
+bool yp_constant_pool_init(yp_constant_pool_t *pool, uint32_t capacity);
+
+// Insert a constant into a constant pool that is a slice of a source string.
+// Returns the id of the constant, or 0 if any potential calls to resize fail.
+yp_constant_id_t yp_constant_pool_insert_shared(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+// Insert a constant into a constant pool from memory that is now owned by the
+// constant pool. Returns the id of the constant, or 0 if any potential calls to
+// resize fail.
+yp_constant_id_t yp_constant_pool_insert_owned(yp_constant_pool_t *pool, const uint8_t *start, size_t length);
+
+// Free the memory associated with a constant pool.
+void yp_constant_pool_free(yp_constant_pool_t *pool);
+
+#endif
diff --git a/prism/util/pm_list.c b/prism/util/pm_list.c
new file mode 100644
index 0000000000..e03dd79d8d
--- /dev/null
+++ b/prism/util/pm_list.c
@@ -0,0 +1,41 @@
+#include "yarp/util/yp_list.h"
+
+// Returns true if the given list is empty.
+YP_EXPORTED_FUNCTION bool
+yp_list_empty_p(yp_list_t *list) {
+    return list->head == NULL;
+}
+
+// Returns the size of the list.
+YP_EXPORTED_FUNCTION size_t
+yp_list_size(yp_list_t *list) {
+    return list->size;
+}
+
+// Append a node to the given list.
+void
+yp_list_append(yp_list_t *list, yp_list_node_t *node) {
+    if (list->head == NULL) {
+        list->head = node;
+    } else {
+        list->tail->next = node;
+    }
+
+    list->tail = node;
+    list->size++;
+}
+
+// Deallocate the internal state of the given list.
+YP_EXPORTED_FUNCTION void
+yp_list_free(yp_list_t *list) {
+    yp_list_node_t *node = list->head;
+    yp_list_node_t *next;
+
+    while (node != NULL) {
+        next = node->next;
+        free(node);
+        node = next;
+    }
+
+    list->size = 0;
+}
diff --git a/prism/util/pm_list.h b/prism/util/pm_list.h
new file mode 100644
index 0000000000..205a5a7eab
--- /dev/null
+++ b/prism/util/pm_list.h
@@ -0,0 +1,67 @@
+// This struct represents an abstract linked list that provides common
+// functionality. It is meant to be used any time a linked list is necessary to
+// store data.
+//
+// The linked list itself operates off a set of pointers. Because the pointers
+// are not necessarily sequential, they can be of any size. We use this fact to
+// allow the consumer of this linked list to extend the node struct to include
+// any data they want. This is done by using the yp_list_node_t as the first
+// member of the struct.
+//
+// For example, if we want to store a list of integers, we can do the following:
+//
+//     typedef struct {
+//       yp_list_node_t node;
+//       int value;
+//     } yp_int_node_t;
+//
+//     yp_list_t list = YP_LIST_EMPTY;
+//     yp_int_node_t *node = malloc(sizeof(yp_int_node_t));
+//     node->value = 5;
+//
+//     yp_list_append(&list, &node->node);
+//
+// The yp_list_t struct is used to represent the overall linked list. It
+// contains a pointer to the head and tail of the list. This allows for easy
+// iteration and appending of new nodes.
+
+#ifndef YARP_LIST_H
+#define YARP_LIST_H
+
+#include "yarp/defines.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+// This represents a node in the linked list.
+typedef struct yp_list_node {
+    struct yp_list_node *next;
+} yp_list_node_t;
+
+// This represents the overall linked list. It keeps a pointer to the head and
+// tail so that iteration is easy and pushing new nodes is easy.
+typedef struct {
+    size_t size;
+    yp_list_node_t *head;
+    yp_list_node_t *tail;
+} yp_list_t;
+
+// This represents an empty list. It's used to initialize a stack-allocated list
+// as opposed to a method call.
+#define YP_LIST_EMPTY ((yp_list_t) { .size = 0, .head = NULL, .tail = NULL })
+
+// Returns true if the given list is empty.
+YP_EXPORTED_FUNCTION bool yp_list_empty_p(yp_list_t *list);
+
+// Returns the size of the list.
+YP_EXPORTED_FUNCTION size_t yp_list_size(yp_list_t *list);
+
+// Append a node to the given list.
+void yp_list_append(yp_list_t *list, yp_list_node_t *node);
+
+// Deallocate the internal state of the given list.
+YP_EXPORTED_FUNCTION void yp_list_free(yp_list_t *list);
+
+#endif
diff --git a/prism/util/pm_memchr.c b/prism/util/pm_memchr.c
new file mode 100644
index 0000000000..af9c14397e
--- /dev/null
+++ b/prism/util/pm_memchr.c
@@ -0,0 +1,31 @@
+#include "yarp/util/yp_memchr.h"
+
+#define YP_MEMCHR_TRAILING_BYTE_MINIMUM 0x40
+
+// We need to roll our own memchr to handle cases where the encoding changes and
+// we need to search for a character in a buffer that could be the trailing byte
+// of a multibyte character.
+void *
+yp_memchr(const void *memory, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding) {
+    if (encoding_changed && encoding->multibyte && character >= YP_MEMCHR_TRAILING_BYTE_MINIMUM) {
+        const uint8_t *source = (const uint8_t *) memory;
+        size_t index = 0;
+
+        while (index < number) {
+            if (source[index] == character) {
+                return (void *) (source + index);
+            }
+
+            size_t width = encoding->char_width(source + index, (ptrdiff_t) (number - index));
+            if (width == 0) {
+                return NULL;
+            }
+
+            index += width;
+        }
+
+        return NULL;
+    } else {
+        return memchr(memory, character, number);
+    }
+}
diff --git a/prism/util/pm_memchr.h b/prism/util/pm_memchr.h
new file mode 100644
index 0000000000..97f4b15a83
--- /dev/null
+++ b/prism/util/pm_memchr.h
@@ -0,0 +1,14 @@
+#ifndef YP_MEMCHR_H
+#define YP_MEMCHR_H
+
+#include "yarp/defines.h"
+#include "yarp/enc/yp_encoding.h"
+
+#include <stddef.h>
+
+// We need to roll our own memchr to handle cases where the encoding changes and
+// we need to search for a character in a buffer that could be the trailing byte
+// of a multibyte character.
+void * yp_memchr(const void *source, int character, size_t number, bool encoding_changed, yp_encoding_t *encoding);
+
+#endif
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
new file mode 100644
index 0000000000..969b884560
--- /dev/null
+++ b/prism/util/pm_newline_list.c
@@ -0,0 +1,134 @@
+#include "yarp/util/yp_newline_list.h"
+
+// Initialize a new newline list with the given capacity. Returns true if the
+// allocation of the offsets succeeds, otherwise returns false.
+bool
+yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity) {
+    list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
+    if (list->offsets == NULL) return false;
+
+    list->start = start;
+
+    // This is 1 instead of 0 because we want to include the first line of the
+    // file as having offset 0, which is set because of calloc.
+    list->size = 1;
+    list->capacity = capacity;
+
+    list->last_index = 0;
+    list->last_offset = 0;
+
+    return true;
+}
+
+// Append a new offset to the newline list. Returns true if the reallocation of
+// the offsets succeeds (if one was necessary), otherwise returns false.
+bool
+yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor) {
+    if (list->size == list->capacity) {
+        size_t *original_offsets = list->offsets;
+
+        list->capacity = (list->capacity * 3) / 2;
+        list->offsets = (size_t *) calloc(list->capacity, sizeof(size_t));
+        memcpy(list->offsets, original_offsets, list->size * sizeof(size_t));
+        free(original_offsets);
+        if (list->offsets == NULL) return false;
+    }
+
+    assert(*cursor == '\n');
+    assert(cursor >= list->start);
+    size_t newline_offset = (size_t) (cursor - list->start + 1);
+
+    assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
+    list->offsets[list->size++] = newline_offset;
+
+    return true;
+}
+
+// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+bool
+yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor) {
+    if (*cursor != '\n') {
+        return true;
+    }
+    return yp_newline_list_append(list, cursor);
+}
+
+// Returns the line and column of the given offset, assuming we don't have any
+// information about the previous index that we found.
+static yp_line_column_t
+yp_newline_list_line_column_search(yp_newline_list_t *list, size_t offset) {
+    size_t left = 0;
+    size_t right = list->size - 1;
+
+    while (left <= right) {
+        size_t mid = left + (right - left) / 2;
+
+        if (list->offsets[mid] == offset) {
+            return ((yp_line_column_t) { mid, 0 });
+        }
+
+        if (list->offsets[mid] < offset) {
+            left = mid + 1;
+        } else {
+            right = mid - 1;
+        }
+    }
+
+    return ((yp_line_column_t) { left - 1, offset - list->offsets[left - 1] });
+}
+
+// Returns the line and column of the given offset, assuming we know the last
+// index that we found.
+static yp_line_column_t
+yp_newline_list_line_column_scan(yp_newline_list_t *list, size_t offset) {
+    if (offset > list->last_offset) {
+        size_t index = list->last_index;
+        while (index < list->size && list->offsets[index] < offset) {
+            index++;
+        }
+
+        if (index == list->size) {
+            return ((yp_line_column_t) { index - 1, offset - list->offsets[index - 1] });
+        }
+
+        return ((yp_line_column_t) { index, 0 });
+    } else {
+        size_t index = list->last_index;
+        while (index > 0 && list->offsets[index] > offset) {
+            index--;
+        }
+
+        if (index == 0) {
+            return ((yp_line_column_t) { 0, offset });
+        }
+
+        return ((yp_line_column_t) { index, offset - list->offsets[index - 1] });
+    }
+}
+
+// Returns the line and column of the given offset. If the offset is not in the
+// list, the line and column of the closest offset less than the given offset
+// are returned.
+yp_line_column_t
+yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor) {
+    assert(cursor >= list->start);
+    size_t offset = (size_t) (cursor - list->start);
+    yp_line_column_t result;
+
+    if (list->last_offset == 0) {
+        result = yp_newline_list_line_column_search(list, offset);
+    } else {
+        result = yp_newline_list_line_column_scan(list, offset);
+    }
+
+    list->last_index = result.line;
+    list->last_offset = offset;
+
+    return result;
+}
+
+// Free the internal memory allocated for the newline list.
+void
+yp_newline_list_free(yp_newline_list_t *list) {
+    free(list->offsets);
+}
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
new file mode 100644
index 0000000000..9231305008
--- /dev/null
+++ b/prism/util/pm_newline_list.h
@@ -0,0 +1,61 @@
+// When compiling the syntax tree, it's necessary to know the line and column
+// of many nodes. This is necessary to support things like error messages,
+// tracepoints, etc.
+//
+// It's possible that we could store the start line, start column, end line, and
+// end column on every node in addition to the offsets that we already store,
+// but that would be quite a lot of memory overhead.
+
+#ifndef YP_NEWLINE_LIST_H
+#define YP_NEWLINE_LIST_H
+
+#include "yarp/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+// A list of offsets of newlines in a string. The offsets are assumed to be
+// sorted/inserted in ascending order.
+typedef struct {
+    const uint8_t *start;
+
+    size_t *offsets;
+    size_t size;
+    size_t capacity;
+
+    size_t last_offset;
+    size_t last_index;
+} yp_newline_list_t;
+
+// A line and column in a string.
+typedef struct {
+    size_t line;
+    size_t column;
+} yp_line_column_t;
+
+#define YP_NEWLINE_LIST_EMPTY ((yp_newline_list_t) { \
+    .start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \
+})
+
+// Initialize a new newline list with the given capacity. Returns true if the
+// allocation of the offsets succeeds, otherwise returns false.
+bool yp_newline_list_init(yp_newline_list_t *list, const uint8_t *start, size_t capacity);
+
+// Append a new offset to the newline list. Returns true if the reallocation of
+// the offsets succeeds (if one was necessary), otherwise returns false.
+bool yp_newline_list_append(yp_newline_list_t *list, const uint8_t *cursor);
+
+// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+bool yp_newline_list_check_append(yp_newline_list_t *list, const uint8_t *cursor);
+
+// Returns the line and column of the given offset. If the offset is not in the
+// list, the line and column of the closest offset less than the given offset
+// are returned.
+yp_line_column_t yp_newline_list_line_column(yp_newline_list_t *list, const uint8_t *cursor);
+
+// Free the internal memory allocated for the newline list.
+void yp_newline_list_free(yp_newline_list_t *list);
+
+#endif
diff --git a/prism/util/pm_state_stack.c b/prism/util/pm_state_stack.c
new file mode 100644
index 0000000000..7ff95bd611
--- /dev/null
+++ b/prism/util/pm_state_stack.c
@@ -0,0 +1,19 @@
+#include "yarp/util/yp_state_stack.h"
+
+// Pushes a value onto the stack.
+void
+yp_state_stack_push(yp_state_stack_t *stack, bool value) {
+    *stack = (*stack << 1) | (value & 1);
+}
+
+// Pops a value off the stack.
+void
+yp_state_stack_pop(yp_state_stack_t *stack) {
+    *stack >>= 1;
+}
+
+// Returns the value at the top of the stack.
+bool
+yp_state_stack_p(yp_state_stack_t *stack) {
+    return *stack & 1;
+}
diff --git a/prism/util/pm_state_stack.h b/prism/util/pm_state_stack.h
new file mode 100644
index 0000000000..69d8d7d54b
--- /dev/null
+++ b/prism/util/pm_state_stack.h
@@ -0,0 +1,24 @@
+#ifndef YP_STATE_STACK_H
+#define YP_STATE_STACK_H
+
+#include "yarp/defines.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+// A struct that represents a stack of bools.
+typedef uint32_t yp_state_stack_t;
+
+// Initializes the state stack to an empty stack.
+#define YP_STATE_STACK_EMPTY ((yp_state_stack_t) 0)
+
+// Pushes a value onto the stack.
+void yp_state_stack_push(yp_state_stack_t *stack, bool value);
+
+// Pops a value off the stack.
+void yp_state_stack_pop(yp_state_stack_t *stack);
+
+// Returns the value at the top of the stack.
+bool yp_state_stack_p(yp_state_stack_t *stack);
+
+#endif
diff --git a/prism/util/pm_string.c b/prism/util/pm_string.c
new file mode 100644
index 0000000000..9ee25155a3
--- /dev/null
+++ b/prism/util/pm_string.c
@@ -0,0 +1,200 @@
+#include "yarp/util/yp_string.h"
+
+// The following headers are necessary to read files using demand paging.
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#endif
+
+// Initialize a shared string that is based on initial input.
+void
+yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end) {
+    assert(start <= end);
+
+    *string = (yp_string_t) {
+        .type = YP_STRING_SHARED,
+        .source = start,
+        .length = (size_t) (end - start)
+    };
+}
+
+// Initialize an owned string that is responsible for freeing allocated memory.
+void
+yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length) {
+    *string = (yp_string_t) {
+        .type = YP_STRING_OWNED,
+        .source = source,
+        .length = length
+    };
+}
+
+// Initialize a constant string that doesn't own its memory source.
+void
+yp_string_constant_init(yp_string_t *string, const char *source, size_t length) {
+    *string = (yp_string_t) {
+        .type = YP_STRING_CONSTANT,
+        .source = (const uint8_t *) source,
+        .length = length
+    };
+}
+
+static void
+yp_string_mapped_init_internal(yp_string_t *string, uint8_t *source, size_t length) {
+    *string = (yp_string_t) {
+        .type = YP_STRING_MAPPED,
+        .source = source,
+        .length = length
+    };
+}
+
+// Returns the memory size associated with the string.
+size_t
+yp_string_memsize(const yp_string_t *string) {
+    size_t size = sizeof(yp_string_t);
+    if (string->type == YP_STRING_OWNED) {
+        size += string->length;
+    }
+    return size;
+}
+
+// Ensure the string is owned. If it is not, then reinitialize it as owned and
+// copy over the previous source.
+void
+yp_string_ensure_owned(yp_string_t *string) {
+    if (string->type == YP_STRING_OWNED) return;
+
+    size_t length = yp_string_length(string);
+    const uint8_t *source = yp_string_source(string);
+
+    uint8_t *memory = malloc(length);
+    if (!memory) return;
+
+    yp_string_owned_init(string, memory, length);
+    memcpy((void *) string->source, source, length);
+}
+
+// Returns the length associated with the string.
+YP_EXPORTED_FUNCTION size_t
+yp_string_length(const yp_string_t *string) {
+    return string->length;
+}
+
+// Returns the start pointer associated with the string.
+YP_EXPORTED_FUNCTION const uint8_t *
+yp_string_source(const yp_string_t *string) {
+    return string->source;
+}
+
+// Free the associated memory of the given string.
+YP_EXPORTED_FUNCTION void
+yp_string_free(yp_string_t *string) {
+    void *memory = (void *) string->source;
+
+    if (string->type == YP_STRING_OWNED) {
+        free(memory);
+    } else if (string->type == YP_STRING_MAPPED && string->length) {
+#if defined(_WIN32)
+        UnmapViewOfFile(memory);
+#else
+        munmap(memory, string->length);
+#endif
+    }
+}
+
+bool
+yp_string_mapped_init(yp_string_t *string, const char *filepath) {
+#ifdef _WIN32
+    // Open the file for reading.
+    HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+
+    if (file == INVALID_HANDLE_VALUE) {
+        perror("CreateFile failed");
+        return false;
+    }
+
+    // Get the file size.
+    DWORD file_size = GetFileSize(file, NULL);
+    if (file_size == INVALID_FILE_SIZE) {
+        CloseHandle(file);
+        perror("GetFileSize failed");
+        return false;
+    }
+
+    // If the file is empty, then we don't need to do anything else, we'll set
+    // the source to a constant empty string and return.
+    if (file_size == 0) {
+        CloseHandle(file);
+        uint8_t empty[] = "";
+        yp_string_mapped_init_internal(string, empty, 0);
+        return true;
+    }
+
+    // Create a mapping of the file.
+    HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (mapping == NULL) {
+        CloseHandle(file);
+        perror("CreateFileMapping failed");
+        return false;
+    }
+
+    // Map the file into memory.
+    uint8_t *source = (uint8_t *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
+    CloseHandle(mapping);
+    CloseHandle(file);
+
+    if (source == NULL) {
+        perror("MapViewOfFile failed");
+        return false;
+    }
+
+    yp_string_mapped_init_internal(string, source, (size_t) file_size);
+    return true;
+#else
+    // Open the file for reading
+    int fd = open(filepath, O_RDONLY);
+    if (fd == -1) {
+        perror("open");
+        return false;
+    }
+
+    // Stat the file to get the file size
+    struct stat sb;
+    if (fstat(fd, &sb) == -1) {
+        close(fd);
+        perror("fstat");
+        return false;
+    }
+
+    // mmap the file descriptor to virtually get the contents
+    size_t size = (size_t) sb.st_size;
+    uint8_t *source = NULL;
+
+    if (size == 0) {
+        close(fd);
+        uint8_t empty[] = "";
+        yp_string_mapped_init_internal(string, empty, 0);
+        return true;
+    }
+
+    source = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (source == MAP_FAILED) {
+        perror("Map failed");
+        return false;
+    }
+
+    close(fd);
+    yp_string_mapped_init_internal(string, source, size);
+    return true;
+#endif
+}
+
+// Returns the size of the yp_string_t struct. This is necessary to allocate the
+// correct amount of memory in the FFI backend.
+YP_EXPORTED_FUNCTION size_t
+yp_string_sizeof(void) {
+    return sizeof(yp_string_t);
+}
diff --git a/prism/util/pm_string.h b/prism/util/pm_string.h
new file mode 100644
index 0000000000..bcdf8b66d9
--- /dev/null
+++ b/prism/util/pm_string.h
@@ -0,0 +1,61 @@
+#ifndef YARP_STRING_H
+#define YARP_STRING_H
+
+#include "yarp/defines.h"
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+// This struct represents a string value.
+typedef struct {
+    enum { YP_STRING_SHARED, YP_STRING_OWNED, YP_STRING_CONSTANT, YP_STRING_MAPPED } type;
+    const uint8_t *source;
+    size_t length;
+} yp_string_t;
+
+#define YP_EMPTY_STRING ((yp_string_t) { .type = YP_STRING_CONSTANT, .source = NULL, .length = 0 })
+
+// Initialize a shared string that is based on initial input.
+void yp_string_shared_init(yp_string_t *string, const uint8_t *start, const uint8_t *end);
+
+// Initialize an owned string that is responsible for freeing allocated memory.
+void yp_string_owned_init(yp_string_t *string, uint8_t *source, size_t length);
+
+// Initialize a constant string that doesn't own its memory source.
+void yp_string_constant_init(yp_string_t *string, const char *source, size_t length);
+
+// Read the file indicated by the filepath parameter into source and load its
+// contents and size into the given yp_string_t.
+// The given yp_string_t should be freed using yp_string_free() when it is no longer used.
+//
+// We want to use demand paging as much as possible in order to avoid having to
+// read the entire file into memory (which could be detrimental to performance
+// for large files). This means that if we're on windows we'll use
+// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
+// `mmap`, and on other POSIX systems we'll use `read`.
+YP_EXPORTED_FUNCTION bool yp_string_mapped_init(yp_string_t *string, const char *filepath);
+
+// Returns the memory size associated with the string.
+size_t yp_string_memsize(const yp_string_t *string);
+
+// Ensure the string is owned. If it is not, then reinitialize it as owned and
+// copy over the previous source.
+void yp_string_ensure_owned(yp_string_t *string);
+
+// Returns the length associated with the string.
+YP_EXPORTED_FUNCTION size_t yp_string_length(const yp_string_t *string);
+
+// Returns the start pointer associated with the string.
+YP_EXPORTED_FUNCTION const uint8_t * yp_string_source(const yp_string_t *string);
+
+// Free the associated memory of the given string.
+YP_EXPORTED_FUNCTION void yp_string_free(yp_string_t *string);
+
+// Returns the size of the yp_string_t struct. This is necessary to allocate the
+// correct amount of memory in the FFI backend.
+YP_EXPORTED_FUNCTION size_t yp_string_sizeof(void);
+
+#endif // YARP_STRING_H
diff --git a/prism/util/pm_string_list.c b/prism/util/pm_string_list.c
new file mode 100644
index 0000000000..3b2b4381c5
--- /dev/null
+++ b/prism/util/pm_string_list.c
@@ -0,0 +1,29 @@
+#include "yarp/util/yp_string_list.h"
+
+// Initialize a yp_string_list_t with its default values.
+void
+yp_string_list_init(yp_string_list_t *string_list) {
+    string_list->strings = (yp_string_t *) malloc(sizeof(yp_string_t));
+    string_list->length = 0;
+    string_list->capacity = 1;
+}
+
+// Append a yp_string_t to the given string list.
+void
+yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string) {
+    if (string_list->length + 1 > string_list->capacity) {
+        yp_string_t *original_string = string_list->strings;
+        string_list->capacity *= 2;
+        string_list->strings = (yp_string_t *) malloc(string_list->capacity * sizeof(yp_string_t));
+        memcpy(string_list->strings, original_string, (string_list->length) * sizeof(yp_string_t));
+        free(original_string);
+    }
+
+    string_list->strings[string_list->length++] = *string;
+}
+
+// Free the memory associated with the string list.
+void
+yp_string_list_free(yp_string_list_t *string_list) {
+    free(string_list->strings);
+}
diff --git a/prism/util/pm_string_list.h b/prism/util/pm_string_list.h
new file mode 100644
index 0000000000..0009a27a60
--- /dev/null
+++ b/prism/util/pm_string_list.h
@@ -0,0 +1,25 @@
+#ifndef YARP_STRING_LIST_H
+#define YARP_STRING_LIST_H
+
+#include "yarp/defines.h"
+#include "yarp/util/yp_string.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+typedef struct {
+    yp_string_t *strings;
+    size_t length;
+    size_t capacity;
+} yp_string_list_t;
+
+// Initialize a yp_string_list_t with its default values.
+YP_EXPORTED_FUNCTION void yp_string_list_init(yp_string_list_t *string_list);
+
+// Append a yp_string_t to the given string list.
+void yp_string_list_append(yp_string_list_t *string_list, yp_string_t *string);
+
+// Free the memory associated with the string list.
+YP_EXPORTED_FUNCTION void yp_string_list_free(yp_string_list_t *string_list);
+
+#endif
diff --git a/prism/util/pm_strncasecmp.c b/prism/util/pm_strncasecmp.c
new file mode 100644
index 0000000000..1cbaf904f4
--- /dev/null
+++ b/prism/util/pm_strncasecmp.c
@@ -0,0 +1,17 @@
+#include <ctype.h>
+#include <stddef.h>
+#include <stdint.h>
+
+int
+yp_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) {
+    size_t offset = 0;
+    int difference = 0;
+
+    while (offset < length && string1[offset] != '\0') {
+        if (string2[offset] == '\0') return string1[offset];
+        if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference;
+        offset++;
+    }
+
+    return difference;
+}
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
new file mode 100644
index 0000000000..7c0015d289
--- /dev/null
+++ b/prism/util/pm_strpbrk.c
@@ -0,0 +1,66 @@
+#include "yarp/util/yp_strpbrk.h"
+
+// This is the slow path that does care about the encoding.
+static inline const uint8_t *
+yp_strpbrk_multi_byte(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
+    size_t index = 0;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        size_t width = parser->encoding.char_width(source + index, (ptrdiff_t) (maximum - index));
+        if (width == 0) {
+            return NULL;
+        }
+
+        index += width;
+    }
+
+    return NULL;
+}
+
+// This is the fast path that does not care about the encoding.
+static inline const uint8_t *
+yp_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
+    size_t index = 0;
+
+    while (index < maximum) {
+        if (strchr((const char *) charset, source[index]) != NULL) {
+            return source + index;
+        }
+
+        index++;
+    }
+
+    return NULL;
+}
+
+// Here we have rolled our own version of strpbrk. The standard library strpbrk
+// has undefined behavior when the source string is not null-terminated. We want
+// to support strings that are not null-terminated because yp_parse does not
+// have the contract that the string is null-terminated. (This is desirable
+// because it means the extension can call yp_parse with the result of a call to
+// mmap).
+//
+// The standard library strpbrk also does not support passing a maximum length
+// to search. We want to support this for the reason mentioned above, but we
+// also don't want it to stop on null bytes. Ruby actually allows null bytes
+// within strings, comments, regular expressions, etc. So we need to be able to
+// skip past them.
+//
+// Finally, we want to support encodings wherein the charset could contain
+// characters that are trailing bytes of multi-byte characters. For example, in
+// Shift-JIS, the backslash character can be a trailing byte. In that case we
+// need to take a slower path and iterate one multi-byte character at a time.
+const uint8_t *
+yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
+    if (length <= 0) {
+        return NULL;
+    } else if (parser->encoding_changed && parser->encoding.multibyte) {
+        return yp_strpbrk_multi_byte(parser, source, charset, (size_t) length);
+    } else {
+        return yp_strpbrk_single_byte(source, charset, (size_t) length);
+    }
+}
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
new file mode 100644
index 0000000000..d0bdd5bec0
--- /dev/null
+++ b/prism/util/pm_strpbrk.h
@@ -0,0 +1,29 @@
+#ifndef YP_STRPBRK_H
+#define YP_STRPBRK_H
+
+#include "yarp/defines.h"
+#include "yarp/parser.h"
+
+#include <stddef.h>
+#include <string.h>
+
+// Here we have rolled our own version of strpbrk. The standard library strpbrk
+// has undefined behavior when the source string is not null-terminated. We want
+// to support strings that are not null-terminated because yp_parse does not
+// have the contract that the string is null-terminated. (This is desirable
+// because it means the extension can call yp_parse with the result of a call to
+// mmap).
+//
+// The standard library strpbrk also does not support passing a maximum length
+// to search. We want to support this for the reason mentioned above, but we
+// also don't want it to stop on null bytes. Ruby actually allows null bytes
+// within strings, comments, regular expressions, etc. So we need to be able to
+// skip past them.
+//
+// Finally, we want to support encodings wherein the charset could contain
+// characters that are trailing bytes of multi-byte characters. For example, in
+// Shift-JIS, the backslash character can be a trailing byte. In that case we
+// need to take a slower path and iterate one multi-byte character at a time.
+const uint8_t * yp_strpbrk(yp_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
+
+#endif
author	Kevin Newton <[email protected]>	2023-09-27 12:22:36 -0400
committer	Kevin Newton <[email protected]>	2023-09-27 13:57:38 -0400
commit	8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch)
tree	46ef2bd5c51d5b7f923eda6a60edefc7a08200db /prism/util
parent	7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff)