summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--prism/prism.c2
-rw-r--r--prism/util/pm_newline_list.c32
-rw-r--r--prism/util/pm_newline_list.h76
-rw-r--r--prism/util/pm_strpbrk.c44
-rw-r--r--prism/util/pm_strpbrk.h43
5 files changed, 129 insertions, 68 deletions
diff --git a/prism/prism.c b/prism/prism.c
index b988b6b7fe..87479ab283 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -15701,7 +15701,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
.lex_callback = NULL,
.filepath_string = filepath_string,
.constant_pool = PM_CONSTANT_POOL_EMPTY,
- .newline_list = PM_NEWLINE_LIST_EMPTY,
+ .newline_list = { 0 },
.integer_base = 0,
.current_string = PM_STRING_EMPTY,
.command_start = true,
diff --git a/prism/util/pm_newline_list.c b/prism/util/pm_newline_list.c
index 20a1a221cb..f27bb75b63 100644
--- a/prism/util/pm_newline_list.c
+++ b/prism/util/pm_newline_list.c
@@ -1,7 +1,9 @@
#include "prism/util/pm_newline_list.h"
-// Initialize a new newline list with the given capacity. Returns true if the
-// allocation of the offsets succeeds, otherwise returns false.
+/**
+ * Initialize a new newline list with the given capacity. Returns true if the
+ * allocation of the offsets succeeds, otherwise returns false.
+ */
bool
pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity) {
list->offsets = (size_t *) calloc(capacity, sizeof(size_t));
@@ -14,14 +16,13 @@ pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capac
list->size = 1;
list->capacity = capacity;
- list->last_index = 0;
- list->last_offset = 0;
-
return true;
}
-// Append a new offset to the newline list. Returns true if the reallocation of
-// the offsets succeeds (if one was necessary), otherwise returns false.
+/**
+ * Append a new offset to the newline list. Returns true if the reallocation of
+ * the offsets succeeds (if one was necessary), otherwise returns false.
+ */
bool
pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
if (list->size == list->capacity) {
@@ -44,7 +45,10 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
return true;
}
-// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+/**
+ * Conditionally append a new offset to the newline list, if the value passed in
+ * is a newline.
+ */
bool
pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
if (*cursor != '\n') {
@@ -53,9 +57,11 @@ pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
return pm_newline_list_append(list, cursor);
}
-// Returns the line and column of the given offset. If the offset is not in the
-// list, the line and column of the closest offset less than the given offset
-// are returned.
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ */
pm_line_column_t
pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor) {
assert(cursor >= list->start);
@@ -81,7 +87,9 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
}
-// Free the internal memory allocated for the newline list.
+/**
+ * Free the internal memory allocated for the newline list.
+ */
void
pm_newline_list_free(pm_newline_list_t *list) {
free(list->offsets);
diff --git a/prism/util/pm_newline_list.h b/prism/util/pm_newline_list.h
index da6c565d6b..603a84c38c 100644
--- a/prism/util/pm_newline_list.h
+++ b/prism/util/pm_newline_list.h
@@ -16,46 +16,84 @@
#include <stddef.h>
#include <stdlib.h>
-// A list of offsets of newlines in a string. The offsets are assumed to be
-// sorted/inserted in ascending order.
+/**
+ * A list of offsets of newlines in a string. The offsets are assumed to be
+ * sorted/inserted in ascending order.
+ */
typedef struct {
+ /** A pointer to the start of the source string. */
const uint8_t *start;
- size_t *offsets;
+ /** The number of offsets in the list. */
size_t size;
+
+ /** The capacity of the list that has been allocated. */
size_t capacity;
- size_t last_offset;
- size_t last_index;
+ /** The list of offsets. */
+ size_t *offsets;
} pm_newline_list_t;
-// A line and column in a string.
+/**
+ * A line and column in a string.
+ */
typedef struct {
+ /** The line number. */
size_t line;
+
+ /** The column number. */
size_t column;
} pm_line_column_t;
-#define PM_NEWLINE_LIST_EMPTY ((pm_newline_list_t) { \
- .start = NULL, .offsets = NULL, .size = 0, .capacity = 0, .last_offset = 0, .last_index = 0 \
-})
-
-// Initialize a new newline list with the given capacity. Returns true if the
-// allocation of the offsets succeeds, otherwise returns false.
+/**
+ * Initialize a new newline list with the given capacity. Returns true if the
+ * allocation of the offsets succeeds, otherwise returns false.
+ *
+ * @param list The list to initialize.
+ * @param start A pointer to the start of the source string.
+ * @param capacity The initial capacity of the list.
+ * @return True if the allocation of the offsets succeeds, otherwise false.
+ */
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
-// Append a new offset to the newline list. Returns true if the reallocation of
-// the offsets succeeds (if one was necessary), otherwise returns false.
+/**
+ * Append a new offset to the newline list. Returns true if the reallocation of
+ * the offsets succeeds (if one was necessary), otherwise returns false.
+ *
+ * @param list The list to append to.
+ * @param cursor A pointer to the offset to append.
+ * @return True if the reallocation of the offsets succeeds (if one was
+ * necessary), otherwise false.
+ */
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
-// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+/**
+ * Conditionally append a new offset to the newline list, if the value passed in
+ * is a newline.
+ *
+ * @param list The list to append to.
+ * @param cursor A pointer to the offset to append.
+ * @return True if the reallocation of the offsets succeeds (if one was
+ * necessary), otherwise false.
+ */
bool pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor);
-// Returns the line and column of the given offset. If the offset is not in the
-// list, the line and column of the closest offset less than the given offset
-// are returned.
+/**
+ * Returns the line and column of the given offset. If the offset is not in the
+ * list, the line and column of the closest offset less than the given offset
+ * are returned.
+ *
+ * @param list The list to search.
+ * @param cursor A pointer to the offset to search for.
+ * @return The line and column of the given offset.
+ */
pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor);
-// Free the internal memory allocated for the newline list.
+/**
+ * Free the internal memory allocated for the newline list.
+ *
+ * @param list The list to free.
+ */
void pm_newline_list_free(pm_newline_list_t *list);
#endif
diff --git a/prism/util/pm_strpbrk.c b/prism/util/pm_strpbrk.c
index 49bcd847b8..ce1f36910b 100644
--- a/prism/util/pm_strpbrk.c
+++ b/prism/util/pm_strpbrk.c
@@ -1,6 +1,8 @@
#include "prism/util/pm_strpbrk.h"
-// This is the slow path that does care about the encoding.
+/**
+ * This is the slow path that does care about the encoding.
+ */
static inline const uint8_t *
pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum) {
size_t index = 0;
@@ -21,7 +23,9 @@ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t
return NULL;
}
-// This is the fast path that does not care about the encoding.
+/**
+ * This is the fast path that does not care about the encoding.
+ */
static inline const uint8_t *
pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t maximum) {
size_t index = 0;
@@ -37,23 +41,25 @@ pm_strpbrk_single_byte(const uint8_t *source, const uint8_t *charset, size_t max
return NULL;
}
-// Here we have rolled our own version of strpbrk. The standard library strpbrk
-// has undefined behavior when the source string is not null-terminated. We want
-// to support strings that are not null-terminated because pm_parse does not
-// have the contract that the string is null-terminated. (This is desirable
-// because it means the extension can call pm_parse with the result of a call to
-// mmap).
-//
-// The standard library strpbrk also does not support passing a maximum length
-// to search. We want to support this for the reason mentioned above, but we
-// also don't want it to stop on null bytes. Ruby actually allows null bytes
-// within strings, comments, regular expressions, etc. So we need to be able to
-// skip past them.
-//
-// Finally, we want to support encodings wherein the charset could contain
-// characters that are trailing bytes of multi-byte characters. For example, in
-// Shift-JIS, the backslash character can be a trailing byte. In that case we
-// need to take a slower path and iterate one multi-byte character at a time.
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift-JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ */
const uint8_t *
pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length) {
if (length <= 0) {
diff --git a/prism/util/pm_strpbrk.h b/prism/util/pm_strpbrk.h
index c9ea6c945e..b589004abf 100644
--- a/prism/util/pm_strpbrk.h
+++ b/prism/util/pm_strpbrk.h
@@ -7,23 +7,32 @@
#include <stddef.h>
#include <string.h>
-// Here we have rolled our own version of strpbrk. The standard library strpbrk
-// has undefined behavior when the source string is not null-terminated. We want
-// to support strings that are not null-terminated because pm_parse does not
-// have the contract that the string is null-terminated. (This is desirable
-// because it means the extension can call pm_parse with the result of a call to
-// mmap).
-//
-// The standard library strpbrk also does not support passing a maximum length
-// to search. We want to support this for the reason mentioned above, but we
-// also don't want it to stop on null bytes. Ruby actually allows null bytes
-// within strings, comments, regular expressions, etc. So we need to be able to
-// skip past them.
-//
-// Finally, we want to support encodings wherein the charset could contain
-// characters that are trailing bytes of multi-byte characters. For example, in
-// Shift-JIS, the backslash character can be a trailing byte. In that case we
-// need to take a slower path and iterate one multi-byte character at a time.
+/**
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
+ * has undefined behavior when the source string is not null-terminated. We want
+ * to support strings that are not null-terminated because pm_parse does not
+ * have the contract that the string is null-terminated. (This is desirable
+ * because it means the extension can call pm_parse with the result of a call to
+ * mmap).
+ *
+ * The standard library strpbrk also does not support passing a maximum length
+ * to search. We want to support this for the reason mentioned above, but we
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
+ * within strings, comments, regular expressions, etc. So we need to be able to
+ * skip past them.
+ *
+ * Finally, we want to support encodings wherein the charset could contain
+ * characters that are trailing bytes of multi-byte characters. For example, in
+ * Shift-JIS, the backslash character can be a trailing byte. In that case we
+ * need to take a slower path and iterate one multi-byte character at a time.
+ *
+ * @param parser The parser.
+ * @param source The source string.
+ * @param charset The charset to search for.
+ * @param length The maximum length to search.
+ * @return A pointer to the first character in the source string that is in the
+ * charset, or NULL if no such character exists.
+ */
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length);
#endif