summaryrefslogtreecommitdiff
path: root/prism_compile.c
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2024-02-06 13:53:58 -0500
committerKevin Newton <[email protected]>2024-02-06 14:31:10 -0500
commitc1bc7147200b5e47816388f1d304454f25e50bcb (patch)
treec5505779accda38e3054388ced6194e69211fd85 /prism_compile.c
parentf5b368df0ceb1e705cd94e39ef8459dae07e6d52 (diff)
[PRISM] Do not show source snippets if non-UTF-8
Diffstat (limited to 'prism_compile.c')
-rw-r--r--prism_compile.c96
1 files changed, 67 insertions, 29 deletions
diff --git a/prism_compile.c b/prism_compile.c
index 2e1666524f..a2b9f7a1f0 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -7634,6 +7634,71 @@ pm_parse_result_free(pm_parse_result_t *result)
}
/**
+ * Check if the given source slice is valid UTF-8.
+ */
+static bool
+pm_parse_input_error_utf8_p(const uint8_t *start, const uint8_t *end)
+{
+ size_t width;
+
+ while (start < end) {
+ if ((width = pm_encoding_utf_8_char_width(start, end - start)) == 0) return false;
+ start += width;
+ }
+
+ return true;
+}
+
+/**
+ * Generate an error object from the given parser that contains as much
+ * information as possible about the errors that were encountered.
+ */
+static VALUE
+pm_parse_input_error(const pm_parse_result_t *result)
+{
+ const pm_diagnostic_t *head = (const pm_diagnostic_t *) result->parser.error_list.head;
+ bool valid_utf8 = true;
+
+ for (const pm_diagnostic_t *error = head; error != NULL; error = (const pm_diagnostic_t *) error->node.next) {
+ // Any errors with the level PM_ERROR_LEVEL_ARGUMENT effectively take
+ // over as the only argument that gets raised. This is to allow priority
+ // messages that should be handled before anything else.
+ if (error->level == PM_ERROR_LEVEL_ARGUMENT) {
+ return rb_exc_new(rb_eArgError, error->message, strlen(error->message));
+ }
+
+ // It is implicitly assumed that the error messages will be encodeable
+ // as UTF-8. Because of this, we can't include source examples that
+ // contain invalid byte sequences. So if any source examples include
+ // invalid UTF-8 byte sequences, we will skip showing source examples
+ // entirely.
+ if (valid_utf8 && !pm_parse_input_error_utf8_p(error->location.start, error->location.end)) {
+ valid_utf8 = false;
+ }
+ }
+
+ pm_buffer_t buffer = { 0 };
+ pm_buffer_append_string(&buffer, "syntax errors found\n", 20);
+
+ if (valid_utf8) {
+ pm_parser_errors_format(&result->parser, &buffer, rb_stderr_tty_p());
+ }
+ else {
+ const pm_string_t *filepath = &result->parser.filepath;
+
+ for (const pm_diagnostic_t *error = head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
+ if (error != head) pm_buffer_append_byte(&buffer, '\n');
+ pm_buffer_append_format(&buffer, "%.*s:%" PRIu32 ": %s", (int) pm_string_length(filepath), pm_string_source(filepath), (uint32_t) pm_newline_list_line_column(&result->parser.newline_list, error->location.start).line, error->message);
+ }
+ }
+
+ VALUE error = rb_exc_new(rb_eSyntaxError, pm_buffer_value(&buffer), pm_buffer_length(&buffer));
+ pm_buffer_free(&buffer);
+
+ return error;
+}
+
+/**
* Parse the parse result and raise a Ruby error if there are any syntax errors.
* It returns an error if one should be raised. It is assumed that the parse
* result object is zeroed out.
@@ -7648,38 +7713,11 @@ pm_parse_input(pm_parse_result_t *result, VALUE filepath)
// If there are errors, raise an appropriate error and free the result.
if (result->parser.error_list.size > 0) {
- // Determine the error to raise. Any errors with the level
- // PM_ERROR_LEVEL_ARGUMENT effectively take over as the only argument
- // that gets raised.
- const pm_diagnostic_t *error_argument = NULL;
- for (const pm_diagnostic_t *error = (pm_diagnostic_t *) result->parser.error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
- if (error->level == PM_ERROR_LEVEL_ARGUMENT) {
- error_argument = error;
- break;
- }
- }
-
- VALUE error_class;
- pm_buffer_t buffer = { 0 };
-
- // If we found an error argument, then we need to use that as the error
- // message. Otherwise, we will format all of the parser error together.
- if (error_argument == NULL) {
- error_class = rb_eSyntaxError;
- pm_buffer_append_string(&buffer, "syntax errors found\n", 20);
- pm_parser_errors_format(&result->parser, &buffer, rb_stderr_tty_p());
- }
- else {
- error_class = rb_eArgError;
- pm_buffer_append_string(&buffer, error_argument->message, strlen(error_argument->message));
- }
-
- VALUE error_object = rb_exc_new(error_class, pm_buffer_value(&buffer), pm_buffer_length(&buffer));
- pm_buffer_free(&buffer);
+ VALUE error = pm_parse_input_error(result);
// TODO: We need to set the backtrace.
// rb_funcallv(error, rb_intern("set_backtrace"), 1, &path);
- return error_object;
+ return error;
}
// Emit all of the various warnings from the parse.