summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2023-11-29 11:46:33 +0100
committergit <[email protected]>2023-11-29 13:56:19 +0000
commit2af82e23165180f20ca2af374aedb7a45dedcc20 (patch)
tree1bd829f6f15140c645496167a208d38736ac8d81
parent2653404840952d25bbdd7deaf599fbfb1f5287f0 (diff)
[ruby/prism] Convert start line to signed integers
Ruby allows for 0 or negative line start, this is often used with `eval` calls to get a correct offset when prefixing a snippet. e.g. ```ruby caller = caller_locations(1, 1).first class_eval <<~RUBY, caller.path, caller.line - 2 # frozen_string_literal: true def some_method #{caller_provided_code_snippet} end RUBY ``` https://github.com/ruby/prism/commit/0d14ed1452
-rw-r--r--prism/extension.c3
-rw-r--r--prism/options.c23
-rw-r--r--prism/options.h4
-rw-r--r--prism/parser.h2
-rw-r--r--prism/prism.c10
-rw-r--r--prism/templates/ext/prism/api_node.c.erb2
-rw-r--r--prism/templates/lib/prism/serialize.rb.erb7
-rw-r--r--prism/templates/src/serialize.c.erb8
-rw-r--r--prism/util/pm_buffer.c9
-rw-r--r--prism/util/pm_buffer.h8
-rw-r--r--test/prism/parse_test.rb16
11 files changed, 74 insertions, 18 deletions
diff --git a/prism/extension.c b/prism/extension.c
index 3637cc1617..c3ee58d15e 100644
--- a/prism/extension.c
+++ b/prism/extension.c
@@ -126,7 +126,7 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
} else if (key_id == rb_option_id_encoding) {
if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
} else if (key_id == rb_option_id_line) {
- if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value));
+ if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
} else if (key_id == rb_option_id_frozen_string_literal) {
if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue);
} else if (key_id == rb_option_id_verbose) {
@@ -166,6 +166,7 @@ build_options(VALUE argument) {
*/
static void
extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
+ options->line = 1; // default
if (!NIL_P(keywords)) {
struct build_options_data data = { .options = options, .keywords = keywords };
struct build_options_data *argument = &data;
diff --git a/prism/options.c b/prism/options.c
index 84c1fcbb39..85d04d6272 100644
--- a/prism/options.c
+++ b/prism/options.c
@@ -20,7 +20,7 @@ pm_options_encoding_set(pm_options_t *options, const char *encoding) {
* Set the line option on the given options struct.
*/
PRISM_EXPORTED_FUNCTION void
-pm_options_line_set(pm_options_t *options, uint32_t line) {
+pm_options_line_set(pm_options_t *options, int32_t line) {
options->line = line;
}
@@ -115,6 +115,22 @@ pm_options_read_u32(const char *data) {
}
/**
+ * Read a 32-bit signed integer from a pointer. This function is used to read
+ * the options that are passed into the parser from the Ruby implementation. It
+ * handles aligned and unaligned reads.
+ */
+static int32_t
+pm_options_read_s32(const char *data) {
+ if (((uintptr_t) data) % sizeof(int32_t) == 0) {
+ return *((int32_t *) data);
+ } else {
+ int32_t value;
+ memcpy(&value, data, sizeof(int32_t));
+ return value;
+ }
+}
+
+/**
* Deserialize an options struct from the given binary string. This is used to
* pass options to the parser from an FFI call so that consumers of the library
* from an FFI perspective don't have to worry about the structure of our
@@ -123,6 +139,9 @@ pm_options_read_u32(const char *data) {
*/
void
pm_options_read(pm_options_t *options, const char *data) {
+ options->line = 1; // default
+ if (data == NULL) return;
+
uint32_t filepath_length = pm_options_read_u32(data);
data += 4;
@@ -131,7 +150,7 @@ pm_options_read(pm_options_t *options, const char *data) {
data += filepath_length;
}
- options->line = pm_options_read_u32(data);
+ options->line = pm_options_read_s32(data);
data += 4;
uint32_t encoding_length = pm_options_read_u32(data);
diff --git a/prism/options.h b/prism/options.h
index 2ea85c838c..8608838da8 100644
--- a/prism/options.h
+++ b/prism/options.h
@@ -35,7 +35,7 @@ typedef struct {
* The line within the file that the parse starts on. This value is
* 0-indexed.
*/
- uint32_t line;
+ int32_t line;
/**
* The name of the encoding that the source file is in. Note that this must
@@ -80,7 +80,7 @@ PRISM_EXPORTED_FUNCTION void pm_options_filepath_set(pm_options_t *options, cons
* @param options The options struct to set the line on.
* @param line The line to set.
*/
-PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, uint32_t line);
+PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t line);
/**
* Set the encoding option on the given options struct.
diff --git a/prism/parser.h b/prism/parser.h
index c1f9e0f663..dfc15e19b7 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -661,7 +661,7 @@ struct pm_parser {
* The line number at the start of the parse. This will be used to offset
* the line numbers of all of the locations.
*/
- uint32_t start_line;
+ int32_t start_line;
/** Whether or not we're at the beginning of a command. */
bool command_start;
diff --git a/prism/prism.c b/prism/prism.c
index aee9fc7b88..1a4d31c268 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -17067,9 +17067,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
parser->filepath_string = options->filepath;
// line option
- if (options->line > 0) {
- parser->start_line = options->line;
- }
+ parser->start_line = options->line;
// encoding option
size_t encoding_length = pm_string_length(&options->encoding);
@@ -17238,7 +17236,7 @@ pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
PRISM_EXPORTED_FUNCTION void
pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
- if (data != NULL) pm_options_read(&options, data);
+ pm_options_read(&options, data);
pm_parser_t parser;
pm_parser_init(&parser, source, size, &options);
@@ -17260,7 +17258,7 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons
PRISM_EXPORTED_FUNCTION void
pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
- if (data != NULL) pm_options_read(&options, data);
+ pm_options_read(&options, data);
pm_parser_t parser;
pm_parser_init(&parser, source, size, &options);
@@ -17268,7 +17266,7 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
pm_node_t *node = pm_parse(&parser);
pm_serialize_header(buffer);
pm_serialize_encoding(&parser.encoding, buffer);
- pm_buffer_append_varuint(buffer, parser.start_line);
+ pm_buffer_append_varsint(buffer, parser.start_line);
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
pm_node_destroy(&parser, node);
diff --git a/prism/templates/ext/prism/api_node.c.erb b/prism/templates/ext/prism/api_node.c.erb
index 7bc52c1120..5811cf2027 100644
--- a/prism/templates/ext/prism/api_node.c.erb
+++ b/prism/templates/ext/prism/api_node.c.erb
@@ -46,7 +46,7 @@ pm_source_new(pm_parser_t *parser, rb_encoding *encoding) {
rb_ary_push(offsets, INT2FIX(parser->newline_list.offsets[index]));
}
- VALUE source_argv[] = { source, ULONG2NUM(parser->start_line), offsets };
+ VALUE source_argv[] = { source, LONG2NUM(parser->start_line), offsets };
return rb_class_new_instance(3, source_argv, rb_cPrismSource);
}
diff --git a/prism/templates/lib/prism/serialize.rb.erb b/prism/templates/lib/prism/serialize.rb.erb
index a38c796971..350a502d6a 100644
--- a/prism/templates/lib/prism/serialize.rb.erb
+++ b/prism/templates/lib/prism/serialize.rb.erb
@@ -79,7 +79,7 @@ module Prism
end
def load_start_line
- source.start_line = load_varuint
+ source.start_line = load_varsint
end
def load_comments
@@ -161,6 +161,11 @@ module Prism
end
end
+ def load_varsint
+ n = load_varuint
+ (n >> 1) ^ (-(n & 1))
+ end
+
def load_serialized_length
io.read(4).unpack1("L")
end
diff --git a/prism/templates/src/serialize.c.erb b/prism/templates/src/serialize.c.erb
index 0f6b119d5b..e82a8703b2 100644
--- a/prism/templates/src/serialize.c.erb
+++ b/prism/templates/src/serialize.c.erb
@@ -219,7 +219,7 @@ pm_serialize_encoding(pm_encoding_t *encoding, pm_buffer_t *buffer) {
void
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
pm_serialize_encoding(&parser->encoding, buffer);
- pm_buffer_append_varuint(buffer, parser->start_line);
+ pm_buffer_append_varsint(buffer, parser->start_line);
<%- unless Prism::SERIALIZE_ONLY_SEMANTICS_FIELDS -%>
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
<%- end -%>
@@ -301,7 +301,7 @@ serialize_token(void *data, pm_parser_t *parser, pm_token_t *token) {
PRISM_EXPORTED_FUNCTION void
pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
- if (data != NULL) pm_options_read(&options, data);
+ pm_options_read(&options, data);
pm_parser_t parser;
pm_parser_init(&parser, source, size, &options);
@@ -318,7 +318,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
pm_buffer_append_byte(buffer, 0);
pm_serialize_encoding(&parser.encoding, buffer);
- pm_buffer_append_varuint(buffer, parser.start_line);
+ pm_buffer_append_varsint(buffer, parser.start_line);
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
pm_serialize_data_loc(&parser, buffer);
@@ -337,7 +337,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
PRISM_EXPORTED_FUNCTION void
pm_serialize_parse_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
pm_options_t options = { 0 };
- if (data != NULL) pm_options_read(&options, data);
+ pm_options_read(&options, data);
pm_parser_t parser;
pm_parser_init(&parser, source, size, &options);
diff --git a/prism/util/pm_buffer.c b/prism/util/pm_buffer.c
index dcdf1770bb..307b55d030 100644
--- a/prism/util/pm_buffer.c
+++ b/prism/util/pm_buffer.c
@@ -152,6 +152,15 @@ pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value) {
}
/**
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
+ */
+void
+pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
+ uint32_t unsigned_int = ((uint32_t)(value) << 1) ^ ((uint32_t)(value >> 31));
+ pm_buffer_append_varuint(buffer, unsigned_int);
+}
+
+/**
* Concatenate one buffer onto another.
*/
void
diff --git a/prism/util/pm_buffer.h b/prism/util/pm_buffer.h
index a8596be476..ec11d05e9b 100644
--- a/prism/util/pm_buffer.h
+++ b/prism/util/pm_buffer.h
@@ -121,6 +121,14 @@ void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value);
void pm_buffer_append_varuint(pm_buffer_t *buffer, uint32_t value);
/**
+ * Append a 32-bit signed integer to the buffer as a variable-length integer.
+ *
+ * @param buffer The buffer to append to.
+ * @param value The integer to append.
+ */
+void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value);
+
+/**
* Concatenate one buffer onto another.
*
* @param destination The buffer to concatenate onto.
diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb
index 6bd7a5d2a1..2feb15b48b 100644
--- a/test/prism/parse_test.rb
+++ b/test/prism/parse_test.rb
@@ -46,6 +46,22 @@ module Prism
assert_equal filepath, find_source_file_node(result.value).filepath
end
+ def test_parse_takes_line
+ line = 4
+ result = Prism.parse("def foo\n __FILE__\nend", line: line)
+
+ assert_equal line, result.value.location.start_line
+ assert_equal line + 1, find_source_file_node(result.value).location.start_line
+ end
+
+ def test_parse_takes_negative_lines
+ line = -2
+ result = Prism.parse("def foo\n __FILE__\nend", line: line)
+
+ assert_equal line, result.value.location.start_line
+ assert_equal line + 1, find_source_file_node(result.value).location.start_line
+ end
+
def test_parse_lex
node, tokens = Prism.parse_lex("def foo; end").value