summaryrefslogtreecommitdiff
path: root/yarp/extension.c
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2023-08-24 11:09:17 -0400
committergit <[email protected]>2023-08-24 21:30:01 +0000
commit0e3dc5a056abf51363070ad94de4a8097bc80197 (patch)
treee2bf91984c5aaf0d5157863b9e5c196c5489c0da /yarp/extension.c
parent90048241cad97573d830e86222ca4826a32da13e (diff)
[ruby/yarp] Fix lex compat with BOM
* BOM should not impact looking for the encoding string * We should re-encode tokens when the encoding changes * BOM should change the column of comments only https://github.com/ruby/yarp/commit/119fc2d7b2
Diffstat (limited to 'yarp/extension.c')
-rw-r--r--yarp/extension.c14
1 files changed, 14 insertions, 0 deletions
diff --git a/yarp/extension.c b/yarp/extension.c
index 455cdcadcc..8aef456c00 100644
--- a/yarp/extension.c
+++ b/yarp/extension.c
@@ -221,6 +221,20 @@ static void
lex_encoding_changed_callback(yp_parser_t *parser) {
lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
lex_data->encoding = rb_enc_find(parser->encoding.name);
+
+ // Since we got a new encoding, we need to go back and change the encoding
+ // of the tokens that we've already lexed. This should be a tiny amount
+ // since encoding magic comments need to be the first or second line of the
+ // file.
+ VALUE tokens = lex_data->tokens;
+ for (long index = 0; index < RARRAY_LEN(tokens); index++) {
+ VALUE yields = rb_ary_entry(tokens, index);
+ VALUE token = rb_ary_entry(yields, 0);
+
+ VALUE value = rb_ivar_get(token, rb_intern("@value"));
+ rb_enc_associate(value, lex_data->encoding);
+ ENC_CODERANGE_CLEAR(value);
+ }
}
// Return an array of tokens corresponding to the given source.