diff options
author | Kevin Newton <[email protected]> | 2023-08-24 11:09:17 -0400 |
---|---|---|
committer | git <[email protected]> | 2023-08-24 21:30:01 +0000 |
commit | 0e3dc5a056abf51363070ad94de4a8097bc80197 (patch) | |
tree | e2bf91984c5aaf0d5157863b9e5c196c5489c0da /yarp/extension.c | |
parent | 90048241cad97573d830e86222ca4826a32da13e (diff) |
[ruby/yarp] Fix lex compat with BOM
* BOM should not impact looking for the encoding string
* We should re-encode tokens when the encoding changes
* BOM should change the column of comments only
https://github.com/ruby/yarp/commit/119fc2d7b2
Diffstat (limited to 'yarp/extension.c')
-rw-r--r-- | yarp/extension.c | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/yarp/extension.c b/yarp/extension.c index 455cdcadcc..8aef456c00 100644 --- a/yarp/extension.c +++ b/yarp/extension.c @@ -221,6 +221,20 @@ static void lex_encoding_changed_callback(yp_parser_t *parser) { lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data; lex_data->encoding = rb_enc_find(parser->encoding.name); + + // Since we got a new encoding, we need to go back and change the encoding + // of the tokens that we've already lexed. This should be a tiny amount + // since encoding magic comments need to be the first or second line of the + // file. + VALUE tokens = lex_data->tokens; + for (long index = 0; index < RARRAY_LEN(tokens); index++) { + VALUE yields = rb_ary_entry(tokens, index); + VALUE token = rb_ary_entry(yields, 0); + + VALUE value = rb_ivar_get(token, rb_intern("@value")); + rb_enc_associate(value, lex_data->encoding); + ENC_CODERANGE_CLEAR(value); + } } // Return an array of tokens corresponding to the given source. |