diff options
author | Kevin Newton <[email protected]> | 2023-09-27 12:22:36 -0400 |
---|---|---|
committer | Kevin Newton <[email protected]> | 2023-09-27 13:57:38 -0400 |
commit | 8ab56869a64fdccc094f4a83c6367fb23b72d38b (patch) | |
tree | 46ef2bd5c51d5b7f923eda6a60edefc7a08200db /prism/prism.c | |
parent | 7e0971eb5d679bb6219abb0ec238139aa6502c5a (diff) |
Rename YARP filepaths to prism filepaths
Diffstat (limited to 'prism/prism.c')
-rw-r--r-- | prism/prism.c | 14554 |
1 files changed, 14554 insertions, 0 deletions
diff --git a/prism/prism.c b/prism/prism.c new file mode 100644 index 0000000000..715708a4d3 --- /dev/null +++ b/prism/prism.c @@ -0,0 +1,14554 @@ +#include "yarp.h" + +// The YARP version and the serialization format. +const char * +yp_version(void) { + return YP_VERSION; +} + +// In heredocs, tabs automatically complete up to the next 8 spaces. This is +// defined in CRuby as TAB_WIDTH. +#define YP_TAB_WHITESPACE_SIZE 8 + +// Debugging logging will provide you will additional debugging functions as +// well as automatically replace some functions with their debugging +// counterparts. +#ifndef YP_DEBUG_LOGGING +#define YP_DEBUG_LOGGING 0 +#endif + +#if YP_DEBUG_LOGGING + +/******************************************************************************/ +/* Debugging */ +/******************************************************************************/ + +YP_ATTRIBUTE_UNUSED static const char * +debug_context(yp_context_t context) { + switch (context) { + case YP_CONTEXT_BEGIN: return "BEGIN"; + case YP_CONTEXT_CLASS: return "CLASS"; + case YP_CONTEXT_CASE_IN: return "CASE_IN"; + case YP_CONTEXT_CASE_WHEN: return "CASE_WHEN"; + case YP_CONTEXT_DEF: return "DEF"; + case YP_CONTEXT_DEF_PARAMS: return "DEF_PARAMS"; + case YP_CONTEXT_DEFAULT_PARAMS: return "DEFAULT_PARAMS"; + case YP_CONTEXT_ENSURE: return "ENSURE"; + case YP_CONTEXT_ELSE: return "ELSE"; + case YP_CONTEXT_ELSIF: return "ELSIF"; + case YP_CONTEXT_EMBEXPR: return "EMBEXPR"; + case YP_CONTEXT_BLOCK_BRACES: return "BLOCK_BRACES"; + case YP_CONTEXT_BLOCK_KEYWORDS: return "BLOCK_KEYWORDS"; + case YP_CONTEXT_FOR: return "FOR"; + case YP_CONTEXT_IF: return "IF"; + case YP_CONTEXT_MAIN: return "MAIN"; + case YP_CONTEXT_MODULE: return "MODULE"; + case YP_CONTEXT_PARENS: return "PARENS"; + case YP_CONTEXT_POSTEXE: return "POSTEXE"; + case YP_CONTEXT_PREDICATE: return "PREDICATE"; + case YP_CONTEXT_PREEXE: return "PREEXE"; + case YP_CONTEXT_RESCUE: return "RESCUE"; + case YP_CONTEXT_RESCUE_ELSE: return "RESCUE_ELSE"; + case YP_CONTEXT_SCLASS: return "SCLASS"; + case YP_CONTEXT_UNLESS: return "UNLESS"; + case YP_CONTEXT_UNTIL: return "UNTIL"; + case YP_CONTEXT_WHILE: return "WHILE"; + case YP_CONTEXT_LAMBDA_BRACES: return "LAMBDA_BRACES"; + case YP_CONTEXT_LAMBDA_DO_END: return "LAMBDA_DO_END"; + } + return NULL; +} + +YP_ATTRIBUTE_UNUSED static void +debug_contexts(yp_parser_t *parser) { + yp_context_node_t *context_node = parser->current_context; + fprintf(stderr, "CONTEXTS: "); + + if (context_node != NULL) { + while (context_node != NULL) { + fprintf(stderr, "%s", debug_context(context_node->context)); + context_node = context_node->prev; + if (context_node != NULL) { + fprintf(stderr, " <- "); + } + } + } else { + fprintf(stderr, "NONE"); + } + + fprintf(stderr, "\n"); +} + +YP_ATTRIBUTE_UNUSED static void +debug_node(const char *message, yp_parser_t *parser, yp_node_t *node) { + yp_buffer_t buffer; + if (!yp_buffer_init(&buffer)) return; + + yp_prettyprint(parser, node, &buffer); + + fprintf(stderr, "%s\n%.*s\n", message, (int) buffer.length, buffer.value); + yp_buffer_free(&buffer); +} + +YP_ATTRIBUTE_UNUSED static void +debug_lex_mode(yp_parser_t *parser) { + yp_lex_mode_t *lex_mode = parser->lex_modes.current; + bool first = true; + + while (lex_mode != NULL) { + if (first) { + first = false; + } else { + fprintf(stderr, " <- "); + } + + switch (lex_mode->mode) { + case YP_LEX_DEFAULT: fprintf(stderr, "DEFAULT"); break; + case YP_LEX_EMBEXPR: fprintf(stderr, "EMBEXPR"); break; + case YP_LEX_EMBVAR: fprintf(stderr, "EMBVAR"); break; + case YP_LEX_HEREDOC: fprintf(stderr, "HEREDOC"); break; + case YP_LEX_LIST: fprintf(stderr, "LIST (terminator=%c, interpolation=%d)", lex_mode->as.list.terminator, lex_mode->as.list.interpolation); break; + case YP_LEX_REGEXP: fprintf(stderr, "REGEXP (terminator=%c)", lex_mode->as.regexp.terminator); break; + case YP_LEX_STRING: fprintf(stderr, "STRING (terminator=%c, interpolation=%d)", lex_mode->as.string.terminator, lex_mode->as.string.interpolation); break; + } + + lex_mode = lex_mode->prev; + } + + fprintf(stderr, "\n"); +} + +YP_ATTRIBUTE_UNUSED static void +debug_state(yp_parser_t *parser) { + fprintf(stderr, "STATE: "); + bool first = true; + + if (parser->lex_state == YP_LEX_STATE_NONE) { + fprintf(stderr, "NONE\n"); + return; + } + +#define CHECK_STATE(state) \ + if (parser->lex_state & state) { \ + if (!first) fprintf(stderr, "|"); \ + fprintf(stderr, "%s", #state); \ + first = false; \ + } + + CHECK_STATE(YP_LEX_STATE_BEG) + CHECK_STATE(YP_LEX_STATE_END) + CHECK_STATE(YP_LEX_STATE_ENDARG) + CHECK_STATE(YP_LEX_STATE_ENDFN) + CHECK_STATE(YP_LEX_STATE_ARG) + CHECK_STATE(YP_LEX_STATE_CMDARG) + CHECK_STATE(YP_LEX_STATE_MID) + CHECK_STATE(YP_LEX_STATE_FNAME) + CHECK_STATE(YP_LEX_STATE_DOT) + CHECK_STATE(YP_LEX_STATE_CLASS) + CHECK_STATE(YP_LEX_STATE_LABEL) + CHECK_STATE(YP_LEX_STATE_LABELED) + CHECK_STATE(YP_LEX_STATE_FITEM) + +#undef CHECK_STATE + + fprintf(stderr, "\n"); +} + +YP_ATTRIBUTE_UNUSED static void +debug_token(yp_token_t * token) { + fprintf(stderr, "%s: \"%.*s\"\n", yp_token_type_to_str(token->type), (int) (token->end - token->start), token->start); +} + +#endif + +/* Macros for min/max. */ +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +/******************************************************************************/ +/* Lex mode manipulations */ +/******************************************************************************/ + +// Returns the incrementor character that should be used to increment the +// nesting count if one is possible. +static inline uint8_t +lex_mode_incrementor(const uint8_t start) { + switch (start) { + case '(': + case '[': + case '{': + case '<': + return start; + default: + return '\0'; + } +} + +// Returns the matching character that should be used to terminate a list +// beginning with the given character. +static inline uint8_t +lex_mode_terminator(const uint8_t start) { + switch (start) { + case '(': + return ')'; + case '[': + return ']'; + case '{': + return '}'; + case '<': + return '>'; + default: + return start; + } +} + +// Push a new lex state onto the stack. If we're still within the pre-allocated +// space of the lex state stack, then we'll just use a new slot. Otherwise we'll +// allocate a new pointer and use that. +static bool +lex_mode_push(yp_parser_t *parser, yp_lex_mode_t lex_mode) { + lex_mode.prev = parser->lex_modes.current; + parser->lex_modes.index++; + + if (parser->lex_modes.index > YP_LEX_STACK_SIZE - 1) { + parser->lex_modes.current = (yp_lex_mode_t *) malloc(sizeof(yp_lex_mode_t)); + if (parser->lex_modes.current == NULL) return false; + + *parser->lex_modes.current = lex_mode; + } else { + parser->lex_modes.stack[parser->lex_modes.index] = lex_mode; + parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index]; + } + + return true; +} + +// Push on a new list lex mode. +static inline bool +lex_mode_push_list(yp_parser_t *parser, bool interpolation, uint8_t delimiter) { + uint8_t incrementor = lex_mode_incrementor(delimiter); + uint8_t terminator = lex_mode_terminator(delimiter); + + yp_lex_mode_t lex_mode = { + .mode = YP_LEX_LIST, + .as.list = { + .nesting = 0, + .interpolation = interpolation, + .incrementor = incrementor, + .terminator = terminator + } + }; + + // These are the places where we need to split up the content of the list. + // We'll use strpbrk to find the first of these characters. + uint8_t *breakpoints = lex_mode.as.list.breakpoints; + memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints)); + + // Now we'll add the terminator to the list of breakpoints. + size_t index = 7; + breakpoints[index++] = terminator; + + // If interpolation is allowed, then we're going to check for the # + // character. Otherwise we'll only look for escapes and the terminator. + if (interpolation) { + breakpoints[index++] = '#'; + } + + // If there is an incrementor, then we'll check for that as well. + if (incrementor != '\0') { + breakpoints[index++] = incrementor; + } + + return lex_mode_push(parser, lex_mode); +} + +// Push on a new regexp lex mode. +static inline bool +lex_mode_push_regexp(yp_parser_t *parser, uint8_t incrementor, uint8_t terminator) { + yp_lex_mode_t lex_mode = { + .mode = YP_LEX_REGEXP, + .as.regexp = { + .nesting = 0, + .incrementor = incrementor, + .terminator = terminator + } + }; + + // These are the places where we need to split up the content of the + // regular expression. We'll use strpbrk to find the first of these + // characters. + uint8_t *breakpoints = lex_mode.as.regexp.breakpoints; + memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints)); + + // First we'll add the terminator. + breakpoints[3] = terminator; + + // Next, if there is an incrementor, then we'll check for that as well. + if (incrementor != '\0') { + breakpoints[4] = incrementor; + } + + return lex_mode_push(parser, lex_mode); +} + +// Push on a new string lex mode. +static inline bool +lex_mode_push_string(yp_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) { + yp_lex_mode_t lex_mode = { + .mode = YP_LEX_STRING, + .as.string = { + .nesting = 0, + .interpolation = interpolation, + .label_allowed = label_allowed, + .incrementor = incrementor, + .terminator = terminator + } + }; + + // These are the places where we need to split up the content of the + // string. We'll use strpbrk to find the first of these characters. + uint8_t *breakpoints = lex_mode.as.string.breakpoints; + memcpy(breakpoints, "\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints)); + + // Now add in the terminator. + size_t index = 2; + breakpoints[index++] = terminator; + + // If interpolation is allowed, then we're going to check for the # + // character. Otherwise we'll only look for escapes and the terminator. + if (interpolation) { + breakpoints[index++] = '#'; + } + + // If we have an incrementor, then we'll add that in as a breakpoint as + // well. + if (incrementor != '\0') { + breakpoints[index++] = incrementor; + } + + return lex_mode_push(parser, lex_mode); +} + +// Pop the current lex state off the stack. If we're within the pre-allocated +// space of the lex state stack, then we'll just decrement the index. Otherwise +// we'll free the current pointer and use the previous pointer. +static void +lex_mode_pop(yp_parser_t *parser) { + if (parser->lex_modes.index == 0) { + parser->lex_modes.current->mode = YP_LEX_DEFAULT; + } else if (parser->lex_modes.index < YP_LEX_STACK_SIZE) { + parser->lex_modes.index--; + parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index]; + } else { + parser->lex_modes.index--; + yp_lex_mode_t *prev = parser->lex_modes.current->prev; + free(parser->lex_modes.current); + parser->lex_modes.current = prev; + } +} + +// This is the equivalent of IS_lex_state is CRuby. +static inline bool +lex_state_p(yp_parser_t *parser, yp_lex_state_t state) { + return parser->lex_state & state; +} + +typedef enum { + YP_IGNORED_NEWLINE_NONE = 0, + YP_IGNORED_NEWLINE_ALL, + YP_IGNORED_NEWLINE_PATTERN +} yp_ignored_newline_type_t; + +static inline yp_ignored_newline_type_t +lex_state_ignored_p(yp_parser_t *parser) { + bool ignored = lex_state_p(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_CLASS | YP_LEX_STATE_FNAME | YP_LEX_STATE_DOT) && !lex_state_p(parser, YP_LEX_STATE_LABELED); + + if (ignored) { + return YP_IGNORED_NEWLINE_ALL; + } else if ((parser->lex_state & ~((unsigned int) YP_LEX_STATE_LABEL)) == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)) { + return YP_IGNORED_NEWLINE_PATTERN; + } else { + return YP_IGNORED_NEWLINE_NONE; + } +} + +static inline bool +lex_state_beg_p(yp_parser_t *parser) { + return lex_state_p(parser, YP_LEX_STATE_BEG_ANY) || (parser->lex_state == (YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED)); +} + +static inline bool +lex_state_arg_p(yp_parser_t *parser) { + return lex_state_p(parser, YP_LEX_STATE_ARG_ANY); +} + +static inline bool +lex_state_spcarg_p(yp_parser_t *parser, bool space_seen) { + if (parser->current.end >= parser->end) { + return false; + } + return lex_state_arg_p(parser) && space_seen && !yp_char_is_whitespace(*parser->current.end); +} + +static inline bool +lex_state_end_p(yp_parser_t *parser) { + return lex_state_p(parser, YP_LEX_STATE_END_ANY); +} + +// This is the equivalent of IS_AFTER_OPERATOR in CRuby. +static inline bool +lex_state_operator_p(yp_parser_t *parser) { + return lex_state_p(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_DOT); +} + +// Set the state of the lexer. This is defined as a function to be able to put a breakpoint in it. +static inline void +lex_state_set(yp_parser_t *parser, yp_lex_state_t state) { + parser->lex_state = state; +} + +#if YP_DEBUG_LOGGING +static inline void +debug_lex_state_set(yp_parser_t *parser, yp_lex_state_t state, char const * caller_name, int line_number) { + fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number); + debug_state(parser); + lex_state_set(parser, state); + fprintf(stderr, "Now: "); + debug_state(parser); + fprintf(stderr, "\n"); +} + +#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__) +#endif + +/******************************************************************************/ +/* Node-related functions */ +/******************************************************************************/ + +// Retrieve the constant pool id for the given location. +static inline yp_constant_id_t +yp_parser_constant_id_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) { + return yp_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start)); +} + +// Retrieve the constant pool id for the given string. +static inline yp_constant_id_t +yp_parser_constant_id_owned(yp_parser_t *parser, const uint8_t *start, size_t length) { + return yp_constant_pool_insert_owned(&parser->constant_pool, start, length); +} + +// Retrieve the constant pool id for the given token. +static inline yp_constant_id_t +yp_parser_constant_id_token(yp_parser_t *parser, const yp_token_t *token) { + return yp_parser_constant_id_location(parser, token->start, token->end); +} + +// Retrieve the constant pool id for the given token. If the token is not +// provided, then return 0. +static inline yp_constant_id_t +yp_parser_optional_constant_id_token(yp_parser_t *parser, const yp_token_t *token) { + return token->type == YP_TOKEN_NOT_PROVIDED ? 0 : yp_parser_constant_id_token(parser, token); +} + +// The predicate of conditional nodes can change what would otherwise be regular +// nodes into specialized nodes. For example: +// +// if foo .. bar => RangeNode becomes FlipFlopNode +// if foo and bar .. baz => RangeNode becomes FlipFlopNode +// if /foo/ => RegularExpressionNode becomes MatchLastLineNode +// if /foo #{bar}/ => InterpolatedRegularExpressionNode becomes InterpolatedMatchLastLineNode +// +static void +yp_conditional_predicate(yp_node_t *node) { + switch (YP_NODE_TYPE(node)) { + case YP_AND_NODE: { + yp_and_node_t *cast = (yp_and_node_t *) node; + yp_conditional_predicate(cast->left); + yp_conditional_predicate(cast->right); + break; + } + case YP_OR_NODE: { + yp_or_node_t *cast = (yp_or_node_t *) node; + yp_conditional_predicate(cast->left); + yp_conditional_predicate(cast->right); + break; + } + case YP_PARENTHESES_NODE: { + yp_parentheses_node_t *cast = (yp_parentheses_node_t *) node; + + if ((cast->body != NULL) && YP_NODE_TYPE_P(cast->body, YP_STATEMENTS_NODE)) { + yp_statements_node_t *statements = (yp_statements_node_t *) cast->body; + if (statements->body.size == 1) yp_conditional_predicate(statements->body.nodes[0]); + } + + break; + } + case YP_RANGE_NODE: { + yp_range_node_t *cast = (yp_range_node_t *) node; + if (cast->left) { + yp_conditional_predicate(cast->left); + } + if (cast->right) { + yp_conditional_predicate(cast->right); + } + + // Here we change the range node into a flip flop node. We can do + // this since the nodes are exactly the same except for the type. + // We're only asserting against the size when we should probably + // assert against the entire layout, but we'll assume tests will + // catch this. + assert(sizeof(yp_range_node_t) == sizeof(yp_flip_flop_node_t)); + node->type = YP_FLIP_FLOP_NODE; + + break; + } + case YP_REGULAR_EXPRESSION_NODE: + // Here we change the regular expression node into a match last line + // node. We can do this since the nodes are exactly the same except + // for the type. + assert(sizeof(yp_regular_expression_node_t) == sizeof(yp_match_last_line_node_t)); + node->type = YP_MATCH_LAST_LINE_NODE; + break; + case YP_INTERPOLATED_REGULAR_EXPRESSION_NODE: + // Here we change the interpolated regular expression node into an + // interpolated match last line node. We can do this since the nodes + // are exactly the same except for the type. + assert(sizeof(yp_interpolated_regular_expression_node_t) == sizeof(yp_interpolated_match_last_line_node_t)); + node->type = YP_INTERPOLATED_MATCH_LAST_LINE_NODE; + break; + default: + break; + } +} + +// In a lot of places in the tree you can have tokens that are not provided but +// that do not cause an error. For example, in a method call without +// parentheses. In these cases we set the token to the "not provided" type. For +// example: +// +// yp_token_t token; +// not_provided(&token, parser->previous.end); +// +static inline yp_token_t +not_provided(yp_parser_t *parser) { + return (yp_token_t) { .type = YP_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start }; +} + +#define YP_LOCATION_NULL_VALUE(parser) ((yp_location_t) { .start = parser->start, .end = parser->start }) +#define YP_LOCATION_TOKEN_VALUE(token) ((yp_location_t) { .start = (token)->start, .end = (token)->end }) +#define YP_LOCATION_NODE_VALUE(node) ((yp_location_t) { .start = (node)->location.start, .end = (node)->location.end }) +#define YP_LOCATION_NODE_BASE_VALUE(node) ((yp_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end }) +#define YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((yp_location_t) { .start = NULL, .end = NULL }) +#define YP_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == YP_TOKEN_NOT_PROVIDED ? YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : YP_LOCATION_TOKEN_VALUE(token)) + +// This is a special out parameter to the parse_arguments_list function that +// includes opening and closing parentheses in addition to the arguments since +// it's so common. It is handy to use when passing argument information to one +// of the call node creation functions. +typedef struct { + yp_location_t opening_loc; + yp_arguments_node_t *arguments; + yp_location_t closing_loc; + yp_node_t *block; +} yp_arguments_t; + +#define YP_EMPTY_ARGUMENTS ((yp_arguments_t) { \ + .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, \ + .arguments = NULL, \ + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, \ + .block = NULL, \ +}) + +// Check that we're not about to attempt to attach a brace block to a call that +// has arguments without parentheses. +static void +yp_arguments_validate_block(yp_parser_t *parser, yp_arguments_t *arguments, yp_block_node_t *block) { + // First, check that we have arguments and that we don't have a closing + // location for them. + if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) { + return; + } + + // Next, check that we don't have a single parentheses argument. This would + // look like: + // + // foo (1) {} + // + // In this case, it's actually okay for the block to be attached to the + // call, even though it looks like it's attached to the argument. + if (arguments->arguments->arguments.size == 1 && YP_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], YP_PARENTHESES_NODE)) { + return; + } + + // If we didn't hit a case before this check, then at this point we need to + // add a syntax error. + yp_diagnostic_list_append( + &parser->error_list, + block->base.location.start, + block->base.location.end, + YP_ERR_ARGUMENT_UNEXPECTED_BLOCK + ); +} + +/******************************************************************************/ +/* Scope node functions */ +/******************************************************************************/ + +// Generate a scope node from the given node. +void +yp_scope_node_init(yp_node_t *node, yp_scope_node_t *scope) { + scope->base.type = YP_SCOPE_NODE; + scope->base.location.start = node->location.start; + scope->base.location.end = node->location.end; + + scope->parameters = NULL; + scope->body = NULL; + yp_constant_id_list_init(&scope->locals); + + switch (YP_NODE_TYPE(node)) { + case YP_BLOCK_NODE: { + yp_block_node_t *cast = (yp_block_node_t *) node; + if (cast->parameters) scope->parameters = cast->parameters->parameters; + scope->body = cast->body; + scope->locals = cast->locals; + break; + } + case YP_CLASS_NODE: { + yp_class_node_t *cast = (yp_class_node_t *) node; + scope->body = cast->body; + scope->locals = cast->locals; + break; + } + case YP_DEF_NODE: { + yp_def_node_t *cast = (yp_def_node_t *) node; + scope->parameters = cast->parameters; + scope->body = cast->body; + scope->locals = cast->locals; + break; + } + case YP_LAMBDA_NODE: { + yp_lambda_node_t *cast = (yp_lambda_node_t *) node; + if (cast->parameters) scope->parameters = cast->parameters->parameters; + scope->body = cast->body; + scope->locals = cast->locals; + break; + } + case YP_MODULE_NODE: { + yp_module_node_t *cast = (yp_module_node_t *) node; + scope->body = cast->body; + scope->locals = cast->locals; + break; + } + case YP_PROGRAM_NODE: { + yp_program_node_t *cast = (yp_program_node_t *) node; + scope->body = (yp_node_t *) cast->statements; + scope->locals = cast->locals; + break; + } + case YP_SINGLETON_CLASS_NODE: { + yp_singleton_class_node_t *cast = (yp_singleton_class_node_t *) node; + scope->body = cast->body; + scope->locals = cast->locals; + break; + } + default: + assert(false && "unreachable"); + break; + } +} + +/******************************************************************************/ +/* Node creation functions */ +/******************************************************************************/ + +// Parse the decimal number represented by the range of bytes. returns +// UINT32_MAX if the number fails to parse. This function assumes that the range +// of bytes has already been validated to contain only decimal digits. +static uint32_t +parse_decimal_number(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) { + ptrdiff_t diff = end - start; + assert(diff > 0 && ((unsigned long) diff < SIZE_MAX)); + size_t length = (size_t) diff; + + char *digits = calloc(length + 1, sizeof(char)); + memcpy(digits, start, length); + digits[length] = '\0'; + + char *endptr; + errno = 0; + unsigned long value = strtoul(digits, &endptr, 10); + + if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) { + yp_diagnostic_list_append(&parser->error_list, start, end, YP_ERR_INVALID_NUMBER_DECIMAL); + value = UINT32_MAX; + } + + free(digits); + + if (value > UINT32_MAX) { + yp_diagnostic_list_append(&parser->error_list, start, end, YP_ERR_INVALID_NUMBER_DECIMAL); + value = UINT32_MAX; + } + + return (uint32_t) value; +} + +// Parse out the options for a regular expression. +static inline yp_node_flags_t +yp_regular_expression_flags_create(const yp_token_t *closing) { + yp_node_flags_t flags = 0; + + if (closing->type == YP_TOKEN_REGEXP_END) { + for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) { + switch (*flag) { + case 'i': flags |= YP_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break; + case 'm': flags |= YP_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break; + case 'x': flags |= YP_REGULAR_EXPRESSION_FLAGS_EXTENDED; break; + case 'e': flags |= YP_REGULAR_EXPRESSION_FLAGS_EUC_JP; break; + case 'n': flags |= YP_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT; break; + case 's': flags |= YP_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J; break; + case 'u': flags |= YP_REGULAR_EXPRESSION_FLAGS_UTF_8; break; + case 'o': flags |= YP_REGULAR_EXPRESSION_FLAGS_ONCE; break; + default: assert(false && "unreachable"); + } + } + } + + return flags; +} + +// Allocate and initialize a new StatementsNode node. +static yp_statements_node_t * +yp_statements_node_create(yp_parser_t *parser); + +// Append a new node to the given StatementsNode node's body. +static void +yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement); + +// Get the length of the given StatementsNode node's body. +static size_t +yp_statements_node_body_length(yp_statements_node_t *node); + +// This function is here to allow us a place to extend in the future when we +// implement our own arena allocation. +static inline void * +yp_alloc_node(YP_ATTRIBUTE_UNUSED yp_parser_t *parser, size_t size) { + void *memory = calloc(1, size); + if (memory == NULL) { + fprintf(stderr, "Failed to allocate %zu bytes\n", size); + abort(); + } + return memory; +} + +#define YP_ALLOC_NODE(parser, type) (type *) yp_alloc_node(parser, sizeof(type)) + +// Allocate a new MissingNode node. +static yp_missing_node_t * +yp_missing_node_create(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) { + yp_missing_node_t *node = YP_ALLOC_NODE(parser, yp_missing_node_t); + *node = (yp_missing_node_t) {{ .type = YP_MISSING_NODE, .location = { .start = start, .end = end } }}; + return node; +} + +// Allocate and initialize a new AliasGlobalVariableNode node. +static yp_alias_global_variable_node_t * +yp_alias_global_variable_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *new_name, yp_node_t *old_name) { + assert(keyword->type == YP_TOKEN_KEYWORD_ALIAS); + yp_alias_global_variable_node_t *node = YP_ALLOC_NODE(parser, yp_alias_global_variable_node_t); + + *node = (yp_alias_global_variable_node_t) { + { + .type = YP_ALIAS_GLOBAL_VARIABLE_NODE, + .location = { + .start = keyword->start, + .end = old_name->location.end + }, + }, + .new_name = new_name, + .old_name = old_name, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword) + }; + + return node; +} + +// Allocate and initialize a new AliasMethodNode node. +static yp_alias_method_node_t * +yp_alias_method_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *new_name, yp_node_t *old_name) { + assert(keyword->type == YP_TOKEN_KEYWORD_ALIAS); + yp_alias_method_node_t *node = YP_ALLOC_NODE(parser, yp_alias_method_node_t); + + *node = (yp_alias_method_node_t) { + { + .type = YP_ALIAS_METHOD_NODE, + .location = { + .start = keyword->start, + .end = old_name->location.end + }, + }, + .new_name = new_name, + .old_name = old_name, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword) + }; + + return node; +} + +// Allocate a new AlternationPatternNode node. +static yp_alternation_pattern_node_t * +yp_alternation_pattern_node_create(yp_parser_t *parser, yp_node_t *left, yp_node_t *right, const yp_token_t *operator) { + yp_alternation_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_alternation_pattern_node_t); + + *node = (yp_alternation_pattern_node_t) { + { + .type = YP_ALTERNATION_PATTERN_NODE, + .location = { + .start = left->location.start, + .end = right->location.end + }, + }, + .left = left, + .right = right, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new and node. +static yp_and_node_t * +yp_and_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *operator, yp_node_t *right) { + yp_and_node_t *node = YP_ALLOC_NODE(parser, yp_and_node_t); + + *node = (yp_and_node_t) { + { + .type = YP_AND_NODE, + .location = { + .start = left->location.start, + .end = right->location.end + }, + }, + .left = left, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .right = right + }; + + return node; +} + +// Allocate an initialize a new arguments node. +static yp_arguments_node_t * +yp_arguments_node_create(yp_parser_t *parser) { + yp_arguments_node_t *node = YP_ALLOC_NODE(parser, yp_arguments_node_t); + + *node = (yp_arguments_node_t) { + { + .type = YP_ARGUMENTS_NODE, + .location = YP_LOCATION_NULL_VALUE(parser) + }, + .arguments = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Return the size of the given arguments node. +static size_t +yp_arguments_node_size(yp_arguments_node_t *node) { + return node->arguments.size; +} + +// Append an argument to an arguments node. +static void +yp_arguments_node_arguments_append(yp_arguments_node_t *node, yp_node_t *argument) { + if (yp_arguments_node_size(node) == 0) { + node->base.location.start = argument->location.start; + } + + node->base.location.end = argument->location.end; + yp_node_list_append(&node->arguments, argument); +} + +// Allocate and initialize a new ArrayNode node. +static yp_array_node_t * +yp_array_node_create(yp_parser_t *parser, const yp_token_t *opening) { + yp_array_node_t *node = YP_ALLOC_NODE(parser, yp_array_node_t); + + *node = (yp_array_node_t) { + { + .type = YP_ARRAY_NODE, + .location = YP_LOCATION_TOKEN_VALUE(opening) + }, + .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .elements = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Return the size of the given array node. +static inline size_t +yp_array_node_size(yp_array_node_t *node) { + return node->elements.size; +} + +// Append an argument to an array node. +static inline void +yp_array_node_elements_append(yp_array_node_t *node, yp_node_t *element) { + if (!node->elements.size && !node->opening_loc.start) { + node->base.location.start = element->location.start; + } + yp_node_list_append(&node->elements, element); + node->base.location.end = element->location.end; +} + +// Set the closing token and end location of an array node. +static void +yp_array_node_close_set(yp_array_node_t *node, const yp_token_t *closing) { + assert(closing->type == YP_TOKEN_BRACKET_RIGHT || closing->type == YP_TOKEN_STRING_END || closing->type == YP_TOKEN_MISSING || closing->type == YP_TOKEN_NOT_PROVIDED); + node->base.location.end = closing->end; + node->closing_loc = YP_LOCATION_TOKEN_VALUE(closing); +} + +// Allocate and initialize a new array pattern node. The node list given in the +// nodes parameter is guaranteed to have at least two nodes. +static yp_array_pattern_node_t * +yp_array_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *nodes) { + yp_array_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_array_pattern_node_t); + + *node = (yp_array_pattern_node_t) { + { + .type = YP_ARRAY_PATTERN_NODE, + .location = { + .start = nodes->nodes[0]->location.start, + .end = nodes->nodes[nodes->size - 1]->location.end + }, + }, + .constant = NULL, + .rest = NULL, + .requireds = YP_EMPTY_NODE_LIST, + .posts = YP_EMPTY_NODE_LIST, + .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + // For now we're going to just copy over each pointer manually. This could be + // much more efficient, as we could instead resize the node list. + bool found_rest = false; + for (size_t index = 0; index < nodes->size; index++) { + yp_node_t *child = nodes->nodes[index]; + + if (!found_rest && YP_NODE_TYPE_P(child, YP_SPLAT_NODE)) { + node->rest = child; + found_rest = true; + } else if (found_rest) { + yp_node_list_append(&node->posts, child); + } else { + yp_node_list_append(&node->requireds, child); + } + } + + return node; +} + +// Allocate and initialize a new array pattern node from a single rest node. +static yp_array_pattern_node_t * +yp_array_pattern_node_rest_create(yp_parser_t *parser, yp_node_t *rest) { + yp_array_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_array_pattern_node_t); + + *node = (yp_array_pattern_node_t) { + { + .type = YP_ARRAY_PATTERN_NODE, + .location = rest->location, + }, + .constant = NULL, + .rest = rest, + .requireds = YP_EMPTY_NODE_LIST, + .posts = YP_EMPTY_NODE_LIST, + .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + return node; +} + +// Allocate and initialize a new array pattern node from a constant and opening +// and closing tokens. +static yp_array_pattern_node_t * +yp_array_pattern_node_constant_create(yp_parser_t *parser, yp_node_t *constant, const yp_token_t *opening, const yp_token_t *closing) { + yp_array_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_array_pattern_node_t); + + *node = (yp_array_pattern_node_t) { + { + .type = YP_ARRAY_PATTERN_NODE, + .location = { + .start = constant->location.start, + .end = closing->end + }, + }, + .constant = constant, + .rest = NULL, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing), + .requireds = YP_EMPTY_NODE_LIST, + .posts = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Allocate and initialize a new array pattern node from an opening and closing +// token. +static yp_array_pattern_node_t * +yp_array_pattern_node_empty_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *closing) { + yp_array_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_array_pattern_node_t); + + *node = (yp_array_pattern_node_t) { + { + .type = YP_ARRAY_PATTERN_NODE, + .location = { + .start = opening->start, + .end = closing->end + }, + }, + .constant = NULL, + .rest = NULL, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing), + .requireds = YP_EMPTY_NODE_LIST, + .posts = YP_EMPTY_NODE_LIST + }; + + return node; +} + +static inline void +yp_array_pattern_node_requireds_append(yp_array_pattern_node_t *node, yp_node_t *inner) { + yp_node_list_append(&node->requireds, inner); +} + +// Allocate and initialize a new assoc node. +static yp_assoc_node_t * +yp_assoc_node_create(yp_parser_t *parser, yp_node_t *key, const yp_token_t *operator, yp_node_t *value) { + yp_assoc_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_node_t); + const uint8_t *end; + + if (value != NULL) { + end = value->location.end; + } else if (operator->type != YP_TOKEN_NOT_PROVIDED) { + end = operator->end; + } else { + end = key->location.end; + } + + *node = (yp_assoc_node_t) { + { + .type = YP_ASSOC_NODE, + .location = { + .start = key->location.start, + .end = end + }, + }, + .key = key, + .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new assoc splat node. +static yp_assoc_splat_node_t * +yp_assoc_splat_node_create(yp_parser_t *parser, yp_node_t *value, const yp_token_t *operator) { + assert(operator->type == YP_TOKEN_USTAR_STAR); + yp_assoc_splat_node_t *node = YP_ALLOC_NODE(parser, yp_assoc_splat_node_t); + + *node = (yp_assoc_splat_node_t) { + { + .type = YP_ASSOC_SPLAT_NODE, + .location = { + .start = operator->start, + .end = value == NULL ? operator->end : value->location.end + }, + }, + .value = value, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate a new BackReferenceReadNode node. +static yp_back_reference_read_node_t * +yp_back_reference_read_node_create(yp_parser_t *parser, const yp_token_t *name) { + assert(name->type == YP_TOKEN_BACK_REFERENCE); + yp_back_reference_read_node_t *node = YP_ALLOC_NODE(parser, yp_back_reference_read_node_t); + + *node = (yp_back_reference_read_node_t) { + { + .type = YP_BACK_REFERENCE_READ_NODE, + .location = YP_LOCATION_TOKEN_VALUE(name), + } + }; + + return node; +} + +// Allocate and initialize new a begin node. +static yp_begin_node_t * +yp_begin_node_create(yp_parser_t *parser, const yp_token_t *begin_keyword, yp_statements_node_t *statements) { + yp_begin_node_t *node = YP_ALLOC_NODE(parser, yp_begin_node_t); + + *node = (yp_begin_node_t) { + { + .type = YP_BEGIN_NODE, + .location = { + .start = begin_keyword->start, + .end = statements == NULL ? begin_keyword->end : statements->base.location.end + }, + }, + .begin_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword), + .statements = statements, + .end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + return node; +} + +// Set the rescue clause, optionally start, and end location of a begin node. +static void +yp_begin_node_rescue_clause_set(yp_begin_node_t *node, yp_rescue_node_t *rescue_clause) { + // If the begin keyword doesn't exist, we set the start on the begin_node + if (!node->begin_keyword_loc.start) { + node->base.location.start = rescue_clause->base.location.start; + } + node->base.location.end = rescue_clause->base.location.end; + node->rescue_clause = rescue_clause; +} + +// Set the else clause and end location of a begin node. +static void +yp_begin_node_else_clause_set(yp_begin_node_t *node, yp_else_node_t *else_clause) { + node->base.location.end = else_clause->base.location.end; + node->else_clause = else_clause; +} + +// Set the ensure clause and end location of a begin node. +static void +yp_begin_node_ensure_clause_set(yp_begin_node_t *node, yp_ensure_node_t *ensure_clause) { + node->base.location.end = ensure_clause->base.location.end; + node->ensure_clause = ensure_clause; +} + +// Set the end keyword and end location of a begin node. +static void +yp_begin_node_end_keyword_set(yp_begin_node_t *node, const yp_token_t *end_keyword) { + assert(end_keyword->type == YP_TOKEN_KEYWORD_END || end_keyword->type == YP_TOKEN_MISSING); + + node->base.location.end = end_keyword->end; + node->end_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword); +} + +// Allocate and initialize a new BlockArgumentNode node. +static yp_block_argument_node_t * +yp_block_argument_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *expression) { + yp_block_argument_node_t *node = YP_ALLOC_NODE(parser, yp_block_argument_node_t); + + *node = (yp_block_argument_node_t) { + { + .type = YP_BLOCK_ARGUMENT_NODE, + .location = { + .start = operator->start, + .end = expression == NULL ? operator->end : expression->location.end + }, + }, + .expression = expression, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new BlockNode node. +static yp_block_node_t * +yp_block_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *opening, yp_block_parameters_node_t *parameters, yp_node_t *body, const yp_token_t *closing) { + yp_block_node_t *node = YP_ALLOC_NODE(parser, yp_block_node_t); + + *node = (yp_block_node_t) { + { + .type = YP_BLOCK_NODE, + .location = { .start = opening->start, .end = closing->end }, + }, + .locals = *locals, + .parameters = parameters, + .body = body, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing) + }; + + return node; +} + +// Allocate and initialize a new BlockParameterNode node. +static yp_block_parameter_node_t * +yp_block_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, const yp_token_t *operator) { + assert(operator->type == YP_TOKEN_NOT_PROVIDED || operator->type == YP_TOKEN_UAMPERSAND || operator->type == YP_TOKEN_AMPERSAND); + yp_block_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameter_node_t); + + *node = (yp_block_parameter_node_t) { + { + .type = YP_BLOCK_PARAMETER_NODE, + .location = { + .start = operator->start, + .end = (name->type == YP_TOKEN_NOT_PROVIDED ? operator->end : name->end) + }, + }, + .name = yp_parser_optional_constant_id_token(parser, name), + .name_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(name), + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new BlockParametersNode node. +static yp_block_parameters_node_t * +yp_block_parameters_node_create(yp_parser_t *parser, yp_parameters_node_t *parameters, const yp_token_t *opening) { + yp_block_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_block_parameters_node_t); + + const uint8_t *start; + if (opening->type != YP_TOKEN_NOT_PROVIDED) { + start = opening->start; + } else if (parameters != NULL) { + start = parameters->base.location.start; + } else { + start = NULL; + } + + const uint8_t *end; + if (parameters != NULL) { + end = parameters->base.location.end; + } else if (opening->type != YP_TOKEN_NOT_PROVIDED) { + end = opening->end; + } else { + end = NULL; + } + + *node = (yp_block_parameters_node_t) { + { + .type = YP_BLOCK_PARAMETERS_NODE, + .location = { + .start = start, + .end = end + } + }, + .parameters = parameters, + .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .locals = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Set the closing location of a BlockParametersNode node. +static void +yp_block_parameters_node_closing_set(yp_block_parameters_node_t *node, const yp_token_t *closing) { + assert(closing->type == YP_TOKEN_PIPE || closing->type == YP_TOKEN_PARENTHESIS_RIGHT || closing->type == YP_TOKEN_MISSING); + + node->base.location.end = closing->end; + node->closing_loc = YP_LOCATION_TOKEN_VALUE(closing); +} + +// Allocate and initialize a new BlockLocalVariableNode node. +static yp_block_local_variable_node_t * +yp_block_local_variable_node_create(yp_parser_t *parser, const yp_token_t *name) { + assert(name->type == YP_TOKEN_IDENTIFIER || name->type == YP_TOKEN_MISSING); + yp_block_local_variable_node_t *node = YP_ALLOC_NODE(parser, yp_block_local_variable_node_t); + + *node = (yp_block_local_variable_node_t) { + { + .type = YP_BLOCK_LOCAL_VARIABLE_NODE, + .location = YP_LOCATION_TOKEN_VALUE(name), + }, + .name = yp_parser_constant_id_token(parser, name) + }; + + return node; +} + +// Append a new block-local variable to a BlockParametersNode node. +static void +yp_block_parameters_node_append_local(yp_block_parameters_node_t *node, const yp_block_local_variable_node_t *local) { + yp_node_list_append(&node->locals, (yp_node_t *) local); + + if (node->base.location.start == NULL) node->base.location.start = local->base.location.start; + node->base.location.end = local->base.location.end; +} + +// Allocate and initialize a new BreakNode node. +static yp_break_node_t * +yp_break_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_arguments_node_t *arguments) { + assert(keyword->type == YP_TOKEN_KEYWORD_BREAK); + yp_break_node_t *node = YP_ALLOC_NODE(parser, yp_break_node_t); + + *node = (yp_break_node_t) { + { + .type = YP_BREAK_NODE, + .location = { + .start = keyword->start, + .end = (arguments == NULL ? keyword->end : arguments->base.location.end) + }, + }, + .arguments = arguments, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword) + }; + + return node; +} + +// Allocate and initialize a new CallNode node. This sets everything to NULL or +// YP_TOKEN_NOT_PROVIDED as appropriate such that its values can be overridden +// in the various specializations of this function. +static yp_call_node_t * +yp_call_node_create(yp_parser_t *parser) { + yp_call_node_t *node = YP_ALLOC_NODE(parser, yp_call_node_t); + + *node = (yp_call_node_t) { + { + .type = YP_CALL_NODE, + .location = YP_LOCATION_NULL_VALUE(parser), + }, + .receiver = NULL, + .call_operator_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .message_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .arguments = NULL, + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .block = NULL + }; + + return node; +} + +// Allocate and initialize a new CallNode node from an aref or an aset +// expression. +static yp_call_node_t * +yp_call_node_aref_create(yp_parser_t *parser, yp_node_t *receiver, yp_arguments_t *arguments) { + yp_call_node_t *node = yp_call_node_create(parser); + + node->base.location.start = receiver->location.start; + if (arguments->block != NULL) { + node->base.location.end = arguments->block->location.end; + } else { + node->base.location.end = arguments->closing_loc.end; + } + + node->receiver = receiver; + node->message_loc.start = arguments->opening_loc.start; + node->message_loc.end = arguments->closing_loc.end; + + node->opening_loc = arguments->opening_loc; + node->arguments = arguments->arguments; + node->closing_loc = arguments->closing_loc; + node->block = arguments->block; + + yp_string_constant_init(&node->name, "[]", 2); + return node; +} + +// Allocate and initialize a new CallNode node from a binary expression. +static yp_call_node_t * +yp_call_node_binary_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_node_t *argument) { + yp_call_node_t *node = yp_call_node_create(parser); + + node->base.location.start = MIN(receiver->location.start, argument->location.start); + node->base.location.end = MAX(receiver->location.end, argument->location.end); + + node->receiver = receiver; + node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + + yp_arguments_node_t *arguments = yp_arguments_node_create(parser); + yp_arguments_node_arguments_append(arguments, argument); + node->arguments = arguments; + + yp_string_shared_init(&node->name, operator->start, operator->end); + return node; +} + +// Allocate and initialize a new CallNode node from a call expression. +static yp_call_node_t * +yp_call_node_call_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_token_t *message, yp_arguments_t *arguments) { + yp_call_node_t *node = yp_call_node_create(parser); + + node->base.location.start = receiver->location.start; + if (arguments->block != NULL) { + node->base.location.end = arguments->block->location.end; + } else if (arguments->closing_loc.start != NULL) { + node->base.location.end = arguments->closing_loc.end; + } else if (arguments->arguments != NULL) { + node->base.location.end = arguments->arguments->base.location.end; + } else { + node->base.location.end = message->end; + } + + node->receiver = receiver; + node->call_operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->opening_loc = arguments->opening_loc; + node->arguments = arguments->arguments; + node->closing_loc = arguments->closing_loc; + node->block = arguments->block; + + if (operator->type == YP_TOKEN_AMPERSAND_DOT) { + node->base.flags |= YP_CALL_NODE_FLAGS_SAFE_NAVIGATION; + } + + yp_string_shared_init(&node->name, message->start, message->end); + return node; +} + +// Allocate and initialize a new CallNode node from a call to a method name +// without a receiver that could not have been a local variable read. +static yp_call_node_t * +yp_call_node_fcall_create(yp_parser_t *parser, yp_token_t *message, yp_arguments_t *arguments) { + yp_call_node_t *node = yp_call_node_create(parser); + + node->base.location.start = message->start; + if (arguments->block != NULL) { + node->base.location.end = arguments->block->location.end; + } else if (arguments->closing_loc.start != NULL) { + node->base.location.end = arguments->closing_loc.end; + } else if (arguments->arguments != NULL) { + node->base.location.end = arguments->arguments->base.location.end; + } else { + node->base.location.end = arguments->closing_loc.end; + } + + node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->opening_loc = arguments->opening_loc; + node->arguments = arguments->arguments; + node->closing_loc = arguments->closing_loc; + node->block = arguments->block; + + yp_string_shared_init(&node->name, message->start, message->end); + return node; +} + +// Allocate and initialize a new CallNode node from a not expression. +static yp_call_node_t * +yp_call_node_not_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *message, yp_arguments_t *arguments) { + yp_call_node_t *node = yp_call_node_create(parser); + + node->base.location.start = message->start; + if (arguments->closing_loc.start != NULL) { + node->base.location.end = arguments->closing_loc.end; + } else { + node->base.location.end = receiver->location.end; + } + + node->receiver = receiver; + node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(message); + node->opening_loc = arguments->opening_loc; + node->arguments = arguments->arguments; + node->closing_loc = arguments->closing_loc; + + yp_string_constant_init(&node->name, "!", 1); + return node; +} + +// Allocate and initialize a new CallNode node from a call shorthand expression. +static yp_call_node_t * +yp_call_node_shorthand_create(yp_parser_t *parser, yp_node_t *receiver, yp_token_t *operator, yp_arguments_t *arguments) { + yp_call_node_t *node = yp_call_node_create(parser); + + node->base.location.start = receiver->location.start; + if (arguments->block != NULL) { + node->base.location.end = arguments->block->location.end; + } else { + node->base.location.end = arguments->closing_loc.end; + } + + node->receiver = receiver; + node->call_operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + node->opening_loc = arguments->opening_loc; + node->arguments = arguments->arguments; + node->closing_loc = arguments->closing_loc; + node->block = arguments->block; + + if (operator->type == YP_TOKEN_AMPERSAND_DOT) { + node->base.flags |= YP_CALL_NODE_FLAGS_SAFE_NAVIGATION; + } + + yp_string_constant_init(&node->name, "call", 4); + return node; +} + +// Allocate and initialize a new CallNode node from a unary operator expression. +static yp_call_node_t * +yp_call_node_unary_create(yp_parser_t *parser, yp_token_t *operator, yp_node_t *receiver, const char *name) { + yp_call_node_t *node = yp_call_node_create(parser); + + node->base.location.start = operator->start; + node->base.location.end = receiver->location.end; + + node->receiver = receiver; + node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator); + + yp_string_constant_init(&node->name, name, strlen(name)); + return node; +} + +// Allocate and initialize a new CallNode node from a call to a method name +// without a receiver that could also have been a local variable read. +static yp_call_node_t * +yp_call_node_variable_call_create(yp_parser_t *parser, yp_token_t *message) { + yp_call_node_t *node = yp_call_node_create(parser); + + node->base.location = YP_LOCATION_TOKEN_VALUE(message); + node->message_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(message); + + yp_string_shared_init(&node->name, message->start, message->end); + return node; +} + +// Returns whether or not this call node is a "vcall" (a call to a method name +// without a receiver that could also have been a local variable read). +static inline bool +yp_call_node_variable_call_p(yp_call_node_t *node) { + return node->base.flags & YP_CALL_NODE_FLAGS_VARIABLE_CALL; +} + +// Initialize the read name by reading the write name and chopping off the '='. +static void +yp_call_write_read_name_init(yp_string_t *read_name, yp_string_t *write_name) { + if (write_name->length >= 1) { + size_t length = write_name->length - 1; + + void *memory = malloc(length); + memcpy(memory, write_name->source, length); + + yp_string_owned_init(read_name, (uint8_t *) memory, length); + } else { + // We can get here if the message was missing because of a syntax error. + yp_string_constant_init(read_name, "", 0); + } +} + +// Allocate and initialize a new CallAndWriteNode node. +static yp_call_and_write_node_t * +yp_call_and_write_node_create(yp_parser_t *parser, yp_call_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(target->block == NULL); + assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL); + yp_call_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_call_and_write_node_t); + + *node = (yp_call_and_write_node_t) { + { + .type = YP_CALL_AND_WRITE_NODE, + .flags = target->base.flags, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .receiver = target->receiver, + .call_operator_loc = target->call_operator_loc, + .message_loc = target->message_loc, + .opening_loc = target->opening_loc, + .arguments = target->arguments, + .closing_loc = target->closing_loc, + .read_name = YP_EMPTY_STRING, + .write_name = target->name, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + yp_call_write_read_name_init(&node->read_name, &node->write_name); + + // Here we're going to free the target, since it is no longer necessary. + // However, we don't want to call `yp_node_destroy` because we want to keep + // around all of its children since we just reused them. + free(target); + + return node; +} + +// Allocate a new CallOperatorWriteNode node. +static yp_call_operator_write_node_t * +yp_call_operator_write_node_create(yp_parser_t *parser, yp_call_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(target->block == NULL); + yp_call_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_call_operator_write_node_t); + + *node = (yp_call_operator_write_node_t) { + { + .type = YP_CALL_OPERATOR_WRITE_NODE, + .flags = target->base.flags, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .receiver = target->receiver, + .call_operator_loc = target->call_operator_loc, + .message_loc = target->message_loc, + .opening_loc = target->opening_loc, + .arguments = target->arguments, + .closing_loc = target->closing_loc, + .read_name = YP_EMPTY_STRING, + .write_name = target->name, + .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1), + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + yp_call_write_read_name_init(&node->read_name, &node->write_name); + + // Here we're going to free the target, since it is no longer necessary. + // However, we don't want to call `yp_node_destroy` because we want to keep + // around all of its children since we just reused them. + free(target); + + return node; +} + +// Allocate and initialize a new CallOperatorOrWriteNode node. +static yp_call_or_write_node_t * +yp_call_or_write_node_create(yp_parser_t *parser, yp_call_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(target->block == NULL); + assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL); + yp_call_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_call_or_write_node_t); + + *node = (yp_call_or_write_node_t) { + { + .type = YP_CALL_OR_WRITE_NODE, + .flags = target->base.flags, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .receiver = target->receiver, + .call_operator_loc = target->call_operator_loc, + .message_loc = target->message_loc, + .opening_loc = target->opening_loc, + .arguments = target->arguments, + .closing_loc = target->closing_loc, + .read_name = YP_EMPTY_STRING, + .write_name = target->name, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + yp_call_write_read_name_init(&node->read_name, &node->write_name); + + // Here we're going to free the target, since it is no longer necessary. + // However, we don't want to call `yp_node_destroy` because we want to keep + // around all of its children since we just reused them. + free(target); + + return node; +} + +// Allocate and initialize a new CapturePatternNode node. +static yp_capture_pattern_node_t * +yp_capture_pattern_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *target, const yp_token_t *operator) { + yp_capture_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_capture_pattern_node_t); + + *node = (yp_capture_pattern_node_t) { + { + .type = YP_CAPTURE_PATTERN_NODE, + .location = { + .start = value->location.start, + .end = target->location.end + }, + }, + .value = value, + .target = target, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new CaseNode node. +static yp_case_node_t * +yp_case_node_create(yp_parser_t *parser, const yp_token_t *case_keyword, yp_node_t *predicate, yp_else_node_t *consequent, const yp_token_t *end_keyword) { + yp_case_node_t *node = YP_ALLOC_NODE(parser, yp_case_node_t); + + *node = (yp_case_node_t) { + { + .type = YP_CASE_NODE, + .location = { + .start = case_keyword->start, + .end = end_keyword->end + }, + }, + .predicate = predicate, + .consequent = consequent, + .case_keyword_loc = YP_LOCATION_TOKEN_VALUE(case_keyword), + .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword), + .conditions = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Append a new condition to a CaseNode node. +static void +yp_case_node_condition_append(yp_case_node_t *node, yp_node_t *condition) { + assert(YP_NODE_TYPE_P(condition, YP_WHEN_NODE) || YP_NODE_TYPE_P(condition, YP_IN_NODE)); + + yp_node_list_append(&node->conditions, condition); + node->base.location.end = condition->location.end; +} + +// Set the consequent of a CaseNode node. +static void +yp_case_node_consequent_set(yp_case_node_t *node, yp_else_node_t *consequent) { + node->consequent = consequent; + node->base.location.end = consequent->base.location.end; +} + +// Set the end location for a CaseNode node. +static void +yp_case_node_end_keyword_loc_set(yp_case_node_t *node, const yp_token_t *end_keyword) { + node->base.location.end = end_keyword->end; + node->end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword); +} + +// Allocate a new ClassNode node. +static yp_class_node_t * +yp_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, yp_node_t *constant_path, const yp_token_t *name, const yp_token_t *inheritance_operator, yp_node_t *superclass, yp_node_t *body, const yp_token_t *end_keyword) { + yp_class_node_t *node = YP_ALLOC_NODE(parser, yp_class_node_t); + + *node = (yp_class_node_t) { + { + .type = YP_CLASS_NODE, + .location = { .start = class_keyword->start, .end = end_keyword->end }, + }, + .locals = *locals, + .class_keyword_loc = YP_LOCATION_TOKEN_VALUE(class_keyword), + .constant_path = constant_path, + .inheritance_operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator), + .superclass = superclass, + .body = body, + .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword), + .name = yp_parser_constant_id_token(parser, name) + }; + + return node; +} + +// Allocate and initialize a new ClassVariableAndWriteNode node. +static yp_class_variable_and_write_node_t * +yp_class_variable_and_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL); + yp_class_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_and_write_node_t); + + *node = (yp_class_variable_and_write_node_t) { + { + .type = YP_CLASS_VARIABLE_AND_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new ClassVariableOperatorWriteNode node. +static yp_class_variable_operator_write_node_t * +yp_class_variable_operator_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_class_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_operator_write_node_t); + + *node = (yp_class_variable_operator_write_node_t) { + { + .type = YP_CLASS_VARIABLE_OPERATOR_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value, + .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1) + }; + + return node; +} + +// Allocate and initialize a new ClassVariableOrWriteNode node. +static yp_class_variable_or_write_node_t * +yp_class_variable_or_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL); + yp_class_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_or_write_node_t); + + *node = (yp_class_variable_or_write_node_t) { + { + .type = YP_CLASS_VARIABLE_OR_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new ClassVariableReadNode node. +static yp_class_variable_read_node_t * +yp_class_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_CLASS_VARIABLE); + yp_class_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_read_node_t); + + *node = (yp_class_variable_read_node_t) { + { + .type = YP_CLASS_VARIABLE_READ_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .name = yp_parser_constant_id_token(parser, token) + }; + + return node; +} + +// Initialize a new ClassVariableWriteNode node from a ClassVariableRead node. +static yp_class_variable_write_node_t * +yp_class_variable_write_node_create(yp_parser_t *parser, yp_class_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) { + yp_class_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_class_variable_write_node_t); + + *node = (yp_class_variable_write_node_t) { + { + .type = YP_CLASS_VARIABLE_WRITE_NODE, + .location = { + .start = read_node->base.location.start, + .end = value->location.end + }, + }, + .name = read_node->name, + .name_loc = YP_LOCATION_NODE_VALUE((yp_node_t *) read_node), + .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new ConstantPathAndWriteNode node. +static yp_constant_path_and_write_node_t * +yp_constant_path_and_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL); + yp_constant_path_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_and_write_node_t); + + *node = (yp_constant_path_and_write_node_t) { + { + .type = YP_CONSTANT_PATH_AND_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .target = target, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new ConstantPathOperatorWriteNode node. +static yp_constant_path_operator_write_node_t * +yp_constant_path_operator_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_constant_path_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_operator_write_node_t); + + *node = (yp_constant_path_operator_write_node_t) { + { + .type = YP_CONSTANT_PATH_OPERATOR_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .target = target, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value, + .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1) + }; + + return node; +} + +// Allocate and initialize a new ConstantPathOrWriteNode node. +static yp_constant_path_or_write_node_t * +yp_constant_path_or_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL); + yp_constant_path_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_or_write_node_t); + + *node = (yp_constant_path_or_write_node_t) { + { + .type = YP_CONSTANT_PATH_OR_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .target = target, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new ConstantPathNode node. +static yp_constant_path_node_t * +yp_constant_path_node_create(yp_parser_t *parser, yp_node_t *parent, const yp_token_t *delimiter, yp_node_t *child) { + yp_constant_path_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_node_t); + + *node = (yp_constant_path_node_t) { + { + .type = YP_CONSTANT_PATH_NODE, + .location = { + .start = parent == NULL ? delimiter->start : parent->location.start, + .end = child->location.end + }, + }, + .parent = parent, + .child = child, + .delimiter_loc = YP_LOCATION_TOKEN_VALUE(delimiter) + }; + + return node; +} + +// Allocate a new ConstantPathWriteNode node. +static yp_constant_path_write_node_t * +yp_constant_path_write_node_create(yp_parser_t *parser, yp_constant_path_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_constant_path_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_path_write_node_t); + + *node = (yp_constant_path_write_node_t) { + { + .type = YP_CONSTANT_PATH_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + }, + }, + .target = target, + .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new ConstantAndWriteNode node. +static yp_constant_and_write_node_t * +yp_constant_and_write_node_create(yp_parser_t *parser, yp_constant_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL); + yp_constant_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_and_write_node_t); + + *node = (yp_constant_and_write_node_t) { + { + .type = YP_CONSTANT_AND_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new ConstantOperatorWriteNode node. +static yp_constant_operator_write_node_t * +yp_constant_operator_write_node_create(yp_parser_t *parser, yp_constant_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_constant_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_operator_write_node_t); + + *node = (yp_constant_operator_write_node_t) { + { + .type = YP_CONSTANT_OPERATOR_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value, + .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1) + }; + + return node; +} + +// Allocate and initialize a new ConstantOrWriteNode node. +static yp_constant_or_write_node_t * +yp_constant_or_write_node_create(yp_parser_t *parser, yp_constant_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL); + yp_constant_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_or_write_node_t); + + *node = (yp_constant_or_write_node_t) { + { + .type = YP_CONSTANT_OR_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new ConstantReadNode node. +static yp_constant_read_node_t * +yp_constant_read_node_create(yp_parser_t *parser, const yp_token_t *name) { + assert(name->type == YP_TOKEN_CONSTANT || name->type == YP_TOKEN_MISSING); + yp_constant_read_node_t *node = YP_ALLOC_NODE(parser, yp_constant_read_node_t); + + *node = (yp_constant_read_node_t) { + { + .type = YP_CONSTANT_READ_NODE, + .location = YP_LOCATION_TOKEN_VALUE(name) + }, + .name = yp_parser_constant_id_token(parser, name) + }; + + return node; +} + +// Allocate a new ConstantWriteNode node. +static yp_constant_write_node_t * +yp_constant_write_node_create(yp_parser_t *parser, yp_constant_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_constant_write_node_t *node = YP_ALLOC_NODE(parser, yp_constant_write_node_t); + + *node = (yp_constant_write_node_t) { + { + .type = YP_CONSTANT_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new DefNode node. +static yp_def_node_t * +yp_def_node_create( + yp_parser_t *parser, + const yp_token_t *name, + yp_node_t *receiver, + yp_parameters_node_t *parameters, + yp_node_t *body, + yp_constant_id_list_t *locals, + const yp_token_t *def_keyword, + const yp_token_t *operator, + const yp_token_t *lparen, + const yp_token_t *rparen, + const yp_token_t *equal, + const yp_token_t *end_keyword +) { + yp_def_node_t *node = YP_ALLOC_NODE(parser, yp_def_node_t); + const uint8_t *end; + + if (end_keyword->type == YP_TOKEN_NOT_PROVIDED) { + end = body->location.end; + } else { + end = end_keyword->end; + } + + *node = (yp_def_node_t) { + { + .type = YP_DEF_NODE, + .location = { .start = def_keyword->start, .end = end }, + }, + .name = yp_parser_constant_id_token(parser, name), + .name_loc = YP_LOCATION_TOKEN_VALUE(name), + .receiver = receiver, + .parameters = parameters, + .body = body, + .locals = *locals, + .def_keyword_loc = YP_LOCATION_TOKEN_VALUE(def_keyword), + .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .lparen_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(lparen), + .rparen_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(rparen), + .equal_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(equal), + .end_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + }; + + return node; +} + +// Allocate a new DefinedNode node. +static yp_defined_node_t * +yp_defined_node_create(yp_parser_t *parser, const yp_token_t *lparen, yp_node_t *value, const yp_token_t *rparen, const yp_location_t *keyword_loc) { + yp_defined_node_t *node = YP_ALLOC_NODE(parser, yp_defined_node_t); + + *node = (yp_defined_node_t) { + { + .type = YP_DEFINED_NODE, + .location = { + .start = keyword_loc->start, + .end = (rparen->type == YP_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end) + }, + }, + .lparen_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(lparen), + .value = value, + .rparen_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(rparen), + .keyword_loc = *keyword_loc + }; + + return node; +} + +// Allocate and initialize a new ElseNode node. +static yp_else_node_t * +yp_else_node_create(yp_parser_t *parser, const yp_token_t *else_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) { + yp_else_node_t *node = YP_ALLOC_NODE(parser, yp_else_node_t); + const uint8_t *end = NULL; + if ((end_keyword->type == YP_TOKEN_NOT_PROVIDED) && (statements != NULL)) { + end = statements->base.location.end; + } else { + end = end_keyword->end; + } + + *node = (yp_else_node_t) { + { + .type = YP_ELSE_NODE, + .location = { + .start = else_keyword->start, + .end = end, + }, + }, + .else_keyword_loc = YP_LOCATION_TOKEN_VALUE(else_keyword), + .statements = statements, + .end_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + }; + + return node; +} + +// Allocate and initialize a new EmbeddedStatementsNode node. +static yp_embedded_statements_node_t * +yp_embedded_statements_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_statements_node_t *statements, const yp_token_t *closing) { + yp_embedded_statements_node_t *node = YP_ALLOC_NODE(parser, yp_embedded_statements_node_t); + + *node = (yp_embedded_statements_node_t) { + { + .type = YP_EMBEDDED_STATEMENTS_NODE, + .location = { + .start = opening->start, + .end = closing->end + } + }, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .statements = statements, + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing) + }; + + return node; +} + +// Allocate and initialize a new EmbeddedVariableNode node. +static yp_embedded_variable_node_t * +yp_embedded_variable_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *variable) { + yp_embedded_variable_node_t *node = YP_ALLOC_NODE(parser, yp_embedded_variable_node_t); + + *node = (yp_embedded_variable_node_t) { + { + .type = YP_EMBEDDED_VARIABLE_NODE, + .location = { + .start = operator->start, + .end = variable->location.end + } + }, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .variable = variable + }; + + return node; +} + +// Allocate a new EnsureNode node. +static yp_ensure_node_t * +yp_ensure_node_create(yp_parser_t *parser, const yp_token_t *ensure_keyword, yp_statements_node_t *statements, const yp_token_t *end_keyword) { + yp_ensure_node_t *node = YP_ALLOC_NODE(parser, yp_ensure_node_t); + + *node = (yp_ensure_node_t) { + { + .type = YP_ENSURE_NODE, + .location = { + .start = ensure_keyword->start, + .end = end_keyword->end + }, + }, + .ensure_keyword_loc = YP_LOCATION_TOKEN_VALUE(ensure_keyword), + .statements = statements, + .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword) + }; + + return node; +} + +// Allocate and initialize a new FalseNode node. +static yp_false_node_t * +yp_false_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD_FALSE); + yp_false_node_t *node = YP_ALLOC_NODE(parser, yp_false_node_t); + + *node = (yp_false_node_t) {{ + .type = YP_FALSE_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }}; + + return node; +} + +// Allocate and initialize a new find pattern node. The node list given in the +// nodes parameter is guaranteed to have at least two nodes. +static yp_find_pattern_node_t * +yp_find_pattern_node_create(yp_parser_t *parser, yp_node_list_t *nodes) { + yp_find_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_find_pattern_node_t); + + yp_node_t *left = nodes->nodes[0]; + yp_node_t *right; + + if (nodes->size == 1) { + right = (yp_node_t *) yp_missing_node_create(parser, left->location.end, left->location.end); + } else { + right = nodes->nodes[nodes->size - 1]; + } + + *node = (yp_find_pattern_node_t) { + { + .type = YP_FIND_PATTERN_NODE, + .location = { + .start = left->location.start, + .end = right->location.end, + }, + }, + .constant = NULL, + .left = left, + .right = right, + .requireds = YP_EMPTY_NODE_LIST, + .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + // For now we're going to just copy over each pointer manually. This could be + // much more efficient, as we could instead resize the node list to only point + // to 1...-1. + for (size_t index = 1; index < nodes->size - 1; index++) { + yp_node_list_append(&node->requireds, nodes->nodes[index]); + } + + return node; +} + +// Allocate and initialize a new FloatNode node. +static yp_float_node_t * +yp_float_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_FLOAT); + yp_float_node_t *node = YP_ALLOC_NODE(parser, yp_float_node_t); + + *node = (yp_float_node_t) {{ + .type = YP_FLOAT_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }}; + + return node; +} + +// Allocate and initialize a new FloatNode node from a FLOAT_IMAGINARY token. +static yp_imaginary_node_t * +yp_float_node_imaginary_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_FLOAT_IMAGINARY); + + yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t); + *node = (yp_imaginary_node_t) { + { + .type = YP_IMAGINARY_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_float_node_create(parser, &((yp_token_t) { + .type = YP_TOKEN_FLOAT, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL token. +static yp_rational_node_t * +yp_float_node_rational_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_FLOAT_RATIONAL); + + yp_rational_node_t *node = YP_ALLOC_NODE(parser, yp_rational_node_t); + *node = (yp_rational_node_t) { + { + .type = YP_RATIONAL_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_float_node_create(parser, &((yp_token_t) { + .type = YP_TOKEN_FLOAT, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new FloatNode node from a FLOAT_RATIONAL_IMAGINARY token. +static yp_imaginary_node_t * +yp_float_node_rational_imaginary_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_FLOAT_RATIONAL_IMAGINARY); + + yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t); + *node = (yp_imaginary_node_t) { + { + .type = YP_IMAGINARY_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_float_node_rational_create(parser, &((yp_token_t) { + .type = YP_TOKEN_FLOAT_RATIONAL, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new ForNode node. +static yp_for_node_t * +yp_for_node_create( + yp_parser_t *parser, + yp_node_t *index, + yp_node_t *collection, + yp_statements_node_t *statements, + const yp_token_t *for_keyword, + const yp_token_t *in_keyword, + const yp_token_t *do_keyword, + const yp_token_t *end_keyword +) { + yp_for_node_t *node = YP_ALLOC_NODE(parser, yp_for_node_t); + + *node = (yp_for_node_t) { + { + .type = YP_FOR_NODE, + .location = { + .start = for_keyword->start, + .end = end_keyword->end + }, + }, + .index = index, + .collection = collection, + .statements = statements, + .for_keyword_loc = YP_LOCATION_TOKEN_VALUE(for_keyword), + .in_keyword_loc = YP_LOCATION_TOKEN_VALUE(in_keyword), + .do_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword), + .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword) + }; + + return node; +} + +// Allocate and initialize a new ForwardingArgumentsNode node. +static yp_forwarding_arguments_node_t * +yp_forwarding_arguments_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_UDOT_DOT_DOT); + yp_forwarding_arguments_node_t *node = YP_ALLOC_NODE(parser, yp_forwarding_arguments_node_t); + *node = (yp_forwarding_arguments_node_t) {{ .type = YP_FORWARDING_ARGUMENTS_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }}; + return node; +} + +// Allocate and initialize a new ForwardingParameterNode node. +static yp_forwarding_parameter_node_t * +yp_forwarding_parameter_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_UDOT_DOT_DOT); + yp_forwarding_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_forwarding_parameter_node_t); + *node = (yp_forwarding_parameter_node_t) {{ .type = YP_FORWARDING_PARAMETER_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }}; + return node; +} + +// Allocate and initialize a new ForwardingSuper node. +static yp_forwarding_super_node_t * +yp_forwarding_super_node_create(yp_parser_t *parser, const yp_token_t *token, yp_arguments_t *arguments) { + assert(arguments->block == NULL || YP_NODE_TYPE_P(arguments->block, YP_BLOCK_NODE)); + assert(token->type == YP_TOKEN_KEYWORD_SUPER); + yp_forwarding_super_node_t *node = YP_ALLOC_NODE(parser, yp_forwarding_super_node_t); + + yp_block_node_t *block = NULL; + if (arguments->block != NULL) { + block = (yp_block_node_t *) arguments->block; + } + + *node = (yp_forwarding_super_node_t) { + { + .type = YP_FORWARDING_SUPER_NODE, + .location = { + .start = token->start, + .end = block != NULL ? block->base.location.end : token->end + }, + }, + .block = block + }; + + return node; +} + +// Allocate and initialize a new hash pattern node from an opening and closing +// token. +static yp_hash_pattern_node_t * +yp_hash_pattern_node_empty_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *closing) { + yp_hash_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_hash_pattern_node_t); + + *node = (yp_hash_pattern_node_t) { + { + .type = YP_HASH_PATTERN_NODE, + .location = { + .start = opening->start, + .end = closing->end + }, + }, + .constant = NULL, + .kwrest = NULL, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing), + .assocs = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Allocate and initialize a new hash pattern node. +static yp_hash_pattern_node_t * +yp_hash_pattern_node_node_list_create(yp_parser_t *parser, yp_node_list_t *assocs) { + yp_hash_pattern_node_t *node = YP_ALLOC_NODE(parser, yp_hash_pattern_node_t); + + *node = (yp_hash_pattern_node_t) { + { + .type = YP_HASH_PATTERN_NODE, + .location = { + .start = assocs->nodes[0]->location.start, + .end = assocs->nodes[assocs->size - 1]->location.end + }, + }, + .constant = NULL, + .kwrest = NULL, + .assocs = YP_EMPTY_NODE_LIST, + .opening_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + for (size_t index = 0; index < assocs->size; index++) { + yp_node_t *assoc = assocs->nodes[index]; + yp_node_list_append(&node->assocs, assoc); + } + + return node; +} + +// Retrieve the name from a node that will become a global variable write node. +static yp_constant_id_t +yp_global_variable_write_name(yp_parser_t *parser, yp_node_t *target) { + if (YP_NODE_TYPE_P(target, YP_GLOBAL_VARIABLE_READ_NODE)) { + return ((yp_global_variable_read_node_t *) target)->name; + } + + assert(YP_NODE_TYPE_P(target, YP_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(target, YP_NUMBERED_REFERENCE_READ_NODE)); + + // This will only ever happen in the event of a syntax error, but we + // still need to provide something for the node. + return yp_parser_constant_id_location(parser, target->location.start, target->location.end); +} + +// Allocate and initialize a new GlobalVariableAndWriteNode node. +static yp_global_variable_and_write_node_t * +yp_global_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL); + yp_global_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_and_write_node_t); + + *node = (yp_global_variable_and_write_node_t) { + { + .type = YP_GLOBAL_VARIABLE_AND_WRITE_NODE, + .location = { + .start = target->location.start, + .end = value->location.end + } + }, + .name = yp_global_variable_write_name(parser, target), + .name_loc = target->location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new GlobalVariableOperatorWriteNode node. +static yp_global_variable_operator_write_node_t * +yp_global_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_global_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_operator_write_node_t); + + *node = (yp_global_variable_operator_write_node_t) { + { + .type = YP_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE, + .location = { + .start = target->location.start, + .end = value->location.end + } + }, + .name = yp_global_variable_write_name(parser, target), + .name_loc = target->location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value, + .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1) + }; + + return node; +} + +// Allocate and initialize a new GlobalVariableOrWriteNode node. +static yp_global_variable_or_write_node_t * +yp_global_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL); + yp_global_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_or_write_node_t); + + *node = (yp_global_variable_or_write_node_t) { + { + .type = YP_GLOBAL_VARIABLE_OR_WRITE_NODE, + .location = { + .start = target->location.start, + .end = value->location.end + } + }, + .name = yp_global_variable_write_name(parser, target), + .name_loc = target->location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate a new GlobalVariableReadNode node. +static yp_global_variable_read_node_t * +yp_global_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name) { + yp_global_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_read_node_t); + + *node = (yp_global_variable_read_node_t) { + { + .type = YP_GLOBAL_VARIABLE_READ_NODE, + .location = YP_LOCATION_TOKEN_VALUE(name), + }, + .name = yp_parser_constant_id_token(parser, name) + }; + + return node; +} + +// Allocate a new GlobalVariableWriteNode node. +static yp_global_variable_write_node_t * +yp_global_variable_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_global_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_global_variable_write_node_t); + + *node = (yp_global_variable_write_node_t) { + { + .type = YP_GLOBAL_VARIABLE_WRITE_NODE, + .location = { + .start = target->location.start, + .end = value->location.end + }, + }, + .name = yp_global_variable_write_name(parser, target), + .name_loc = YP_LOCATION_NODE_VALUE(target), + .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate a new HashNode node. +static yp_hash_node_t * +yp_hash_node_create(yp_parser_t *parser, const yp_token_t *opening) { + assert(opening != NULL); + yp_hash_node_t *node = YP_ALLOC_NODE(parser, yp_hash_node_t); + + *node = (yp_hash_node_t) { + { + .type = YP_HASH_NODE, + .location = YP_LOCATION_TOKEN_VALUE(opening) + }, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_NULL_VALUE(parser), + .elements = YP_EMPTY_NODE_LIST + }; + + return node; +} + +static inline void +yp_hash_node_elements_append(yp_hash_node_t *hash, yp_node_t *element) { + yp_node_list_append(&hash->elements, element); +} + +static inline void +yp_hash_node_closing_loc_set(yp_hash_node_t *hash, yp_token_t *token) { + hash->base.location.end = token->end; + hash->closing_loc = YP_LOCATION_TOKEN_VALUE(token); +} + +// Allocate a new IfNode node. +static yp_if_node_t * +yp_if_node_create(yp_parser_t *parser, + const yp_token_t *if_keyword, + yp_node_t *predicate, + yp_statements_node_t *statements, + yp_node_t *consequent, + const yp_token_t *end_keyword +) { + yp_conditional_predicate(predicate); + yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t); + + const uint8_t *end; + if (end_keyword->type != YP_TOKEN_NOT_PROVIDED) { + end = end_keyword->end; + } else if (consequent != NULL) { + end = consequent->location.end; + } else if (yp_statements_node_body_length(statements) != 0) { + end = statements->base.location.end; + } else { + end = predicate->location.end; + } + + *node = (yp_if_node_t) { + { + .type = YP_IF_NODE, + .flags = YP_NODE_FLAG_NEWLINE, + .location = { + .start = if_keyword->start, + .end = end + }, + }, + .if_keyword_loc = YP_LOCATION_TOKEN_VALUE(if_keyword), + .predicate = predicate, + .statements = statements, + .consequent = consequent, + .end_keyword_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword) + }; + + return node; +} + +// Allocate and initialize new IfNode node in the modifier form. +static yp_if_node_t * +yp_if_node_modifier_create(yp_parser_t *parser, yp_node_t *statement, const yp_token_t *if_keyword, yp_node_t *predicate) { + yp_conditional_predicate(predicate); + yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t); + + yp_statements_node_t *statements = yp_statements_node_create(parser); + yp_statements_node_body_append(statements, statement); + + *node = (yp_if_node_t) { + { + .type = YP_IF_NODE, + .flags = YP_NODE_FLAG_NEWLINE, + .location = { + .start = statement->location.start, + .end = predicate->location.end + }, + }, + .if_keyword_loc = YP_LOCATION_TOKEN_VALUE(if_keyword), + .predicate = predicate, + .statements = statements, + .consequent = NULL, + .end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + return node; +} + +// Allocate and initialize an if node from a ternary expression. +static yp_if_node_t * +yp_if_node_ternary_create(yp_parser_t *parser, yp_node_t *predicate, yp_node_t *true_expression, const yp_token_t *colon, yp_node_t *false_expression) { + yp_conditional_predicate(predicate); + + yp_statements_node_t *if_statements = yp_statements_node_create(parser); + yp_statements_node_body_append(if_statements, true_expression); + + yp_statements_node_t *else_statements = yp_statements_node_create(parser); + yp_statements_node_body_append(else_statements, false_expression); + + yp_token_t end_keyword = not_provided(parser); + yp_else_node_t *else_node = yp_else_node_create(parser, colon, else_statements, &end_keyword); + + yp_if_node_t *node = YP_ALLOC_NODE(parser, yp_if_node_t); + + *node = (yp_if_node_t) { + { + .type = YP_IF_NODE, + .flags = YP_NODE_FLAG_NEWLINE, + .location = { + .start = predicate->location.start, + .end = false_expression->location.end, + }, + }, + .if_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .predicate = predicate, + .statements = if_statements, + .consequent = (yp_node_t *)else_node, + .end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + return node; + +} + +static inline void +yp_if_node_end_keyword_loc_set(yp_if_node_t *node, const yp_token_t *keyword) { + node->base.location.end = keyword->end; + node->end_keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword); +} + +static inline void +yp_else_node_end_keyword_loc_set(yp_else_node_t *node, const yp_token_t *keyword) { + node->base.location.end = keyword->end; + node->end_keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword); +} + +// Allocate and initialize a new ImplicitNode node. +static yp_implicit_node_t * +yp_implicit_node_create(yp_parser_t *parser, yp_node_t *value) { + yp_implicit_node_t *node = YP_ALLOC_NODE(parser, yp_implicit_node_t); + + *node = (yp_implicit_node_t) { + { + .type = YP_IMPLICIT_NODE, + .location = value->location + }, + .value = value + }; + + return node; +} + +// Allocate and initialize a new IntegerNode node. +static yp_integer_node_t * +yp_integer_node_create(yp_parser_t *parser, yp_node_flags_t base, const yp_token_t *token) { + assert(token->type == YP_TOKEN_INTEGER); + yp_integer_node_t *node = YP_ALLOC_NODE(parser, yp_integer_node_t); + + *node = (yp_integer_node_t) {{ + .type = YP_INTEGER_NODE, + .flags = base | YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }}; + + return node; +} + +// Allocate and initialize a new IntegerNode node from an INTEGER_IMAGINARY token. +static yp_imaginary_node_t * +yp_integer_node_imaginary_create(yp_parser_t *parser, yp_node_flags_t base, const yp_token_t *token) { + assert(token->type == YP_TOKEN_INTEGER_IMAGINARY); + + yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t); + *node = (yp_imaginary_node_t) { + { + .type = YP_IMAGINARY_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_integer_node_create(parser, base, &((yp_token_t) { + .type = YP_TOKEN_INTEGER, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL token. +static yp_rational_node_t * +yp_integer_node_rational_create(yp_parser_t *parser, yp_node_flags_t base, const yp_token_t *token) { + assert(token->type == YP_TOKEN_INTEGER_RATIONAL); + + yp_rational_node_t *node = YP_ALLOC_NODE(parser, yp_rational_node_t); + *node = (yp_rational_node_t) { + { + .type = YP_RATIONAL_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_integer_node_create(parser, base, &((yp_token_t) { + .type = YP_TOKEN_INTEGER, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new IntegerNode node from an INTEGER_RATIONAL_IMAGINARY token. +static yp_imaginary_node_t * +yp_integer_node_rational_imaginary_create(yp_parser_t *parser, yp_node_flags_t base, const yp_token_t *token) { + assert(token->type == YP_TOKEN_INTEGER_RATIONAL_IMAGINARY); + + yp_imaginary_node_t *node = YP_ALLOC_NODE(parser, yp_imaginary_node_t); + *node = (yp_imaginary_node_t) { + { + .type = YP_IMAGINARY_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .numeric = (yp_node_t *) yp_integer_node_rational_create(parser, base, &((yp_token_t) { + .type = YP_TOKEN_INTEGER_RATIONAL, + .start = token->start, + .end = token->end - 1 + })) + }; + + return node; +} + +// Allocate and initialize a new InNode node. +static yp_in_node_t * +yp_in_node_create(yp_parser_t *parser, yp_node_t *pattern, yp_statements_node_t *statements, const yp_token_t *in_keyword, const yp_token_t *then_keyword) { + yp_in_node_t *node = YP_ALLOC_NODE(parser, yp_in_node_t); + + const uint8_t *end; + if (statements != NULL) { + end = statements->base.location.end; + } else if (then_keyword->type != YP_TOKEN_NOT_PROVIDED) { + end = then_keyword->end; + } else { + end = pattern->location.end; + } + + *node = (yp_in_node_t) { + { + .type = YP_IN_NODE, + .location = { + .start = in_keyword->start, + .end = end + }, + }, + .pattern = pattern, + .statements = statements, + .in_loc = YP_LOCATION_TOKEN_VALUE(in_keyword), + .then_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword) + }; + + return node; +} + +// Allocate and initialize a new InstanceVariableAndWriteNode node. +static yp_instance_variable_and_write_node_t * +yp_instance_variable_and_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL); + yp_instance_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_and_write_node_t); + + *node = (yp_instance_variable_and_write_node_t) { + { + .type = YP_INSTANCE_VARIABLE_AND_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new InstanceVariableOperatorWriteNode node. +static yp_instance_variable_operator_write_node_t * +yp_instance_variable_operator_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_instance_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_operator_write_node_t); + + *node = (yp_instance_variable_operator_write_node_t) { + { + .type = YP_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value, + .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1) + }; + + return node; +} + +// Allocate and initialize a new InstanceVariableOrWriteNode node. +static yp_instance_variable_or_write_node_t * +yp_instance_variable_or_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *target, const yp_token_t *operator, yp_node_t *value) { + assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL); + yp_instance_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_or_write_node_t); + + *node = (yp_instance_variable_or_write_node_t) { + { + .type = YP_INSTANCE_VARIABLE_OR_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .name = target->name, + .name_loc = target->base.location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new InstanceVariableReadNode node. +static yp_instance_variable_read_node_t * +yp_instance_variable_read_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_INSTANCE_VARIABLE); + yp_instance_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_read_node_t); + + *node = (yp_instance_variable_read_node_t) { + { + .type = YP_INSTANCE_VARIABLE_READ_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .name = yp_parser_constant_id_token(parser, token) + }; + + return node; +} + +// Initialize a new InstanceVariableWriteNode node from an InstanceVariableRead node. +static yp_instance_variable_write_node_t * +yp_instance_variable_write_node_create(yp_parser_t *parser, yp_instance_variable_read_node_t *read_node, yp_token_t *operator, yp_node_t *value) { + yp_instance_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_instance_variable_write_node_t); + *node = (yp_instance_variable_write_node_t) { + { + .type = YP_INSTANCE_VARIABLE_WRITE_NODE, + .location = { + .start = read_node->base.location.start, + .end = value->location.end + } + }, + .name = read_node->name, + .name_loc = YP_LOCATION_NODE_BASE_VALUE(read_node), + .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate a new InterpolatedRegularExpressionNode node. +static yp_interpolated_regular_expression_node_t * +yp_interpolated_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening) { + yp_interpolated_regular_expression_node_t *node = YP_ALLOC_NODE(parser, yp_interpolated_regular_expression_node_t); + + *node = (yp_interpolated_regular_expression_node_t) { + { + .type = YP_INTERPOLATED_REGULAR_EXPRESSION_NODE, + .location = { + .start = opening->start, + .end = NULL, + }, + }, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(opening), + .parts = YP_EMPTY_NODE_LIST + }; + + return node; +} + +static inline void +yp_interpolated_regular_expression_node_append(yp_interpolated_regular_expression_node_t *node, yp_node_t *part) { + if (node->base.location.start > part->location.start) { + node->base.location.start = part->location.start; + } + if (node->base.location.end < part->location.end) { + node->base.location.end = part->location.end; + } + yp_node_list_append(&node->parts, part); +} + +static inline void +yp_interpolated_regular_expression_node_closing_set(yp_interpolated_regular_expression_node_t *node, const yp_token_t *closing) { + node->closing_loc = YP_LOCATION_TOKEN_VALUE(closing); + node->base.location.end = closing->end; + node->base.flags |= yp_regular_expression_flags_create(closing); +} + +// Allocate and initialize a new InterpolatedStringNode node. +static yp_interpolated_string_node_t * +yp_interpolated_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_node_list_t *parts, const yp_token_t *closing) { + yp_interpolated_string_node_t *node = YP_ALLOC_NODE(parser, yp_interpolated_string_node_t); + + *node = (yp_interpolated_string_node_t) { + { + .type = YP_INTERPOLATED_STRING_NODE, + .location = { + .start = opening->start, + .end = closing->end, + }, + }, + .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .parts = parts == NULL ? YP_EMPTY_NODE_LIST : *parts + }; + + return node; +} + +// Append a part to an InterpolatedStringNode node. +static inline void +yp_interpolated_string_node_append(yp_interpolated_string_node_t *node, yp_node_t *part) { + if (node->parts.size == 0 && node->opening_loc.start == NULL) { + node->base.location.start = part->location.start; + } + + yp_node_list_append(&node->parts, part); + node->base.location.end = part->location.end; +} + +// Set the closing token of the given InterpolatedStringNode node. +static void +yp_interpolated_string_node_closing_set(yp_interpolated_string_node_t *node, const yp_token_t *closing) { + node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing); + node->base.location.end = closing->end; +} + +// Allocate and initialize a new InterpolatedSymbolNode node. +static yp_interpolated_symbol_node_t * +yp_interpolated_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_node_list_t *parts, const yp_token_t *closing) { + yp_interpolated_symbol_node_t *node = YP_ALLOC_NODE(parser, yp_interpolated_symbol_node_t); + + *node = (yp_interpolated_symbol_node_t) { + { + .type = YP_INTERPOLATED_SYMBOL_NODE, + .location = { + .start = opening->start, + .end = closing->end, + }, + }, + .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .parts = parts == NULL ? YP_EMPTY_NODE_LIST : *parts + }; + + return node; +} + +static inline void +yp_interpolated_symbol_node_append(yp_interpolated_symbol_node_t *node, yp_node_t *part) { + if (node->parts.size == 0 && node->opening_loc.start == NULL) { + node->base.location.start = part->location.start; + } + + yp_node_list_append(&node->parts, part); + node->base.location.end = part->location.end; +} + +// Allocate a new InterpolatedXStringNode node. +static yp_interpolated_x_string_node_t * +yp_interpolated_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *closing) { + yp_interpolated_x_string_node_t *node = YP_ALLOC_NODE(parser, yp_interpolated_x_string_node_t); + + *node = (yp_interpolated_x_string_node_t) { + { + .type = YP_INTERPOLATED_X_STRING_NODE, + .location = { + .start = opening->start, + .end = closing->end + }, + }, + .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .parts = YP_EMPTY_NODE_LIST + }; + + return node; +} + +static inline void +yp_interpolated_xstring_node_append(yp_interpolated_x_string_node_t *node, yp_node_t *part) { + yp_node_list_append(&node->parts, part); + node->base.location.end = part->location.end; +} + +static inline void +yp_interpolated_xstring_node_closing_set(yp_interpolated_x_string_node_t *node, const yp_token_t *closing) { + node->closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing); + node->base.location.end = closing->end; +} + +// Allocate a new KeywordHashNode node. +static yp_keyword_hash_node_t * +yp_keyword_hash_node_create(yp_parser_t *parser) { + yp_keyword_hash_node_t *node = YP_ALLOC_NODE(parser, yp_keyword_hash_node_t); + + *node = (yp_keyword_hash_node_t) { + .base = { + .type = YP_KEYWORD_HASH_NODE, + .location = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }, + .elements = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Append an element to a KeywordHashNode node. +static void +yp_keyword_hash_node_elements_append(yp_keyword_hash_node_t *hash, yp_node_t *element) { + yp_node_list_append(&hash->elements, element); + if (hash->base.location.start == NULL) { + hash->base.location.start = element->location.start; + } + hash->base.location.end = element->location.end; +} + +// Allocate a new KeywordParameterNode node. +static yp_keyword_parameter_node_t * +yp_keyword_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, yp_node_t *value) { + yp_keyword_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_keyword_parameter_node_t); + + *node = (yp_keyword_parameter_node_t) { + { + .type = YP_KEYWORD_PARAMETER_NODE, + .location = { + .start = name->start, + .end = value == NULL ? name->end : value->location.end + }, + }, + .name = yp_parser_constant_id_location(parser, name->start, name->end - 1), + .name_loc = YP_LOCATION_TOKEN_VALUE(name), + .value = value + }; + + return node; +} + +// Allocate a new KeywordRestParameterNode node. +static yp_keyword_rest_parameter_node_t * +yp_keyword_rest_parameter_node_create(yp_parser_t *parser, const yp_token_t *operator, const yp_token_t *name) { + yp_keyword_rest_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_keyword_rest_parameter_node_t); + + *node = (yp_keyword_rest_parameter_node_t) { + { + .type = YP_KEYWORD_REST_PARAMETER_NODE, + .location = { + .start = operator->start, + .end = (name->type == YP_TOKEN_NOT_PROVIDED ? operator->end : name->end) + }, + }, + .name = yp_parser_optional_constant_id_token(parser, name), + .name_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(name), + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate a new LambdaNode node. +static yp_lambda_node_t * +yp_lambda_node_create( + yp_parser_t *parser, + yp_constant_id_list_t *locals, + const yp_token_t *operator, + const yp_token_t *opening, + const yp_token_t *closing, + yp_block_parameters_node_t *parameters, + yp_node_t *body +) { + yp_lambda_node_t *node = YP_ALLOC_NODE(parser, yp_lambda_node_t); + + *node = (yp_lambda_node_t) { + { + .type = YP_LAMBDA_NODE, + .location = { + .start = operator->start, + .end = closing->end + }, + }, + .locals = *locals, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing), + .parameters = parameters, + .body = body + }; + + return node; +} + +// Allocate and initialize a new LocalVariableAndWriteNode node. +static yp_local_variable_and_write_node_t * +yp_local_variable_and_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) { + assert(YP_NODE_TYPE_P(target, YP_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_CALL_NODE)); + assert(operator->type == YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL); + yp_local_variable_and_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_and_write_node_t); + + *node = (yp_local_variable_and_write_node_t) { + { + .type = YP_LOCAL_VARIABLE_AND_WRITE_NODE, + .location = { + .start = target->location.start, + .end = value->location.end + } + }, + .name_loc = target->location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value, + .name = name, + .depth = depth + }; + + return node; +} + +// Allocate and initialize a new LocalVariableOperatorWriteNode node. +static yp_local_variable_operator_write_node_t * +yp_local_variable_operator_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) { + yp_local_variable_operator_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_operator_write_node_t); + + *node = (yp_local_variable_operator_write_node_t) { + { + .type = YP_LOCAL_VARIABLE_OPERATOR_WRITE_NODE, + .location = { + .start = target->location.start, + .end = value->location.end + } + }, + .name_loc = target->location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value, + .name = name, + .operator = yp_parser_constant_id_location(parser, operator->start, operator->end - 1), + .depth = depth + }; + + return node; +} + +// Allocate and initialize a new LocalVariableOrWriteNode node. +static yp_local_variable_or_write_node_t * +yp_local_variable_or_write_node_create(yp_parser_t *parser, yp_node_t *target, const yp_token_t *operator, yp_node_t *value, yp_constant_id_t name, uint32_t depth) { + assert(YP_NODE_TYPE_P(target, YP_LOCAL_VARIABLE_READ_NODE) || YP_NODE_TYPE_P(target, YP_CALL_NODE)); + assert(operator->type == YP_TOKEN_PIPE_PIPE_EQUAL); + yp_local_variable_or_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_or_write_node_t); + + *node = (yp_local_variable_or_write_node_t) { + { + .type = YP_LOCAL_VARIABLE_OR_WRITE_NODE, + .location = { + .start = target->location.start, + .end = value->location.end + } + }, + .name_loc = target->location, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value, + .name = name, + .depth = depth + }; + + return node; +} + +// Allocate a new LocalVariableReadNode node. +static yp_local_variable_read_node_t * +yp_local_variable_read_node_create(yp_parser_t *parser, const yp_token_t *name, uint32_t depth) { + yp_local_variable_read_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_read_node_t); + + *node = (yp_local_variable_read_node_t) { + { + .type = YP_LOCAL_VARIABLE_READ_NODE, + .location = YP_LOCATION_TOKEN_VALUE(name) + }, + .name = yp_parser_constant_id_token(parser, name), + .depth = depth + }; + + return node; +} + +// Allocate and initialize a new LocalVariableWriteNode node. +static yp_local_variable_write_node_t * +yp_local_variable_write_node_create(yp_parser_t *parser, yp_constant_id_t name, uint32_t depth, yp_node_t *value, const yp_location_t *name_loc, const yp_token_t *operator) { + yp_local_variable_write_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_write_node_t); + + *node = (yp_local_variable_write_node_t) { + { + .type = YP_LOCAL_VARIABLE_WRITE_NODE, + .location = { + .start = name_loc->start, + .end = value->location.end + } + }, + .name = name, + .depth = depth, + .value = value, + .name_loc = *name_loc, + .operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new LocalVariableTargetNode node. +static yp_local_variable_target_node_t * +yp_local_variable_target_node_create(yp_parser_t *parser, const yp_token_t *name) { + yp_local_variable_target_node_t *node = YP_ALLOC_NODE(parser, yp_local_variable_target_node_t); + + *node = (yp_local_variable_target_node_t) { + { + .type = YP_LOCAL_VARIABLE_TARGET_NODE, + .location = YP_LOCATION_TOKEN_VALUE(name) + }, + .name = yp_parser_constant_id_token(parser, name), + .depth = 0 + }; + + return node; +} + +// Allocate and initialize a new MatchPredicateNode node. +static yp_match_predicate_node_t * +yp_match_predicate_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *pattern, const yp_token_t *operator) { + yp_match_predicate_node_t *node = YP_ALLOC_NODE(parser, yp_match_predicate_node_t); + + *node = (yp_match_predicate_node_t) { + { + .type = YP_MATCH_PREDICATE_NODE, + .location = { + .start = value->location.start, + .end = pattern->location.end + } + }, + .value = value, + .pattern = pattern, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new MatchRequiredNode node. +static yp_match_required_node_t * +yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *pattern, const yp_token_t *operator) { + yp_match_required_node_t *node = YP_ALLOC_NODE(parser, yp_match_required_node_t); + + *node = (yp_match_required_node_t) { + { + .type = YP_MATCH_REQUIRED_NODE, + .location = { + .start = value->location.start, + .end = pattern->location.end + } + }, + .value = value, + .pattern = pattern, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new MatchWriteNode node. +static yp_match_write_node_t * +yp_match_write_node_create(yp_parser_t *parser, yp_call_node_t *call) { + yp_match_write_node_t *node = YP_ALLOC_NODE(parser, yp_match_write_node_t); + + *node = (yp_match_write_node_t) { + { + .type = YP_MATCH_WRITE_NODE, + .location = call->base.location + }, + .call = call + }; + + yp_constant_id_list_init(&node->locals); + return node; +} + +// Allocate a new ModuleNode node. +static yp_module_node_t * +yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, const yp_token_t *name, yp_node_t *body, const yp_token_t *end_keyword) { + yp_module_node_t *node = YP_ALLOC_NODE(parser, yp_module_node_t); + + *node = (yp_module_node_t) { + { + .type = YP_MODULE_NODE, + .location = { + .start = module_keyword->start, + .end = end_keyword->end + } + }, + .locals = (locals == NULL ? ((yp_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals), + .module_keyword_loc = YP_LOCATION_TOKEN_VALUE(module_keyword), + .constant_path = constant_path, + .body = body, + .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword), + .name = yp_parser_constant_id_token(parser, name) + }; + + return node; +} + +// Allocate and initialize new MultiTargetNode node. +static yp_multi_target_node_t * +yp_multi_target_node_create(yp_parser_t *parser) { + yp_multi_target_node_t *node = YP_ALLOC_NODE(parser, yp_multi_target_node_t); + + *node = (yp_multi_target_node_t) { + { + .type = YP_MULTI_TARGET_NODE, + .location = { .start = NULL, .end = NULL } + }, + .targets = YP_EMPTY_NODE_LIST, + .lparen_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .rparen_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + return node; +} + +// Append a target to a MultiTargetNode node. +static void +yp_multi_target_node_targets_append(yp_multi_target_node_t *node, yp_node_t *target) { + yp_node_list_append(&node->targets, target); + + if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) { + node->base.location.start = target->location.start; + } + + if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) { + node->base.location.end = target->location.end; + } +} + +// Allocate a new MultiWriteNode node. +static yp_multi_write_node_t * +yp_multi_write_node_create(yp_parser_t *parser, yp_multi_target_node_t *target, const yp_token_t *operator, yp_node_t *value) { + yp_multi_write_node_t *node = YP_ALLOC_NODE(parser, yp_multi_write_node_t); + + *node = (yp_multi_write_node_t) { + { + .type = YP_MULTI_WRITE_NODE, + .location = { + .start = target->base.location.start, + .end = value->location.end + } + }, + .targets = target->targets, + .lparen_loc = target->lparen_loc, + .rparen_loc = target->rparen_loc, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + // Explicitly do not call yp_node_destroy here because we want to keep + // around all of the information within the MultiWriteNode node. + free(target); + + return node; +} + +// Allocate and initialize a new NextNode node. +static yp_next_node_t * +yp_next_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_arguments_node_t *arguments) { + assert(keyword->type == YP_TOKEN_KEYWORD_NEXT); + yp_next_node_t *node = YP_ALLOC_NODE(parser, yp_next_node_t); + + *node = (yp_next_node_t) { + { + .type = YP_NEXT_NODE, + .location = { + .start = keyword->start, + .end = (arguments == NULL ? keyword->end : arguments->base.location.end) + } + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .arguments = arguments + }; + + return node; +} + +// Allocate and initialize a new NilNode node. +static yp_nil_node_t * +yp_nil_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD_NIL); + yp_nil_node_t *node = YP_ALLOC_NODE(parser, yp_nil_node_t); + + *node = (yp_nil_node_t) {{ + .type = YP_NIL_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }}; + + return node; +} + +// Allocate and initialize a new NoKeywordsParameterNode node. +static yp_no_keywords_parameter_node_t * +yp_no_keywords_parameter_node_create(yp_parser_t *parser, const yp_token_t *operator, const yp_token_t *keyword) { + assert(operator->type == YP_TOKEN_USTAR_STAR || operator->type == YP_TOKEN_STAR_STAR); + assert(keyword->type == YP_TOKEN_KEYWORD_NIL); + yp_no_keywords_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_no_keywords_parameter_node_t); + + *node = (yp_no_keywords_parameter_node_t) { + { + .type = YP_NO_KEYWORDS_PARAMETER_NODE, + .location = { + .start = operator->start, + .end = keyword->end + } + }, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword) + }; + + return node; +} + +// Allocate a new NthReferenceReadNode node. +static yp_numbered_reference_read_node_t * +yp_numbered_reference_read_node_create(yp_parser_t *parser, const yp_token_t *name) { + assert(name->type == YP_TOKEN_NUMBERED_REFERENCE); + yp_numbered_reference_read_node_t *node = YP_ALLOC_NODE(parser, yp_numbered_reference_read_node_t); + + *node = (yp_numbered_reference_read_node_t) { + { + .type = YP_NUMBERED_REFERENCE_READ_NODE, + .location = YP_LOCATION_TOKEN_VALUE(name), + }, + .number = parse_decimal_number(parser, name->start + 1, name->end) + }; + + return node; +} + +// Allocate a new OptionalParameterNode node. +static yp_optional_parameter_node_t * +yp_optional_parameter_node_create(yp_parser_t *parser, const yp_token_t *name, const yp_token_t *operator, yp_node_t *value) { + yp_optional_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_optional_parameter_node_t); + + *node = (yp_optional_parameter_node_t) { + { + .type = YP_OPTIONAL_PARAMETER_NODE, + .location = { + .start = name->start, + .end = value->location.end + } + }, + .name = yp_parser_constant_id_token(parser, name), + .name_loc = YP_LOCATION_TOKEN_VALUE(name), + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .value = value + }; + + return node; +} + +// Allocate and initialize a new OrNode node. +static yp_or_node_t * +yp_or_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *operator, yp_node_t *right) { + yp_or_node_t *node = YP_ALLOC_NODE(parser, yp_or_node_t); + + *node = (yp_or_node_t) { + { + .type = YP_OR_NODE, + .location = { + .start = left->location.start, + .end = right->location.end + } + }, + .left = left, + .right = right, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new ParametersNode node. +static yp_parameters_node_t * +yp_parameters_node_create(yp_parser_t *parser) { + yp_parameters_node_t *node = YP_ALLOC_NODE(parser, yp_parameters_node_t); + + *node = (yp_parameters_node_t) { + { + .type = YP_PARAMETERS_NODE, + .location = YP_LOCATION_TOKEN_VALUE(&parser->current) + }, + .rest = NULL, + .keyword_rest = NULL, + .block = NULL, + .requireds = YP_EMPTY_NODE_LIST, + .optionals = YP_EMPTY_NODE_LIST, + .posts = YP_EMPTY_NODE_LIST, + .keywords = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Set the location properly for the parameters node. +static void +yp_parameters_node_location_set(yp_parameters_node_t *params, yp_node_t *param) { + if (params->base.location.start == NULL) { + params->base.location.start = param->location.start; + } else { + params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start; + } + + if (params->base.location.end == NULL) { + params->base.location.end = param->location.end; + } else { + params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end; + } +} + +// Append a required parameter to a ParametersNode node. +static void +yp_parameters_node_requireds_append(yp_parameters_node_t *params, yp_node_t *param) { + yp_parameters_node_location_set(params, param); + yp_node_list_append(¶ms->requireds, param); +} + +// Append an optional parameter to a ParametersNode node. +static void +yp_parameters_node_optionals_append(yp_parameters_node_t *params, yp_optional_parameter_node_t *param) { + yp_parameters_node_location_set(params, (yp_node_t *) param); + yp_node_list_append(¶ms->optionals, (yp_node_t *) param); +} + +// Append a post optional arguments parameter to a ParametersNode node. +static void +yp_parameters_node_posts_append(yp_parameters_node_t *params, yp_node_t *param) { + yp_parameters_node_location_set(params, param); + yp_node_list_append(¶ms->posts, param); +} + +// Set the rest parameter on a ParametersNode node. +static void +yp_parameters_node_rest_set(yp_parameters_node_t *params, yp_rest_parameter_node_t *param) { + assert(params->rest == NULL); + yp_parameters_node_location_set(params, (yp_node_t *) param); + params->rest = param; +} + +// Append a keyword parameter to a ParametersNode node. +static void +yp_parameters_node_keywords_append(yp_parameters_node_t *params, yp_node_t *param) { + yp_parameters_node_location_set(params, param); + yp_node_list_append(¶ms->keywords, param); +} + +// Set the keyword rest parameter on a ParametersNode node. +static void +yp_parameters_node_keyword_rest_set(yp_parameters_node_t *params, yp_node_t *param) { + assert(params->keyword_rest == NULL); + yp_parameters_node_location_set(params, param); + params->keyword_rest = param; +} + +// Set the block parameter on a ParametersNode node. +static void +yp_parameters_node_block_set(yp_parameters_node_t *params, yp_block_parameter_node_t *param) { + assert(params->block == NULL); + yp_parameters_node_location_set(params, (yp_node_t *) param); + params->block = param; +} + +// Allocate a new ProgramNode node. +static yp_program_node_t * +yp_program_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, yp_statements_node_t *statements) { + yp_program_node_t *node = YP_ALLOC_NODE(parser, yp_program_node_t); + + *node = (yp_program_node_t) { + { + .type = YP_PROGRAM_NODE, + .location = { + .start = statements == NULL ? parser->start : statements->base.location.start, + .end = statements == NULL ? parser->end : statements->base.location.end + } + }, + .locals = *locals, + .statements = statements + }; + + return node; +} + +// Allocate and initialize new ParenthesesNode node. +static yp_parentheses_node_t * +yp_parentheses_node_create(yp_parser_t *parser, const yp_token_t *opening, yp_node_t *body, const yp_token_t *closing) { + yp_parentheses_node_t *node = YP_ALLOC_NODE(parser, yp_parentheses_node_t); + + *node = (yp_parentheses_node_t) { + { + .type = YP_PARENTHESES_NODE, + .location = { + .start = opening->start, + .end = closing->end + } + }, + .body = body, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing) + }; + + return node; +} + +// Allocate and initialize a new PinnedExpressionNode node. +static yp_pinned_expression_node_t * +yp_pinned_expression_node_create(yp_parser_t *parser, yp_node_t *expression, const yp_token_t *operator, const yp_token_t *lparen, const yp_token_t *rparen) { + yp_pinned_expression_node_t *node = YP_ALLOC_NODE(parser, yp_pinned_expression_node_t); + + *node = (yp_pinned_expression_node_t) { + { + .type = YP_PINNED_EXPRESSION_NODE, + .location = { + .start = operator->start, + .end = rparen->end + } + }, + .expression = expression, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .lparen_loc = YP_LOCATION_TOKEN_VALUE(lparen), + .rparen_loc = YP_LOCATION_TOKEN_VALUE(rparen) + }; + + return node; +} + +// Allocate and initialize a new PinnedVariableNode node. +static yp_pinned_variable_node_t * +yp_pinned_variable_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *variable) { + yp_pinned_variable_node_t *node = YP_ALLOC_NODE(parser, yp_pinned_variable_node_t); + + *node = (yp_pinned_variable_node_t) { + { + .type = YP_PINNED_VARIABLE_NODE, + .location = { + .start = operator->start, + .end = variable->location.end + } + }, + .variable = variable, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new PostExecutionNode node. +static yp_post_execution_node_t * +yp_post_execution_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *opening, yp_statements_node_t *statements, const yp_token_t *closing) { + yp_post_execution_node_t *node = YP_ALLOC_NODE(parser, yp_post_execution_node_t); + + *node = (yp_post_execution_node_t) { + { + .type = YP_POST_EXECUTION_NODE, + .location = { + .start = keyword->start, + .end = closing->end + } + }, + .statements = statements, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing) + }; + + return node; +} + +// Allocate and initialize a new PreExecutionNode node. +static yp_pre_execution_node_t * +yp_pre_execution_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *opening, yp_statements_node_t *statements, const yp_token_t *closing) { + yp_pre_execution_node_t *node = YP_ALLOC_NODE(parser, yp_pre_execution_node_t); + + *node = (yp_pre_execution_node_t) { + { + .type = YP_PRE_EXECUTION_NODE, + .location = { + .start = keyword->start, + .end = closing->end + } + }, + .statements = statements, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing) + }; + + return node; +} + +// Allocate and initialize new RangeNode node. +static yp_range_node_t * +yp_range_node_create(yp_parser_t *parser, yp_node_t *left, const yp_token_t *operator, yp_node_t *right) { + yp_range_node_t *node = YP_ALLOC_NODE(parser, yp_range_node_t); + + *node = (yp_range_node_t) { + { + .type = YP_RANGE_NODE, + .location = { + .start = (left == NULL ? operator->start : left->location.start), + .end = (right == NULL ? operator->end : right->location.end) + } + }, + .left = left, + .right = right, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + switch (operator->type) { + case YP_TOKEN_DOT_DOT_DOT: + case YP_TOKEN_UDOT_DOT_DOT: + node->base.flags |= YP_RANGE_FLAGS_EXCLUDE_END; + break; + default: + break; + } + + return node; +} + +// Allocate and initialize a new RedoNode node. +static yp_redo_node_t * +yp_redo_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD_REDO); + yp_redo_node_t *node = YP_ALLOC_NODE(parser, yp_redo_node_t); + + *node = (yp_redo_node_t) {{ .type = YP_REDO_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }}; + return node; +} + +// Allocate a new RegularExpressionNode node. +static yp_regular_expression_node_t * +yp_regular_expression_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) { + yp_regular_expression_node_t *node = YP_ALLOC_NODE(parser, yp_regular_expression_node_t); + + *node = (yp_regular_expression_node_t) { + { + .type = YP_REGULAR_EXPRESSION_NODE, + .flags = yp_regular_expression_flags_create(closing), + .location = { + .start = MIN(opening->start, closing->start), + .end = MAX(opening->end, closing->end) + } + }, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .content_loc = YP_LOCATION_TOKEN_VALUE(content), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing), + .unescaped = YP_EMPTY_STRING + }; + + return node; +} + +// Allocate a new RequiredDestructuredParameterNode node. +static yp_required_destructured_parameter_node_t * +yp_required_destructured_parameter_node_create(yp_parser_t *parser, const yp_token_t *opening) { + yp_required_destructured_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_required_destructured_parameter_node_t); + + *node = (yp_required_destructured_parameter_node_t) { + { + .type = YP_REQUIRED_DESTRUCTURED_PARAMETER_NODE, + .location = YP_LOCATION_TOKEN_VALUE(opening) + }, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .parameters = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Append a new parameter to the given RequiredDestructuredParameterNode node. +static void +yp_required_destructured_parameter_node_append_parameter(yp_required_destructured_parameter_node_t *node, yp_node_t *parameter) { + yp_node_list_append(&node->parameters, parameter); +} + +// Set the closing token of the given RequiredDestructuredParameterNode node. +static void +yp_required_destructured_parameter_node_closing_set(yp_required_destructured_parameter_node_t *node, const yp_token_t *closing) { + node->closing_loc = YP_LOCATION_TOKEN_VALUE(closing); + node->base.location.end = closing->end; +} + +// Allocate a new RequiredParameterNode node. +static yp_required_parameter_node_t * +yp_required_parameter_node_create(yp_parser_t *parser, const yp_token_t *token) { + yp_required_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_required_parameter_node_t); + + *node = (yp_required_parameter_node_t) { + { + .type = YP_REQUIRED_PARAMETER_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token) + }, + .name = yp_parser_constant_id_token(parser, token) + }; + + return node; +} + +// Allocate a new RescueModifierNode node. +static yp_rescue_modifier_node_t * +yp_rescue_modifier_node_create(yp_parser_t *parser, yp_node_t *expression, const yp_token_t *keyword, yp_node_t *rescue_expression) { + yp_rescue_modifier_node_t *node = YP_ALLOC_NODE(parser, yp_rescue_modifier_node_t); + + *node = (yp_rescue_modifier_node_t) { + { + .type = YP_RESCUE_MODIFIER_NODE, + .location = { + .start = expression->location.start, + .end = rescue_expression->location.end + } + }, + .expression = expression, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .rescue_expression = rescue_expression + }; + + return node; +} + +// Allocate and initiliaze a new RescueNode node. +static yp_rescue_node_t * +yp_rescue_node_create(yp_parser_t *parser, const yp_token_t *keyword) { + yp_rescue_node_t *node = YP_ALLOC_NODE(parser, yp_rescue_node_t); + + *node = (yp_rescue_node_t) { + { + .type = YP_RESCUE_NODE, + .location = YP_LOCATION_TOKEN_VALUE(keyword) + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .operator_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .reference = NULL, + .statements = NULL, + .consequent = NULL, + .exceptions = YP_EMPTY_NODE_LIST + }; + + return node; +} + +static inline void +yp_rescue_node_operator_set(yp_rescue_node_t *node, const yp_token_t *operator) { + node->operator_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(operator); +} + +// Set the reference of a rescue node, and update the location of the node. +static void +yp_rescue_node_reference_set(yp_rescue_node_t *node, yp_node_t *reference) { + node->reference = reference; + node->base.location.end = reference->location.end; +} + +// Set the statements of a rescue node, and update the location of the node. +static void +yp_rescue_node_statements_set(yp_rescue_node_t *node, yp_statements_node_t *statements) { + node->statements = statements; + if (yp_statements_node_body_length(statements) > 0) { + node->base.location.end = statements->base.location.end; + } +} + +// Set the consequent of a rescue node, and update the location. +static void +yp_rescue_node_consequent_set(yp_rescue_node_t *node, yp_rescue_node_t *consequent) { + node->consequent = consequent; + node->base.location.end = consequent->base.location.end; +} + +// Append an exception node to a rescue node, and update the location. +static void +yp_rescue_node_exceptions_append(yp_rescue_node_t *node, yp_node_t *exception) { + yp_node_list_append(&node->exceptions, exception); + node->base.location.end = exception->location.end; +} + +// Allocate a new RestParameterNode node. +static yp_rest_parameter_node_t * +yp_rest_parameter_node_create(yp_parser_t *parser, const yp_token_t *operator, const yp_token_t *name) { + yp_rest_parameter_node_t *node = YP_ALLOC_NODE(parser, yp_rest_parameter_node_t); + + *node = (yp_rest_parameter_node_t) { + { + .type = YP_REST_PARAMETER_NODE, + .location = { + .start = operator->start, + .end = (name->type == YP_TOKEN_NOT_PROVIDED ? operator->end : name->end) + } + }, + .name = yp_parser_optional_constant_id_token(parser, name), + .name_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(name), + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator) + }; + + return node; +} + +// Allocate and initialize a new RetryNode node. +static yp_retry_node_t * +yp_retry_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD_RETRY); + yp_retry_node_t *node = YP_ALLOC_NODE(parser, yp_retry_node_t); + + *node = (yp_retry_node_t) {{ .type = YP_RETRY_NODE, .location = YP_LOCATION_TOKEN_VALUE(token) }}; + return node; +} + +// Allocate a new ReturnNode node. +static yp_return_node_t * +yp_return_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_arguments_node_t *arguments) { + yp_return_node_t *node = YP_ALLOC_NODE(parser, yp_return_node_t); + + *node = (yp_return_node_t) { + { + .type = YP_RETURN_NODE, + .location = { + .start = keyword->start, + .end = (arguments == NULL ? keyword->end : arguments->base.location.end) + } + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .arguments = arguments + }; + + return node; +} + +// Allocate and initialize a new SelfNode node. +static yp_self_node_t * +yp_self_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD_SELF); + yp_self_node_t *node = YP_ALLOC_NODE(parser, yp_self_node_t); + + *node = (yp_self_node_t) {{ + .type = YP_SELF_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }}; + + return node; +} + +// Allocate a new SingletonClassNode node. +static yp_singleton_class_node_t * +yp_singleton_class_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *class_keyword, const yp_token_t *operator, yp_node_t *expression, yp_node_t *body, const yp_token_t *end_keyword) { + yp_singleton_class_node_t *node = YP_ALLOC_NODE(parser, yp_singleton_class_node_t); + + *node = (yp_singleton_class_node_t) { + { + .type = YP_SINGLETON_CLASS_NODE, + .location = { + .start = class_keyword->start, + .end = end_keyword->end + } + }, + .locals = *locals, + .class_keyword_loc = YP_LOCATION_TOKEN_VALUE(class_keyword), + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .expression = expression, + .body = body, + .end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword) + }; + + return node; +} + +// Allocate and initialize a new SourceEncodingNode node. +static yp_source_encoding_node_t * +yp_source_encoding_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD___ENCODING__); + yp_source_encoding_node_t *node = YP_ALLOC_NODE(parser, yp_source_encoding_node_t); + + *node = (yp_source_encoding_node_t) {{ + .type = YP_SOURCE_ENCODING_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }}; + + return node; +} + +// Allocate and initialize a new SourceFileNode node. +static yp_source_file_node_t* +yp_source_file_node_create(yp_parser_t *parser, const yp_token_t *file_keyword) { + yp_source_file_node_t *node = YP_ALLOC_NODE(parser, yp_source_file_node_t); + assert(file_keyword->type == YP_TOKEN_KEYWORD___FILE__); + + *node = (yp_source_file_node_t) { + { + .type = YP_SOURCE_FILE_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(file_keyword), + }, + .filepath = parser->filepath_string, + }; + + return node; +} + +// Allocate and initialize a new SourceLineNode node. +static yp_source_line_node_t * +yp_source_line_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD___LINE__); + yp_source_line_node_t *node = YP_ALLOC_NODE(parser, yp_source_line_node_t); + + *node = (yp_source_line_node_t) {{ + .type = YP_SOURCE_LINE_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }}; + + return node; +} + +// Allocate a new SplatNode node. +static yp_splat_node_t * +yp_splat_node_create(yp_parser_t *parser, const yp_token_t *operator, yp_node_t *expression) { + yp_splat_node_t *node = YP_ALLOC_NODE(parser, yp_splat_node_t); + + *node = (yp_splat_node_t) { + { + .type = YP_SPLAT_NODE, + .location = { + .start = operator->start, + .end = (expression == NULL ? operator->end : expression->location.end) + } + }, + .operator_loc = YP_LOCATION_TOKEN_VALUE(operator), + .expression = expression + }; + + return node; +} + +// Allocate and initialize a new StatementsNode node. +static yp_statements_node_t * +yp_statements_node_create(yp_parser_t *parser) { + yp_statements_node_t *node = YP_ALLOC_NODE(parser, yp_statements_node_t); + + *node = (yp_statements_node_t) { + { + .type = YP_STATEMENTS_NODE, + .location = YP_LOCATION_NULL_VALUE(parser) + }, + .body = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Get the length of the given StatementsNode node's body. +static size_t +yp_statements_node_body_length(yp_statements_node_t *node) { + return node && node->body.size; +} + +// Set the location of the given StatementsNode. +static void +yp_statements_node_location_set(yp_statements_node_t *node, const uint8_t *start, const uint8_t *end) { + node->base.location = (yp_location_t) { .start = start, .end = end }; +} + +// Append a new node to the given StatementsNode node's body. +static void +yp_statements_node_body_append(yp_statements_node_t *node, yp_node_t *statement) { + if (yp_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) { + node->base.location.start = statement->location.start; + } + if (statement->location.end > node->base.location.end) { + node->base.location.end = statement->location.end; + } + + yp_node_list_append(&node->body, statement); + + // Every statement gets marked as a place where a newline can occur. + statement->flags |= YP_NODE_FLAG_NEWLINE; +} + +// Allocate a new StringConcatNode node. +static yp_string_concat_node_t * +yp_string_concat_node_create(yp_parser_t *parser, yp_node_t *left, yp_node_t *right) { + yp_string_concat_node_t *node = YP_ALLOC_NODE(parser, yp_string_concat_node_t); + + *node = (yp_string_concat_node_t) { + { + .type = YP_STRING_CONCAT_NODE, + .location = { + .start = left->location.start, + .end = right->location.end + } + }, + .left = left, + .right = right + }; + + return node; +} + +// Allocate a new StringNode node. +static yp_string_node_t * +yp_string_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) { + yp_string_node_t *node = YP_ALLOC_NODE(parser, yp_string_node_t); + yp_node_flags_t flags = 0; + + if (parser->frozen_string_literal) { + flags = YP_NODE_FLAG_STATIC_LITERAL | YP_STRING_FLAGS_FROZEN; + } + + *node = (yp_string_node_t) { + { + .type = YP_STRING_NODE, + .flags = flags, + .location = { + .start = (opening->type == YP_TOKEN_NOT_PROVIDED ? content->start : opening->start), + .end = (closing->type == YP_TOKEN_NOT_PROVIDED ? content->end : closing->end) + } + }, + .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .content_loc = YP_LOCATION_TOKEN_VALUE(content), + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .unescaped = YP_EMPTY_STRING + }; + + return node; +} + +// Allocate and initialize a new SuperNode node. +static yp_super_node_t * +yp_super_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_arguments_t *arguments) { + assert(keyword->type == YP_TOKEN_KEYWORD_SUPER); + yp_super_node_t *node = YP_ALLOC_NODE(parser, yp_super_node_t); + + const uint8_t *end; + if (arguments->block != NULL) { + end = arguments->block->location.end; + } else if (arguments->closing_loc.start != NULL) { + end = arguments->closing_loc.end; + } else if (arguments->arguments != NULL) { + end = arguments->arguments->base.location.end; + } else { + assert(false && "unreachable"); + end = NULL; + } + + *node = (yp_super_node_t) { + { + .type = YP_SUPER_NODE, + .location = { + .start = keyword->start, + .end = end, + } + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .lparen_loc = arguments->opening_loc, + .arguments = arguments->arguments, + .rparen_loc = arguments->closing_loc, + .block = arguments->block + }; + + return node; +} + +// Allocate a new SymbolNode node. +static yp_symbol_node_t * +yp_symbol_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *value, const yp_token_t *closing) { + yp_symbol_node_t *node = YP_ALLOC_NODE(parser, yp_symbol_node_t); + + *node = (yp_symbol_node_t) { + { + .type = YP_SYMBOL_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = { + .start = (opening->type == YP_TOKEN_NOT_PROVIDED ? value->start : opening->start), + .end = (closing->type == YP_TOKEN_NOT_PROVIDED ? value->end : closing->end) + } + }, + .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .value_loc = YP_LOCATION_TOKEN_VALUE(value), + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .unescaped = YP_EMPTY_STRING + }; + + return node; +} + +// Allocate and initialize a new SymbolNode node from a label. +static yp_symbol_node_t * +yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) { + yp_symbol_node_t *node; + + switch (token->type) { + case YP_TOKEN_LABEL: { + yp_token_t opening = not_provided(parser); + yp_token_t closing = { .type = YP_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end }; + + yp_token_t label = { .type = YP_TOKEN_LABEL, .start = token->start, .end = token->end - 1 }; + node = yp_symbol_node_create(parser, &opening, &label, &closing); + + assert((label.end - label.start) >= 0); + yp_string_shared_init(&node->unescaped, label.start, label.end); + + yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL); + break; + } + case YP_TOKEN_MISSING: { + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + + yp_token_t label = { .type = YP_TOKEN_LABEL, .start = token->start, .end = token->end }; + node = yp_symbol_node_create(parser, &opening, &label, &closing); + break; + } + default: + assert(false && "unreachable"); + node = NULL; + break; + } + + return node; +} + +// Check if the given node is a label in a hash. +static bool +yp_symbol_node_label_p(yp_node_t *node) { + const uint8_t *end = NULL; + + switch (YP_NODE_TYPE(node)) { + case YP_SYMBOL_NODE: + end = ((yp_symbol_node_t *) node)->closing_loc.end; + break; + case YP_INTERPOLATED_SYMBOL_NODE: + end = ((yp_interpolated_symbol_node_t *) node)->closing_loc.end; + break; + default: + return false; + } + + return (end != NULL) && (end[-1] == ':'); +} + +// Convert the given StringNode node to a SymbolNode node. +static yp_symbol_node_t * +yp_string_node_to_symbol_node(yp_parser_t *parser, yp_string_node_t *node, const yp_token_t *opening, const yp_token_t *closing) { + yp_symbol_node_t *new_node = YP_ALLOC_NODE(parser, yp_symbol_node_t); + + *new_node = (yp_symbol_node_t) { + { + .type = YP_SYMBOL_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = { + .start = opening->start, + .end = closing->end + } + }, + .opening_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(opening), + .value_loc = node->content_loc, + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .unescaped = node->unescaped + }; + + // We are explicitly _not_ using yp_node_destroy here because we don't want + // to trash the unescaped string. We could instead copy the string if we + // know that it is owned, but we're taking the fast path for now. + free(node); + + return new_node; +} + +// Convert the given SymbolNode node to a StringNode node. +static yp_string_node_t * +yp_symbol_node_to_string_node(yp_parser_t *parser, yp_symbol_node_t *node) { + yp_string_node_t *new_node = YP_ALLOC_NODE(parser, yp_string_node_t); + yp_node_flags_t flags = 0; + + if (parser->frozen_string_literal) { + flags = YP_NODE_FLAG_STATIC_LITERAL | YP_STRING_FLAGS_FROZEN; + } + + *new_node = (yp_string_node_t) { + { + .type = YP_STRING_NODE, + .flags = flags, + .location = node->base.location + }, + .opening_loc = node->opening_loc, + .content_loc = node->value_loc, + .closing_loc = node->closing_loc, + .unescaped = node->unescaped + }; + + // We are explicitly _not_ using yp_node_destroy here because we don't want + // to trash the unescaped string. We could instead copy the string if we + // know that it is owned, but we're taking the fast path for now. + free(node); + + return new_node; +} + +// Allocate and initialize a new TrueNode node. +static yp_true_node_t * +yp_true_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD_TRUE); + yp_true_node_t *node = YP_ALLOC_NODE(parser, yp_true_node_t); + + *node = (yp_true_node_t) {{ + .type = YP_TRUE_NODE, + .flags = YP_NODE_FLAG_STATIC_LITERAL, + .location = YP_LOCATION_TOKEN_VALUE(token) + }}; + + return node; +} + +// Allocate and initialize a new UndefNode node. +static yp_undef_node_t * +yp_undef_node_create(yp_parser_t *parser, const yp_token_t *token) { + assert(token->type == YP_TOKEN_KEYWORD_UNDEF); + yp_undef_node_t *node = YP_ALLOC_NODE(parser, yp_undef_node_t); + + *node = (yp_undef_node_t) { + { + .type = YP_UNDEF_NODE, + .location = YP_LOCATION_TOKEN_VALUE(token), + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(token), + .names = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Append a name to an undef node. +static void +yp_undef_node_append(yp_undef_node_t *node, yp_node_t *name) { + node->base.location.end = name->location.end; + yp_node_list_append(&node->names, name); +} + +// Allocate a new UnlessNode node. +static yp_unless_node_t * +yp_unless_node_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements) { + yp_conditional_predicate(predicate); + yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t); + + const uint8_t *end; + if (statements != NULL) { + end = statements->base.location.end; + } else { + end = predicate->location.end; + } + + *node = (yp_unless_node_t) { + { + .type = YP_UNLESS_NODE, + .flags = YP_NODE_FLAG_NEWLINE, + .location = { + .start = keyword->start, + .end = end + }, + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .predicate = predicate, + .statements = statements, + .consequent = NULL, + .end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + return node; +} + +// Allocate and initialize new UnlessNode node in the modifier form. +static yp_unless_node_t * +yp_unless_node_modifier_create(yp_parser_t *parser, yp_node_t *statement, const yp_token_t *unless_keyword, yp_node_t *predicate) { + yp_conditional_predicate(predicate); + yp_unless_node_t *node = YP_ALLOC_NODE(parser, yp_unless_node_t); + + yp_statements_node_t *statements = yp_statements_node_create(parser); + yp_statements_node_body_append(statements, statement); + + *node = (yp_unless_node_t) { + { + .type = YP_UNLESS_NODE, + .flags = YP_NODE_FLAG_NEWLINE, + .location = { + .start = statement->location.start, + .end = predicate->location.end + }, + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(unless_keyword), + .predicate = predicate, + .statements = statements, + .consequent = NULL, + .end_keyword_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE + }; + + return node; +} + +static inline void +yp_unless_node_end_keyword_loc_set(yp_unless_node_t *node, const yp_token_t *end_keyword) { + node->end_keyword_loc = YP_LOCATION_TOKEN_VALUE(end_keyword); + node->base.location.end = end_keyword->end; +} + +// Allocate a new UntilNode node. +static yp_until_node_t * +yp_until_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) { + yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t); + + *node = (yp_until_node_t) { + { + .type = YP_UNTIL_NODE, + .flags = flags, + .location = { + .start = keyword->start, + .end = closing->end, + }, + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .predicate = predicate, + .statements = statements + }; + + return node; +} + +// Allocate a new UntilNode node. +static yp_until_node_t * +yp_until_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) { + yp_until_node_t *node = YP_ALLOC_NODE(parser, yp_until_node_t); + + *node = (yp_until_node_t) { + { + .type = YP_UNTIL_NODE, + .flags = flags, + .location = { + .start = statements->base.location.start, + .end = predicate->location.end, + }, + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .predicate = predicate, + .statements = statements + }; + + return node; +} + +// Allocate and initialize a new WhenNode node. +static yp_when_node_t * +yp_when_node_create(yp_parser_t *parser, const yp_token_t *keyword) { + yp_when_node_t *node = YP_ALLOC_NODE(parser, yp_when_node_t); + + *node = (yp_when_node_t) { + { + .type = YP_WHEN_NODE, + .location = { + .start = keyword->start, + .end = NULL + } + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .statements = NULL, + .conditions = YP_EMPTY_NODE_LIST + }; + + return node; +} + +// Append a new condition to a when node. +static void +yp_when_node_conditions_append(yp_when_node_t *node, yp_node_t *condition) { + node->base.location.end = condition->location.end; + yp_node_list_append(&node->conditions, condition); +} + +// Set the statements list of a when node. +static void +yp_when_node_statements_set(yp_when_node_t *node, yp_statements_node_t *statements) { + if (statements->base.location.end > node->base.location.end) { + node->base.location.end = statements->base.location.end; + } + + node->statements = statements; +} + +// Allocate a new WhileNode node. +static yp_while_node_t * +yp_while_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_token_t *closing, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) { + yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t); + + *node = (yp_while_node_t) { + { + .type = YP_WHILE_NODE, + .flags = flags, + .location = { + .start = keyword->start, + .end = closing->end + }, + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .closing_loc = YP_OPTIONAL_LOCATION_TOKEN_VALUE(closing), + .predicate = predicate, + .statements = statements + }; + + return node; +} + +// Allocate a new WhileNode node. +static yp_while_node_t * +yp_while_node_modifier_create(yp_parser_t *parser, const yp_token_t *keyword, yp_node_t *predicate, yp_statements_node_t *statements, yp_node_flags_t flags) { + yp_while_node_t *node = YP_ALLOC_NODE(parser, yp_while_node_t); + + *node = (yp_while_node_t) { + { + .type = YP_WHILE_NODE, + .flags = flags, + .location = { + .start = statements->base.location.start, + .end = predicate->location.end + }, + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .closing_loc = YP_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE, + .predicate = predicate, + .statements = statements + }; + + return node; +} + +// Allocate and initialize a new XStringNode node. +static yp_x_string_node_t * +yp_xstring_node_create(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) { + yp_x_string_node_t *node = YP_ALLOC_NODE(parser, yp_x_string_node_t); + + *node = (yp_x_string_node_t) { + { + .type = YP_X_STRING_NODE, + .location = { + .start = opening->start, + .end = closing->end + }, + }, + .opening_loc = YP_LOCATION_TOKEN_VALUE(opening), + .content_loc = YP_LOCATION_TOKEN_VALUE(content), + .closing_loc = YP_LOCATION_TOKEN_VALUE(closing), + .unescaped = YP_EMPTY_STRING + }; + + return node; +} + +// Allocate a new YieldNode node. +static yp_yield_node_t * +yp_yield_node_create(yp_parser_t *parser, const yp_token_t *keyword, const yp_location_t *lparen_loc, yp_arguments_node_t *arguments, const yp_location_t *rparen_loc) { + yp_yield_node_t *node = YP_ALLOC_NODE(parser, yp_yield_node_t); + + const uint8_t *end; + if (rparen_loc->start != NULL) { + end = rparen_loc->end; + } else if (arguments != NULL) { + end = arguments->base.location.end; + } else if (lparen_loc->start != NULL) { + end = lparen_loc->end; + } else { + end = keyword->end; + } + + *node = (yp_yield_node_t) { + { + .type = YP_YIELD_NODE, + .location = { + .start = keyword->start, + .end = end + }, + }, + .keyword_loc = YP_LOCATION_TOKEN_VALUE(keyword), + .lparen_loc = *lparen_loc, + .arguments = arguments, + .rparen_loc = *rparen_loc + }; + + return node; +} + + +#undef YP_EMPTY_STRING +#undef YP_ALLOC_NODE + +/******************************************************************************/ +/* Scope-related functions */ +/******************************************************************************/ + +// Allocate and initialize a new scope. Push it onto the scope stack. +static bool +yp_parser_scope_push(yp_parser_t *parser, bool closed) { + yp_scope_t *scope = (yp_scope_t *) malloc(sizeof(yp_scope_t)); + if (scope == NULL) return false; + + *scope = (yp_scope_t) { + .previous = parser->current_scope, + .closed = closed, + .explicit_params = false, + .numbered_params = false + }; + + yp_constant_id_list_init(&scope->locals); + parser->current_scope = scope; + + return true; +} + +// Check if the current scope has a given local variables. +static int +yp_parser_local_depth(yp_parser_t *parser, yp_token_t *token) { + yp_constant_id_t constant_id = yp_parser_constant_id_token(parser, token); + yp_scope_t *scope = parser->current_scope; + int depth = 0; + + while (scope != NULL) { + if (yp_constant_id_list_includes(&scope->locals, constant_id)) return depth; + if (scope->closed) break; + + scope = scope->previous; + depth++; + } + + return -1; +} + +// Add a constant id to the local table of the current scope. +static inline void +yp_parser_local_add(yp_parser_t *parser, yp_constant_id_t constant_id) { + if (!yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) { + yp_constant_id_list_append(&parser->current_scope->locals, constant_id); + } +} + +// Add a local variable from a location to the current scope. +static yp_constant_id_t +yp_parser_local_add_location(yp_parser_t *parser, const uint8_t *start, const uint8_t *end) { + yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, start, end); + if (constant_id != 0) yp_parser_local_add(parser, constant_id); + return constant_id; +} + +// Add a local variable from a token to the current scope. +static inline void +yp_parser_local_add_token(yp_parser_t *parser, yp_token_t *token) { + yp_parser_local_add_location(parser, token->start, token->end); +} + +// Add a local variable from an owned string to the current scope. +static inline void +yp_parser_local_add_owned(yp_parser_t *parser, const uint8_t *start, size_t length) { + yp_constant_id_t constant_id = yp_parser_constant_id_owned(parser, start, length); + if (constant_id != 0) yp_parser_local_add(parser, constant_id); +} + +static inline bool +token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) { + return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (yp_char_is_decimal_digit(start[1])); +} + +// Add a parameter name to the current scope and check whether the name of the +// parameter is unique or not. +static void +yp_parser_parameter_name_check(yp_parser_t *parser, yp_token_t *name) { + // We want to check whether the parameter name is a numbered parameter or not. + if (token_is_numbered_parameter(name->start, name->end)) { + yp_diagnostic_list_append(&parser->error_list, name->start, name->end, YP_ERR_PARAMETER_NUMBERED_RESERVED); + } + + // We want to ignore any parameter name that starts with an underscore. + if ((*name->start == '_')) return; + + // Otherwise we'll fetch the constant id for the parameter name and check + // whether it's already in the current scope. + yp_constant_id_t constant_id = yp_parser_constant_id_token(parser, name); + + if (yp_constant_id_list_includes(&parser->current_scope->locals, constant_id)) { + yp_diagnostic_list_append(&parser->error_list, name->start, name->end, YP_ERR_PARAMETER_NAME_REPEAT); + } +} + +// Pop the current scope off the scope stack. Note that we specifically do not +// free the associated constant list because we assume that we have already +// transferred ownership of the list to the AST somewhere. +static void +yp_parser_scope_pop(yp_parser_t *parser) { + yp_scope_t *scope = parser->current_scope; + parser->current_scope = scope->previous; + free(scope); +} + +/******************************************************************************/ +/* Basic character checks */ +/******************************************************************************/ + +// This function is used extremely frequently to lex all of the identifiers in a +// source file, so it's important that it be as fast as possible. For this +// reason we have the encoding_changed boolean to check if we need to go through +// the function pointer or can just directly use the UTF-8 functions. +static inline size_t +char_is_identifier_start(yp_parser_t *parser, const uint8_t *b) { + if (parser->encoding_changed) { + return parser->encoding.alpha_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80); + } else if (*b < 0x80) { + return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_'); + } else { + return (size_t) (yp_encoding_utf_8_alpha_char(b, parser->end - b) || 1u); + } +} + +// Like the above, this function is also used extremely frequently to lex all of +// the identifiers in a source file once the first character has been found. So +// it's important that it be as fast as possible. +static inline size_t +char_is_identifier(yp_parser_t *parser, const uint8_t *b) { + if (parser->encoding_changed) { + return parser->encoding.alnum_char(b, parser->end - b) || (*b == '_') || (*b >= 0x80); + } else if (*b < 0x80) { + return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHANUMERIC_BIT ? 1 : 0) || (*b == '_'); + } else { + return (size_t) (yp_encoding_utf_8_alnum_char(b, parser->end - b) || 1u); + } +} + +// Here we're defining a perfect hash for the characters that are allowed in +// global names. This is used to quickly check the next character after a $ to +// see if it's a valid character for a global name. +#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0) +#define PUNCT(idx) ( \ + BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \ + BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \ + BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \ + BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \ + BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \ + BIT('0', idx)) + +const unsigned int yp_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) }; + +#undef BIT +#undef PUNCT + +static inline bool +char_is_global_name_punctuation(const uint8_t b) { + const unsigned int i = (const unsigned int) b; + if (i <= 0x20 || 0x7e < i) return false; + + return (yp_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1; +} + +static inline bool +token_is_setter_name(yp_token_t *token) { + return ( + (token->type == YP_TOKEN_IDENTIFIER) && + (token->end - token->start >= 2) && + (token->end[-1] == '=') + ); +} + +/******************************************************************************/ +/* Stack helpers */ +/******************************************************************************/ + +static inline void +yp_accepts_block_stack_push(yp_parser_t *parser, bool value) { + // Use the negation of the value to prevent stack overflow. + yp_state_stack_push(&parser->accepts_block_stack, !value); +} + +static inline void +yp_accepts_block_stack_pop(yp_parser_t *parser) { + yp_state_stack_pop(&parser->accepts_block_stack); +} + +static inline bool +yp_accepts_block_stack_p(yp_parser_t *parser) { + return !yp_state_stack_p(&parser->accepts_block_stack); +} + +static inline void +yp_do_loop_stack_push(yp_parser_t *parser, bool value) { + yp_state_stack_push(&parser->do_loop_stack, value); +} + +static inline void +yp_do_loop_stack_pop(yp_parser_t *parser) { + yp_state_stack_pop(&parser->do_loop_stack); +} + +static inline bool +yp_do_loop_stack_p(yp_parser_t *parser) { + return yp_state_stack_p(&parser->do_loop_stack); +} + +/******************************************************************************/ +/* Lexer check helpers */ +/******************************************************************************/ + +// Get the next character in the source starting from +cursor+. If that position +// is beyond the end of the source then return '\0'. +static inline uint8_t +peek_at(yp_parser_t *parser, const uint8_t *cursor) { + if (cursor < parser->end) { + return *cursor; + } else { + return '\0'; + } +} + +// Get the next character in the source starting from parser->current.end and +// adding the given offset. If that position is beyond the end of the source +// then return '\0'. +static inline uint8_t +peek_offset(yp_parser_t *parser, ptrdiff_t offset) { + return peek_at(parser, parser->current.end + offset); +} + +// Get the next character in the source starting from parser->current.end. If +// that position is beyond the end of the source then return '\0'. +static inline uint8_t +peek(yp_parser_t *parser) { + return peek_at(parser, parser->current.end); +} + +// Get the next string of length len in the source starting from parser->current.end. +// If the string extends beyond the end of the source, return the empty string "" +static inline const uint8_t * +peek_string(yp_parser_t *parser, size_t len) { + if (parser->current.end + len <= parser->end) { + return parser->current.end; + } else { + return (const uint8_t *) ""; + } +} + +// If the character to be read matches the given value, then returns true and +// advanced the current pointer. +static inline bool +match(yp_parser_t *parser, uint8_t value) { + if (peek(parser) == value) { + parser->current.end++; + return true; + } + return false; +} + +// Return the length of the line ending string starting at +cursor+, or 0 if it +// is not a line ending. This function is intended to be CRLF/LF agnostic. +static inline size_t +match_eol_at(yp_parser_t *parser, const uint8_t *cursor) { + if (peek_at(parser, cursor) == '\n') { + return 1; + } + if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') { + return 2; + } + return 0; +} + +// Return the length of the line ending string starting at +// parser->current.end + offset, or 0 if it is not a line ending. This function +// is intended to be CRLF/LF agnostic. +static inline size_t +match_eol_offset(yp_parser_t *parser, ptrdiff_t offset) { + return match_eol_at(parser, parser->current.end + offset); +} + +// Return the length of the line ending string starting at parser->current.end, +// or 0 if it is not a line ending. This function is intended to be CRLF/LF +// agnostic. +static inline size_t +match_eol(yp_parser_t *parser) { + return match_eol_at(parser, parser->current.end); +} + +// Skip to the next newline character or NUL byte. +static inline const uint8_t * +next_newline(const uint8_t *cursor, ptrdiff_t length) { + assert(length >= 0); + + // Note that it's okay for us to use memchr here to look for \n because none + // of the encodings that we support have \n as a component of a multi-byte + // character. + return memchr(cursor, '\n', (size_t) length); +} + +// Find the start of the encoding comment. This is effectively an inlined +// version of strnstr with some modifications. +static inline const uint8_t * +parser_lex_encoding_comment_start(yp_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) { + assert(remaining >= 0); + size_t length = (size_t) remaining; + + size_t key_length = strlen("coding:"); + if (key_length > length) return NULL; + + const uint8_t *cursor_limit = cursor + length - key_length + 1; + while ((cursor = yp_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) { + if (memcmp(cursor, "coding", key_length - 1) == 0) { + size_t whitespace_after_coding = yp_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1)); + size_t cur_pos = key_length + whitespace_after_coding; + + if (cursor[cur_pos - 1] == ':' || cursor[cur_pos - 1] == '=') { + return cursor + cur_pos; + } + } + + cursor++; + } + + return NULL; +} + +// Here we're going to check if this is a "magic" comment, and perform whatever +// actions are necessary for it here. +static void +parser_lex_encoding_comment(yp_parser_t *parser) { + const uint8_t *start = parser->current.start + 1; + const uint8_t *end = parser->current.end; + + // These are the patterns we're going to match to find the encoding comment. + // This is definitely not complete or even really correct. + const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start); + + // If we didn't find anything that matched our patterns, then return. Note + // that this does a _very_ poor job of actually finding the encoding, and + // there is a lot of work to do here to better reflect actual magic comment + // parsing from CRuby, but this at least gets us part of the way there. + if (encoding_start == NULL) return; + + // Skip any non-newline whitespace after the "coding:" or "coding=". + encoding_start += yp_strspn_inline_whitespace(encoding_start, end - encoding_start); + + // Now determine the end of the encoding string. This is either the end of + // the line, the first whitespace character, or a punctuation mark. + const uint8_t *encoding_end = yp_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start); + encoding_end = encoding_end == NULL ? end : encoding_end; + + // Finally, we can determine the width of the encoding string. + size_t width = (size_t) (encoding_end - encoding_start); + + // First, we're going to call out to a user-defined callback if one was + // provided. If they return an encoding struct that we can use, then we'll + // use that here. + if (parser->encoding_decode_callback != NULL) { + yp_encoding_t *encoding = parser->encoding_decode_callback(parser, encoding_start, width); + + if (encoding != NULL) { + parser->encoding = *encoding; + return; + } + } + + // Next, we're going to check for UTF-8. This is the most common encoding. + // Extensions like utf-8 can contain extra encoding details like, + // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should + // treat any encoding starting utf-8 as utf-8. + if ((encoding_start + 5 <= parser->end) && (yp_strncasecmp(encoding_start, (const uint8_t *) "utf-8", 5) == 0)) { + // We don't need to do anything here because the default encoding is + // already UTF-8. We'll just return. + return; + } + + // Next, we're going to loop through each of the encodings that we handle + // explicitly. If we found one that we understand, we'll use that value. +#define ENCODING(value, prebuilt) \ + if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && yp_strncasecmp(encoding_start, (const uint8_t *) value, width) == 0) { \ + parser->encoding = prebuilt; \ + parser->encoding_changed |= true; \ + if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \ + return; \ + } + + // Check most common first. (This is pretty arbitrary.) + ENCODING("ascii", yp_encoding_ascii); + ENCODING("ascii-8bit", yp_encoding_ascii_8bit); + ENCODING("us-ascii", yp_encoding_ascii); + ENCODING("binary", yp_encoding_ascii_8bit); + ENCODING("shift_jis", yp_encoding_shift_jis); + ENCODING("euc-jp", yp_encoding_euc_jp); + + // Then check all the others. + ENCODING("big5", yp_encoding_big5); + ENCODING("gbk", yp_encoding_gbk); + ENCODING("iso-8859-1", yp_encoding_iso_8859_1); + ENCODING("iso-8859-2", yp_encoding_iso_8859_2); + ENCODING("iso-8859-3", yp_encoding_iso_8859_3); + ENCODING("iso-8859-4", yp_encoding_iso_8859_4); + ENCODING("iso-8859-5", yp_encoding_iso_8859_5); + ENCODING("iso-8859-6", yp_encoding_iso_8859_6); + ENCODING("iso-8859-7", yp_encoding_iso_8859_7); + ENCODING("iso-8859-8", yp_encoding_iso_8859_8); + ENCODING("iso-8859-9", yp_encoding_iso_8859_9); + ENCODING("iso-8859-10", yp_encoding_iso_8859_10); + ENCODING("iso-8859-11", yp_encoding_iso_8859_11); + ENCODING("iso-8859-13", yp_encoding_iso_8859_13); + ENCODING("iso-8859-14", yp_encoding_iso_8859_14); + ENCODING("iso-8859-15", yp_encoding_iso_8859_15); + ENCODING("iso-8859-16", yp_encoding_iso_8859_16); + ENCODING("koi8-r", yp_encoding_koi8_r); + ENCODING("windows-31j", yp_encoding_windows_31j); + ENCODING("windows-1251", yp_encoding_windows_1251); + ENCODING("windows-1252", yp_encoding_windows_1252); + ENCODING("cp1251", yp_encoding_windows_1251); + ENCODING("cp1252", yp_encoding_windows_1252); + ENCODING("cp932", yp_encoding_windows_31j); + ENCODING("sjis", yp_encoding_windows_31j); + ENCODING("utf8-mac", yp_encoding_utf8_mac); + +#undef ENCODING + + // If nothing was returned by this point, then we've got an issue because we + // didn't understand the encoding that the user was trying to use. In this + // case we'll keep using the default encoding but add an error to the + // parser to indicate an unsuccessful parse. + yp_diagnostic_list_append(&parser->error_list, encoding_start, encoding_end, YP_ERR_INVALID_ENCODING_MAGIC_COMMENT); +} + +// Check if this is a magic comment that includes the frozen_string_literal +// pragma. If it does, set that field on the parser. +static void +parser_lex_frozen_string_literal_comment(yp_parser_t *parser) { + const uint8_t *cursor = parser->current.start + 1; + const uint8_t *end = parser->current.end; + + size_t key_length = strlen("frozen_string_literal"); + if (key_length > (size_t) (end - cursor)) return; + + const uint8_t *cursor_limit = cursor + (end - cursor) - key_length + 1; + + while ((cursor = yp_memchr(cursor, 'f', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) { + if (memcmp(cursor, "frozen_string_literal", key_length) == 0) { + cursor += key_length; + cursor += yp_strspn_inline_whitespace(cursor, end - cursor); + + if (*cursor == ':' || *cursor == '=') { + cursor++; + cursor += yp_strspn_inline_whitespace(cursor, end - cursor); + + if (cursor + 4 <= end && yp_strncasecmp(cursor, (const uint8_t *) "true", 4) == 0) { + parser->frozen_string_literal = true; + } + + return; + } + } + + cursor++; + } +} + +/******************************************************************************/ +/* Context manipulations */ +/******************************************************************************/ + +static bool +context_terminator(yp_context_t context, yp_token_t *token) { + switch (context) { + case YP_CONTEXT_MAIN: + case YP_CONTEXT_DEF_PARAMS: + return token->type == YP_TOKEN_EOF; + case YP_CONTEXT_DEFAULT_PARAMS: + return token->type == YP_TOKEN_COMMA || token->type == YP_TOKEN_PARENTHESIS_RIGHT; + case YP_CONTEXT_PREEXE: + case YP_CONTEXT_POSTEXE: + return token->type == YP_TOKEN_BRACE_RIGHT; + case YP_CONTEXT_MODULE: + case YP_CONTEXT_CLASS: + case YP_CONTEXT_SCLASS: + case YP_CONTEXT_LAMBDA_DO_END: + case YP_CONTEXT_DEF: + case YP_CONTEXT_BLOCK_KEYWORDS: + return token->type == YP_TOKEN_KEYWORD_END || token->type == YP_TOKEN_KEYWORD_RESCUE || token->type == YP_TOKEN_KEYWORD_ENSURE; + case YP_CONTEXT_WHILE: + case YP_CONTEXT_UNTIL: + case YP_CONTEXT_ELSE: + case YP_CONTEXT_FOR: + case YP_CONTEXT_ENSURE: + return token->type == YP_TOKEN_KEYWORD_END; + case YP_CONTEXT_CASE_WHEN: + return token->type == YP_TOKEN_KEYWORD_WHEN || token->type == YP_TOKEN_KEYWORD_END || token->type == YP_TOKEN_KEYWORD_ELSE; + case YP_CONTEXT_CASE_IN: + return token->type == YP_TOKEN_KEYWORD_IN || token->type == YP_TOKEN_KEYWORD_END || token->type == YP_TOKEN_KEYWORD_ELSE; + case YP_CONTEXT_IF: + case YP_CONTEXT_ELSIF: + return token->type == YP_TOKEN_KEYWORD_ELSE || token->type == YP_TOKEN_KEYWORD_ELSIF || token->type == YP_TOKEN_KEYWORD_END; + case YP_CONTEXT_UNLESS: + return token->type == YP_TOKEN_KEYWORD_ELSE || token->type == YP_TOKEN_KEYWORD_END; + case YP_CONTEXT_EMBEXPR: + return token->type == YP_TOKEN_EMBEXPR_END; + case YP_CONTEXT_BLOCK_BRACES: + return token->type == YP_TOKEN_BRACE_RIGHT; + case YP_CONTEXT_PARENS: + return token->type == YP_TOKEN_PARENTHESIS_RIGHT; + case YP_CONTEXT_BEGIN: + case YP_CONTEXT_RESCUE: + return token->type == YP_TOKEN_KEYWORD_ENSURE || token->type == YP_TOKEN_KEYWORD_RESCUE || token->type == YP_TOKEN_KEYWORD_ELSE || token->type == YP_TOKEN_KEYWORD_END; + case YP_CONTEXT_RESCUE_ELSE: + return token->type == YP_TOKEN_KEYWORD_ENSURE || token->type == YP_TOKEN_KEYWORD_END; + case YP_CONTEXT_LAMBDA_BRACES: + return token->type == YP_TOKEN_BRACE_RIGHT; + case YP_CONTEXT_PREDICATE: + return token->type == YP_TOKEN_KEYWORD_THEN || token->type == YP_TOKEN_NEWLINE || token->type == YP_TOKEN_SEMICOLON; + } + + return false; +} + +static bool +context_recoverable(yp_parser_t *parser, yp_token_t *token) { + yp_context_node_t *context_node = parser->current_context; + + while (context_node != NULL) { + if (context_terminator(context_node->context, token)) return true; + context_node = context_node->prev; + } + + return false; +} + +static bool +context_push(yp_parser_t *parser, yp_context_t context) { + yp_context_node_t *context_node = (yp_context_node_t *) malloc(sizeof(yp_context_node_t)); + if (context_node == NULL) return false; + + *context_node = (yp_context_node_t) { .context = context, .prev = NULL }; + + if (parser->current_context == NULL) { + parser->current_context = context_node; + } else { + context_node->prev = parser->current_context; + parser->current_context = context_node; + } + + return true; +} + +static void +context_pop(yp_parser_t *parser) { + yp_context_node_t *prev = parser->current_context->prev; + free(parser->current_context); + parser->current_context = prev; +} + +static bool +context_p(yp_parser_t *parser, yp_context_t context) { + yp_context_node_t *context_node = parser->current_context; + + while (context_node != NULL) { + if (context_node->context == context) return true; + context_node = context_node->prev; + } + + return false; +} + +static bool +context_def_p(yp_parser_t *parser) { + yp_context_node_t *context_node = parser->current_context; + + while (context_node != NULL) { + switch (context_node->context) { + case YP_CONTEXT_DEF: + return true; + case YP_CONTEXT_CLASS: + case YP_CONTEXT_MODULE: + case YP_CONTEXT_SCLASS: + return false; + default: + context_node = context_node->prev; + } + } + + return false; +} + +/******************************************************************************/ +/* Specific token lexers */ +/******************************************************************************/ + +static void +yp_strspn_number_validate(yp_parser_t *parser, const uint8_t *invalid) { + if (invalid != NULL) { + yp_diagnostic_list_append(&parser->error_list, invalid, invalid + 1, YP_ERR_INVALID_NUMBER_UNDERSCORE); + } +} + +static size_t +yp_strspn_binary_number_validate(yp_parser_t *parser, const uint8_t *string) { + const uint8_t *invalid = NULL; + size_t length = yp_strspn_binary_number(string, parser->end - string, &invalid); + yp_strspn_number_validate(parser, invalid); + return length; +} + +static size_t +yp_strspn_octal_number_validate(yp_parser_t *parser, const uint8_t *string) { + const uint8_t *invalid = NULL; + size_t length = yp_strspn_octal_number(string, parser->end - string, &invalid); + yp_strspn_number_validate(parser, invalid); + return length; +} + +static size_t +yp_strspn_decimal_number_validate(yp_parser_t *parser, const uint8_t *string) { + const uint8_t *invalid = NULL; + size_t length = yp_strspn_decimal_number(string, parser->end - string, &invalid); + yp_strspn_number_validate(parser, invalid); + return length; +} + +static size_t +yp_strspn_hexadecimal_number_validate(yp_parser_t *parser, const uint8_t *string) { + const uint8_t *invalid = NULL; + size_t length = yp_strspn_hexadecimal_number(string, parser->end - string, &invalid); + yp_strspn_number_validate(parser, invalid); + return length; +} + +static yp_token_type_t +lex_optional_float_suffix(yp_parser_t *parser) { + yp_token_type_t type = YP_TOKEN_INTEGER; + + // Here we're going to attempt to parse the optional decimal portion of a + // float. If it's not there, then it's okay and we'll just continue on. + if (peek(parser) == '.') { + if (yp_char_is_decimal_digit(peek_offset(parser, 1))) { + parser->current.end += 2; + parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end); + type = YP_TOKEN_FLOAT; + } else { + // If we had a . and then something else, then it's not a float suffix on + // a number it's a method call or something else. + return type; + } + } + + // Here we're going to attempt to parse the optional exponent portion of a + // float. If it's not there, it's okay and we'll just continue on. + if (match(parser, 'e') || match(parser, 'E')) { + (void) (match(parser, '+') || match(parser, '-')); + + if (yp_char_is_decimal_digit(*parser->current.end)) { + parser->current.end++; + parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end); + type = YP_TOKEN_FLOAT; + } else { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_FLOAT_EXPONENT); + type = YP_TOKEN_FLOAT; + } + } + + return type; +} + +static yp_token_type_t +lex_numeric_prefix(yp_parser_t *parser) { + yp_token_type_t type = YP_TOKEN_INTEGER; + + if (peek_offset(parser, -1) == '0') { + switch (*parser->current.end) { + // 0d1111 is a decimal number + case 'd': + case 'D': + parser->current.end++; + if (yp_char_is_decimal_digit(peek(parser))) { + parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end); + } else { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_DECIMAL); + } + + break; + + // 0b1111 is a binary number + case 'b': + case 'B': + parser->current.end++; + if (yp_char_is_binary_digit(peek(parser))) { + parser->current.end += yp_strspn_binary_number_validate(parser, parser->current.end); + } else { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_BINARY); + } + + parser->integer_base = YP_INTEGER_BASE_FLAGS_BINARY; + break; + + // 0o1111 is an octal number + case 'o': + case 'O': + parser->current.end++; + if (yp_char_is_octal_digit(peek(parser))) { + parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end); + } else { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_OCTAL); + } + + parser->integer_base = YP_INTEGER_BASE_FLAGS_OCTAL; + break; + + // 01111 is an octal number + case '_': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + parser->current.end += yp_strspn_octal_number_validate(parser, parser->current.end); + parser->integer_base = YP_INTEGER_BASE_FLAGS_OCTAL; + break; + + // 0x1111 is a hexadecimal number + case 'x': + case 'X': + parser->current.end++; + if (yp_char_is_hexadecimal_digit(peek(parser))) { + parser->current.end += yp_strspn_hexadecimal_number_validate(parser, parser->current.end); + } else { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_NUMBER_HEXADECIMAL); + } + + parser->integer_base = YP_INTEGER_BASE_FLAGS_HEXADECIMAL; + break; + + // 0.xxx is a float + case '.': { + type = lex_optional_float_suffix(parser); + break; + } + + // 0exxx is a float + case 'e': + case 'E': { + type = lex_optional_float_suffix(parser); + break; + } + } + } else { + // If it didn't start with a 0, then we'll lex as far as we can into a + // decimal number. + parser->current.end += yp_strspn_decimal_number_validate(parser, parser->current.end); + + // Afterward, we'll lex as far as we can into an optional float suffix. + type = lex_optional_float_suffix(parser); + } + + return type; +} + +static yp_token_type_t +lex_numeric(yp_parser_t *parser) { + yp_token_type_t type = YP_TOKEN_INTEGER; + parser->integer_base = YP_INTEGER_BASE_FLAGS_DECIMAL; + + if (parser->current.end < parser->end) { + type = lex_numeric_prefix(parser); + + const uint8_t *end = parser->current.end; + yp_token_type_t suffix_type = type; + + if (type == YP_TOKEN_INTEGER) { + if (match(parser, 'r')) { + suffix_type = YP_TOKEN_INTEGER_RATIONAL; + + if (match(parser, 'i')) { + suffix_type = YP_TOKEN_INTEGER_RATIONAL_IMAGINARY; + } + } else if (match(parser, 'i')) { + suffix_type = YP_TOKEN_INTEGER_IMAGINARY; + } + } else { + if (match(parser, 'r')) { + suffix_type = YP_TOKEN_FLOAT_RATIONAL; + + if (match(parser, 'i')) { + suffix_type = YP_TOKEN_FLOAT_RATIONAL_IMAGINARY; + } + } else if (match(parser, 'i')) { + suffix_type = YP_TOKEN_FLOAT_IMAGINARY; + } + } + + const uint8_t b = peek(parser); + if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) { + parser->current.end = end; + } else { + type = suffix_type; + } + } + + return type; +} + +static yp_token_type_t +lex_global_variable(yp_parser_t *parser) { + if (parser->current.end >= parser->end) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_VARIABLE_GLOBAL); + return YP_TOKEN_GLOBAL_VARIABLE; + } + + switch (*parser->current.end) { + case '~': // $~: match-data + case '*': // $*: argv + case '$': // $$: pid + case '?': // $?: last status + case '!': // $!: error string + case '@': // $@: error position + case '/': // $/: input record separator + case '\\': // $\: output record separator + case ';': // $;: field separator + case ',': // $,: output field separator + case '.': // $.: last read line number + case '=': // $=: ignorecase + case ':': // $:: load path + case '<': // $<: reading filename + case '>': // $>: default output handle + case '\"': // $": already loaded files + parser->current.end++; + return YP_TOKEN_GLOBAL_VARIABLE; + + case '&': // $&: last match + case '`': // $`: string before last match + case '\'': // $': string after last match + case '+': // $+: string matches last paren. + parser->current.end++; + return lex_state_p(parser, YP_LEX_STATE_FNAME) ? YP_TOKEN_GLOBAL_VARIABLE : YP_TOKEN_BACK_REFERENCE; + + case '0': { + parser->current.end++; + size_t width; + + if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) { + do { + parser->current.end += width; + } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0); + + // $0 isn't allowed to be followed by anything. + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_VARIABLE_GLOBAL); + } + + return YP_TOKEN_GLOBAL_VARIABLE; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + parser->current.end += yp_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end); + return lex_state_p(parser, YP_LEX_STATE_FNAME) ? YP_TOKEN_GLOBAL_VARIABLE : YP_TOKEN_NUMBERED_REFERENCE; + + case '-': + parser->current.end++; + /* fallthrough */ + default: { + size_t width; + + if ((width = char_is_identifier(parser, parser->current.end)) > 0) { + do { + parser->current.end += width; + } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0); + } else { + // If we get here, then we have a $ followed by something that isn't + // recognized as a global variable. + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_VARIABLE_GLOBAL); + } + + return YP_TOKEN_GLOBAL_VARIABLE; + } + } +} + +// This function checks if the current token matches a keyword. If it does, it +// returns true. Otherwise, it returns false. The arguments are as follows: +// +// * `value` - the literal string that we're checking for +// * `width` - the length of the token +// * `state` - the state that we should transition to if the token matches +// +static inline yp_token_type_t +lex_keyword(yp_parser_t *parser, const char *value, size_t vlen, yp_lex_state_t state, yp_token_type_t type, yp_token_type_t modifier_type) { + yp_lex_state_t last_state = parser->lex_state; + + if (parser->current.start + vlen <= parser->end && memcmp(parser->current.start, value, vlen) == 0) { + if (parser->lex_state & YP_LEX_STATE_FNAME) { + lex_state_set(parser, YP_LEX_STATE_ENDFN); + } else { + lex_state_set(parser, state); + if (state == YP_LEX_STATE_BEG) { + parser->command_start = true; + } + + if ((modifier_type != YP_TOKEN_EOF) && !(last_state & (YP_LEX_STATE_BEG | YP_LEX_STATE_LABELED | YP_LEX_STATE_CLASS))) { + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + return modifier_type; + } + } + + return type; + } + + return YP_TOKEN_EOF; +} + +static yp_token_type_t +lex_identifier(yp_parser_t *parser, bool previous_command_start) { + // Lex as far as we can into the current identifier. + size_t width; + while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) { + parser->current.end += width; + } + + // Now cache the length of the identifier so that we can quickly compare it + // against known keywords. + width = (size_t) (parser->current.end - parser->current.start); + + if (parser->current.end < parser->end) { + if (((parser->current.end + 1 >= parser->end) || (parser->current.end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) { + // First we'll attempt to extend the identifier by a ! or ?. Then we'll + // check if we're returning the defined? keyword or just an identifier. + width++; + + if ( + ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) && + (peek(parser) == ':') && (peek_offset(parser, 1) != ':') + ) { + // If we're in a position where we can accept a : at the end of an + // identifier, then we'll optionally accept it. + lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED); + (void) match(parser, ':'); + return YP_TOKEN_LABEL; + } + + if (parser->lex_state != YP_LEX_STATE_DOT) { + if (width == 8 && (lex_keyword(parser, "defined?", width, YP_LEX_STATE_ARG, YP_TOKEN_KEYWORD_DEFINED, YP_TOKEN_EOF) != YP_TOKEN_EOF)) { + return YP_TOKEN_KEYWORD_DEFINED; + } + } + + return YP_TOKEN_METHOD_NAME; + } + + if (lex_state_p(parser, YP_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) { + // If we're in a position where we can accept a = at the end of an + // identifier, then we'll optionally accept it. + return YP_TOKEN_IDENTIFIER; + } + + if ( + ((lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) && + peek(parser) == ':' && peek_offset(parser, 1) != ':' + ) { + // If we're in a position where we can accept a : at the end of an + // identifier, then we'll optionally accept it. + lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED); + (void) match(parser, ':'); + return YP_TOKEN_LABEL; + } + } + + if (parser->lex_state != YP_LEX_STATE_DOT) { + yp_token_type_t type; + + switch (width) { + case 2: + if (lex_keyword(parser, "do", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_DO, YP_TOKEN_EOF) != YP_TOKEN_EOF) { + if (yp_do_loop_stack_p(parser)) { + return YP_TOKEN_KEYWORD_DO_LOOP; + } + return YP_TOKEN_KEYWORD_DO; + } + + if ((type = lex_keyword(parser, "if", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_IF, YP_TOKEN_KEYWORD_IF_MODIFIER)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "in", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_IN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "or", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_OR, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + break; + case 3: + if ((type = lex_keyword(parser, "and", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_AND, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "def", width, YP_LEX_STATE_FNAME, YP_TOKEN_KEYWORD_DEF, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "end", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_END, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "END", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_END_UPCASE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "for", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_FOR, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "nil", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_NIL, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "not", width, YP_LEX_STATE_ARG, YP_TOKEN_KEYWORD_NOT, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + break; + case 4: + if ((type = lex_keyword(parser, "case", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_CASE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "else", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "next", width, YP_LEX_STATE_MID, YP_TOKEN_KEYWORD_NEXT, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "redo", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_REDO, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "self", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_SELF, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "then", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "true", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_TRUE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "when", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_WHEN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + break; + case 5: + if ((type = lex_keyword(parser, "alias", width, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM, YP_TOKEN_KEYWORD_ALIAS, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "begin", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_BEGIN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "BEGIN", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_BEGIN_UPCASE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "break", width, YP_LEX_STATE_MID, YP_TOKEN_KEYWORD_BREAK, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "class", width, YP_LEX_STATE_CLASS, YP_TOKEN_KEYWORD_CLASS, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "elsif", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_ELSIF, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "false", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_FALSE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "retry", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD_RETRY, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "super", width, YP_LEX_STATE_ARG, YP_TOKEN_KEYWORD_SUPER, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "undef", width, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM, YP_TOKEN_KEYWORD_UNDEF, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "until", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_UNTIL, YP_TOKEN_KEYWORD_UNTIL_MODIFIER)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "while", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_WHILE, YP_TOKEN_KEYWORD_WHILE_MODIFIER)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "yield", width, YP_LEX_STATE_ARG, YP_TOKEN_KEYWORD_YIELD, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + break; + case 6: + if ((type = lex_keyword(parser, "ensure", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "module", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_MODULE, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "rescue", width, YP_LEX_STATE_MID, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_RESCUE_MODIFIER)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "return", width, YP_LEX_STATE_MID, YP_TOKEN_KEYWORD_RETURN, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "unless", width, YP_LEX_STATE_BEG, YP_TOKEN_KEYWORD_UNLESS, YP_TOKEN_KEYWORD_UNLESS_MODIFIER)) != YP_TOKEN_EOF) return type; + break; + case 8: + if ((type = lex_keyword(parser, "__LINE__", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD___LINE__, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + if ((type = lex_keyword(parser, "__FILE__", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD___FILE__, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + break; + case 12: + if ((type = lex_keyword(parser, "__ENCODING__", width, YP_LEX_STATE_END, YP_TOKEN_KEYWORD___ENCODING__, YP_TOKEN_EOF)) != YP_TOKEN_EOF) return type; + break; + } + } + + return parser->encoding.isupper_char(parser->current.start, parser->end - parser->current.start) ? YP_TOKEN_CONSTANT : YP_TOKEN_IDENTIFIER; +} + +// Returns true if the current token that the parser is considering is at the +// beginning of a line or the beginning of the source. +static bool +current_token_starts_line(yp_parser_t *parser) { + return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n'); +} + +// When we hit a # while lexing something like a string, we need to potentially +// handle interpolation. This function performs that check. It returns a token +// type representing what it found. Those cases are: +// +// * YP_TOKEN_NOT_PROVIDED - No interpolation was found at this point. The +// caller should keep lexing. +// * YP_TOKEN_STRING_CONTENT - No interpolation was found at this point. The +// caller should return this token type. +// * YP_TOKEN_EMBEXPR_BEGIN - An embedded expression was found. The caller +// should return this token type. +// * YP_TOKEN_EMBVAR - An embedded variable was found. The caller should return +// this token type. +// +static yp_token_type_t +lex_interpolation(yp_parser_t *parser, const uint8_t *pound) { + // If there is no content following this #, then we're at the end of + // the string and we can safely return string content. + if (pound + 1 >= parser->end) { + parser->current.end = pound + 1; + return YP_TOKEN_STRING_CONTENT; + } + + // Now we'll check against the character the follows the #. If it constitutes + // valid interplation, we'll handle that, otherwise we'll return + // YP_TOKEN_NOT_PROVIDED. + switch (pound[1]) { + case '@': { + // In this case we may have hit an embedded instance or class variable. + if (pound + 2 >= parser->end) { + parser->current.end = pound + 1; + return YP_TOKEN_STRING_CONTENT; + } + + // If we're looking at a @ and there's another @, then we'll skip past the + // second @. + const uint8_t *variable = pound + 2; + if (*variable == '@' && pound + 3 < parser->end) variable++; + + if (char_is_identifier_start(parser, variable)) { + // At this point we're sure that we've either hit an embedded instance + // or class variable. In this case we'll first need to check if we've + // already consumed content. + if (pound > parser->current.start) { + parser->current.end = pound; + return YP_TOKEN_STRING_CONTENT; + } + + // Otherwise we need to return the embedded variable token + // and then switch to the embedded variable lex mode. + lex_mode_push(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBVAR }); + parser->current.end = pound + 1; + return YP_TOKEN_EMBVAR; + } + + // If we didn't get an valid interpolation, then this is just regular + // string content. This is like if we get "#@-". In this case the caller + // should keep lexing. + parser->current.end = pound + 1; + return YP_TOKEN_NOT_PROVIDED; + } + case '$': + // In this case we may have hit an embedded global variable. If there's + // not enough room, then we'll just return string content. + if (pound + 2 >= parser->end) { + parser->current.end = pound + 1; + return YP_TOKEN_STRING_CONTENT; + } + + // This is the character that we're going to check to see if it is the + // start of an identifier that would indicate that this is a global + // variable. + const uint8_t *check = pound + 2; + + if (pound[2] == '-') { + if (pound + 3 >= parser->end) { + parser->current.end = pound + 2; + return YP_TOKEN_STRING_CONTENT; + } + + check++; + } + + // If the character that we're going to check is the start of an + // identifier, or we don't have a - and the character is a decimal number + // or a global name punctuation character, then we've hit an embedded + // global variable. + if ( + char_is_identifier_start(parser, check) || + (pound[2] != '-' && (yp_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2]))) + ) { + // In this case we've hit an embedded global variable. First check to + // see if we've already consumed content. If we have, then we need to + // return that content as string content first. + if (pound > parser->current.start) { + parser->current.end = pound; + return YP_TOKEN_STRING_CONTENT; + } + + // Otherwise, we need to return the embedded variable token and switch + // to the embedded variable lex mode. + lex_mode_push(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBVAR }); + parser->current.end = pound + 1; + return YP_TOKEN_EMBVAR; + } + + // In this case we've hit a #$ that does not indicate a global variable. + // In this case we'll continue lexing past it. + parser->current.end = pound + 1; + return YP_TOKEN_NOT_PROVIDED; + case '{': + // In this case it's the start of an embedded expression. If we have + // already consumed content, then we need to return that content as string + // content first. + if (pound > parser->current.start) { + parser->current.end = pound; + return YP_TOKEN_STRING_CONTENT; + } + + parser->enclosure_nesting++; + + // Otherwise we'll skip past the #{ and begin lexing the embedded + // expression. + lex_mode_push(parser, (yp_lex_mode_t) { .mode = YP_LEX_EMBEXPR }); + parser->current.end = pound + 2; + parser->command_start = true; + yp_do_loop_stack_push(parser, false); + return YP_TOKEN_EMBEXPR_BEGIN; + default: + // In this case we've hit a # that doesn't constitute interpolation. We'll + // mark that by returning the not provided token type. This tells the + // consumer to keep lexing forward. + parser->current.end = pound + 1; + return YP_TOKEN_NOT_PROVIDED; + } +} + +// This function is responsible for lexing either a character literal or the ? +// operator. The supported character literals are described below. +// +// \a bell, ASCII 07h (BEL) +// \b backspace, ASCII 08h (BS) +// \t horizontal tab, ASCII 09h (TAB) +// \n newline (line feed), ASCII 0Ah (LF) +// \v vertical tab, ASCII 0Bh (VT) +// \f form feed, ASCII 0Ch (FF) +// \r carriage return, ASCII 0Dh (CR) +// \e escape, ASCII 1Bh (ESC) +// \s space, ASCII 20h (SPC) +// \\ backslash +// \nnn octal bit pattern, where nnn is 1-3 octal digits ([0-7]) +// \xnn hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F]) +// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F]) +// \u{nnnn ...} Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F]) +// \cx or \C-x control character, where x is an ASCII printable character +// \M-x meta character, where x is an ASCII printable character +// \M-\C-x meta control character, where x is an ASCII printable character +// \M-\cx same as above +// \c\M-x same as above +// \c? or \C-? delete, ASCII 7Fh (DEL) +// +static yp_token_type_t +lex_question_mark(yp_parser_t *parser) { + if (lex_state_end_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_BEG); + return YP_TOKEN_QUESTION_MARK; + } + + if (parser->current.end >= parser->end) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INCOMPLETE_QUESTION_MARK); + return YP_TOKEN_CHARACTER_LITERAL; + } + + if (yp_char_is_whitespace(*parser->current.end)) { + lex_state_set(parser, YP_LEX_STATE_BEG); + return YP_TOKEN_QUESTION_MARK; + } + + lex_state_set(parser, YP_LEX_STATE_BEG); + + if (parser->current.start[1] == '\\') { + lex_state_set(parser, YP_LEX_STATE_END); + parser->current.end += yp_unescape_calculate_difference(parser, parser->current.start + 1, YP_UNESCAPE_ALL, true); + return YP_TOKEN_CHARACTER_LITERAL; + } else { + size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end); + + // Ternary operators can have a ? immediately followed by an identifier which starts with + // an underscore. We check for this case + if ( + !(parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end) || + peek(parser) == '_') || + ( + (parser->current.end + encoding_width >= parser->end) || + !char_is_identifier(parser, parser->current.end + encoding_width) + ) + ) { + lex_state_set(parser, YP_LEX_STATE_END); + parser->current.end += encoding_width; + return YP_TOKEN_CHARACTER_LITERAL; + } + } + + return YP_TOKEN_QUESTION_MARK; +} + +// Lex a variable that starts with an @ sign (either an instance or class +// variable). +static yp_token_type_t +lex_at_variable(yp_parser_t *parser) { + yp_token_type_t type = match(parser, '@') ? YP_TOKEN_CLASS_VARIABLE : YP_TOKEN_INSTANCE_VARIABLE; + size_t width; + + if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) { + parser->current.end += width; + + while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) { + parser->current.end += width; + } + } else if (type == YP_TOKEN_CLASS_VARIABLE) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INCOMPLETE_VARIABLE_CLASS); + } else { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INCOMPLETE_VARIABLE_INSTANCE); + } + + // If we're lexing an embedded variable, then we need to pop back into the + // parent lex context. + if (parser->lex_modes.current->mode == YP_LEX_EMBVAR) { + lex_mode_pop(parser); + } + + return type; +} + +// Optionally call out to the lex callback if one is provided. +static inline void +parser_lex_callback(yp_parser_t *parser) { + if (parser->lex_callback) { + parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current); + } +} + +// Return a new comment node of the specified type. +static inline yp_comment_t * +parser_comment(yp_parser_t *parser, yp_comment_type_t type) { + yp_comment_t *comment = (yp_comment_t *) malloc(sizeof(yp_comment_t)); + if (comment == NULL) return NULL; + + *comment = (yp_comment_t) { + .type = type, + .start = parser->current.start, + .end = parser->current.end + }; + + return comment; +} + +// Lex out embedded documentation, and return when we have either hit the end of +// the file or the end of the embedded documentation. This calls the callback +// manually because only the lexer should see these tokens, not the parser. +static yp_token_type_t +lex_embdoc(yp_parser_t *parser) { + // First, lex out the EMBDOC_BEGIN token. + const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end); + + if (newline == NULL) { + parser->current.end = parser->end; + } else { + yp_newline_list_append(&parser->newline_list, newline); + parser->current.end = newline + 1; + } + + parser->current.type = YP_TOKEN_EMBDOC_BEGIN; + parser_lex_callback(parser); + + // Now, create a comment that is going to be attached to the parser. + yp_comment_t *comment = parser_comment(parser, YP_COMMENT_EMBDOC); + if (comment == NULL) return YP_TOKEN_EOF; + + // Now, loop until we find the end of the embedded documentation or the end of + // the file. + while (parser->current.end + 4 <= parser->end) { + parser->current.start = parser->current.end; + + // If we've hit the end of the embedded documentation then we'll return that + // token here. + if (memcmp(parser->current.end, "=end", 4) == 0 && + (parser->current.end + 4 == parser->end || yp_char_is_whitespace(parser->current.end[4]))) { + const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end); + + if (newline == NULL) { + parser->current.end = parser->end; + } else { + yp_newline_list_append(&parser->newline_list, newline); + parser->current.end = newline + 1; + } + + parser->current.type = YP_TOKEN_EMBDOC_END; + parser_lex_callback(parser); + + comment->end = parser->current.end; + yp_list_append(&parser->comment_list, (yp_list_node_t *) comment); + + return YP_TOKEN_EMBDOC_END; + } + + // Otherwise, we'll parse until the end of the line and return a line of + // embedded documentation. + const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end); + + if (newline == NULL) { + parser->current.end = parser->end; + } else { + yp_newline_list_append(&parser->newline_list, newline); + parser->current.end = newline + 1; + } + + parser->current.type = YP_TOKEN_EMBDOC_LINE; + parser_lex_callback(parser); + } + + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_EMBDOC_TERM); + + comment->end = parser->current.end; + yp_list_append(&parser->comment_list, (yp_list_node_t *) comment); + + return YP_TOKEN_EOF; +} + +// Set the current type to an ignored newline and then call the lex callback. +// This happens in a couple places depending on whether or not we have already +// lexed a comment. +static inline void +parser_lex_ignored_newline(yp_parser_t *parser) { + parser->current.type = YP_TOKEN_IGNORED_NEWLINE; + parser_lex_callback(parser); +} + +// This function will be called when a newline is encountered. In some newlines, +// we need to check if there is a heredoc or heredocs that we have already lexed +// the body of that we need to now skip past. That will be indicated by the +// heredoc_end field on the parser. +// +// If it is set, then we need to skip past the heredoc body and then clear the +// heredoc_end field. +static inline void +parser_flush_heredoc_end(yp_parser_t *parser) { + assert(parser->heredoc_end <= parser->end); + parser->next_start = parser->heredoc_end; + parser->heredoc_end = NULL; +} + +// This is a convenience macro that will set the current token type, call the +// lex callback, and then return from the parser_lex function. +#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return + +// Called when the parser requires a new token. The parser maintains a moving +// window of two tokens at a time: parser.previous and parser.current. This +// function will move the current token into the previous token and then +// lex a new token into the current token. +static void +parser_lex(yp_parser_t *parser) { + assert(parser->current.end <= parser->end); + parser->previous = parser->current; + + // This value mirrors cmd_state from CRuby. + bool previous_command_start = parser->command_start; + parser->command_start = false; + + // This is used to communicate to the newline lexing function that we've + // already seen a comment. + bool lexed_comment = false; + + // Here we cache the current value of the semantic token seen flag. This is + // used to reset it in case we find a token that shouldn't flip this flag. + unsigned int semantic_token_seen = parser->semantic_token_seen; + parser->semantic_token_seen = true; + + switch (parser->lex_modes.current->mode) { + case YP_LEX_DEFAULT: + case YP_LEX_EMBEXPR: + case YP_LEX_EMBVAR: + + // We have a specific named label here because we are going to jump back to + // this location in the event that we have lexed a token that should not be + // returned to the parser. This includes comments, ignored newlines, and + // invalid tokens of some form. + lex_next_token: { + // If we have the special next_start pointer set, then we're going to jump + // to that location and start lexing from there. + if (parser->next_start != NULL) { + parser->current.end = parser->next_start; + parser->next_start = NULL; + } + + // This value mirrors space_seen from CRuby. It tracks whether or not + // space has been eaten before the start of the next token. + bool space_seen = false; + + // First, we're going to skip past any whitespace at the front of the next + // token. + bool chomping = true; + while (parser->current.end < parser->end && chomping) { + switch (*parser->current.end) { + case ' ': + case '\t': + case '\f': + case '\v': + parser->current.end++; + space_seen = true; + break; + case '\r': + if (match_eol_offset(parser, 1)) { + chomping = false; + } else { + parser->current.end++; + space_seen = true; + } + break; + case '\\': { + size_t eol_length = match_eol_offset(parser, 1); + if (eol_length) { + if (parser->heredoc_end) { + parser->current.end = parser->heredoc_end; + parser->heredoc_end = NULL; + } else { + parser->current.end += eol_length + 1; + yp_newline_list_append(&parser->newline_list, parser->current.end - 1); + space_seen = true; + } + } else if (yp_char_is_inline_whitespace(*parser->current.end)) { + parser->current.end += 2; + } else { + chomping = false; + } + + break; + } + default: + chomping = false; + break; + } + } + + // Next, we'll set to start of this token to be the current end. + parser->current.start = parser->current.end; + + // We'll check if we're at the end of the file. If we are, then we + // need to return the EOF token. + if (parser->current.end >= parser->end) { + LEX(YP_TOKEN_EOF); + } + + // Finally, we'll check the current character to determine the next + // token. + switch (*parser->current.end++) { + case '\0': // NUL or end of script + case '\004': // ^D + case '\032': // ^Z + parser->current.end--; + LEX(YP_TOKEN_EOF); + + case '#': { // comments + const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end); + parser->current.end = ending == NULL ? parser->end : ending; + + // If we found a comment while lexing, then we're going to + // add it to the list of comments in the file and keep + // lexing. + yp_comment_t *comment = parser_comment(parser, YP_COMMENT_INLINE); + yp_list_append(&parser->comment_list, (yp_list_node_t *) comment); + + if (ending) parser->current.end++; + parser->current.type = YP_TOKEN_COMMENT; + parser_lex_callback(parser); + + if (parser->current.start == parser->encoding_comment_start) { + parser_lex_encoding_comment(parser); + } + + if (!semantic_token_seen) { + parser_lex_frozen_string_literal_comment(parser); + } + + lexed_comment = true; + } + /* fallthrough */ + case '\r': + case '\n': { + parser->semantic_token_seen = semantic_token_seen & 0x1; + size_t eol_length = match_eol_at(parser, parser->current.end - 1); + + if (eol_length) { + // The only way you can have carriage returns in this + // particular loop is if you have a carriage return + // followed by a newline. In that case we'll just skip + // over the carriage return and continue lexing, in + // order to make it so that the newline token + // encapsulates both the carriage return and the + // newline. Note that we need to check that we haven't + // already lexed a comment here because that falls + // through into here as well. + if (!lexed_comment) { + parser->current.end += eol_length - 1; // skip CR + } + + if (parser->heredoc_end == NULL) { + yp_newline_list_append(&parser->newline_list, parser->current.end - 1); + } + } + + if (parser->heredoc_end) { + parser_flush_heredoc_end(parser); + } + + // If this is an ignored newline, then we can continue lexing after + // calling the callback with the ignored newline token. + switch (lex_state_ignored_p(parser)) { + case YP_IGNORED_NEWLINE_NONE: + break; + case YP_IGNORED_NEWLINE_PATTERN: + if (parser->pattern_matching_newlines || parser->in_keyword_arg) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, YP_LEX_STATE_BEG); + parser->command_start = true; + parser->current.type = YP_TOKEN_NEWLINE; + return; + } + /* fallthrough */ + case YP_IGNORED_NEWLINE_ALL: + if (!lexed_comment) parser_lex_ignored_newline(parser); + lexed_comment = false; + goto lex_next_token; + } + + // Here we need to look ahead and see if there is a call operator + // (either . or &.) that starts the next line. If there is, then this + // is going to become an ignored newline and we're going to instead + // return the call operator. + const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start; + next_content += yp_strspn_inline_whitespace(next_content, parser->end - next_content); + + if (next_content < parser->end) { + // If we hit a comment after a newline, then we're going to check + // if it's ignored or if it's followed by a method call ('.'). + // If it is, then we're going to call the + // callback with an ignored newline and then continue lexing. + // Otherwise we'll return a regular newline. + if (next_content[0] == '#') { + // Here we look for a "." or "&." following a "\n". + const uint8_t *following = next_newline(next_content, parser->end - next_content); + + while (following && (following + 1 < parser->end)) { + following++; + following += yp_strspn_inline_whitespace(following, parser->end - following); + + // If this is not followed by a comment, then we can break out + // of this loop. + if (peek_at(parser, following) != '#') break; + + // If there is a comment, then we need to find the end of the + // comment and continue searching from there. + following = next_newline(following, parser->end - following); + } + + // If the lex state was ignored, or we hit a '.' or a '&.', + // we will lex the ignored newline + if ( + lex_state_ignored_p(parser) || + (following && ( + (peek_at(parser, following) == '.') || + (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.') + )) + ) { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lexed_comment = false; + goto lex_next_token; + } + } + + // If we hit a . after a newline, then we're in a call chain and + // we need to return the call operator. + if (next_content[0] == '.') { + // To match ripper, we need to emit an ignored newline even though + // its a real newline in the case that we have a beginless range + // on a subsequent line. + if (peek_at(parser, next_content + 1) == '.') { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, YP_LEX_STATE_BEG); + parser->command_start = true; + parser->current.type = YP_TOKEN_NEWLINE; + return; + } + + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, YP_LEX_STATE_DOT); + parser->current.start = next_content; + parser->current.end = next_content + 1; + parser->next_start = NULL; + LEX(YP_TOKEN_DOT); + } + + // If we hit a &. after a newline, then we're in a call chain and + // we need to return the call operator. + if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') { + if (!lexed_comment) parser_lex_ignored_newline(parser); + lex_state_set(parser, YP_LEX_STATE_DOT); + parser->current.start = next_content; + parser->current.end = next_content + 2; + parser->next_start = NULL; + LEX(YP_TOKEN_AMPERSAND_DOT); + } + } + + // At this point we know this is a regular newline, and we can set the + // necessary state and return the token. + lex_state_set(parser, YP_LEX_STATE_BEG); + parser->command_start = true; + parser->current.type = YP_TOKEN_NEWLINE; + if (!lexed_comment) parser_lex_callback(parser); + return; + } + + // , + case ',': + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + LEX(YP_TOKEN_COMMA); + + // ( + case '(': { + yp_token_type_t type = YP_TOKEN_PARENTHESIS_LEFT; + + if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (YP_LEX_STATE_END | YP_LEX_STATE_LABEL))) { + type = YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES; + } + + parser->enclosure_nesting++; + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + yp_do_loop_stack_push(parser, false); + LEX(type); + } + + // ) + case ')': + parser->enclosure_nesting--; + lex_state_set(parser, YP_LEX_STATE_ENDFN); + yp_do_loop_stack_pop(parser); + LEX(YP_TOKEN_PARENTHESIS_RIGHT); + + // ; + case ';': + lex_state_set(parser, YP_LEX_STATE_BEG); + parser->command_start = true; + LEX(YP_TOKEN_SEMICOLON); + + // [ [] []= + case '[': + parser->enclosure_nesting++; + yp_token_type_t type = YP_TOKEN_BRACKET_LEFT; + + if (lex_state_operator_p(parser)) { + if (match(parser, ']')) { + parser->enclosure_nesting--; + lex_state_set(parser, YP_LEX_STATE_ARG); + LEX(match(parser, '=') ? YP_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : YP_TOKEN_BRACKET_LEFT_RIGHT); + } + + lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABEL); + LEX(type); + } + + if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, YP_LEX_STATE_LABELED)))) { + type = YP_TOKEN_BRACKET_LEFT_ARRAY; + } + + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + yp_do_loop_stack_push(parser, false); + LEX(type); + + // ] + case ']': + parser->enclosure_nesting--; + lex_state_set(parser, YP_LEX_STATE_END); + yp_do_loop_stack_pop(parser); + LEX(YP_TOKEN_BRACKET_RIGHT); + + // { + case '{': { + yp_token_type_t type = YP_TOKEN_BRACE_LEFT; + + if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) { + // This { begins a lambda + parser->command_start = true; + lex_state_set(parser, YP_LEX_STATE_BEG); + type = YP_TOKEN_LAMBDA_BEGIN; + } else if (lex_state_p(parser, YP_LEX_STATE_LABELED)) { + // This { begins a hash literal + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + } else if (lex_state_p(parser, YP_LEX_STATE_ARG_ANY | YP_LEX_STATE_END | YP_LEX_STATE_ENDFN)) { + // This { begins a block + parser->command_start = true; + lex_state_set(parser, YP_LEX_STATE_BEG); + } else if (lex_state_p(parser, YP_LEX_STATE_ENDARG)) { + // This { begins a block on a command + parser->command_start = true; + lex_state_set(parser, YP_LEX_STATE_BEG); + } else { + // This { begins a hash literal + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + } + + parser->enclosure_nesting++; + parser->brace_nesting++; + yp_do_loop_stack_push(parser, false); + + LEX(type); + } + + // } + case '}': + parser->enclosure_nesting--; + yp_do_loop_stack_pop(parser); + + if ((parser->lex_modes.current->mode == YP_LEX_EMBEXPR) && (parser->brace_nesting == 0)) { + lex_mode_pop(parser); + LEX(YP_TOKEN_EMBEXPR_END); + } + + parser->brace_nesting--; + lex_state_set(parser, YP_LEX_STATE_END); + LEX(YP_TOKEN_BRACE_RIGHT); + + // * ** **= *= + case '*': { + if (match(parser, '*')) { + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_STAR_STAR_EQUAL); + } + + yp_token_type_t type = YP_TOKEN_STAR_STAR; + + if (lex_state_spcarg_p(parser, space_seen) || lex_state_beg_p(parser)) { + type = YP_TOKEN_USTAR_STAR; + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + LEX(type); + } + + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_STAR_EQUAL); + } + + yp_token_type_t type = YP_TOKEN_STAR; + + if (lex_state_spcarg_p(parser, space_seen)) { + yp_diagnostic_list_append(&parser->warning_list, parser->current.start, parser->current.end, YP_WARN_AMBIGUOUS_PREFIX_STAR); + type = YP_TOKEN_USTAR; + } else if (lex_state_beg_p(parser)) { + type = YP_TOKEN_USTAR; + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + LEX(type); + } + + // ! != !~ !@ + case '!': + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + if (match(parser, '@')) { + LEX(YP_TOKEN_BANG); + } + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + if (match(parser, '=')) { + LEX(YP_TOKEN_BANG_EQUAL); + } + + if (match(parser, '~')) { + LEX(YP_TOKEN_BANG_TILDE); + } + + LEX(YP_TOKEN_BANG); + + // = => =~ == === =begin + case '=': + if (current_token_starts_line(parser) && memcmp(peek_string(parser, 5), "begin", 5) == 0 && yp_char_is_whitespace(peek_offset(parser, 5))) { + yp_token_type_t type = lex_embdoc(parser); + + if (type == YP_TOKEN_EOF) { + LEX(type); + } + + goto lex_next_token; + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + if (match(parser, '>')) { + LEX(YP_TOKEN_EQUAL_GREATER); + } + + if (match(parser, '~')) { + LEX(YP_TOKEN_EQUAL_TILDE); + } + + if (match(parser, '=')) { + LEX(match(parser, '=') ? YP_TOKEN_EQUAL_EQUAL_EQUAL : YP_TOKEN_EQUAL_EQUAL); + } + + LEX(YP_TOKEN_EQUAL); + + // < << <<= <= <=> + case '<': + if (match(parser, '<')) { + if ( + !lex_state_p(parser, YP_LEX_STATE_DOT | YP_LEX_STATE_CLASS) && + !lex_state_end_p(parser) && + (!lex_state_p(parser, YP_LEX_STATE_ARG_ANY) || lex_state_p(parser, YP_LEX_STATE_LABELED) || space_seen) + ) { + const uint8_t *end = parser->current.end; + + yp_heredoc_quote_t quote = YP_HEREDOC_QUOTE_NONE; + yp_heredoc_indent_t indent = YP_HEREDOC_INDENT_NONE; + + if (match(parser, '-')) { + indent = YP_HEREDOC_INDENT_DASH; + } + else if (match(parser, '~')) { + indent = YP_HEREDOC_INDENT_TILDE; + } + + if (match(parser, '`')) { + quote = YP_HEREDOC_QUOTE_BACKTICK; + } + else if (match(parser, '"')) { + quote = YP_HEREDOC_QUOTE_DOUBLE; + } + else if (match(parser, '\'')) { + quote = YP_HEREDOC_QUOTE_SINGLE; + } + + const uint8_t *ident_start = parser->current.end; + size_t width = 0; + + if (parser->current.end >= parser->end) { + parser->current.end = end; + } else if (quote == YP_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) { + parser->current.end = end; + } else { + if (quote == YP_HEREDOC_QUOTE_NONE) { + parser->current.end += width; + + while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) { + parser->current.end += width; + } + } else { + // If we have quotes, then we're going to go until we find the + // end quote. + while ((parser->current.end < parser->end) && quote != (yp_heredoc_quote_t) (*parser->current.end)) { + parser->current.end++; + } + } + + size_t ident_length = (size_t) (parser->current.end - ident_start); + if (quote != YP_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) { + // TODO: handle unterminated heredoc + } + + lex_mode_push(parser, (yp_lex_mode_t) { + .mode = YP_LEX_HEREDOC, + .as.heredoc = { + .ident_start = ident_start, + .ident_length = ident_length, + .next_start = parser->current.end, + .quote = quote, + .indent = indent + } + }); + + if (parser->heredoc_end == NULL) { + const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end); + + if (body_start == NULL) { + // If there is no newline after the heredoc identifier, then + // this is not a valid heredoc declaration. In this case we + // will add an error, but we will still return a heredoc + // start. + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_EMBDOC_TERM); + body_start = parser->end; + } else { + // Otherwise, we want to indicate that the body of the + // heredoc starts on the character after the next newline. + yp_newline_list_append(&parser->newline_list, body_start); + body_start++; + } + + parser->next_start = body_start; + } else { + parser->next_start = parser->heredoc_end; + } + + LEX(YP_TOKEN_HEREDOC_START); + } + } + + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_LESS_LESS_EQUAL); + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + if (lex_state_p(parser, YP_LEX_STATE_CLASS)) parser->command_start = true; + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + LEX(YP_TOKEN_LESS_LESS); + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + if (lex_state_p(parser, YP_LEX_STATE_CLASS)) parser->command_start = true; + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + if (match(parser, '=')) { + if (match(parser, '>')) { + LEX(YP_TOKEN_LESS_EQUAL_GREATER); + } + + LEX(YP_TOKEN_LESS_EQUAL); + } + + LEX(YP_TOKEN_LESS); + + // > >> >>= >= + case '>': + if (match(parser, '>')) { + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + LEX(match(parser, '=') ? YP_TOKEN_GREATER_GREATER_EQUAL : YP_TOKEN_GREATER_GREATER); + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + LEX(match(parser, '=') ? YP_TOKEN_GREATER_EQUAL : YP_TOKEN_GREATER); + + // double-quoted string literal + case '"': { + bool label_allowed = (lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser); + lex_mode_push_string(parser, true, label_allowed, '\0', '"'); + LEX(YP_TOKEN_STRING_BEGIN); + } + + // xstring literal + case '`': { + if (lex_state_p(parser, YP_LEX_STATE_FNAME)) { + lex_state_set(parser, YP_LEX_STATE_ENDFN); + LEX(YP_TOKEN_BACKTICK); + } + + if (lex_state_p(parser, YP_LEX_STATE_DOT)) { + if (previous_command_start) { + lex_state_set(parser, YP_LEX_STATE_CMDARG); + } else { + lex_state_set(parser, YP_LEX_STATE_ARG); + } + + LEX(YP_TOKEN_BACKTICK); + } + + lex_mode_push_string(parser, true, false, '\0', '`'); + LEX(YP_TOKEN_BACKTICK); + } + + // single-quoted string literal + case '\'': { + bool label_allowed = (lex_state_p(parser, YP_LEX_STATE_LABEL | YP_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser); + lex_mode_push_string(parser, false, label_allowed, '\0', '\''); + LEX(YP_TOKEN_STRING_BEGIN); + } + + // ? character literal + case '?': + LEX(lex_question_mark(parser)); + + // & && &&= &= + case '&': { + if (match(parser, '&')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + + if (match(parser, '=')) { + LEX(YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL); + } + + LEX(YP_TOKEN_AMPERSAND_AMPERSAND); + } + + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_AMPERSAND_EQUAL); + } + + if (match(parser, '.')) { + lex_state_set(parser, YP_LEX_STATE_DOT); + LEX(YP_TOKEN_AMPERSAND_DOT); + } + + yp_token_type_t type = YP_TOKEN_AMPERSAND; + if (lex_state_spcarg_p(parser, space_seen) || lex_state_beg_p(parser)) { + type = YP_TOKEN_UAMPERSAND; + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + LEX(type); + } + + // | || ||= |= + case '|': + if (match(parser, '|')) { + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_PIPE_PIPE_EQUAL); + } + + if (lex_state_p(parser, YP_LEX_STATE_BEG)) { + parser->current.end--; + LEX(YP_TOKEN_PIPE); + } + + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_PIPE_PIPE); + } + + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_PIPE_EQUAL); + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + } + + LEX(YP_TOKEN_PIPE); + + // + += +@ + case '+': { + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + + if (match(parser, '@')) { + LEX(YP_TOKEN_UPLUS); + } + + LEX(YP_TOKEN_PLUS); + } + + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_PLUS_EQUAL); + } + + bool spcarg = lex_state_spcarg_p(parser, space_seen); + if (spcarg) { + yp_diagnostic_list_append( + &parser->warning_list, + parser->current.start, + parser->current.end, + YP_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS + ); + } + + if (lex_state_beg_p(parser) || spcarg) { + lex_state_set(parser, YP_LEX_STATE_BEG); + + if (yp_char_is_decimal_digit(peek(parser))) { + parser->current.end++; + yp_token_type_t type = lex_numeric(parser); + lex_state_set(parser, YP_LEX_STATE_END); + LEX(type); + } + + LEX(YP_TOKEN_UPLUS); + } + + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_PLUS); + } + + // - -= -@ + case '-': { + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + + if (match(parser, '@')) { + LEX(YP_TOKEN_UMINUS); + } + + LEX(YP_TOKEN_MINUS); + } + + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_MINUS_EQUAL); + } + + if (match(parser, '>')) { + lex_state_set(parser, YP_LEX_STATE_ENDFN); + LEX(YP_TOKEN_MINUS_GREATER); + } + + bool spcarg = lex_state_spcarg_p(parser, space_seen); + if (spcarg) { + yp_diagnostic_list_append( + &parser->warning_list, + parser->current.start, + parser->current.end, + YP_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS + ); + } + + if (lex_state_beg_p(parser) || spcarg) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(yp_char_is_decimal_digit(peek(parser)) ? YP_TOKEN_UMINUS_NUM : YP_TOKEN_UMINUS); + } + + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_MINUS); + } + + // . .. ... + case '.': { + bool beg_p = lex_state_beg_p(parser); + + if (match(parser, '.')) { + if (match(parser, '.')) { + // If we're _not_ inside a range within default parameters + if ( + !context_p(parser, YP_CONTEXT_DEFAULT_PARAMS) && + context_p(parser, YP_CONTEXT_DEF_PARAMS) + ) { + if (lex_state_p(parser, YP_LEX_STATE_END)) { + lex_state_set(parser, YP_LEX_STATE_BEG); + } else { + lex_state_set(parser, YP_LEX_STATE_ENDARG); + } + LEX(YP_TOKEN_UDOT_DOT_DOT); + } + + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(beg_p ? YP_TOKEN_UDOT_DOT_DOT : YP_TOKEN_DOT_DOT_DOT); + } + + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(beg_p ? YP_TOKEN_UDOT_DOT : YP_TOKEN_DOT_DOT); + } + + lex_state_set(parser, YP_LEX_STATE_DOT); + LEX(YP_TOKEN_DOT); + } + + // integer + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + yp_token_type_t type = lex_numeric(parser); + lex_state_set(parser, YP_LEX_STATE_END); + LEX(type); + } + + // :: symbol + case ':': + if (match(parser, ':')) { + if (lex_state_beg_p(parser) || lex_state_p(parser, YP_LEX_STATE_CLASS) || (lex_state_p(parser, YP_LEX_STATE_ARG_ANY) && space_seen)) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_UCOLON_COLON); + } + + lex_state_set(parser, YP_LEX_STATE_DOT); + LEX(YP_TOKEN_COLON_COLON); + } + + if (lex_state_end_p(parser) || yp_char_is_whitespace(peek(parser)) || peek(parser) == '#') { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_COLON); + } + + if (peek(parser) == '"' || peek(parser) == '\'') { + lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end); + parser->current.end++; + } + + lex_state_set(parser, YP_LEX_STATE_FNAME); + LEX(YP_TOKEN_SYMBOL_BEGIN); + + // / /= + case '/': + if (lex_state_beg_p(parser)) { + lex_mode_push_regexp(parser, '\0', '/'); + LEX(YP_TOKEN_REGEXP_BEGIN); + } + + if (match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_SLASH_EQUAL); + } + + if (lex_state_spcarg_p(parser, space_seen)) { + yp_diagnostic_list_append(&parser->warning_list, parser->current.start, parser->current.end, YP_WARN_AMBIGUOUS_SLASH); + lex_mode_push_regexp(parser, '\0', '/'); + LEX(YP_TOKEN_REGEXP_BEGIN); + } + + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + LEX(YP_TOKEN_SLASH); + + // ^ ^= + case '^': + if (lex_state_operator_p(parser)) { + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + LEX(match(parser, '=') ? YP_TOKEN_CARET_EQUAL : YP_TOKEN_CARET); + + // ~ ~@ + case '~': + if (lex_state_operator_p(parser)) { + (void) match(parser, '@'); + lex_state_set(parser, YP_LEX_STATE_ARG); + } else { + lex_state_set(parser, YP_LEX_STATE_BEG); + } + + LEX(YP_TOKEN_TILDE); + + // % %= %i %I %q %Q %w %W + case '%': { + // If there is no subsequent character then we have an + // invalid token. We're going to say it's the percent + // operator because we don't want to move into the string + // lex mode unnecessarily. + if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_PERCENT); + LEX(YP_TOKEN_PERCENT); + } + + if (!lex_state_beg_p(parser) && match(parser, '=')) { + lex_state_set(parser, YP_LEX_STATE_BEG); + LEX(YP_TOKEN_PERCENT_EQUAL); + } + else if( + lex_state_beg_p(parser) || + (lex_state_p(parser, YP_LEX_STATE_FITEM) && (peek(parser) == 's')) || + lex_state_spcarg_p(parser, space_seen) + ) { + if (!parser->encoding.alnum_char(parser->current.end, parser->end - parser->current.end)) { + lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + + size_t eol_length = match_eol(parser); + if (eol_length) { + parser->current.end += eol_length; + yp_newline_list_append(&parser->newline_list, parser->current.end - 1); + } else { + parser->current.end++; + } + + if (parser->current.end < parser->end) { + LEX(YP_TOKEN_STRING_BEGIN); + } + } + + // Delimiters for %-literals cannot be alphanumeric. We + // validate that here. + uint8_t delimiter = peek_offset(parser, 1); + if (delimiter >= 0x80 || parser->encoding.alnum_char(&delimiter, 1)) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_PERCENT); + goto lex_next_token; + } + + switch (peek(parser)) { + case 'i': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_list(parser, false, *parser->current.end++); + } + + LEX(YP_TOKEN_PERCENT_LOWER_I); + } + case 'I': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_list(parser, true, *parser->current.end++); + } + + LEX(YP_TOKEN_PERCENT_UPPER_I); + } + case 'r': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_regexp(parser, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + yp_newline_list_check_append(&parser->newline_list, parser->current.end); + parser->current.end++; + } + + LEX(YP_TOKEN_REGEXP_BEGIN); + } + case 'q': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + yp_newline_list_check_append(&parser->newline_list, parser->current.end); + parser->current.end++; + } + + LEX(YP_TOKEN_STRING_BEGIN); + } + case 'Q': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + yp_newline_list_check_append(&parser->newline_list, parser->current.end); + parser->current.end++; + } + + LEX(YP_TOKEN_STRING_BEGIN); + } + case 's': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_string(parser, false, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM); + parser->current.end++; + } + + LEX(YP_TOKEN_SYMBOL_BEGIN); + } + case 'w': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_list(parser, false, *parser->current.end++); + } + + LEX(YP_TOKEN_PERCENT_LOWER_W); + } + case 'W': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_list(parser, true, *parser->current.end++); + } + + LEX(YP_TOKEN_PERCENT_UPPER_W); + } + case 'x': { + parser->current.end++; + + if (parser->current.end < parser->end) { + lex_mode_push_string(parser, true, false, lex_mode_incrementor(*parser->current.end), lex_mode_terminator(*parser->current.end)); + parser->current.end++; + } + + LEX(YP_TOKEN_PERCENT_LOWER_X); + } + default: + // If we get to this point, then we have a % that is completely + // unparseable. In this case we'll just drop it from the parser + // and skip past it and hope that the next token is something + // that we can parse. + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_PERCENT); + goto lex_next_token; + } + } + + lex_state_set(parser, lex_state_operator_p(parser) ? YP_LEX_STATE_ARG : YP_LEX_STATE_BEG); + LEX(YP_TOKEN_PERCENT); + } + + // global variable + case '$': { + yp_token_type_t type = lex_global_variable(parser); + + // If we're lexing an embedded variable, then we need to pop back into + // the parent lex context. + if (parser->lex_modes.current->mode == YP_LEX_EMBVAR) { + lex_mode_pop(parser); + } + + lex_state_set(parser, YP_LEX_STATE_END); + LEX(type); + } + + // instance variable, class variable + case '@': + lex_state_set(parser, parser->lex_state & YP_LEX_STATE_FNAME ? YP_LEX_STATE_ENDFN : YP_LEX_STATE_END); + LEX(lex_at_variable(parser)); + + default: { + if (*parser->current.start != '_') { + size_t width = char_is_identifier_start(parser, parser->current.start); + + // If this isn't the beginning of an identifier, then it's an invalid + // token as we've exhausted all of the other options. We'll skip past + // it and return the next token. + if (!width) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_INVALID_TOKEN); + goto lex_next_token; + } + + parser->current.end = parser->current.start + width; + } + + yp_token_type_t type = lex_identifier(parser, previous_command_start); + + // If we've hit a __END__ and it was at the start of the line or the + // start of the file and it is followed by either a \n or a \r\n, then + // this is the last token of the file. + if ( + ((parser->current.end - parser->current.start) == 7) && + current_token_starts_line(parser) && + (memcmp(parser->current.start, "__END__", 7) == 0) && + (parser->current.end == parser->end || match_eol(parser)) + ) + { + // Since we know we're about to add an __END__ comment, we know we + // need at add all of the newlines to get the correct column + // information for it. + const uint8_t *cursor = parser->current.end; + while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) { + yp_newline_list_append(&parser->newline_list, cursor++); + } + + parser->current.end = parser->end; + parser->current.type = YP_TOKEN___END__; + parser_lex_callback(parser); + + yp_comment_t *comment = parser_comment(parser, YP_COMMENT___END__); + yp_list_append(&parser->comment_list, (yp_list_node_t *) comment); + + LEX(YP_TOKEN_EOF); + } + + yp_lex_state_t last_state = parser->lex_state; + + if (type == YP_TOKEN_IDENTIFIER || type == YP_TOKEN_CONSTANT || type == YP_TOKEN_METHOD_NAME) { + if (lex_state_p(parser, YP_LEX_STATE_BEG_ANY | YP_LEX_STATE_ARG_ANY | YP_LEX_STATE_DOT)) { + if (previous_command_start) { + lex_state_set(parser, YP_LEX_STATE_CMDARG); + } else { + lex_state_set(parser, YP_LEX_STATE_ARG); + } + } else if (parser->lex_state == YP_LEX_STATE_FNAME) { + lex_state_set(parser, YP_LEX_STATE_ENDFN); + } else { + lex_state_set(parser, YP_LEX_STATE_END); + } + } + + if ( + !(last_state & (YP_LEX_STATE_DOT | YP_LEX_STATE_FNAME)) && + (type == YP_TOKEN_IDENTIFIER) && + ((yp_parser_local_depth(parser, &parser->current) != -1) || + token_is_numbered_parameter(parser->current.start, parser->current.end)) + ) { + lex_state_set(parser, YP_LEX_STATE_END | YP_LEX_STATE_LABEL); + } + + LEX(type); + } + } + } + case YP_LEX_LIST: + if (parser->next_start != NULL) { + parser->current.end = parser->next_start; + parser->next_start = NULL; + } + + // First we'll set the beginning of the token. + parser->current.start = parser->current.end; + + // If there's any whitespace at the start of the list, then we're + // going to trim it off the beginning and create a new token. + size_t whitespace; + + if (parser->heredoc_end) { + whitespace = yp_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end); + if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') { + whitespace += 1; + } + } else { + whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list); + } + + if (whitespace > 0) { + parser->current.end += whitespace; + if (peek_offset(parser, -1) == '\n') { + // mutates next_start + parser_flush_heredoc_end(parser); + } + LEX(YP_TOKEN_WORDS_SEP); + } + + // We'll check if we're at the end of the file. If we are, then we + // need to return the EOF token. + if (parser->current.end >= parser->end) { + LEX(YP_TOKEN_EOF); + } + + // Here we'll get a list of the places where strpbrk should break, + // and then find the first one. + yp_lex_mode_t *lex_mode = parser->lex_modes.current; + const uint8_t *breakpoints = lex_mode->as.list.breakpoints; + const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + + while (breakpoint != NULL) { + // If we hit a null byte, skip directly past it. + if (*breakpoint == '\0') { + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + continue; + } + + // If we hit whitespace, then we must have received content by + // now, so we can return an element of the list. + if (yp_char_is_whitespace(*breakpoint)) { + parser->current.end = breakpoint; + LEX(YP_TOKEN_STRING_CONTENT); + } + + //If we hit the terminator, we need to check which token to + // return. + if (*breakpoint == lex_mode->as.list.terminator) { + // If this terminator doesn't actually close the list, then + // we need to continue on past it. + if (lex_mode->as.list.nesting > 0) { + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + lex_mode->as.list.nesting--; + continue; + } + + // If we've hit the terminator and we've already skipped + // past content, then we can return a list node. + if (breakpoint > parser->current.start) { + parser->current.end = breakpoint; + LEX(YP_TOKEN_STRING_CONTENT); + } + + // Otherwise, switch back to the default state and return + // the end of the list. + parser->current.end = breakpoint + 1; + lex_mode_pop(parser); + lex_state_set(parser, YP_LEX_STATE_END); + LEX(YP_TOKEN_STRING_END); + } + + // If we hit escapes, then we need to treat the next token + // literally. In this case we'll skip past the next character + // and find the next breakpoint. + if (*breakpoint == '\\') { + yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL; + size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false); + if (difference == 0) { + // we're at the end of the file + breakpoint = NULL; + continue; + } + + // If the result is an escaped newline ... + if (breakpoint[difference - 1] == '\n') { + if (parser->heredoc_end) { + // ... if we are on the same line as a heredoc, flush the heredoc and + // continue parsing after heredoc_end. + parser->current.end = breakpoint + difference; + parser_flush_heredoc_end(parser); + LEX(YP_TOKEN_STRING_CONTENT); + } else { + // ... else track the newline. + yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); + } + } + + breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); + continue; + } + + // If we hit a #, then we will attempt to lex interpolation. + if (*breakpoint == '#') { + yp_token_type_t type = lex_interpolation(parser, breakpoint); + if (type != YP_TOKEN_NOT_PROVIDED) { + LEX(type); + } + + // If we haven't returned at this point then we had something + // that looked like an interpolated class or instance variable + // like "#@" but wasn't actually. In this case we'll just skip + // to the next breakpoint. + breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + continue; + } + + // If we've hit the incrementor, then we need to skip past it + // and find the next breakpoint. + assert(*breakpoint == lex_mode->as.list.incrementor); + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + lex_mode->as.list.nesting++; + continue; + } + + // If we were unable to find a breakpoint, then this token hits the end of + // the file. + LEX(YP_TOKEN_EOF); + + case YP_LEX_REGEXP: { + // First, we'll set to start of this token to be the current end. + if (parser->next_start == NULL) { + parser->current.start = parser->current.end; + } else { + parser->current.start = parser->next_start; + parser->current.end = parser->next_start; + parser->next_start = NULL; + } + + // We'll check if we're at the end of the file. If we are, then we need to + // return the EOF token. + if (parser->current.end >= parser->end) { + LEX(YP_TOKEN_EOF); + } + + // Get a reference to the current mode. + yp_lex_mode_t *lex_mode = parser->lex_modes.current; + + // These are the places where we need to split up the content of the + // regular expression. We'll use strpbrk to find the first of these + // characters. + const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints; + const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + + while (breakpoint != NULL) { + // If we hit a null byte, skip directly past it. + if (*breakpoint == '\0') { + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + continue; + } + + // If we've hit a newline, then we need to track that in the + // list of newlines. + if (*breakpoint == '\n') { + // For the special case of a newline-terminated regular expression, we will pass + // through this branch twice -- once with YP_TOKEN_REGEXP_BEGIN and then again + // with YP_TOKEN_STRING_CONTENT. Let's avoid tracking the newline twice, by + // tracking it only in the REGEXP_BEGIN case. + if ( + !(lex_mode->as.regexp.terminator == '\n' && parser->current.type != YP_TOKEN_REGEXP_BEGIN) + && parser->heredoc_end == NULL + ) { + yp_newline_list_append(&parser->newline_list, breakpoint); + } + + if (lex_mode->as.regexp.terminator != '\n') { + // If the terminator is not a newline, then we can set + // the next breakpoint and continue. + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + continue; + } + } + + // If we hit the terminator, we need to determine what kind of + // token to return. + if (*breakpoint == lex_mode->as.regexp.terminator) { + if (lex_mode->as.regexp.nesting > 0) { + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + lex_mode->as.regexp.nesting--; + continue; + } + + // Here we've hit the terminator. If we have already consumed + // content then we need to return that content as string content + // first. + if (breakpoint > parser->current.start) { + parser->current.end = breakpoint; + LEX(YP_TOKEN_STRING_CONTENT); + } + + // Since we've hit the terminator of the regular expression, we now + // need to parse the options. + parser->current.end = breakpoint + 1; + parser->current.end += yp_strspn_regexp_option(parser->current.end, parser->end - parser->current.end); + + lex_mode_pop(parser); + lex_state_set(parser, YP_LEX_STATE_END); + LEX(YP_TOKEN_REGEXP_END); + } + + // If we hit escapes, then we need to treat the next token + // literally. In this case we'll skip past the next character + // and find the next breakpoint. + if (*breakpoint == '\\') { + size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false); + if (difference == 0) { + // we're at the end of the file + breakpoint = NULL; + continue; + } + + // If the result is an escaped newline ... + if (breakpoint[difference - 1] == '\n') { + if (parser->heredoc_end) { + // ... if we are on the same line as a heredoc, flush the heredoc and + // continue parsing after heredoc_end. + parser->current.end = breakpoint + difference; + parser_flush_heredoc_end(parser); + LEX(YP_TOKEN_STRING_CONTENT); + } else { + // ... else track the newline. + yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); + } + } + + breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); + continue; + } + + // If we hit a #, then we will attempt to lex interpolation. + if (*breakpoint == '#') { + yp_token_type_t type = lex_interpolation(parser, breakpoint); + if (type != YP_TOKEN_NOT_PROVIDED) { + LEX(type); + } + + // If we haven't returned at this point then we had + // something that looked like an interpolated class or + // instance variable like "#@" but wasn't actually. In this + // case we'll just skip to the next breakpoint. + breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + continue; + } + + // If we've hit the incrementor, then we need to skip past it + // and find the next breakpoint. + assert(*breakpoint == lex_mode->as.regexp.incrementor); + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + lex_mode->as.regexp.nesting++; + continue; + } + + // At this point, the breakpoint is NULL which means we were unable to + // find anything before the end of the file. + LEX(YP_TOKEN_EOF); + } + case YP_LEX_STRING: { + // First, we'll set to start of this token to be the current end. + if (parser->next_start == NULL) { + parser->current.start = parser->current.end; + } else { + parser->current.start = parser->next_start; + parser->current.end = parser->next_start; + parser->next_start = NULL; + } + + // We'll check if we're at the end of the file. If we are, then we need to + // return the EOF token. + if (parser->current.end >= parser->end) { + LEX(YP_TOKEN_EOF); + } + + // These are the places where we need to split up the content of the + // string. We'll use strpbrk to find the first of these characters. + const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints; + const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + + while (breakpoint != NULL) { + // If we hit the incrementor, then we'll increment then nesting and + // continue lexing. + if ( + parser->lex_modes.current->as.string.incrementor != '\0' && + *breakpoint == parser->lex_modes.current->as.string.incrementor + ) { + parser->lex_modes.current->as.string.nesting++; + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + continue; + } + + // Note that we have to check the terminator here first because we could + // potentially be parsing a % string that has a # character as the + // terminator. + if (*breakpoint == parser->lex_modes.current->as.string.terminator) { + // If this terminator doesn't actually close the string, then we need + // to continue on past it. + if (parser->lex_modes.current->as.string.nesting > 0) { + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + parser->lex_modes.current->as.string.nesting--; + continue; + } + + // Here we've hit the terminator. If we have already consumed content + // then we need to return that content as string content first. + if (breakpoint > parser->current.start) { + parser->current.end = breakpoint; + LEX(YP_TOKEN_STRING_CONTENT); + } + + // Otherwise we need to switch back to the parent lex mode and + // return the end of the string. + size_t eol_length = match_eol_at(parser, breakpoint); + if (eol_length) { + parser->current.end = breakpoint + eol_length; + yp_newline_list_append(&parser->newline_list, parser->current.end - 1); + } else { + parser->current.end = breakpoint + 1; + } + + if ( + parser->lex_modes.current->as.string.label_allowed && + (peek(parser) == ':') && + (peek_offset(parser, 1) != ':') + ) { + parser->current.end++; + lex_state_set(parser, YP_LEX_STATE_ARG | YP_LEX_STATE_LABELED); + lex_mode_pop(parser); + LEX(YP_TOKEN_LABEL_END); + } + + lex_state_set(parser, YP_LEX_STATE_END); + lex_mode_pop(parser); + LEX(YP_TOKEN_STRING_END); + } + + // When we hit a newline, we need to flush any potential heredocs. Note + // that this has to happen after we check for the terminator in case the + // terminator is a newline character. + if (*breakpoint == '\n') { + if (parser->heredoc_end == NULL) { + yp_newline_list_append(&parser->newline_list, breakpoint); + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + continue; + } else { + parser->current.end = breakpoint + 1; + parser_flush_heredoc_end(parser); + LEX(YP_TOKEN_STRING_CONTENT); + } + } + + switch (*breakpoint) { + case '\0': + // Skip directly past the null character. + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + break; + case '\\': { + // If we hit escapes, then we need to treat the next token + // literally. In this case we'll skip past the next character and + // find the next breakpoint. + yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL; + size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false); + if (difference == 0) { + // we're at the end of the file + breakpoint = NULL; + break; + } + + // If the result is an escaped newline ... + if (breakpoint[difference - 1] == '\n') { + if (parser->heredoc_end) { + // ... if we are on the same line as a heredoc, flush the heredoc and + // continue parsing after heredoc_end. + parser->current.end = breakpoint + difference; + parser_flush_heredoc_end(parser); + LEX(YP_TOKEN_STRING_CONTENT); + } else { + // ... else track the newline. + yp_newline_list_append(&parser->newline_list, breakpoint + difference - 1); + } + } + + breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); + break; + } + case '#': { + yp_token_type_t type = lex_interpolation(parser, breakpoint); + if (type != YP_TOKEN_NOT_PROVIDED) { + LEX(type); + } + + // If we haven't returned at this point then we had something that + // looked like an interpolated class or instance variable like "#@" + // but wasn't actually. In this case we'll just skip to the next + // breakpoint. + breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + break; + } + default: + assert(false && "unreachable"); + } + } + + // If we've hit the end of the string, then this is an unterminated + // string. In that case we'll return the EOF token. + parser->current.end = parser->end; + LEX(YP_TOKEN_EOF); + } + case YP_LEX_HEREDOC: { + // First, we'll set to start of this token. + if (parser->next_start == NULL) { + parser->current.start = parser->current.end; + } else { + parser->current.start = parser->next_start; + parser->current.end = parser->next_start; + parser->heredoc_end = NULL; + parser->next_start = NULL; + } + + // We'll check if we're at the end of the file. If we are, then we need to + // return the EOF token. + if (parser->current.end >= parser->end) { + LEX(YP_TOKEN_EOF); + } + + // Now let's grab the information about the identifier off of the current + // lex mode. + const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start; + size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length; + + // If we are immediately following a newline and we have hit the + // terminator, then we need to return the ending of the heredoc. + if (current_token_starts_line(parser)) { + const uint8_t *start = parser->current.start; + if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) { + start += yp_strspn_inline_whitespace(start, parser->end - start); + } + + if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) { + bool matched = true; + bool at_end = false; + + size_t eol_length = match_eol_at(parser, start + ident_length); + if (eol_length) { + parser->current.end = start + ident_length + eol_length; + yp_newline_list_append(&parser->newline_list, parser->current.end - 1); + } else if (parser->end == (start + ident_length)) { + parser->current.end = start + ident_length; + at_end = true; + } else { + matched = false; + } + + if (matched) { + if (*parser->lex_modes.current->as.heredoc.next_start == '\\') { + parser->next_start = NULL; + } else { + parser->next_start = parser->lex_modes.current->as.heredoc.next_start; + parser->heredoc_end = parser->current.end; + } + + lex_mode_pop(parser); + if (!at_end) { + lex_state_set(parser, YP_LEX_STATE_END); + } + LEX(YP_TOKEN_HEREDOC_END); + } + } + } + + // Otherwise we'll be parsing string content. These are the places where + // we need to split up the content of the heredoc. We'll use strpbrk to + // find the first of these characters. + uint8_t breakpoints[] = "\n\\#"; + + yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote; + if (quote == YP_HEREDOC_QUOTE_SINGLE) { + breakpoints[2] = '\0'; + } + + const uint8_t *breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + + while (breakpoint != NULL) { + switch (*breakpoint) { + case '\0': + // Skip directly past the null character. + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + break; + case '\n': { + if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) { + parser_flush_heredoc_end(parser); + parser->current.end = breakpoint + 1; + LEX(YP_TOKEN_STRING_CONTENT); + } + + yp_newline_list_append(&parser->newline_list, breakpoint); + + const uint8_t *start = breakpoint + 1; + if (parser->lex_modes.current->as.heredoc.indent != YP_HEREDOC_INDENT_NONE) { + start += yp_strspn_inline_whitespace(start, parser->end - start); + } + + // If we have hit a newline that is followed by a valid terminator, + // then we need to return the content of the heredoc here as string + // content. Then, the next time a token is lexed, it will match + // again and return the end of the heredoc. + if ( + (start + ident_length <= parser->end) && + (memcmp(start, ident_start, ident_length) == 0) + ) { + // Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid. + if ( + start + ident_length == parser->end || + match_eol_at(parser, start + ident_length) + ) { + parser->current.end = breakpoint + 1; + LEX(YP_TOKEN_STRING_CONTENT); + } + } + + // Otherwise we hit a newline and it wasn't followed by a + // terminator, so we can continue parsing. + breakpoint = yp_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1)); + break; + } + case '\\': { + // If we hit an escape, then we need to skip past + // however many characters the escape takes up. However + // it's important that if \n or \r\n are escaped that we + // stop looping before the newline and not after the + // newline so that we can still potentially find the + // terminator of the heredoc. + size_t eol_length = match_eol_at(parser, breakpoint + 1); + if (eol_length) { + breakpoint += eol_length; + } else { + yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL; + size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false); + if (difference == 0) { + // we're at the end of the file + breakpoint = NULL; + break; + } + + yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1); + + breakpoint = yp_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference)); + } + + break; + } + case '#': { + yp_token_type_t type = lex_interpolation(parser, breakpoint); + if (type != YP_TOKEN_NOT_PROVIDED) { + LEX(type); + } + + // If we haven't returned at this point then we had something + // that looked like an interpolated class or instance variable + // like "#@" but wasn't actually. In this case we'll just skip + // to the next breakpoint. + breakpoint = yp_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end); + break; + } + default: + assert(false && "unreachable"); + } + } + + // If we've hit the end of the string, then this is an unterminated + // heredoc. In that case we'll return the EOF token. + parser->current.end = parser->end; + LEX(YP_TOKEN_EOF); + } + } + + assert(false && "unreachable"); +} + +#undef LEX + +/******************************************************************************/ +/* Parse functions */ +/******************************************************************************/ + +// When we are parsing certain content, we need to unescape the content to +// provide to the consumers of the parser. The following functions accept a range +// of characters from the source and unescapes into the provided type. +// +// We have functions for unescaping regular expression nodes, string nodes, +// symbol nodes, and xstring nodes +static yp_regular_expression_node_t * +yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) { + yp_regular_expression_node_t *node = yp_regular_expression_node_create(parser, opening, content, closing); + + assert((content->end - content->start) >= 0); + yp_string_shared_init(&node->unescaped, content->start, content->end); + + yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type); + return node; +} + +static yp_symbol_node_t * +yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) { + yp_symbol_node_t *node = yp_symbol_node_create(parser, opening, content, closing); + + assert((content->end - content->start) >= 0); + yp_string_shared_init(&node->unescaped, content->start, content->end); + + yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type); + return node; +} + +static yp_string_node_t * +yp_char_literal_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) { + yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing); + + assert((content->end - content->start) >= 0); + yp_string_shared_init(&node->unescaped, content->start, content->end); + + yp_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type); + return node; +} + +static yp_string_node_t * +yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing, yp_unescape_type_t unescape_type) { + yp_string_node_t *node = yp_string_node_create(parser, opening, content, closing); + + assert((content->end - content->start) >= 0); + yp_string_shared_init(&node->unescaped, content->start, content->end); + + yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type); + return node; +} + +static yp_x_string_node_t * +yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *opening, const yp_token_t *content, const yp_token_t *closing) { + yp_x_string_node_t *node = yp_xstring_node_create(parser, opening, content, closing); + + assert((content->end - content->start) >= 0); + yp_string_shared_init(&node->unescaped, content->start, content->end); + + yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL); + return node; +} + +// These are the various precedence rules. Because we are using a Pratt parser, +// they are named binding power to represent the manner in which nodes are bound +// together in the stack. +// +// We increment by 2 because we want to leave room for the infix operators to +// specify their associativity by adding or subtracting one. +typedef enum { + YP_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator + YP_BINDING_POWER_STATEMENT = 2, + YP_BINDING_POWER_MODIFIER = 4, // if unless until while in + YP_BINDING_POWER_MODIFIER_RESCUE = 6, // rescue + YP_BINDING_POWER_COMPOSITION = 8, // and or + YP_BINDING_POWER_NOT = 10, // not + YP_BINDING_POWER_MATCH = 12, // => + YP_BINDING_POWER_DEFINED = 14, // defined? + YP_BINDING_POWER_ASSIGNMENT = 16, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **= + YP_BINDING_POWER_TERNARY = 18, // ?: + YP_BINDING_POWER_RANGE = 20, // .. ... + YP_BINDING_POWER_LOGICAL_OR = 22, // || + YP_BINDING_POWER_LOGICAL_AND = 24, // && + YP_BINDING_POWER_EQUALITY = 26, // <=> == === != =~ !~ + YP_BINDING_POWER_COMPARISON = 28, // > >= < <= + YP_BINDING_POWER_BITWISE_OR = 30, // | ^ + YP_BINDING_POWER_BITWISE_AND = 32, // & + YP_BINDING_POWER_SHIFT = 34, // << >> + YP_BINDING_POWER_TERM = 36, // + - + YP_BINDING_POWER_FACTOR = 38, // * / % + YP_BINDING_POWER_UMINUS = 40, // -@ + YP_BINDING_POWER_EXPONENT = 42, // ** + YP_BINDING_POWER_UNARY = 44, // ! ~ +@ + YP_BINDING_POWER_INDEX = 46, // [] []= + YP_BINDING_POWER_CALL = 48, // :: . + YP_BINDING_POWER_MAX = 50 +} yp_binding_power_t; + +// This struct represents a set of binding powers used for a given token. They +// are combined in this way to make it easier to represent associativity. +typedef struct { + yp_binding_power_t left; + yp_binding_power_t right; + bool binary; +} yp_binding_powers_t; + +#define BINDING_POWER_ASSIGNMENT { YP_BINDING_POWER_UNARY, YP_BINDING_POWER_ASSIGNMENT, true } +#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true } +#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true } +#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false } + +yp_binding_powers_t yp_binding_powers[YP_TOKEN_MAXIMUM] = { + // if unless until while in rescue + [YP_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER), + [YP_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER), + [YP_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER), + [YP_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER), + [YP_TOKEN_KEYWORD_IN] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MODIFIER), + + // rescue modifier + [YP_TOKEN_KEYWORD_RESCUE_MODIFIER] = { + YP_BINDING_POWER_ASSIGNMENT, + YP_BINDING_POWER_MODIFIER_RESCUE + 1, + true + }, + + // and or + [YP_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_COMPOSITION), + [YP_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_COMPOSITION), + + // => + [YP_TOKEN_EQUAL_GREATER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_MATCH), + + // &&= &= ^= = >>= <<= -= %= |= += /= *= **= + [YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT, + [YP_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT, + + // ?: + [YP_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_TERNARY), + + // .. ... + [YP_TOKEN_DOT_DOT] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_RANGE), + [YP_TOKEN_DOT_DOT_DOT] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_RANGE), + + // || + [YP_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_LOGICAL_OR), + + // && + [YP_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_LOGICAL_AND), + + // != !~ == === =~ <=> + [YP_TOKEN_BANG_EQUAL] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY), + [YP_TOKEN_BANG_TILDE] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY), + [YP_TOKEN_EQUAL_EQUAL] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY), + [YP_TOKEN_EQUAL_EQUAL_EQUAL] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY), + [YP_TOKEN_EQUAL_TILDE] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY), + [YP_TOKEN_LESS_EQUAL_GREATER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_EQUALITY), + + // > >= < <= + [YP_TOKEN_GREATER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_COMPARISON), + [YP_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_COMPARISON), + [YP_TOKEN_LESS] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_COMPARISON), + [YP_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_COMPARISON), + + // ^ | + [YP_TOKEN_CARET] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_BITWISE_OR), + [YP_TOKEN_PIPE] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_BITWISE_OR), + + // & + [YP_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_BITWISE_AND), + + // >> << + [YP_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_SHIFT), + [YP_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_SHIFT), + + // - + + [YP_TOKEN_MINUS] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_TERM), + [YP_TOKEN_PLUS] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_TERM), + + // % / * + [YP_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_FACTOR), + [YP_TOKEN_SLASH] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_FACTOR), + [YP_TOKEN_STAR] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_FACTOR), + [YP_TOKEN_USTAR] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_FACTOR), + + // -@ + [YP_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UMINUS), + [YP_TOKEN_UMINUS_NUM] = { YP_BINDING_POWER_UMINUS, YP_BINDING_POWER_MAX, false }, + + // ** + [YP_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_EXPONENT), + [YP_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UNARY), + + // ! ~ +@ + [YP_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UNARY), + [YP_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UNARY), + [YP_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(YP_BINDING_POWER_UNARY), + + // [ + [YP_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(YP_BINDING_POWER_INDEX), + + // :: . &. + [YP_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_CALL), + [YP_TOKEN_DOT] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_CALL), + [YP_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(YP_BINDING_POWER_CALL) +}; + +#undef BINDING_POWER_ASSIGNMENT +#undef LEFT_ASSOCIATIVE +#undef RIGHT_ASSOCIATIVE +#undef RIGHT_ASSOCIATIVE_UNARY + +// Returns true if the current token is of the given type. +static inline bool +match1(const yp_parser_t *parser, yp_token_type_t type) { + return parser->current.type == type; +} + +// Returns true if the current token is of either of the given types. +static inline bool +match2(const yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2) { + return match1(parser, type1) || match1(parser, type2); +} + +// Returns true if the current token is any of the three given types. +static inline bool +match3(const yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2, yp_token_type_t type3) { + return match1(parser, type1) || match1(parser, type2) || match1(parser, type3); +} + +// Returns true if the current token is any of the five given types. +static inline bool +match5(const yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2, yp_token_type_t type3, yp_token_type_t type4, yp_token_type_t type5) { + return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5); +} + +// Returns true if the current token is any of the six given types. +static inline bool +match6(const yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2, yp_token_type_t type3, yp_token_type_t type4, yp_token_type_t type5, yp_token_type_t type6) { + return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6); +} + +// Returns true if the current token is any of the seven given types. +static inline bool +match7(const yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2, yp_token_type_t type3, yp_token_type_t type4, yp_token_type_t type5, yp_token_type_t type6, yp_token_type_t type7) { + return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7); +} + +// Returns true if the current token is any of the eight given types. +static inline bool +match8(const yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2, yp_token_type_t type3, yp_token_type_t type4, yp_token_type_t type5, yp_token_type_t type6, yp_token_type_t type7, yp_token_type_t type8) { + return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8); +} + +// If the current token is of the specified type, lex forward by one token and +// return true. Otherwise, return false. For example: +// +// if (accept1(parser, YP_TOKEN_COLON)) { ... } +// +static bool +accept1(yp_parser_t *parser, yp_token_type_t type) { + if (match1(parser, type)) { + parser_lex(parser); + return true; + } + return false; +} + +// If the current token is either of the two given types, lex forward by one +// token and return true. Otherwise return false. +static inline bool +accept2(yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2) { + if (match2(parser, type1, type2)) { + parser_lex(parser); + return true; + } + return false; +} + +// If the current token is any of the three given types, lex forward by one +// token and return true. Otherwise return false. +static inline bool +accept3(yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2, yp_token_type_t type3) { + if (match3(parser, type1, type2, type3)) { + parser_lex(parser); + return true; + } + return false; +} + +// This function indicates that the parser expects a token in a specific +// position. For example, if you're parsing a BEGIN block, you know that a { is +// expected immediately after the keyword. In that case you would call this +// function to indicate that that token should be found. +// +// If we didn't find the token that we were expecting, then we're going to add +// an error to the parser's list of errors (to indicate that the tree is not +// valid) and create an artificial token instead. This allows us to recover from +// the fact that the token isn't present and continue parsing. +static void +expect1(yp_parser_t *parser, yp_token_type_t type, yp_diagnostic_id_t diag_id) { + if (accept1(parser, type)) return; + + const uint8_t *location = parser->previous.end; + yp_diagnostic_list_append(&parser->error_list, location, location, diag_id); + + parser->previous.start = location; + parser->previous.type = YP_TOKEN_MISSING; +} + +// This function is the same as expect1, but it expects either of two token +// types. +static void +expect2(yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2, yp_diagnostic_id_t diag_id) { + if (accept2(parser, type1, type2)) return; + + const uint8_t *location = parser->previous.end; + yp_diagnostic_list_append(&parser->error_list, location, location, diag_id); + + parser->previous.start = location; + parser->previous.type = YP_TOKEN_MISSING; +} + +// This function is the same as expect2, but it expects one of three token types. +static void +expect3(yp_parser_t *parser, yp_token_type_t type1, yp_token_type_t type2, yp_token_type_t type3, yp_diagnostic_id_t diag_id) { + if (accept3(parser, type1, type2, type3)) return; + + const uint8_t *location = parser->previous.end; + yp_diagnostic_list_append(&parser->error_list, location, location, diag_id); + + parser->previous.start = location; + parser->previous.type = YP_TOKEN_MISSING; +} + +static yp_node_t * +parse_expression(yp_parser_t *parser, yp_binding_power_t binding_power, yp_diagnostic_id_t diag_id); + +// This function controls whether or not we will attempt to parse an expression +// beginning at the subsequent token. It is used when we are in a context where +// an expression is optional. +// +// For example, looking at a range object when we've already lexed the operator, +// we need to know if we should attempt to parse an expression on the right. +// +// For another example, if we've parsed an identifier or a method call and we do +// not have parentheses, then the next token may be the start of an argument or +// it may not. +// +// CRuby parsers that are generated would resolve this by using a lookahead and +// potentially backtracking. We attempt to do this by just looking at the next +// token and making a decision based on that. I am not sure if this is going to +// work in all cases, it may need to be refactored later. But it appears to work +// for now. +static inline bool +token_begins_expression_p(yp_token_type_t type) { + switch (type) { + case YP_TOKEN_EQUAL_GREATER: + case YP_TOKEN_KEYWORD_IN: + // We need to special case this because it is a binary operator that + // should not be marked as beginning an expression. + return false; + case YP_TOKEN_BRACE_RIGHT: + case YP_TOKEN_BRACKET_RIGHT: + case YP_TOKEN_COLON: + case YP_TOKEN_COMMA: + case YP_TOKEN_EMBEXPR_END: + case YP_TOKEN_EOF: + case YP_TOKEN_LAMBDA_BEGIN: + case YP_TOKEN_KEYWORD_DO: + case YP_TOKEN_KEYWORD_DO_LOOP: + case YP_TOKEN_KEYWORD_END: + case YP_TOKEN_KEYWORD_ELSE: + case YP_TOKEN_KEYWORD_ELSIF: + case YP_TOKEN_KEYWORD_ENSURE: + case YP_TOKEN_KEYWORD_THEN: + case YP_TOKEN_KEYWORD_RESCUE: + case YP_TOKEN_KEYWORD_WHEN: + case YP_TOKEN_NEWLINE: + case YP_TOKEN_PARENTHESIS_RIGHT: + case YP_TOKEN_SEMICOLON: + // The reason we need this short-circuit is because we're using the + // binding powers table to tell us if the subsequent token could + // potentially be the start of an expression . If there _is_ a binding + // power for one of these tokens, then we should remove it from this list + // and let it be handled by the default case below. + assert(yp_binding_powers[type].left == YP_BINDING_POWER_UNSET); + return false; + case YP_TOKEN_UAMPERSAND: + // This is a special case because this unary operator cannot appear + // as a general operator, it only appears in certain circumstances. + return false; + case YP_TOKEN_UCOLON_COLON: + case YP_TOKEN_UMINUS: + case YP_TOKEN_UMINUS_NUM: + case YP_TOKEN_UPLUS: + case YP_TOKEN_BANG: + case YP_TOKEN_TILDE: + case YP_TOKEN_UDOT_DOT: + case YP_TOKEN_UDOT_DOT_DOT: + // These unary tokens actually do have binding power associated with them + // so that we can correctly place them into the precedence order. But we + // want them to be marked as beginning an expression, so we need to + // special case them here. + return true; + default: + return yp_binding_powers[type].left == YP_BINDING_POWER_UNSET; + } +} + +// Parse an expression with the given binding power that may be optionally +// prefixed by the * operator. +static yp_node_t * +parse_starred_expression(yp_parser_t *parser, yp_binding_power_t binding_power, yp_diagnostic_id_t diag_id) { + if (accept1(parser, YP_TOKEN_USTAR)) { + yp_token_t operator = parser->previous; + yp_node_t *expression = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_STAR); + return (yp_node_t *) yp_splat_node_create(parser, &operator, expression); + } + + return parse_expression(parser, binding_power, diag_id); +} + +// Convert the name of a method into the corresponding write method name. For +// exmaple, foo would be turned into foo=. +static void +parse_write_name(yp_string_t *string) { + // The method name needs to change. If we previously had + // foo, we now need foo=. In this case we'll allocate a new + // owned string, copy the previous method name in, and + // append an =. + size_t length = yp_string_length(string); + uint8_t *name = calloc(length + 1, sizeof(uint8_t)); + if (name == NULL) return; + + memcpy(name, yp_string_source(string), length); + name[length] = '='; + + // Now switch the name to the new string. + yp_string_free(string); + yp_string_owned_init(string, name, length + 1); +} + +// Convert the given node into a valid target node. +static yp_node_t * +parse_target(yp_parser_t *parser, yp_node_t *target) { + switch (YP_NODE_TYPE(target)) { + case YP_MISSING_NODE: + return target; + case YP_CLASS_VARIABLE_READ_NODE: + assert(sizeof(yp_class_variable_target_node_t) == sizeof(yp_class_variable_read_node_t)); + target->type = YP_CLASS_VARIABLE_TARGET_NODE; + return target; + case YP_CONSTANT_PATH_NODE: + assert(sizeof(yp_constant_path_target_node_t) == sizeof(yp_constant_path_node_t)); + target->type = YP_CONSTANT_PATH_TARGET_NODE; + return target; + case YP_CONSTANT_READ_NODE: + assert(sizeof(yp_constant_target_node_t) == sizeof(yp_constant_read_node_t)); + target->type = YP_CONSTANT_TARGET_NODE; + return target; + case YP_BACK_REFERENCE_READ_NODE: + case YP_NUMBERED_REFERENCE_READ_NODE: + yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, YP_ERR_WRITE_TARGET_READONLY); + return target; + case YP_GLOBAL_VARIABLE_READ_NODE: + assert(sizeof(yp_global_variable_target_node_t) == sizeof(yp_global_variable_read_node_t)); + target->type = YP_GLOBAL_VARIABLE_TARGET_NODE; + return target; + case YP_LOCAL_VARIABLE_READ_NODE: + if (token_is_numbered_parameter(target->location.start, target->location.end)) { + yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, YP_ERR_PARAMETER_NUMBERED_RESERVED); + } else { + assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t)); + target->type = YP_LOCAL_VARIABLE_TARGET_NODE; + } + + return target; + case YP_INSTANCE_VARIABLE_READ_NODE: + assert(sizeof(yp_instance_variable_target_node_t) == sizeof(yp_instance_variable_read_node_t)); + target->type = YP_INSTANCE_VARIABLE_TARGET_NODE; + return target; + case YP_MULTI_TARGET_NODE: + return target; + case YP_SPLAT_NODE: { + yp_splat_node_t *splat = (yp_splat_node_t *) target; + + if (splat->expression != NULL) { + splat->expression = parse_target(parser, splat->expression); + } + + yp_multi_target_node_t *multi_target = yp_multi_target_node_create(parser); + yp_multi_target_node_targets_append(multi_target, (yp_node_t *) splat); + + return (yp_node_t *) multi_target; + } + case YP_CALL_NODE: { + yp_call_node_t *call = (yp_call_node_t *) target; + + // If we have no arguments to the call node and we need this to be a + // target then this is either a method call or a local variable write. + if ( + (call->message_loc.start != NULL) && + (call->message_loc.end[-1] != '!') && + (call->message_loc.end[-1] != '?') && + (call->opening_loc.start == NULL) && + (call->arguments == NULL) && + (call->block == NULL) + ) { + if (call->receiver == NULL) { + // When we get here, we have a local variable write, because it + // was previously marked as a method call but now we have an =. + // This looks like: + // + // foo = 1 + // + // When it was parsed in the prefix position, foo was seen as a + // method call with no receiver and no arguments. Now we have an + // =, so we know it's a local variable write. + const yp_location_t message = call->message_loc; + + yp_parser_local_add_location(parser, message.start, message.end); + yp_node_destroy(parser, target); + + const yp_token_t name = { .type = YP_TOKEN_IDENTIFIER, .start = message.start, .end = message.end }; + target = (yp_node_t *) yp_local_variable_read_node_create(parser, &name, 0); + + assert(sizeof(yp_local_variable_target_node_t) == sizeof(yp_local_variable_read_node_t)); + target->type = YP_LOCAL_VARIABLE_TARGET_NODE; + + if (token_is_numbered_parameter(message.start, message.end)) { + yp_diagnostic_list_append(&parser->error_list, message.start, message.end, YP_ERR_PARAMETER_NUMBERED_RESERVED); + } + + return target; + } + + if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) { + parse_write_name(&call->name); + return (yp_node_t *) call; + } + } + + // If there is no call operator and the message is "[]" then this is + // an aref expression, and we can transform it into an aset + // expression. + if ( + (call->call_operator_loc.start == NULL) && + (call->message_loc.start != NULL) && + (call->message_loc.start[0] == '[') && + (call->message_loc.end[-1] == ']') && + (call->block == NULL) + ) { + // Free the previous name and replace it with "[]=". + yp_string_free(&call->name); + yp_string_constant_init(&call->name, "[]=", 3); + return target; + } + } + /* fallthrough */ + default: + // In this case we have a node that we don't know how to convert + // into a target. We need to treat it as an error. For now, we'll + // mark it as an error and just skip right past it. + yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, YP_ERR_WRITE_TARGET_UNEXPECTED); + return target; + } +} + +// Parse a write targets and validate that it is in a valid position for +// assignment. +static yp_node_t * +parse_target_validate(yp_parser_t *parser, yp_node_t *target) { + yp_node_t *result = parse_target(parser, target); + + // Ensure that we have either an = or a ) after the targets. + if (!match3(parser, YP_TOKEN_EQUAL, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_KEYWORD_IN)) { + yp_diagnostic_list_append(&parser->error_list, result->location.start, result->location.end, YP_ERR_WRITE_TARGET_UNEXPECTED); + } + + return result; +} + +// Convert the given node into a valid write node. +static yp_node_t * +parse_write(yp_parser_t *parser, yp_node_t *target, yp_token_t *operator, yp_node_t *value) { + switch (YP_NODE_TYPE(target)) { + case YP_MISSING_NODE: + return target; + case YP_CLASS_VARIABLE_READ_NODE: { + yp_class_variable_write_node_t *node = yp_class_variable_write_node_create(parser, (yp_class_variable_read_node_t *) target, operator, value); + yp_node_destroy(parser, target); + return (yp_node_t *) node; + } + case YP_CONSTANT_PATH_NODE: + return (yp_node_t *) yp_constant_path_write_node_create(parser, (yp_constant_path_node_t *) target, operator, value); + case YP_CONSTANT_READ_NODE: { + yp_constant_write_node_t *node = yp_constant_write_node_create(parser, (yp_constant_read_node_t *) target, operator, value); + yp_node_destroy(parser, target); + return (yp_node_t *) node; + } + case YP_BACK_REFERENCE_READ_NODE: + case YP_NUMBERED_REFERENCE_READ_NODE: + yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, YP_ERR_WRITE_TARGET_READONLY); + /* fallthrough */ + case YP_GLOBAL_VARIABLE_READ_NODE: { + yp_global_variable_write_node_t *node = yp_global_variable_write_node_create(parser, target, operator, value); + yp_node_destroy(parser, target); + return (yp_node_t *) node; + } + case YP_LOCAL_VARIABLE_READ_NODE: { + if (token_is_numbered_parameter(target->location.start, target->location.end)) { + yp_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, YP_ERR_PARAMETER_NUMBERED_RESERVED); + } + + yp_local_variable_read_node_t *local_read = (yp_local_variable_read_node_t *) target; + + yp_constant_id_t constant_id = local_read->name; + uint32_t depth = local_read->depth; + + yp_location_t name_loc = target->location; + yp_node_destroy(parser, target); + + return (yp_node_t *) yp_local_variable_write_node_create(parser, constant_id, depth, value, &name_loc, operator); + } + case YP_INSTANCE_VARIABLE_READ_NODE: { + yp_node_t *write_node = (yp_node_t *) yp_instance_variable_write_node_create(parser, (yp_instance_variable_read_node_t *) target, operator, value); + yp_node_destroy(parser, target); + return write_node; + } + case YP_MULTI_TARGET_NODE: + return (yp_node_t *) yp_multi_write_node_create(parser, (yp_multi_target_node_t *) target, operator, value); + case YP_SPLAT_NODE: { + yp_splat_node_t *splat = (yp_splat_node_t *) target; + + if (splat->expression != NULL) { + splat->expression = parse_write(parser, splat->expression, operator, value); + } + + yp_multi_target_node_t *multi_target = yp_multi_target_node_create(parser); + yp_multi_target_node_targets_append(multi_target, (yp_node_t *) splat); + + return (yp_node_t *) yp_multi_write_node_create(parser, multi_target, operator, value); + } + case YP_CALL_NODE: { + yp_call_node_t *call = (yp_call_node_t *) target; + + // If we have no arguments to the call node and we need this to be a + // target then this is either a method call or a local variable + // write. + if ( + (call->message_loc.start != NULL) && + (call->message_loc.end[-1] != '!') && + (call->message_loc.end[-1] != '?') && + (call->opening_loc.start == NULL) && + (call->arguments == NULL) && + (call->block == NULL) + ) { + if (call->receiver == NULL) { + // When we get here, we have a local variable write, because it + // was previously marked as a method call but now we have an =. + // This looks like: + // + // foo = 1 + // + // When it was parsed in the prefix position, foo was seen as a + // method call with no receiver and no arguments. Now we have an + // =, so we know it's a local variable write. + const yp_location_t message = call->message_loc; + + yp_parser_local_add_location(parser, message.start, message.end); + yp_node_destroy(parser, target); + + yp_constant_id_t constant_id = yp_parser_constant_id_location(parser, message.start, message.end); + target = (yp_node_t *) yp_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator); + + if (token_is_numbered_parameter(message.start, message.end)) { + yp_diagnostic_list_append(&parser->error_list, message.start, message.end, YP_ERR_PARAMETER_NUMBERED_RESERVED); + } + + return target; + } + + if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) { + // When we get here, we have a method call, because it was + // previously marked as a method call but now we have an =. This + // looks like: + // + // foo.bar = 1 + // + // When it was parsed in the prefix position, foo.bar was seen as a + // method call with no arguments. Now we have an =, so we know it's + // a method call with an argument. In this case we will create the + // arguments node, parse the argument, and add it to the list. + yp_arguments_node_t *arguments = yp_arguments_node_create(parser); + call->arguments = arguments; + + yp_arguments_node_arguments_append(arguments, value); + call->base.location.end = arguments->base.location.end; + + parse_write_name(&call->name); + return (yp_node_t *) call; + } + } + + // If there is no call operator and the message is "[]" then this is + // an aref expression, and we can transform it into an aset + // expression. + if ( + (call->call_operator_loc.start == NULL) && + (call->message_loc.start[0] == '[') && + (call->message_loc.end[-1] == ']') && + (call->block == NULL) + ) { + if (call->arguments == NULL) { + call->arguments = yp_arguments_node_create(parser); + } + + yp_arguments_node_arguments_append(call->arguments, value); + target->location.end = value->location.end; + + // Free the previous name and replace it with "[]=". + yp_string_free(&call->name); + yp_string_constant_init(&call->name, "[]=", 3); + return target; + } + + // If there are arguments on the call node, then it can't be a method + // call ending with = or a local variable write, so it must be a + // syntax error. In this case we'll fall through to our default + // handling. We need to free the value that we parsed because there + // is no way for us to attach it to the tree at this point. + yp_node_destroy(parser, value); + } + /* fallthrough */ + default: + // In this case we have a node that we don't know how to convert into a + // target. We need to treat it as an error. For now, we'll mark it as an + // error and just skip right past it. + yp_diagnostic_list_append(&parser->error_list, operator->start, operator->end, YP_ERR_WRITE_TARGET_UNEXPECTED); + return target; + } +} + +// Parse a list of targets for assignment. This is used in the case of a for +// loop or a multi-assignment. For example, in the following code: +// +// for foo, bar in baz +// ^^^^^^^^ +// +// The targets are `foo` and `bar`. This function will either return a single +// target node or a multi-target node. +static yp_node_t * +parse_targets(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t binding_power) { + bool has_splat = YP_NODE_TYPE_P(first_target, YP_SPLAT_NODE); + + yp_multi_target_node_t *result = yp_multi_target_node_create(parser); + yp_multi_target_node_targets_append(result, parse_target(parser, first_target)); + + while (accept1(parser, YP_TOKEN_COMMA)) { + if (accept1(parser, YP_TOKEN_USTAR)) { + // Here we have a splat operator. It can have a name or be + // anonymous. It can be the final target or be in the middle if + // there haven't been any others yet. + if (has_splat) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_MULTI_ASSIGN_MULTI_SPLATS); + } + + yp_token_t star_operator = parser->previous; + yp_node_t *name = NULL; + + if (token_begins_expression_p(parser->current.type)) { + name = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_STAR); + name = parse_target(parser, name); + } + + yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &star_operator, name); + yp_multi_target_node_targets_append(result, splat); + has_splat = true; + } else if (token_begins_expression_p(parser->current.type)) { + yp_node_t *target = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_COMMA); + target = parse_target(parser, target); + + yp_multi_target_node_targets_append(result, target); + } else { + // If we get here, then we have a trailing , in a multi target node. + // We need to indicate this somehow in the tree, so we'll add an + // anonymous splat. + yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &parser->previous, NULL); + yp_multi_target_node_targets_append(result, splat); + break; + } + } + + return (yp_node_t *) result; +} + +// Parse a list of targets and validate that it is in a valid position for +// assignment. +static yp_node_t * +parse_targets_validate(yp_parser_t *parser, yp_node_t *first_target, yp_binding_power_t binding_power) { + yp_node_t *result = parse_targets(parser, first_target, binding_power); + + // Ensure that we have either an = or a ) after the targets. + if (!match2(parser, YP_TOKEN_EQUAL, YP_TOKEN_PARENTHESIS_RIGHT)) { + yp_diagnostic_list_append(&parser->error_list, result->location.start, result->location.end, YP_ERR_WRITE_TARGET_UNEXPECTED); + } + + return result; +} + +// Parse a list of statements separated by newlines or semicolons. +static yp_statements_node_t * +parse_statements(yp_parser_t *parser, yp_context_t context) { + // First, skip past any optional terminators that might be at the beginning of + // the statements. + while (accept2(parser, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE)); + + // If we have a terminator, then we can just return NULL. + if (context_terminator(context, &parser->current)) return NULL; + + yp_statements_node_t *statements = yp_statements_node_create(parser); + + // At this point we know we have at least one statement, and that it + // immediately follows the current token. + context_push(parser, context); + + while (true) { + yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION); + yp_statements_node_body_append(statements, node); + + // If we're recovering from a syntax error, then we need to stop parsing the + // statements now. + if (parser->recovering) { + // If this is the level of context where the recovery has happened, then + // we can mark the parser as done recovering. + if (context_terminator(context, &parser->current)) parser->recovering = false; + break; + } + + // If we have a terminator, then we will parse all consequtive terminators + // and then continue parsing the statements list. + if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + // If we have a terminator, then we will continue parsing the statements + // list. + while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); + if (context_terminator(context, &parser->current)) break; + + // Now we can continue parsing the list of statements. + continue; + } + + // At this point we have a list of statements that are not terminated by a + // newline or semicolon. At this point we need to check if we're at the end + // of the statements list. If we are, then we should break out of the loop. + if (context_terminator(context, &parser->current)) break; + + // At this point, we have a syntax error, because the statement was not + // terminated by a newline or semicolon, and we're not at the end of the + // statements list. Ideally we should scan forward to determine if we should + // insert a missing terminator or break out of parsing the statements list + // at this point. + // + // We don't have that yet, so instead we'll do a more naive approach. If we + // were unable to parse an expression, then we will skip past this token and + // continue parsing the statements list. Otherwise we'll add an error and + // continue parsing the statements list. + if (YP_NODE_TYPE_P(node, YP_MISSING_NODE)) { + parser_lex(parser); + + while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); + if (context_terminator(context, &parser->current)) break; + } else { + expect1(parser, YP_TOKEN_NEWLINE, YP_ERR_EXPECT_EOL_AFTER_STATEMENT); + } + } + + context_pop(parser); + return statements; +} + +// Parse all of the elements of a hash. +static void +parse_assocs(yp_parser_t *parser, yp_node_t *node) { + assert(YP_NODE_TYPE_P(node, YP_HASH_NODE) || YP_NODE_TYPE_P(node, YP_KEYWORD_HASH_NODE)); + + while (true) { + yp_node_t *element; + + switch (parser->current.type) { + case YP_TOKEN_USTAR_STAR: { + parser_lex(parser); + yp_token_t operator = parser->previous; + yp_node_t *value = NULL; + + if (token_begins_expression_p(parser->current.type)) { + value = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH); + } else if (yp_parser_local_depth(parser, &operator) == -1) { + yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, YP_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH); + } + + element = (yp_node_t *) yp_assoc_splat_node_create(parser, value, &operator); + break; + } + case YP_TOKEN_LABEL: { + yp_token_t label = parser->current; + parser_lex(parser); + + yp_node_t *key = (yp_node_t *) yp_symbol_node_label_create(parser, &label); + yp_token_t operator = not_provided(parser); + yp_node_t *value = NULL; + + if (token_begins_expression_p(parser->current.type)) { + value = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_HASH_EXPRESSION_AFTER_LABEL); + } else { + if (parser->encoding.isupper_char(label.start, (label.end - 1) - label.start)) { + yp_token_t constant = { .type = YP_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 }; + value = (yp_node_t *) yp_constant_read_node_create(parser, &constant); + } else { + int depth = yp_parser_local_depth(parser, &((yp_token_t) { .type = YP_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 })); + yp_token_t identifier = { .type = YP_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 }; + + if (depth == -1) { + value = (yp_node_t *) yp_call_node_variable_call_create(parser, &identifier); + } else { + value = (yp_node_t *) yp_local_variable_read_node_create(parser, &identifier, (uint32_t) depth); + } + } + + value->location.end++; + value = (yp_node_t *) yp_implicit_node_create(parser, value); + } + + element = (yp_node_t *) yp_assoc_node_create(parser, key, &operator, value); + break; + } + default: { + yp_node_t *key = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_HASH_KEY); + yp_token_t operator; + + if (yp_symbol_node_label_p(key)) { + operator = not_provided(parser); + } else { + expect1(parser, YP_TOKEN_EQUAL_GREATER, YP_ERR_HASH_ROCKET); + operator = parser->previous; + } + + yp_node_t *value = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_HASH_VALUE); + element = (yp_node_t *) yp_assoc_node_create(parser, key, &operator, value); + break; + } + } + + if (YP_NODE_TYPE_P(node, YP_HASH_NODE)) { + yp_hash_node_elements_append((yp_hash_node_t *) node, element); + } else { + yp_keyword_hash_node_elements_append((yp_keyword_hash_node_t *) node, element); + } + + // If there's no comma after the element, then we're done. + if (!accept1(parser, YP_TOKEN_COMMA)) return; + + // If the next element starts with a label or a **, then we know we have + // another element in the hash, so we'll continue parsing. + if (match2(parser, YP_TOKEN_USTAR_STAR, YP_TOKEN_LABEL)) continue; + + // Otherwise we need to check if the subsequent token begins an expression. + // If it does, then we'll continue parsing. + if (token_begins_expression_p(parser->current.type)) continue; + + // Otherwise by default we will exit out of this loop. + return; + } +} + +// Append an argument to a list of arguments. +static inline void +parse_arguments_append(yp_parser_t *parser, yp_arguments_t *arguments, yp_node_t *argument) { + if (arguments->arguments == NULL) { + arguments->arguments = yp_arguments_node_create(parser); + } + + yp_arguments_node_arguments_append(arguments->arguments, argument); +} + +// Parse a list of arguments. +static void +parse_arguments(yp_parser_t *parser, yp_arguments_t *arguments, bool accepts_forwarding, yp_token_type_t terminator) { + yp_binding_power_t binding_power = yp_binding_powers[parser->current.type].left; + + // First we need to check if the next token is one that could be the start of + // an argument. If it's not, then we can just return. + if ( + match2(parser, terminator, YP_TOKEN_EOF) || + (binding_power != YP_BINDING_POWER_UNSET && binding_power < YP_BINDING_POWER_RANGE) || + context_terminator(parser->current_context->context, &parser->current) + ) { + return; + } + + bool parsed_bare_hash = false; + bool parsed_block_argument = false; + + while (!match1(parser, YP_TOKEN_EOF)) { + if (parsed_block_argument) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_ARGUMENT_AFTER_BLOCK); + } + + yp_node_t *argument = NULL; + + switch (parser->current.type) { + case YP_TOKEN_USTAR_STAR: + case YP_TOKEN_LABEL: { + if (parsed_bare_hash) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_ARGUMENT_BARE_HASH); + } + + yp_keyword_hash_node_t *hash = yp_keyword_hash_node_create(parser); + argument = (yp_node_t *) hash; + + if (!match7(parser, terminator, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_TOKEN_EOF, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_KEYWORD_DO, YP_TOKEN_PARENTHESIS_RIGHT)) { + parse_assocs(parser, (yp_node_t *) hash); + } + + parsed_bare_hash = true; + parse_arguments_append(parser, arguments, argument); + break; + } + case YP_TOKEN_UAMPERSAND: { + parser_lex(parser); + yp_token_t operator = parser->previous; + yp_node_t *expression = NULL; + + if (token_begins_expression_p(parser->current.type)) { + expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_EXPECT_ARGUMENT); + } else if (yp_parser_local_depth(parser, &operator) == -1) { + yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, YP_ERR_ARGUMENT_NO_FORWARDING_AMP); + } + + argument = (yp_node_t *) yp_block_argument_node_create(parser, &operator, expression); + if (parsed_block_argument) { + parse_arguments_append(parser, arguments, argument); + } else { + arguments->block = argument; + } + + parsed_block_argument = true; + break; + } + case YP_TOKEN_USTAR: { + parser_lex(parser); + yp_token_t operator = parser->previous; + + if (match2(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_COMMA)) { + if (yp_parser_local_depth(parser, &parser->previous) == -1) { + yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, YP_ERR_ARGUMENT_NO_FORWARDING_STAR); + } + + argument = (yp_node_t *) yp_splat_node_create(parser, &operator, NULL); + } else { + yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_EXPECT_EXPRESSION_AFTER_SPLAT); + + if (parsed_bare_hash) { + yp_diagnostic_list_append(&parser->error_list, operator.start, expression->location.end, YP_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT); + } + + argument = (yp_node_t *) yp_splat_node_create(parser, &operator, expression); + } + + parse_arguments_append(parser, arguments, argument); + break; + } + case YP_TOKEN_UDOT_DOT_DOT: { + if (accepts_forwarding) { + parser_lex(parser); + + if (token_begins_expression_p(parser->current.type)) { + // If the token begins an expression then this ... was not actually + // argument forwarding but was instead a range. + yp_token_t operator = parser->previous; + yp_node_t *right = parse_expression(parser, YP_BINDING_POWER_RANGE, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + argument = (yp_node_t *) yp_range_node_create(parser, NULL, &operator, right); + } else { + if (yp_parser_local_depth(parser, &parser->previous) == -1) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES); + } + + argument = (yp_node_t *) yp_forwarding_arguments_node_create(parser, &parser->previous); + parse_arguments_append(parser, arguments, argument); + break; + } + } + } + /* fallthrough */ + default: { + if (argument == NULL) { + argument = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_EXPECT_ARGUMENT); + } + + if (yp_symbol_node_label_p(argument) || accept1(parser, YP_TOKEN_EQUAL_GREATER)) { + if (parsed_bare_hash) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_ARGUMENT_BARE_HASH); + } + + yp_token_t operator; + if (parser->previous.type == YP_TOKEN_EQUAL_GREATER) { + operator = parser->previous; + } else { + operator = not_provided(parser); + } + + yp_keyword_hash_node_t *bare_hash = yp_keyword_hash_node_create(parser); + + // Finish parsing the one we are part way through + yp_node_t *value = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_HASH_VALUE); + + argument = (yp_node_t *) yp_assoc_node_create(parser, argument, &operator, value); + yp_keyword_hash_node_elements_append(bare_hash, argument); + argument = (yp_node_t *) bare_hash; + + // Then parse more if we have a comma + if (accept1(parser, YP_TOKEN_COMMA) && ( + token_begins_expression_p(parser->current.type) || + match2(parser, YP_TOKEN_USTAR_STAR, YP_TOKEN_LABEL) + )) { + parse_assocs(parser, (yp_node_t *) bare_hash); + } + + parsed_bare_hash = true; + } + + parse_arguments_append(parser, arguments, argument); + break; + } + } + + // If parsing the argument failed, we need to stop parsing arguments. + if (YP_NODE_TYPE_P(argument, YP_MISSING_NODE) || parser->recovering) break; + + // If the terminator of these arguments is not EOF, then we have a specific + // token we're looking for. In that case we can accept a newline here + // because it is not functioning as a statement terminator. + if (terminator != YP_TOKEN_EOF) accept1(parser, YP_TOKEN_NEWLINE); + + if (parser->previous.type == YP_TOKEN_COMMA && parsed_bare_hash) { + // If we previously were on a comma and we just parsed a bare hash, then + // we want to continue parsing arguments. This is because the comma was + // grabbed up by the hash parser. + } else { + // If there is no comma at the end of the argument list then we're done + // parsing arguments and can break out of this loop. + if (!accept1(parser, YP_TOKEN_COMMA)) break; + } + + // If we hit the terminator, then that means we have a trailing comma so we + // can accept that output as well. + if (match1(parser, terminator)) break; + } +} + +// Required parameters on method, block, and lambda declarations can be +// destructured using parentheses. This looks like: +// +// def foo((bar, baz)) +// end +// +// It can recurse infinitely down, and splats are allowed to group arguments. +static yp_required_destructured_parameter_node_t * +parse_required_destructured_parameter(yp_parser_t *parser) { + expect1(parser, YP_TOKEN_PARENTHESIS_LEFT, YP_ERR_EXPECT_LPAREN_REQ_PARAMETER); + + yp_token_t opening = parser->previous; + yp_required_destructured_parameter_node_t *node = yp_required_destructured_parameter_node_create(parser, &opening); + bool parsed_splat = false; + + do { + yp_node_t *param; + + if (node->parameters.size > 0 && match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + if (parsed_splat) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_ARGUMENT_SPLAT_AFTER_SPLAT); + } + + param = (yp_node_t *) yp_splat_node_create(parser, &parser->previous, NULL); + yp_required_destructured_parameter_node_append_parameter(node, param); + break; + } + + if (match1(parser, YP_TOKEN_PARENTHESIS_LEFT)) { + param = (yp_node_t *) parse_required_destructured_parameter(parser); + } else if (accept1(parser, YP_TOKEN_USTAR)) { + if (parsed_splat) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_ARGUMENT_SPLAT_AFTER_SPLAT); + } + + yp_token_t star = parser->previous; + yp_node_t *value = NULL; + + if (accept1(parser, YP_TOKEN_IDENTIFIER)) { + yp_token_t name = parser->previous; + value = (yp_node_t *) yp_required_parameter_node_create(parser, &name); + yp_parser_local_add_token(parser, &name); + } + + param = (yp_node_t *) yp_splat_node_create(parser, &star, value); + parsed_splat = true; + } else { + expect1(parser, YP_TOKEN_IDENTIFIER, YP_ERR_EXPECT_IDENT_REQ_PARAMETER); + yp_token_t name = parser->previous; + + param = (yp_node_t *) yp_required_parameter_node_create(parser, &name); + yp_parser_local_add_token(parser, &name); + } + + yp_required_destructured_parameter_node_append_parameter(node, param); + } while (accept1(parser, YP_TOKEN_COMMA)); + + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_EXPECT_RPAREN_REQ_PARAMETER); + yp_required_destructured_parameter_node_closing_set(node, &parser->previous); + + return node; +} + +// This represents the different order states we can be in when parsing +// method parameters. +typedef enum { + YP_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state + YP_PARAMETERS_ORDER_NOTHING_AFTER = 1, + YP_PARAMETERS_ORDER_KEYWORDS_REST, + YP_PARAMETERS_ORDER_KEYWORDS, + YP_PARAMETERS_ORDER_REST, + YP_PARAMETERS_ORDER_AFTER_OPTIONAL, + YP_PARAMETERS_ORDER_OPTIONAL, + YP_PARAMETERS_ORDER_NAMED, + YP_PARAMETERS_ORDER_NONE, + +} yp_parameters_order_t; + +// This matches parameters tokens with parameters state. +static yp_parameters_order_t parameters_ordering[YP_TOKEN_MAXIMUM] = { + [0] = YP_PARAMETERS_NO_CHANGE, + [YP_TOKEN_UAMPERSAND] = YP_PARAMETERS_ORDER_NOTHING_AFTER, + [YP_TOKEN_AMPERSAND] = YP_PARAMETERS_ORDER_NOTHING_AFTER, + [YP_TOKEN_UDOT_DOT_DOT] = YP_PARAMETERS_ORDER_NOTHING_AFTER, + [YP_TOKEN_IDENTIFIER] = YP_PARAMETERS_ORDER_NAMED, + [YP_TOKEN_PARENTHESIS_LEFT] = YP_PARAMETERS_ORDER_NAMED, + [YP_TOKEN_EQUAL] = YP_PARAMETERS_ORDER_OPTIONAL, + [YP_TOKEN_LABEL] = YP_PARAMETERS_ORDER_KEYWORDS, + [YP_TOKEN_USTAR] = YP_PARAMETERS_ORDER_AFTER_OPTIONAL, + [YP_TOKEN_STAR] = YP_PARAMETERS_ORDER_AFTER_OPTIONAL, + [YP_TOKEN_USTAR_STAR] = YP_PARAMETERS_ORDER_KEYWORDS_REST, + [YP_TOKEN_STAR_STAR] = YP_PARAMETERS_ORDER_KEYWORDS_REST +}; + +// Check if current parameter follows valid parameters ordering. If not it adds an +// error to the list without stopping the parsing, otherwise sets the parameters state +// to the one corresponding to the current parameter. +static void +update_parameter_state(yp_parser_t *parser, yp_token_t *token, yp_parameters_order_t *current) { + yp_parameters_order_t state = parameters_ordering[token->type]; + if (state == YP_PARAMETERS_NO_CHANGE) return; + + // If we see another ordered argument after a optional argument + // we only continue parsing ordered arguments until we stop seeing ordered arguments + if (*current == YP_PARAMETERS_ORDER_OPTIONAL && state == YP_PARAMETERS_ORDER_NAMED) { + *current = YP_PARAMETERS_ORDER_AFTER_OPTIONAL; + return; + } else if (*current == YP_PARAMETERS_ORDER_AFTER_OPTIONAL && state == YP_PARAMETERS_ORDER_NAMED) { + return; + } + + if (token->type == YP_TOKEN_USTAR && *current == YP_PARAMETERS_ORDER_AFTER_OPTIONAL) { + yp_diagnostic_list_append(&parser->error_list, token->start, token->end, YP_ERR_PARAMETER_STAR); + } + + if (*current == YP_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) { + // We know what transition we failed on, so we can provide a better error here. + yp_diagnostic_list_append(&parser->error_list, token->start, token->end, YP_ERR_PARAMETER_ORDER); + } else if (state < *current) { + *current = state; + } +} + +// Parse a list of parameters on a method definition. +static yp_parameters_node_t * +parse_parameters( + yp_parser_t *parser, + yp_binding_power_t binding_power, + bool uses_parentheses, + bool allows_trailing_comma, + bool allows_forwarding_parameter +) { + yp_parameters_node_t *params = yp_parameters_node_create(parser); + bool looping = true; + + yp_do_loop_stack_push(parser, false); + yp_parameters_order_t order = YP_PARAMETERS_ORDER_NONE; + + do { + switch (parser->current.type) { + case YP_TOKEN_PARENTHESIS_LEFT: { + update_parameter_state(parser, &parser->current, &order); + yp_node_t *param = (yp_node_t *) parse_required_destructured_parameter(parser); + + if (order > YP_PARAMETERS_ORDER_AFTER_OPTIONAL) { + yp_parameters_node_requireds_append(params, param); + } else { + yp_parameters_node_posts_append(params, param); + } + break; + } + case YP_TOKEN_UAMPERSAND: + case YP_TOKEN_AMPERSAND: { + update_parameter_state(parser, &parser->current, &order); + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_token_t name; + + if (accept1(parser, YP_TOKEN_IDENTIFIER)) { + name = parser->previous; + yp_parser_parameter_name_check(parser, &name); + yp_parser_local_add_token(parser, &name); + } else { + name = not_provided(parser); + yp_parser_local_add_token(parser, &operator); + } + + yp_block_parameter_node_t *param = yp_block_parameter_node_create(parser, &name, &operator); + if (params->block == NULL) { + yp_parameters_node_block_set(params, param); + } else { + yp_diagnostic_list_append(&parser->error_list, param->base.location.start, param->base.location.end, YP_ERR_PARAMETER_BLOCK_MULTI); + yp_parameters_node_posts_append(params, (yp_node_t *) param); + } + + break; + } + case YP_TOKEN_UDOT_DOT_DOT: { + if (!allows_forwarding_parameter) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES); + } + if (order > YP_PARAMETERS_ORDER_NOTHING_AFTER) { + update_parameter_state(parser, &parser->current, &order); + parser_lex(parser); + + yp_parser_local_add_token(parser, &parser->previous); + yp_forwarding_parameter_node_t *param = yp_forwarding_parameter_node_create(parser, &parser->previous); + yp_parameters_node_keyword_rest_set(params, (yp_node_t *)param); + } else { + update_parameter_state(parser, &parser->current, &order); + parser_lex(parser); + } + break; + } + case YP_TOKEN_CLASS_VARIABLE: + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_CONSTANT: + case YP_TOKEN_INSTANCE_VARIABLE: + case YP_TOKEN_GLOBAL_VARIABLE: + case YP_TOKEN_METHOD_NAME: { + parser_lex(parser); + switch (parser->previous.type) { + case YP_TOKEN_CONSTANT: + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_ARGUMENT_FORMAL_CONSTANT); + break; + case YP_TOKEN_INSTANCE_VARIABLE: + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_ARGUMENT_FORMAL_IVAR); + break; + case YP_TOKEN_GLOBAL_VARIABLE: + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_ARGUMENT_FORMAL_GLOBAL); + break; + case YP_TOKEN_CLASS_VARIABLE: + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_ARGUMENT_FORMAL_CLASS); + break; + case YP_TOKEN_METHOD_NAME: + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_PARAMETER_METHOD_NAME); + break; + default: break; + } + + if (parser->current.type == YP_TOKEN_EQUAL) { + update_parameter_state(parser, &parser->current, &order); + } else { + update_parameter_state(parser, &parser->previous, &order); + } + + yp_token_t name = parser->previous; + yp_parser_parameter_name_check(parser, &name); + yp_parser_local_add_token(parser, &name); + + if (accept1(parser, YP_TOKEN_EQUAL)) { + yp_token_t operator = parser->previous; + context_push(parser, YP_CONTEXT_DEFAULT_PARAMS); + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_PARAMETER_NO_DEFAULT); + + yp_optional_parameter_node_t *param = yp_optional_parameter_node_create(parser, &name, &operator, value); + yp_parameters_node_optionals_append(params, param); + context_pop(parser); + + // If parsing the value of the parameter resulted in error recovery, + // then we can put a missing node in its place and stop parsing the + // parameters entirely now. + if (parser->recovering) { + looping = false; + break; + } + } else if (order > YP_PARAMETERS_ORDER_AFTER_OPTIONAL) { + yp_required_parameter_node_t *param = yp_required_parameter_node_create(parser, &name); + yp_parameters_node_requireds_append(params, (yp_node_t *) param); + } else { + yp_required_parameter_node_t *param = yp_required_parameter_node_create(parser, &name); + yp_parameters_node_posts_append(params, (yp_node_t *) param); + } + + break; + } + case YP_TOKEN_LABEL: { + if (!uses_parentheses) parser->in_keyword_arg = true; + update_parameter_state(parser, &parser->current, &order); + parser_lex(parser); + + yp_token_t name = parser->previous; + yp_token_t local = name; + local.end -= 1; + + yp_parser_parameter_name_check(parser, &local); + yp_parser_local_add_token(parser, &local); + + switch (parser->current.type) { + case YP_TOKEN_COMMA: + case YP_TOKEN_PARENTHESIS_RIGHT: + case YP_TOKEN_PIPE: { + yp_node_t *param = (yp_node_t *) yp_keyword_parameter_node_create(parser, &name, NULL); + yp_parameters_node_keywords_append(params, param); + break; + } + case YP_TOKEN_SEMICOLON: + case YP_TOKEN_NEWLINE: { + if (uses_parentheses) { + looping = false; + break; + } + + yp_node_t *param = (yp_node_t *) yp_keyword_parameter_node_create(parser, &name, NULL); + yp_parameters_node_keywords_append(params, param); + break; + } + default: { + yp_node_t *value = NULL; + if (token_begins_expression_p(parser->current.type)) { + context_push(parser, YP_CONTEXT_DEFAULT_PARAMS); + value = parse_expression(parser, binding_power, YP_ERR_PARAMETER_NO_DEFAULT_KW); + context_pop(parser); + } + + yp_node_t *param = (yp_node_t *) yp_keyword_parameter_node_create(parser, &name, value); + yp_parameters_node_keywords_append(params, param); + + // If parsing the value of the parameter resulted in error recovery, + // then we can put a missing node in its place and stop parsing the + // parameters entirely now. + if (parser->recovering) { + looping = false; + break; + } + } + } + + parser->in_keyword_arg = false; + break; + } + case YP_TOKEN_USTAR: + case YP_TOKEN_STAR: { + update_parameter_state(parser, &parser->current, &order); + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_token_t name; + + if (accept1(parser, YP_TOKEN_IDENTIFIER)) { + name = parser->previous; + yp_parser_parameter_name_check(parser, &name); + yp_parser_local_add_token(parser, &name); + } else { + name = not_provided(parser); + yp_parser_local_add_token(parser, &operator); + } + + yp_rest_parameter_node_t *param = yp_rest_parameter_node_create(parser, &operator, &name); + if (params->rest == NULL) { + yp_parameters_node_rest_set(params, param); + } else { + yp_diagnostic_list_append(&parser->error_list, param->base.location.start, param->base.location.end, YP_ERR_PARAMETER_SPLAT_MULTI); + yp_parameters_node_posts_append(params, (yp_node_t *) param); + } + + break; + } + case YP_TOKEN_STAR_STAR: + case YP_TOKEN_USTAR_STAR: { + update_parameter_state(parser, &parser->current, &order); + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_node_t *param; + + if (accept1(parser, YP_TOKEN_KEYWORD_NIL)) { + param = (yp_node_t *) yp_no_keywords_parameter_node_create(parser, &operator, &parser->previous); + } else { + yp_token_t name; + + if (accept1(parser, YP_TOKEN_IDENTIFIER)) { + name = parser->previous; + yp_parser_parameter_name_check(parser, &name); + yp_parser_local_add_token(parser, &name); + } else { + name = not_provided(parser); + yp_parser_local_add_token(parser, &operator); + } + + param = (yp_node_t *) yp_keyword_rest_parameter_node_create(parser, &operator, &name); + } + + if (params->keyword_rest == NULL) { + yp_parameters_node_keyword_rest_set(params, param); + } else { + yp_diagnostic_list_append(&parser->error_list, param->location.start, param->location.end, YP_ERR_PARAMETER_ASSOC_SPLAT_MULTI); + yp_parameters_node_posts_append(params, param); + } + + break; + } + default: + if (parser->previous.type == YP_TOKEN_COMMA) { + if (allows_trailing_comma) { + // If we get here, then we have a trailing comma in a block + // parameter list. We need to create an anonymous rest parameter to + // represent it. + yp_token_t name = not_provided(parser); + yp_rest_parameter_node_t *param = yp_rest_parameter_node_create(parser, &parser->previous, &name); + + if (params->rest == NULL) { + yp_parameters_node_rest_set(params, param); + } else { + yp_diagnostic_list_append(&parser->error_list, param->base.location.start, param->base.location.end, YP_ERR_PARAMETER_SPLAT_MULTI); + yp_parameters_node_posts_append(params, (yp_node_t *) param); + } + } else { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_PARAMETER_WILD_LOOSE_COMMA); + } + } + + looping = false; + break; + } + + if (looping && uses_parentheses) { + accept1(parser, YP_TOKEN_NEWLINE); + } + } while (looping && accept1(parser, YP_TOKEN_COMMA)); + + yp_do_loop_stack_pop(parser); + + // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`. + if (params->base.location.start == params->base.location.end) { + yp_node_destroy(parser, (yp_node_t *) params); + return NULL; + } + + return params; +} + +// Parse any number of rescue clauses. This will form a linked list of if +// nodes pointing to each other from the top. +static inline void +parse_rescues(yp_parser_t *parser, yp_begin_node_t *parent_node) { + yp_rescue_node_t *current = NULL; + + while (accept1(parser, YP_TOKEN_KEYWORD_RESCUE)) { + yp_rescue_node_t *rescue = yp_rescue_node_create(parser, &parser->previous); + + switch (parser->current.type) { + case YP_TOKEN_EQUAL_GREATER: { + // Here we have an immediate => after the rescue keyword, in which case + // we're going to have an empty list of exceptions to rescue (which + // implies StandardError). + parser_lex(parser); + yp_rescue_node_operator_set(rescue, &parser->previous); + + yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, YP_ERR_RESCUE_VARIABLE); + reference = parse_target(parser, reference); + + yp_rescue_node_reference_set(rescue, reference); + break; + } + case YP_TOKEN_NEWLINE: + case YP_TOKEN_SEMICOLON: + case YP_TOKEN_KEYWORD_THEN: + // Here we have a terminator for the rescue keyword, in which case we're + // going to just continue on. + break; + default: { + if (token_begins_expression_p(parser->current.type) || match1(parser, YP_TOKEN_USTAR)) { + // Here we have something that could be an exception expression, so + // we'll attempt to parse it here and any others delimited by commas. + + do { + yp_node_t *expression = parse_starred_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_RESCUE_EXPRESSION); + yp_rescue_node_exceptions_append(rescue, expression); + + // If we hit a newline, then this is the end of the rescue expression. We + // can continue on to parse the statements. + if (match3(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_TOKEN_KEYWORD_THEN)) break; + + // If we hit a `=>` then we're going to parse the exception variable. Once + // we've done that, we'll break out of the loop and parse the statements. + if (accept1(parser, YP_TOKEN_EQUAL_GREATER)) { + yp_rescue_node_operator_set(rescue, &parser->previous); + + yp_node_t *reference = parse_expression(parser, YP_BINDING_POWER_INDEX, YP_ERR_RESCUE_VARIABLE); + reference = parse_target(parser, reference); + + yp_rescue_node_reference_set(rescue, reference); + break; + } + } while (accept1(parser, YP_TOKEN_COMMA)); + } + } + } + + if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + accept1(parser, YP_TOKEN_KEYWORD_THEN); + } else { + expect1(parser, YP_TOKEN_KEYWORD_THEN, YP_ERR_RESCUE_TERM); + } + + if (!match3(parser, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_RESCUE); + if (statements) { + yp_rescue_node_statements_set(rescue, statements); + } + yp_accepts_block_stack_pop(parser); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + } + + if (current == NULL) { + yp_begin_node_rescue_clause_set(parent_node, rescue); + } else { + yp_rescue_node_consequent_set(current, rescue); + } + + current = rescue; + } + + // The end node locations on rescue nodes will not be set correctly + // since we won't know the end until we've found all consequent + // clauses. This sets the end location on all rescues once we know it + if (current) { + const uint8_t *end_to_set = current->base.location.end; + current = parent_node->rescue_clause; + while (current) { + current->base.location.end = end_to_set; + current = current->consequent; + } + } + + if (accept1(parser, YP_TOKEN_KEYWORD_ELSE)) { + yp_token_t else_keyword = parser->previous; + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + + yp_statements_node_t *else_statements = NULL; + if (!match2(parser, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_ENSURE)) { + yp_accepts_block_stack_push(parser, true); + else_statements = parse_statements(parser, YP_CONTEXT_RESCUE_ELSE); + yp_accepts_block_stack_pop(parser); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + } + + yp_else_node_t *else_clause = yp_else_node_create(parser, &else_keyword, else_statements, &parser->current); + yp_begin_node_else_clause_set(parent_node, else_clause); + } + + if (accept1(parser, YP_TOKEN_KEYWORD_ENSURE)) { + yp_token_t ensure_keyword = parser->previous; + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + + yp_statements_node_t *ensure_statements = NULL; + if (!match1(parser, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + ensure_statements = parse_statements(parser, YP_CONTEXT_ENSURE); + yp_accepts_block_stack_pop(parser); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + } + + yp_ensure_node_t *ensure_clause = yp_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current); + yp_begin_node_ensure_clause_set(parent_node, ensure_clause); + } + + if (parser->current.type == YP_TOKEN_KEYWORD_END) { + yp_begin_node_end_keyword_set(parent_node, &parser->current); + } else { + yp_token_t end_keyword = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + yp_begin_node_end_keyword_set(parent_node, &end_keyword); + } +} + +static inline yp_begin_node_t * +parse_rescues_as_begin(yp_parser_t *parser, yp_statements_node_t *statements) { + yp_token_t no_begin_token = not_provided(parser); + yp_begin_node_t *begin_node = yp_begin_node_create(parser, &no_begin_token, statements); + parse_rescues(parser, begin_node); + + // All nodes within a begin node are optional, so we look + // for the earliest possible node that we can use to set + // the BeginNode's start location + const uint8_t *start = begin_node->base.location.start; + if (begin_node->statements) { + start = begin_node->statements->base.location.start; + } else if (begin_node->rescue_clause) { + start = begin_node->rescue_clause->base.location.start; + } else if (begin_node->else_clause) { + start = begin_node->else_clause->base.location.start; + } else if (begin_node->ensure_clause) { + start = begin_node->ensure_clause->base.location.start; + } + + begin_node->base.location.start = start; + return begin_node; +} + +// Parse a list of parameters and local on a block definition. +static yp_block_parameters_node_t * +parse_block_parameters( + yp_parser_t *parser, + bool allows_trailing_comma, + const yp_token_t *opening, + bool is_lambda_literal +) { + yp_parameters_node_t *parameters = NULL; + if (!match1(parser, YP_TOKEN_SEMICOLON)) { + parameters = parse_parameters( + parser, + is_lambda_literal ? YP_BINDING_POWER_DEFINED : YP_BINDING_POWER_INDEX, + false, + allows_trailing_comma, + false + ); + } + + yp_block_parameters_node_t *block_parameters = yp_block_parameters_node_create(parser, parameters, opening); + if (accept1(parser, YP_TOKEN_SEMICOLON)) { + do { + expect1(parser, YP_TOKEN_IDENTIFIER, YP_ERR_BLOCK_PARAM_LOCAL_VARIABLE); + yp_parser_local_add_token(parser, &parser->previous); + + yp_block_local_variable_node_t *local = yp_block_local_variable_node_create(parser, &parser->previous); + yp_block_parameters_node_append_local(block_parameters, local); + } while (accept1(parser, YP_TOKEN_COMMA)); + } + + return block_parameters; +} + +// Parse a block. +static yp_block_node_t * +parse_block(yp_parser_t *parser) { + yp_token_t opening = parser->previous; + accept1(parser, YP_TOKEN_NEWLINE); + + yp_accepts_block_stack_push(parser, true); + yp_parser_scope_push(parser, false); + yp_block_parameters_node_t *parameters = NULL; + + if (accept1(parser, YP_TOKEN_PIPE)) { + parser->current_scope->explicit_params = true; + yp_token_t block_parameters_opening = parser->previous; + + if (match1(parser, YP_TOKEN_PIPE)) { + parameters = yp_block_parameters_node_create(parser, NULL, &block_parameters_opening); + parser->command_start = true; + parser_lex(parser); + } else { + parameters = parse_block_parameters(parser, true, &block_parameters_opening, false); + accept1(parser, YP_TOKEN_NEWLINE); + parser->command_start = true; + expect1(parser, YP_TOKEN_PIPE, YP_ERR_BLOCK_PARAM_PIPE_TERM); + } + + yp_block_parameters_node_closing_set(parameters, &parser->previous); + } + + accept1(parser, YP_TOKEN_NEWLINE); + yp_node_t *statements = NULL; + + if (opening.type == YP_TOKEN_BRACE_LEFT) { + if (!match1(parser, YP_TOKEN_BRACE_RIGHT)) { + statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_BRACES); + } + + expect1(parser, YP_TOKEN_BRACE_RIGHT, YP_ERR_BLOCK_TERM_BRACE); + } else { + if (!match1(parser, YP_TOKEN_KEYWORD_END)) { + if (!match3(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_ENSURE)) { + yp_accepts_block_stack_push(parser, true); + statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_BLOCK_KEYWORDS); + yp_accepts_block_stack_pop(parser); + } + + if (match2(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || YP_NODE_TYPE_P(statements, YP_STATEMENTS_NODE)); + statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements); + } + } + + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_BLOCK_TERM_END); + } + + yp_constant_id_list_t locals = parser->current_scope->locals; + yp_parser_scope_pop(parser); + yp_accepts_block_stack_pop(parser); + return yp_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous); +} + +// Parse a list of arguments and their surrounding parentheses if they are +// present. It returns true if it found any pieces of arguments (parentheses, +// arguments, or blocks). +static bool +parse_arguments_list(yp_parser_t *parser, yp_arguments_t *arguments, bool accepts_block) { + bool found = false; + + if (accept1(parser, YP_TOKEN_PARENTHESIS_LEFT)) { + found |= true; + arguments->opening_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + + if (accept1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + arguments->closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + } else { + yp_accepts_block_stack_push(parser, true); + parse_arguments(parser, arguments, true, YP_TOKEN_PARENTHESIS_RIGHT); + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_ARGUMENT_TERM_PAREN); + yp_accepts_block_stack_pop(parser); + + arguments->closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + } + } else if ((token_begins_expression_p(parser->current.type) || match3(parser, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR, YP_TOKEN_UAMPERSAND)) && !match1(parser, YP_TOKEN_BRACE_LEFT)) { + found |= true; + yp_accepts_block_stack_push(parser, false); + + // If we get here, then the subsequent token cannot be used as an infix + // operator. In this case we assume the subsequent token is part of an + // argument to this method call. + parse_arguments(parser, arguments, true, YP_TOKEN_EOF); + + yp_accepts_block_stack_pop(parser); + } + + // If we're at the end of the arguments, we can now check if there is a block + // node that starts with a {. If there is, then we can parse it and add it to + // the arguments. + if (accepts_block) { + yp_block_node_t *block = NULL; + + if (accept1(parser, YP_TOKEN_BRACE_LEFT)) { + found |= true; + block = parse_block(parser); + yp_arguments_validate_block(parser, arguments, block); + } else if (yp_accepts_block_stack_p(parser) && accept1(parser, YP_TOKEN_KEYWORD_DO)) { + found |= true; + block = parse_block(parser); + } + + if (block != NULL) { + if (arguments->block == NULL) { + arguments->block = (yp_node_t *) block; + } else { + yp_diagnostic_list_append(&parser->error_list, block->base.location.start, block->base.location.end, YP_ERR_ARGUMENT_BLOCK_MULTI); + if (arguments->arguments == NULL) { + arguments->arguments = yp_arguments_node_create(parser); + } + yp_arguments_node_arguments_append(arguments->arguments, arguments->block); + arguments->block = (yp_node_t *) block; + } + } + } + + return found; +} + +static inline yp_node_t * +parse_conditional(yp_parser_t *parser, yp_context_t context) { + yp_token_t keyword = parser->previous; + + context_push(parser, YP_CONTEXT_PREDICATE); + yp_diagnostic_id_t error_id = context == YP_CONTEXT_IF ? YP_ERR_CONDITIONAL_IF_PREDICATE : YP_ERR_CONDITIONAL_UNLESS_PREDICATE; + yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_MODIFIER, error_id); + + // Predicates are closed by a term, a "then", or a term and then a "then". + bool predicate_closed = accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + predicate_closed |= accept1(parser, YP_TOKEN_KEYWORD_THEN); + if (!predicate_closed) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_CONDITIONAL_PREDICATE_TERM); + } + + context_pop(parser); + yp_statements_node_t *statements = NULL; + + if (!match3(parser, YP_TOKEN_KEYWORD_ELSIF, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + statements = parse_statements(parser, context); + yp_accepts_block_stack_pop(parser); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + } + + yp_token_t end_keyword = not_provided(parser); + yp_node_t *parent = NULL; + + switch (context) { + case YP_CONTEXT_IF: + parent = (yp_node_t *) yp_if_node_create(parser, &keyword, predicate, statements, NULL, &end_keyword); + break; + case YP_CONTEXT_UNLESS: + parent = (yp_node_t *) yp_unless_node_create(parser, &keyword, predicate, statements); + break; + default: + assert(false && "unreachable"); + break; + } + + yp_node_t *current = parent; + + // Parse any number of elsif clauses. This will form a linked list of if + // nodes pointing to each other from the top. + if (context == YP_CONTEXT_IF) { + while (accept1(parser, YP_TOKEN_KEYWORD_ELSIF)) { + yp_token_t elsif_keyword = parser->previous; + yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_MODIFIER, YP_ERR_CONDITIONAL_ELSIF_PREDICATE); + + // Predicates are closed by a term, a "then", or a term and then a "then". + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + accept1(parser, YP_TOKEN_KEYWORD_THEN); + + yp_accepts_block_stack_push(parser, true); + yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_ELSIF); + yp_accepts_block_stack_pop(parser); + + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + + yp_node_t *elsif = (yp_node_t *) yp_if_node_create(parser, &elsif_keyword, predicate, statements, NULL, &end_keyword); + ((yp_if_node_t *) current)->consequent = elsif; + current = elsif; + } + } + + if (match1(parser, YP_TOKEN_KEYWORD_ELSE)) { + parser_lex(parser); + yp_token_t else_keyword = parser->previous; + + yp_accepts_block_stack_push(parser, true); + yp_statements_node_t *else_statements = parse_statements(parser, YP_CONTEXT_ELSE); + yp_accepts_block_stack_pop(parser); + + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_CONDITIONAL_TERM_ELSE); + + yp_else_node_t *else_node = yp_else_node_create(parser, &else_keyword, else_statements, &parser->previous); + + switch (context) { + case YP_CONTEXT_IF: + ((yp_if_node_t *) current)->consequent = (yp_node_t *) else_node; + break; + case YP_CONTEXT_UNLESS: + ((yp_unless_node_t *) parent)->consequent = else_node; + break; + default: + assert(false && "unreachable"); + break; + } + } else { + // We should specialize this error message to refer to 'if' or 'unless' explicitly. + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_CONDITIONAL_TERM); + } + + // Set the appropriate end location for all of the nodes in the subtree. + switch (context) { + case YP_CONTEXT_IF: { + yp_node_t *current = parent; + bool recursing = true; + + while (recursing) { + switch (YP_NODE_TYPE(current)) { + case YP_IF_NODE: + yp_if_node_end_keyword_loc_set((yp_if_node_t *) current, &parser->previous); + current = ((yp_if_node_t *) current)->consequent; + recursing = current != NULL; + break; + case YP_ELSE_NODE: + yp_else_node_end_keyword_loc_set((yp_else_node_t *) current, &parser->previous); + recursing = false; + break; + default: { + recursing = false; + break; + } + } + } + break; + } + case YP_CONTEXT_UNLESS: + yp_unless_node_end_keyword_loc_set((yp_unless_node_t *) parent, &parser->previous); + break; + default: + assert(false && "unreachable"); + break; + } + + return parent; +} + +// This macro allows you to define a case statement for all of the keywords. +// It's meant to be used in a switch statement. +#define YP_CASE_KEYWORD YP_TOKEN_KEYWORD___ENCODING__: case YP_TOKEN_KEYWORD___FILE__: case YP_TOKEN_KEYWORD___LINE__: \ + case YP_TOKEN_KEYWORD_ALIAS: case YP_TOKEN_KEYWORD_AND: case YP_TOKEN_KEYWORD_BEGIN: case YP_TOKEN_KEYWORD_BEGIN_UPCASE: \ + case YP_TOKEN_KEYWORD_BREAK: case YP_TOKEN_KEYWORD_CASE: case YP_TOKEN_KEYWORD_CLASS: case YP_TOKEN_KEYWORD_DEF: \ + case YP_TOKEN_KEYWORD_DEFINED: case YP_TOKEN_KEYWORD_DO: case YP_TOKEN_KEYWORD_DO_LOOP: case YP_TOKEN_KEYWORD_ELSE: \ + case YP_TOKEN_KEYWORD_ELSIF: case YP_TOKEN_KEYWORD_END: case YP_TOKEN_KEYWORD_END_UPCASE: case YP_TOKEN_KEYWORD_ENSURE: \ + case YP_TOKEN_KEYWORD_FALSE: case YP_TOKEN_KEYWORD_FOR: case YP_TOKEN_KEYWORD_IF: case YP_TOKEN_KEYWORD_IN: \ + case YP_TOKEN_KEYWORD_MODULE: case YP_TOKEN_KEYWORD_NEXT: case YP_TOKEN_KEYWORD_NIL: case YP_TOKEN_KEYWORD_NOT: \ + case YP_TOKEN_KEYWORD_OR: case YP_TOKEN_KEYWORD_REDO: case YP_TOKEN_KEYWORD_RESCUE: case YP_TOKEN_KEYWORD_RETRY: \ + case YP_TOKEN_KEYWORD_RETURN: case YP_TOKEN_KEYWORD_SELF: case YP_TOKEN_KEYWORD_SUPER: case YP_TOKEN_KEYWORD_THEN: \ + case YP_TOKEN_KEYWORD_TRUE: case YP_TOKEN_KEYWORD_UNDEF: case YP_TOKEN_KEYWORD_UNLESS: case YP_TOKEN_KEYWORD_UNTIL: \ + case YP_TOKEN_KEYWORD_WHEN: case YP_TOKEN_KEYWORD_WHILE: case YP_TOKEN_KEYWORD_YIELD + +// This macro allows you to define a case statement for all of the operators. +// It's meant to be used in a switch statement. +#define YP_CASE_OPERATOR YP_TOKEN_AMPERSAND: case YP_TOKEN_BACKTICK: case YP_TOKEN_BANG_EQUAL: \ + case YP_TOKEN_BANG_TILDE: case YP_TOKEN_BANG: case YP_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \ + case YP_TOKEN_BRACKET_LEFT_RIGHT: case YP_TOKEN_CARET: case YP_TOKEN_EQUAL_EQUAL_EQUAL: case YP_TOKEN_EQUAL_EQUAL: \ + case YP_TOKEN_EQUAL_TILDE: case YP_TOKEN_GREATER_EQUAL: case YP_TOKEN_GREATER_GREATER: case YP_TOKEN_GREATER: \ + case YP_TOKEN_LESS_EQUAL_GREATER: case YP_TOKEN_LESS_EQUAL: case YP_TOKEN_LESS_LESS: case YP_TOKEN_LESS: \ + case YP_TOKEN_MINUS: case YP_TOKEN_PERCENT: case YP_TOKEN_PIPE: case YP_TOKEN_PLUS: case YP_TOKEN_SLASH: \ + case YP_TOKEN_STAR_STAR: case YP_TOKEN_STAR: case YP_TOKEN_TILDE: case YP_TOKEN_UAMPERSAND: case YP_TOKEN_UMINUS: \ + case YP_TOKEN_UMINUS_NUM: case YP_TOKEN_UPLUS: case YP_TOKEN_USTAR: case YP_TOKEN_USTAR_STAR + +// This macro allows you to define a case statement for all of the token types +// that represent the beginning of nodes that are "primitives" in a pattern +// matching expression. +#define YP_CASE_PRIMITIVE YP_TOKEN_INTEGER: case YP_TOKEN_INTEGER_IMAGINARY: case YP_TOKEN_INTEGER_RATIONAL: \ + case YP_TOKEN_INTEGER_RATIONAL_IMAGINARY: case YP_TOKEN_FLOAT: case YP_TOKEN_FLOAT_IMAGINARY: \ + case YP_TOKEN_FLOAT_RATIONAL: case YP_TOKEN_FLOAT_RATIONAL_IMAGINARY: case YP_TOKEN_SYMBOL_BEGIN: \ + case YP_TOKEN_REGEXP_BEGIN: case YP_TOKEN_BACKTICK: case YP_TOKEN_PERCENT_LOWER_X: case YP_TOKEN_PERCENT_LOWER_I: \ + case YP_TOKEN_PERCENT_LOWER_W: case YP_TOKEN_PERCENT_UPPER_I: case YP_TOKEN_PERCENT_UPPER_W: \ + case YP_TOKEN_STRING_BEGIN: case YP_TOKEN_KEYWORD_NIL: case YP_TOKEN_KEYWORD_SELF: case YP_TOKEN_KEYWORD_TRUE: \ + case YP_TOKEN_KEYWORD_FALSE: case YP_TOKEN_KEYWORD___FILE__: case YP_TOKEN_KEYWORD___LINE__: \ + case YP_TOKEN_KEYWORD___ENCODING__: case YP_TOKEN_MINUS_GREATER: case YP_TOKEN_HEREDOC_START: \ + case YP_TOKEN_UMINUS_NUM: case YP_TOKEN_CHARACTER_LITERAL + +// This macro allows you to define a case statement for all of the token types +// that could begin a parameter. +#define YP_CASE_PARAMETER YP_TOKEN_UAMPERSAND: case YP_TOKEN_AMPERSAND: case YP_TOKEN_UDOT_DOT_DOT: \ + case YP_TOKEN_IDENTIFIER: case YP_TOKEN_LABEL: case YP_TOKEN_USTAR: case YP_TOKEN_STAR: case YP_TOKEN_STAR_STAR: \ + case YP_TOKEN_USTAR_STAR: case YP_TOKEN_CONSTANT: case YP_TOKEN_INSTANCE_VARIABLE: case YP_TOKEN_GLOBAL_VARIABLE: \ + case YP_TOKEN_CLASS_VARIABLE + +// This macro allows you to define a case statement for all of the nodes that +// can be transformed into write targets. +#define YP_CASE_WRITABLE YP_CLASS_VARIABLE_READ_NODE: case YP_CONSTANT_PATH_NODE: \ + case YP_CONSTANT_READ_NODE: case YP_GLOBAL_VARIABLE_READ_NODE: case YP_LOCAL_VARIABLE_READ_NODE: \ + case YP_INSTANCE_VARIABLE_READ_NODE: case YP_MULTI_TARGET_NODE: case YP_BACK_REFERENCE_READ_NODE: \ + case YP_NUMBERED_REFERENCE_READ_NODE + +// Parse a node that is part of a string. If the subsequent tokens cannot be +// parsed as a string part, then NULL is returned. +static yp_node_t * +parse_string_part(yp_parser_t *parser) { + switch (parser->current.type) { + // Here the lexer has returned to us plain string content. In this case + // we'll create a string node that has no opening or closing and return that + // as the part. These kinds of parts look like: + // + // "aaa #{bbb} #@ccc ddd" + // ^^^^ ^ ^^^^ + case YP_TOKEN_STRING_CONTENT: { + yp_unescape_type_t unescape_type = YP_UNESCAPE_ALL; + + if (parser->lex_modes.current->mode == YP_LEX_HEREDOC) { + if (parser->lex_modes.current->as.heredoc.indent == YP_HEREDOC_INDENT_TILDE) { + // If we're in a tilde heredoc, we want to unescape it later + // because we don't want unescaped newlines to disappear + // before we handle them in the dedent. + unescape_type = YP_UNESCAPE_NONE; + } else if (parser->lex_modes.current->as.heredoc.quote == YP_HEREDOC_QUOTE_SINGLE) { + unescape_type = YP_UNESCAPE_MINIMAL; + } + } + + parser_lex(parser); + + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + + return (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, unescape_type); + } + // Here the lexer has returned the beginning of an embedded expression. In + // that case we'll parse the inner statements and return that as the part. + // These kinds of parts look like: + // + // "aaa #{bbb} #@ccc ddd" + // ^^^^^^ + case YP_TOKEN_EMBEXPR_BEGIN: { + yp_lex_state_t state = parser->lex_state; + int brace_nesting = parser->brace_nesting; + + parser->brace_nesting = 0; + lex_state_set(parser, YP_LEX_STATE_BEG); + parser_lex(parser); + + yp_token_t opening = parser->previous; + yp_statements_node_t *statements = NULL; + + if (!match1(parser, YP_TOKEN_EMBEXPR_END)) { + yp_accepts_block_stack_push(parser, true); + statements = parse_statements(parser, YP_CONTEXT_EMBEXPR); + yp_accepts_block_stack_pop(parser); + } + + parser->brace_nesting = brace_nesting; + lex_state_set(parser, state); + + expect1(parser, YP_TOKEN_EMBEXPR_END, YP_ERR_EMBEXPR_END); + yp_token_t closing = parser->previous; + + return (yp_node_t *) yp_embedded_statements_node_create(parser, &opening, statements, &closing); + } + + // Here the lexer has returned the beginning of an embedded variable. + // In that case we'll parse the variable and create an appropriate node + // for it and then return that node. These kinds of parts look like: + // + // "aaa #{bbb} #@ccc ddd" + // ^^^^^ + case YP_TOKEN_EMBVAR: { + lex_state_set(parser, YP_LEX_STATE_BEG); + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_node_t *variable; + + switch (parser->current.type) { + // In this case a back reference is being interpolated. We'll + // create a global variable read node. + case YP_TOKEN_BACK_REFERENCE: + parser_lex(parser); + variable = (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous); + break; + // In this case an nth reference is being interpolated. We'll + // create a global variable read node. + case YP_TOKEN_NUMBERED_REFERENCE: + parser_lex(parser); + variable = (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous); + break; + // In this case a global variable is being interpolated. We'll + // create a global variable read node. + case YP_TOKEN_GLOBAL_VARIABLE: + parser_lex(parser); + variable = (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous); + break; + // In this case an instance variable is being interpolated. + // We'll create an instance variable read node. + case YP_TOKEN_INSTANCE_VARIABLE: + parser_lex(parser); + variable = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous); + break; + // In this case a class variable is being interpolated. We'll + // create a class variable read node. + case YP_TOKEN_CLASS_VARIABLE: + parser_lex(parser); + variable = (yp_node_t *) yp_class_variable_read_node_create(parser, &parser->previous); + break; + // We can hit here if we got an invalid token. In that case + // we'll not attempt to lex this token and instead just return a + // missing node. + default: + expect1(parser, YP_TOKEN_IDENTIFIER, YP_ERR_EMBVAR_INVALID); + variable = (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end); + break; + } + + return (yp_node_t *) yp_embedded_variable_node_create(parser, &operator, variable); + } + default: + parser_lex(parser); + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_CANNOT_PARSE_STRING_PART); + return NULL; + } +} + +static yp_node_t * +parse_symbol(yp_parser_t *parser, yp_lex_mode_t *lex_mode, yp_lex_state_t next_state) { + yp_token_t opening = parser->previous; + + if (lex_mode->mode != YP_LEX_STRING) { + if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state); + yp_token_t symbol; + + switch (parser->current.type) { + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_CONSTANT: + case YP_TOKEN_INSTANCE_VARIABLE: + case YP_TOKEN_METHOD_NAME: + case YP_TOKEN_CLASS_VARIABLE: + case YP_TOKEN_GLOBAL_VARIABLE: + case YP_TOKEN_NUMBERED_REFERENCE: + case YP_TOKEN_BACK_REFERENCE: + case YP_CASE_KEYWORD: + parser_lex(parser); + symbol = parser->previous; + break; + case YP_CASE_OPERATOR: + lex_state_set(parser, next_state == YP_LEX_STATE_NONE ? YP_LEX_STATE_ENDFN : next_state); + parser_lex(parser); + symbol = parser->previous; + break; + default: + expect2(parser, YP_TOKEN_IDENTIFIER, YP_TOKEN_METHOD_NAME, YP_ERR_SYMBOL_INVALID); + symbol = parser->previous; + break; + } + + yp_token_t closing = not_provided(parser); + return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &symbol, &closing, YP_UNESCAPE_ALL); + } + + if (lex_mode->as.string.interpolation) { + // If we have the end of the symbol, then we can return an empty symbol. + if (match1(parser, YP_TOKEN_STRING_END)) { + if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state); + parser_lex(parser); + + yp_token_t content = not_provided(parser); + yp_token_t closing = parser->previous; + return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_NONE); + } + + // Now we can parse the first part of the symbol. + yp_node_t *part = parse_string_part(parser); + + // If we got a string part, then it's possible that we could transform + // what looks like an interpolated symbol into a regular symbol. + if (part && YP_NODE_TYPE_P(part, YP_STRING_NODE) && match2(parser, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { + if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state); + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_SYMBOL_TERM_INTERPOLATED); + + return (yp_node_t *) yp_string_node_to_symbol_node(parser, (yp_string_node_t *) part, &opening, &parser->previous); + } + + // Create a node_list first. We'll use this to check if it should be an + // InterpolatedSymbolNode or a SymbolNode. + yp_node_list_t node_list = YP_EMPTY_NODE_LIST; + if (part) yp_node_list_append(&node_list, part); + + while (!match2(parser, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { + if ((part = parse_string_part(parser)) != NULL) { + yp_node_list_append(&node_list, part); + } + } + + if (next_state != YP_LEX_STATE_NONE) lex_state_set(parser, next_state); + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_SYMBOL_TERM_INTERPOLATED); + + return (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &node_list, &parser->previous); + } + + yp_token_t content; + if (accept1(parser, YP_TOKEN_STRING_CONTENT)) { + content = parser->previous; + } else { + content = (yp_token_t) { .type = YP_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end }; + } + + if (next_state != YP_LEX_STATE_NONE) { + lex_state_set(parser, next_state); + } + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_SYMBOL_TERM_DYNAMIC); + + return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); +} + +// Parse an argument to undef which can either be a bare word, a +// symbol, a constant, or an interpolated symbol. +static inline yp_node_t * +parse_undef_argument(yp_parser_t *parser) { + switch (parser->current.type) { + case YP_CASE_KEYWORD: + case YP_CASE_OPERATOR: + case YP_TOKEN_CONSTANT: + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_METHOD_NAME: { + parser_lex(parser); + + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + + return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL); + } + case YP_TOKEN_SYMBOL_BEGIN: { + yp_lex_mode_t lex_mode = *parser->lex_modes.current; + parser_lex(parser); + + return parse_symbol(parser, &lex_mode, YP_LEX_STATE_NONE); + } + default: + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_UNDEF_ARGUMENT); + return (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end); + } +} + +// Parse an argument to alias which can either be a bare word, a symbol, an +// interpolated symbol or a global variable. If this is the first argument, then +// we need to set the lex state to YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM +// between the first and second arguments. +static inline yp_node_t * +parse_alias_argument(yp_parser_t *parser, bool first) { + switch (parser->current.type) { + case YP_CASE_OPERATOR: + case YP_CASE_KEYWORD: + case YP_TOKEN_CONSTANT: + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_METHOD_NAME: { + if (first) { + lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM); + } + + parser_lex(parser); + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + + return (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL); + } + case YP_TOKEN_SYMBOL_BEGIN: { + yp_lex_mode_t lex_mode = *parser->lex_modes.current; + parser_lex(parser); + + return parse_symbol(parser, &lex_mode, first ? YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM : YP_LEX_STATE_NONE); + } + case YP_TOKEN_BACK_REFERENCE: + parser_lex(parser); + return (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous); + case YP_TOKEN_NUMBERED_REFERENCE: + parser_lex(parser); + return (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous); + case YP_TOKEN_GLOBAL_VARIABLE: + parser_lex(parser); + return (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous); + default: + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_ALIAS_ARGUMENT); + return (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end); + } +} + +// Return true if any of the visible scopes to the current context are using +// numbered parameters. +static bool +outer_scope_using_numbered_params_p(yp_parser_t *parser) { + for (yp_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) { + if (scope->numbered_params) return true; + } + + return false; +} + +// Parse an identifier into either a local variable read or a call. +static yp_node_t * +parse_variable_call(yp_parser_t *parser) { + yp_node_flags_t flags = 0; + + if (!match1(parser, YP_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) { + int depth; + if ((depth = yp_parser_local_depth(parser, &parser->previous)) != -1) { + return (yp_node_t *) yp_local_variable_read_node_create(parser, &parser->previous, (uint32_t) depth); + } + + if (!parser->current_scope->closed && token_is_numbered_parameter(parser->previous.start, parser->previous.end)) { + // Indicate that this scope is using numbered params so that child + // scopes cannot. + parser->current_scope->numbered_params = true; + + // Now that we know we have a numbered parameter, we need to check + // if it's allowed in this context. If it is, then we will create a + // local variable read. If it's not, then we'll create a normal call + // node but add an error. + if (parser->current_scope->explicit_params) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_NUMBERED_PARAMETER_NOT_ALLOWED); + } else if (outer_scope_using_numbered_params_p(parser)) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_NUMBERED_PARAMETER_OUTER_SCOPE); + } else { + // When you use a numbered parameter, it implies the existence + // of all of the locals that exist before it. For example, + // referencing _2 means that _1 must exist. Therefore here we + // loop through all of the possibilities and add them into the + // constant pool. + uint8_t number = parser->previous.start[1]; + uint8_t current = '1'; + uint8_t *value; + + while (current < number) { + value = malloc(2); + value[0] = '_'; + value[1] = current++; + yp_parser_local_add_owned(parser, value, 2); + } + + // Now we can add the actual token that is being used. For + // this one we can add a shared version since it is directly + // referenced in the source. + yp_parser_local_add_token(parser, &parser->previous); + return (yp_node_t *) yp_local_variable_read_node_create(parser, &parser->previous, 0); + } + } + + flags |= YP_CALL_NODE_FLAGS_VARIABLE_CALL; + } + + yp_call_node_t *node = yp_call_node_variable_call_create(parser, &parser->previous); + node->base.flags |= flags; + + return (yp_node_t *) node; +} + +static inline yp_token_t +parse_method_definition_name(yp_parser_t *parser) { + switch (parser->current.type) { + case YP_CASE_KEYWORD: + case YP_TOKEN_CONSTANT: + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_METHOD_NAME: + parser_lex(parser); + return parser->previous; + case YP_CASE_OPERATOR: + lex_state_set(parser, YP_LEX_STATE_ENDFN); + parser_lex(parser); + return parser->previous; + default: + return (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end }; + } +} + +static int +parse_heredoc_common_whitespace_for_single_node(yp_parser_t *parser, yp_node_t *node, int common_whitespace) +{ + const yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc; + int cur_whitespace; + const uint8_t *cur_char = content_loc->start; + + while (cur_char && cur_char < content_loc->end) { + // Any empty newlines aren't included in the minimum whitespace + // calculation. + size_t eol_length; + while ((eol_length = match_eol_at(parser, cur_char))) { + cur_char += eol_length; + } + + if (cur_char == content_loc->end) break; + + cur_whitespace = 0; + + while (yp_char_is_inline_whitespace(*cur_char) && cur_char < content_loc->end) { + if (cur_char[0] == '\t') { + cur_whitespace = (cur_whitespace / YP_TAB_WHITESPACE_SIZE + 1) * YP_TAB_WHITESPACE_SIZE; + } else { + cur_whitespace++; + } + cur_char++; + } + + // If we hit a newline, then we have encountered a line that + // contains only whitespace, and it shouldn't be considered in + // the calculation of common leading whitespace. + eol_length = match_eol_at(parser, cur_char); + if (eol_length) { + cur_char += eol_length; + continue; + } + + if (cur_whitespace < common_whitespace || common_whitespace == -1) { + common_whitespace = cur_whitespace; + } + + cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1)); + if (cur_char) cur_char++; + } + return common_whitespace; +} + +// Calculate the common leading whitespace for each line in a heredoc. +static int +parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) { + int common_whitespace = -1; + + for (size_t index = 0; index < nodes->size; index++) { + yp_node_t *node = nodes->nodes[index]; + if (!YP_NODE_TYPE_P(node, YP_STRING_NODE)) continue; + + // If the previous node wasn't a string node, we don't want to trim + // whitespace. This could happen after an interpolated expression or + // variable. + if (index == 0 || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_STRING_NODE)) { + common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, common_whitespace); + } + } + + return common_whitespace; +} + +static yp_string_t * +parse_heredoc_dedent_single_node(yp_parser_t *parser, yp_string_t *string, bool dedent_node, int common_whitespace, yp_heredoc_quote_t quote) +{ + // Get a reference to the string struct that is being held by the string + // node. This is the value we're going to actually manipulate. + yp_string_ensure_owned(string); + + // Now get the bounds of the existing string. We'll use this as a + // destination to move bytes into. We'll also use it for bounds checking + // since we don't require that these strings be null terminated. + size_t dest_length = yp_string_length(string); + uint8_t *source_start = (uint8_t *) string->source; + + const uint8_t *source_cursor = source_start; + const uint8_t *source_end = source_cursor + dest_length; + + // We're going to move bytes backward in the string when we get leading + // whitespace, so we'll maintain a pointer to the current position in the + // string that we're writing to. + uint8_t *dest_cursor = source_start; + + while (source_cursor < source_end) { + // If we need to dedent the next element within the heredoc or the next + // line within the string node, then we'll do it here. + if (dedent_node) { + int trimmed_whitespace = 0; + + // While we haven't reached the amount of common whitespace that we need + // to trim and we haven't reached the end of the string, we'll keep + // trimming whitespace. Trimming in this context means skipping over + // these bytes such that they aren't copied into the new string. + while ((source_cursor < source_end) && yp_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) { + if (*source_cursor == '\t') { + trimmed_whitespace = (trimmed_whitespace / YP_TAB_WHITESPACE_SIZE + 1) * YP_TAB_WHITESPACE_SIZE; + if (trimmed_whitespace > common_whitespace) break; + } else { + trimmed_whitespace++; + } + + source_cursor++; + dest_length--; + } + } + + // At this point we have dedented all that we need to, so we need to find + // the next newline. + const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor); + + if (breakpoint == NULL) { + // If there isn't another newline, then we can just move the rest of the + // string and break from the loop. + memmove(dest_cursor, source_cursor, (size_t) (source_end - source_cursor)); + break; + } + + // Otherwise, we need to move everything including the newline, and + // then set the dedent_node flag to true. + if (breakpoint < source_end) breakpoint++; + memmove(dest_cursor, source_cursor, (size_t) (breakpoint - source_cursor)); + dest_cursor += (breakpoint - source_cursor); + source_cursor = breakpoint; + dedent_node = true; + } + + // We only want to write this node into the list if it has any content. + string->length = dest_length; + + if (dest_length != 0) { + yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL); + } + return string; +} + +// Take a heredoc node that is indented by a ~ and trim the leading whitespace. +static void +parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *heredoc_node, yp_heredoc_quote_t quote) +{ + yp_node_list_t *nodes; + + if (quote == YP_HEREDOC_QUOTE_BACKTICK) { + nodes = &((yp_interpolated_x_string_node_t *) heredoc_node)->parts; + } else { + nodes = &((yp_interpolated_string_node_t *) heredoc_node)->parts; + } + + // First, calculate how much common whitespace we need to trim. If there is + // none or it's 0, then we can return early. + int common_whitespace; + if ((common_whitespace = parse_heredoc_common_whitespace(parser, nodes)) <= 0) return; + + // The next node should be dedented if it's the first node in the list or if + // if follows a string node. + bool dedent_next = true; + + // Iterate over all nodes, and trim whitespace accordingly. We're going to + // keep around two indices: a read and a write. If we end up trimming all of + // the whitespace from a node, then we'll drop it from the list entirely. + size_t write_index = 0; + + for (size_t read_index = 0; read_index < nodes->size; read_index++) { + yp_node_t *node = nodes->nodes[read_index]; + + // We're not manipulating child nodes that aren't strings. In this case + // we'll skip past it and indicate that the subsequent node should not + // be dedented. + if (!YP_NODE_TYPE_P(node, YP_STRING_NODE)) { + nodes->nodes[write_index++] = node; + dedent_next = false; + continue; + } + + yp_string_node_t *string_node = ((yp_string_node_t *) node); + parse_heredoc_dedent_single_node(parser, &string_node->unescaped, dedent_next, common_whitespace, quote); + if (string_node->unescaped.length == 0) { + yp_node_destroy(parser, node); + } else { + nodes->nodes[write_index++] = node; + } + + // We always dedent the next node if it follows a string node. + dedent_next = true; + } + + nodes->size = write_index; +} + +static yp_node_t * +parse_pattern(yp_parser_t *parser, bool top_pattern, yp_diagnostic_id_t diag_id); + +// Accept any number of constants joined by :: delimiters. +static yp_node_t * +parse_pattern_constant_path(yp_parser_t *parser, yp_node_t *node) { + // Now, if there are any :: operators that follow, parse them as constant + // path nodes. + while (accept1(parser, YP_TOKEN_COLON_COLON)) { + yp_token_t delimiter = parser->previous; + expect1(parser, YP_TOKEN_CONSTANT, YP_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); + + yp_node_t *child = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous); + node = (yp_node_t *)yp_constant_path_node_create(parser, node, &delimiter, child); + } + + // If there is a [ or ( that follows, then this is part of a larger pattern + // expression. We'll parse the inner pattern here, then modify the returned + // inner pattern with our constant path attached. + if (!match2(parser, YP_TOKEN_BRACKET_LEFT, YP_TOKEN_PARENTHESIS_LEFT)) { + return node; + } + + yp_token_t opening; + yp_token_t closing; + yp_node_t *inner = NULL; + + if (accept1(parser, YP_TOKEN_BRACKET_LEFT)) { + opening = parser->previous; + accept1(parser, YP_TOKEN_NEWLINE); + + if (!accept1(parser, YP_TOKEN_BRACKET_RIGHT)) { + inner = parse_pattern(parser, true, YP_ERR_PATTERN_EXPRESSION_AFTER_BRACKET); + accept1(parser, YP_TOKEN_NEWLINE); + expect1(parser, YP_TOKEN_BRACKET_RIGHT, YP_ERR_PATTERN_TERM_BRACKET); + } + + closing = parser->previous; + } else { + parser_lex(parser); + opening = parser->previous; + + if (!accept1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + inner = parse_pattern(parser, true, YP_ERR_PATTERN_EXPRESSION_AFTER_PAREN); + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_PATTERN_TERM_PAREN); + } + + closing = parser->previous; + } + + if (!inner) { + // If there was no inner pattern, then we have something like Foo() or + // Foo[]. In that case we'll create an array pattern with no requireds. + return (yp_node_t *) yp_array_pattern_node_constant_create(parser, node, &opening, &closing); + } + + // Now that we have the inner pattern, check to see if it's an array, find, + // or hash pattern. If it is, then we'll attach our constant path to it if + // it doesn't already have a constant. If it's not one of those node types + // or it does have a constant, then we'll create an array pattern. + switch (YP_NODE_TYPE(inner)) { + case YP_ARRAY_PATTERN_NODE: { + yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner; + + if (pattern_node->constant == NULL) { + pattern_node->base.location.start = node->location.start; + pattern_node->base.location.end = closing.end; + + pattern_node->constant = node; + pattern_node->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening); + pattern_node->closing_loc = YP_LOCATION_TOKEN_VALUE(&closing); + + return (yp_node_t *) pattern_node; + } + + break; + } + case YP_FIND_PATTERN_NODE: { + yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner; + + if (pattern_node->constant == NULL) { + pattern_node->base.location.start = node->location.start; + pattern_node->base.location.end = closing.end; + + pattern_node->constant = node; + pattern_node->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening); + pattern_node->closing_loc = YP_LOCATION_TOKEN_VALUE(&closing); + + return (yp_node_t *) pattern_node; + } + + break; + } + case YP_HASH_PATTERN_NODE: { + yp_hash_pattern_node_t *pattern_node = (yp_hash_pattern_node_t *) inner; + + if (pattern_node->constant == NULL) { + pattern_node->base.location.start = node->location.start; + pattern_node->base.location.end = closing.end; + + pattern_node->constant = node; + pattern_node->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening); + pattern_node->closing_loc = YP_LOCATION_TOKEN_VALUE(&closing); + + return (yp_node_t *) pattern_node; + } + + break; + } + default: + break; + } + + // If we got here, then we didn't return one of the inner patterns by + // attaching its constant. In this case we'll create an array pattern and + // attach our constant to it. + yp_array_pattern_node_t *pattern_node = yp_array_pattern_node_constant_create(parser, node, &opening, &closing); + yp_array_pattern_node_requireds_append(pattern_node, inner); + return (yp_node_t *) pattern_node; +} + +// Parse a rest pattern. +static yp_splat_node_t * +parse_pattern_rest(yp_parser_t *parser) { + assert(parser->previous.type == YP_TOKEN_USTAR); + yp_token_t operator = parser->previous; + yp_node_t *name = NULL; + + // Rest patterns don't necessarily have a name associated with them. So we + // will check for that here. If they do, then we'll add it to the local table + // since this pattern will cause it to become a local variable. + if (accept1(parser, YP_TOKEN_IDENTIFIER)) { + yp_token_t identifier = parser->previous; + yp_parser_local_add_token(parser, &identifier); + name = (yp_node_t *) yp_local_variable_target_node_create(parser, &identifier); + } + + // Finally we can return the created node. + return yp_splat_node_create(parser, &operator, name); +} + +// Parse a keyword rest node. +static yp_node_t * +parse_pattern_keyword_rest(yp_parser_t *parser) { + assert(parser->current.type == YP_TOKEN_USTAR_STAR); + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_node_t *value = NULL; + + if (accept1(parser, YP_TOKEN_KEYWORD_NIL)) { + return (yp_node_t *) yp_no_keywords_parameter_node_create(parser, &operator, &parser->previous); + } + + if (accept1(parser, YP_TOKEN_IDENTIFIER)) { + yp_parser_local_add_token(parser, &parser->previous); + value = (yp_node_t *) yp_local_variable_target_node_create(parser, &parser->previous); + } + + return (yp_node_t *) yp_assoc_splat_node_create(parser, value, &operator); +} + +// Parse a hash pattern. +static yp_hash_pattern_node_t * +parse_pattern_hash(yp_parser_t *parser, yp_node_t *first_assoc) { + if (YP_NODE_TYPE_P(first_assoc, YP_ASSOC_NODE)) { + if (!match7(parser, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + // Here we have a value for the first assoc in the list, so we will parse it + // now and update the first assoc. + yp_node_t *value = parse_pattern(parser, false, YP_ERR_PATTERN_EXPRESSION_AFTER_KEY); + + yp_assoc_node_t *assoc = (yp_assoc_node_t *) first_assoc; + assoc->base.location.end = value->location.end; + assoc->value = value; + } else { + yp_node_t *key = ((yp_assoc_node_t *) first_assoc)->key; + + if (YP_NODE_TYPE_P(key, YP_SYMBOL_NODE)) { + const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc; + yp_parser_local_add_location(parser, value_loc->start, value_loc->end); + } + } + } + + yp_node_list_t assocs = YP_EMPTY_NODE_LIST; + yp_node_list_append(&assocs, first_assoc); + + // If there are any other assocs, then we'll parse them now. + while (accept1(parser, YP_TOKEN_COMMA)) { + // Here we need to break to support trailing commas. + if (match6(parser, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + break; + } + + yp_node_t *assoc; + + if (match1(parser, YP_TOKEN_USTAR_STAR)) { + assoc = parse_pattern_keyword_rest(parser); + } else { + expect1(parser, YP_TOKEN_LABEL, YP_ERR_PATTERN_LABEL_AFTER_COMMA); + yp_node_t *key = (yp_node_t *) yp_symbol_node_label_create(parser, &parser->previous); + yp_node_t *value = NULL; + + if (!match7(parser, YP_TOKEN_COMMA, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + value = parse_pattern(parser, false, YP_ERR_PATTERN_EXPRESSION_AFTER_KEY); + } else { + const yp_location_t *value_loc = &((yp_symbol_node_t *) key)->value_loc; + yp_parser_local_add_location(parser, value_loc->start, value_loc->end); + } + + yp_token_t operator = not_provided(parser); + assoc = (yp_node_t *) yp_assoc_node_create(parser, key, &operator, value); + } + + yp_node_list_append(&assocs, assoc); + } + + yp_hash_pattern_node_t *node = yp_hash_pattern_node_node_list_create(parser, &assocs); + free(assocs.nodes); + + return node; +} + +// Parse a pattern expression primitive. +static yp_node_t * +parse_pattern_primitive(yp_parser_t *parser, yp_diagnostic_id_t diag_id) { + switch (parser->current.type) { + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_METHOD_NAME: { + parser_lex(parser); + yp_parser_local_add_token(parser, &parser->previous); + return (yp_node_t *) yp_local_variable_target_node_create(parser, &parser->previous); + } + case YP_TOKEN_BRACKET_LEFT_ARRAY: { + yp_token_t opening = parser->current; + parser_lex(parser); + + if (accept1(parser, YP_TOKEN_BRACKET_RIGHT)) { + // If we have an empty array pattern, then we'll just return a new + // array pattern node. + return (yp_node_t *)yp_array_pattern_node_empty_create(parser, &opening, &parser->previous); + } + + // Otherwise, we'll parse the inner pattern, then deal with it depending + // on the type it returns. + yp_node_t *inner = parse_pattern(parser, true, YP_ERR_PATTERN_EXPRESSION_AFTER_BRACKET); + + accept1(parser, YP_TOKEN_NEWLINE); + + expect1(parser, YP_TOKEN_BRACKET_RIGHT, YP_ERR_PATTERN_TERM_BRACKET); + yp_token_t closing = parser->previous; + + switch (YP_NODE_TYPE(inner)) { + case YP_ARRAY_PATTERN_NODE: { + yp_array_pattern_node_t *pattern_node = (yp_array_pattern_node_t *) inner; + if (pattern_node->opening_loc.start == NULL) { + pattern_node->base.location.start = opening.start; + pattern_node->base.location.end = closing.end; + + pattern_node->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening); + pattern_node->closing_loc = YP_LOCATION_TOKEN_VALUE(&closing); + + return (yp_node_t *) pattern_node; + } + + break; + } + case YP_FIND_PATTERN_NODE: { + yp_find_pattern_node_t *pattern_node = (yp_find_pattern_node_t *) inner; + if (pattern_node->opening_loc.start == NULL) { + pattern_node->base.location.start = opening.start; + pattern_node->base.location.end = closing.end; + + pattern_node->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening); + pattern_node->closing_loc = YP_LOCATION_TOKEN_VALUE(&closing); + + return (yp_node_t *) pattern_node; + } + + break; + } + default: + break; + } + + yp_array_pattern_node_t *node = yp_array_pattern_node_empty_create(parser, &opening, &closing); + yp_array_pattern_node_requireds_append(node, inner); + return (yp_node_t *) node; + } + case YP_TOKEN_BRACE_LEFT: { + bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; + parser->pattern_matching_newlines = false; + + yp_hash_pattern_node_t *node; + yp_token_t opening = parser->current; + parser_lex(parser); + + if (accept1(parser, YP_TOKEN_BRACE_RIGHT)) { + // If we have an empty hash pattern, then we'll just return a new hash + // pattern node. + node = yp_hash_pattern_node_empty_create(parser, &opening, &parser->previous); + } else { + yp_node_t *key; + + switch (parser->current.type) { + case YP_TOKEN_LABEL: + parser_lex(parser); + key = (yp_node_t *) yp_symbol_node_label_create(parser, &parser->previous); + break; + case YP_TOKEN_USTAR_STAR: + key = parse_pattern_keyword_rest(parser); + break; + case YP_TOKEN_STRING_BEGIN: + key = parse_expression(parser, YP_BINDING_POWER_MAX, YP_ERR_PATTERN_HASH_KEY); + if (!yp_symbol_node_label_p(key)) { + yp_diagnostic_list_append(&parser->error_list, key->location.start, key->location.end, YP_ERR_PATTERN_HASH_KEY_LABEL); + } + + break; + default: + parser_lex(parser); + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_PATTERN_HASH_KEY); + key = (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end); + break; + } + + yp_token_t operator = not_provided(parser); + node = parse_pattern_hash(parser, (yp_node_t *) yp_assoc_node_create(parser, key, &operator, NULL)); + + accept1(parser, YP_TOKEN_NEWLINE); + expect1(parser, YP_TOKEN_BRACE_RIGHT, YP_ERR_PATTERN_TERM_BRACE); + yp_token_t closing = parser->previous; + + node->base.location.start = opening.start; + node->base.location.end = closing.end; + + node->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening); + node->closing_loc = YP_LOCATION_TOKEN_VALUE(&closing); + } + + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + return (yp_node_t *) node; + } + case YP_TOKEN_UDOT_DOT: + case YP_TOKEN_UDOT_DOT_DOT: { + yp_token_t operator = parser->current; + parser_lex(parser); + + // Since we have a unary range operator, we need to parse the subsequent + // expression as the right side of the range. + switch (parser->current.type) { + case YP_CASE_PRIMITIVE: { + yp_node_t *right = parse_expression(parser, YP_BINDING_POWER_MAX, YP_ERR_PATTERN_EXPRESSION_AFTER_RANGE); + return (yp_node_t *) yp_range_node_create(parser, NULL, &operator, right); + } + default: { + yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, YP_ERR_PATTERN_EXPRESSION_AFTER_RANGE); + yp_node_t *right = (yp_node_t *) yp_missing_node_create(parser, operator.start, operator.end); + return (yp_node_t *) yp_range_node_create(parser, NULL, &operator, right); + } + } + } + case YP_CASE_PRIMITIVE: { + yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_MAX, diag_id); + + // Now that we have a primitive, we need to check if it's part of a range. + if (accept2(parser, YP_TOKEN_DOT_DOT, YP_TOKEN_DOT_DOT_DOT)) { + yp_token_t operator = parser->previous; + + // Now that we have the operator, we need to check if this is followed + // by another expression. If it is, then we will create a full range + // node. Otherwise, we'll create an endless range. + switch (parser->current.type) { + case YP_CASE_PRIMITIVE: { + yp_node_t *right = parse_expression(parser, YP_BINDING_POWER_MAX, YP_ERR_PATTERN_EXPRESSION_AFTER_RANGE); + return (yp_node_t *) yp_range_node_create(parser, node, &operator, right); + } + default: + return (yp_node_t *) yp_range_node_create(parser, node, &operator, NULL); + } + } + + return node; + } + case YP_TOKEN_CARET: { + parser_lex(parser); + yp_token_t operator = parser->previous; + + // At this point we have a pin operator. We need to check the subsequent + // expression to determine if it's a variable or an expression. + switch (parser->current.type) { + case YP_TOKEN_IDENTIFIER: { + parser_lex(parser); + yp_node_t *variable = (yp_node_t *) yp_local_variable_read_node_create(parser, &parser->previous, 0); + + return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable); + } + case YP_TOKEN_INSTANCE_VARIABLE: { + parser_lex(parser); + yp_node_t *variable = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous); + + return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable); + } + case YP_TOKEN_CLASS_VARIABLE: { + parser_lex(parser); + yp_node_t *variable = (yp_node_t *) yp_class_variable_read_node_create(parser, &parser->previous); + + return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable); + } + case YP_TOKEN_GLOBAL_VARIABLE: { + parser_lex(parser); + yp_node_t *variable = (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous); + + return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable); + } + case YP_TOKEN_NUMBERED_REFERENCE: { + parser_lex(parser); + yp_node_t *variable = (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous); + + return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable); + } + case YP_TOKEN_BACK_REFERENCE: { + parser_lex(parser); + yp_node_t *variable = (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous); + + return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable); + } + case YP_TOKEN_PARENTHESIS_LEFT: { + bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; + parser->pattern_matching_newlines = false; + + yp_token_t lparen = parser->current; + parser_lex(parser); + + yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_PATTERN_EXPRESSION_AFTER_PIN); + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + + accept1(parser, YP_TOKEN_NEWLINE); + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_PATTERN_TERM_PAREN); + return (yp_node_t *) yp_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous); + } + default: { + // If we get here, then we have a pin operator followed by something + // not understood. We'll create a missing node and return that. + yp_diagnostic_list_append(&parser->error_list, operator.start, operator.end, YP_ERR_PATTERN_EXPRESSION_AFTER_PIN); + yp_node_t *variable = (yp_node_t *) yp_missing_node_create(parser, operator.start, operator.end); + return (yp_node_t *) yp_pinned_variable_node_create(parser, &operator, variable); + } + } + } + case YP_TOKEN_UCOLON_COLON: { + yp_token_t delimiter = parser->current; + parser_lex(parser); + + expect1(parser, YP_TOKEN_CONSTANT, YP_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); + yp_node_t *child = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous); + yp_constant_path_node_t *node = yp_constant_path_node_create(parser, NULL, &delimiter, child); + + return parse_pattern_constant_path(parser, (yp_node_t *)node); + } + case YP_TOKEN_CONSTANT: { + yp_token_t constant = parser->current; + parser_lex(parser); + + yp_node_t *node = (yp_node_t *) yp_constant_read_node_create(parser, &constant); + return parse_pattern_constant_path(parser, node); + } + default: + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, diag_id); + return (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end); + } +} + +// Parse any number of primitives joined by alternation and ended optionally by +// assignment. +static yp_node_t * +parse_pattern_primitives(yp_parser_t *parser, yp_diagnostic_id_t diag_id) { + yp_node_t *node = NULL; + + do { + yp_token_t operator = parser->previous; + + switch (parser->current.type) { + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_BRACKET_LEFT_ARRAY: + case YP_TOKEN_BRACE_LEFT: + case YP_TOKEN_CARET: + case YP_TOKEN_CONSTANT: + case YP_TOKEN_UCOLON_COLON: + case YP_TOKEN_UDOT_DOT: + case YP_TOKEN_UDOT_DOT_DOT: + case YP_CASE_PRIMITIVE: { + if (node == NULL) { + node = parse_pattern_primitive(parser, diag_id); + } else { + yp_node_t *right = parse_pattern_primitive(parser, YP_ERR_PATTERN_EXPRESSION_AFTER_PIPE); + node = (yp_node_t *) yp_alternation_pattern_node_create(parser, node, right, &operator); + } + + break; + } + case YP_TOKEN_PARENTHESIS_LEFT: { + parser_lex(parser); + if (node != NULL) { + yp_node_destroy(parser, node); + } + node = parse_pattern(parser, false, YP_ERR_PATTERN_EXPRESSION_AFTER_PAREN); + + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_PATTERN_TERM_PAREN); + break; + } + default: { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, diag_id); + yp_node_t *right = (yp_node_t *) yp_missing_node_create(parser, parser->current.start, parser->current.end); + + if (node == NULL) { + node = right; + } else { + node = (yp_node_t *) yp_alternation_pattern_node_create(parser, node, right, &operator); + } + + break; + } + } + } while (accept1(parser, YP_TOKEN_PIPE)); + + // If we have an =>, then we are assigning this pattern to a variable. + // In this case we should create an assignment node. + while (accept1(parser, YP_TOKEN_EQUAL_GREATER)) { + yp_token_t operator = parser->previous; + + expect1(parser, YP_TOKEN_IDENTIFIER, YP_ERR_PATTERN_IDENT_AFTER_HROCKET); + yp_token_t identifier = parser->previous; + yp_parser_local_add_token(parser, &identifier); + + yp_node_t *target = (yp_node_t *) yp_local_variable_target_node_create(parser, &identifier); + node = (yp_node_t *) yp_capture_pattern_node_create(parser, node, target, &operator); + } + + return node; +} + +// Parse a pattern matching expression. +static yp_node_t * +parse_pattern(yp_parser_t *parser, bool top_pattern, yp_diagnostic_id_t diag_id) { + yp_node_t *node = NULL; + + bool leading_rest = false; + bool trailing_rest = false; + + switch (parser->current.type) { + case YP_TOKEN_LABEL: { + parser_lex(parser); + yp_node_t *key = (yp_node_t *) yp_symbol_node_label_create(parser, &parser->previous); + yp_token_t operator = not_provided(parser); + + return (yp_node_t *) parse_pattern_hash(parser, (yp_node_t *) yp_assoc_node_create(parser, key, &operator, NULL)); + } + case YP_TOKEN_USTAR_STAR: { + node = parse_pattern_keyword_rest(parser); + return (yp_node_t *) parse_pattern_hash(parser, node); + } + case YP_TOKEN_USTAR: { + if (top_pattern) { + parser_lex(parser); + node = (yp_node_t *) parse_pattern_rest(parser); + leading_rest = true; + break; + } + } + /* fallthrough */ + default: + node = parse_pattern_primitives(parser, diag_id); + break; + } + + // If we got a dynamic label symbol, then we need to treat it like the + // beginning of a hash pattern. + if (yp_symbol_node_label_p(node)) { + yp_token_t operator = not_provided(parser); + return (yp_node_t *) parse_pattern_hash(parser, (yp_node_t *) yp_assoc_node_create(parser, node, &operator, NULL)); + } + + if (top_pattern && match1(parser, YP_TOKEN_COMMA)) { + // If we have a comma, then we are now parsing either an array pattern or a + // find pattern. We need to parse all of the patterns, put them into a big + // list, and then determine which type of node we have. + yp_node_list_t nodes = YP_EMPTY_NODE_LIST; + yp_node_list_append(&nodes, node); + + // Gather up all of the patterns into the list. + while (accept1(parser, YP_TOKEN_COMMA)) { + // Break early here in case we have a trailing comma. + if (match5(parser, YP_TOKEN_KEYWORD_THEN, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + break; + } + + if (accept1(parser, YP_TOKEN_USTAR)) { + node = (yp_node_t *) parse_pattern_rest(parser); + + // If we have already parsed a splat pattern, then this is an error. We + // will continue to parse the rest of the patterns, but we will indicate + // it as an error. + if (trailing_rest) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_PATTERN_REST); + } + + trailing_rest = true; + } else { + node = parse_pattern_primitives(parser, YP_ERR_PATTERN_EXPRESSION_AFTER_COMMA); + } + + yp_node_list_append(&nodes, node); + } + + // If the first pattern and the last pattern are rest patterns, then we will + // call this a find pattern, regardless of how many rest patterns are in + // between because we know we already added the appropriate errors. + // Otherwise we will create an array pattern. + if (YP_NODE_TYPE_P(nodes.nodes[0], YP_SPLAT_NODE) && YP_NODE_TYPE_P(nodes.nodes[nodes.size - 1], YP_SPLAT_NODE)) { + node = (yp_node_t *) yp_find_pattern_node_create(parser, &nodes); + } else { + node = (yp_node_t *) yp_array_pattern_node_node_list_create(parser, &nodes); + } + + free(nodes.nodes); + } else if (leading_rest) { + // Otherwise, if we parsed a single splat pattern, then we know we have an + // array pattern, so we can go ahead and create that node. + node = (yp_node_t *) yp_array_pattern_node_rest_create(parser, node); + } + + return node; +} + +// Incorporate a negative sign into a numeric node by subtracting 1 character +// from its start bounds. If it's a compound node, then we will recursively +// apply this function to its value. +static inline void +parse_negative_numeric(yp_node_t *node) { + switch (YP_NODE_TYPE(node)) { + case YP_INTEGER_NODE: + case YP_FLOAT_NODE: + node->location.start--; + break; + case YP_RATIONAL_NODE: + node->location.start--; + parse_negative_numeric(((yp_rational_node_t *) node)->numeric); + break; + case YP_IMAGINARY_NODE: + node->location.start--; + parse_negative_numeric(((yp_imaginary_node_t *) node)->numeric); + break; + default: + assert(false && "unreachable"); + break; + } +} + +// Returns a string content token at a particular location that is empty. +static yp_token_t +parse_strings_empty_content(const uint8_t *location) { + return (yp_token_t) { .type = YP_TOKEN_STRING_CONTENT, .start = location, .end = location }; +} + +// Parse a set of strings that could be concatenated together. +static inline yp_node_t * +parse_strings(yp_parser_t *parser) { + assert(parser->current.type == YP_TOKEN_STRING_BEGIN); + yp_node_t *result = NULL; + + while (match1(parser, YP_TOKEN_STRING_BEGIN)) { + yp_node_t *node = NULL; + + // Here we have found a string literal. We'll parse it and add it to + // the list of strings. + assert(parser->lex_modes.current->mode == YP_LEX_STRING); + bool lex_interpolation = parser->lex_modes.current->as.string.interpolation; + + yp_token_t opening = parser->current; + parser_lex(parser); + + if (accept1(parser, YP_TOKEN_STRING_END)) { + // If we get here, then we have an end immediately after a + // start. In that case we'll create an empty content token and + // return an uninterpolated string. + yp_token_t content = parse_strings_empty_content(parser->previous.start); + node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE); + } else if (accept1(parser, YP_TOKEN_LABEL_END)) { + // If we get here, then we have an end of a label immediately + // after a start. In that case we'll create an empty symbol + // node. + yp_token_t opening = not_provided(parser); + yp_token_t content = parse_strings_empty_content(parser->previous.start); + node = (yp_node_t *) yp_symbol_node_create(parser, &opening, &content, &parser->previous); + } else if (!lex_interpolation) { + // If we don't accept interpolation then we expect the string to + // start with a single string content node. + expect1(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_EXPECT_STRING_CONTENT); + yp_token_t content = parser->previous; + + // It is unfortunately possible to have multiple string content + // nodes in a row in the case that there's heredoc content in + // the middle of the string, like this cursed example: + // + // <<-END+'b + // a + // END + // c'+'d' + // + // In that case we need to switch to an interpolated string to + // be able to contain all of the parts. + if (match1(parser, YP_TOKEN_STRING_CONTENT)) { + yp_node_list_t parts = YP_EMPTY_NODE_LIST; + + yp_token_t delimiters = not_provided(parser); + yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, YP_UNESCAPE_MINIMAL); + yp_node_list_append(&parts, part); + + while (accept1(parser, YP_TOKEN_STRING_CONTENT)) { + part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, YP_UNESCAPE_MINIMAL); + yp_node_list_append(&parts, part); + } + + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM); + node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); + } else if (accept1(parser, YP_TOKEN_LABEL_END)) { + node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); + } else { + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_LITERAL_TERM); + node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_MINIMAL); + } + } else if (match1(parser, YP_TOKEN_STRING_CONTENT)) { + // In this case we've hit string content so we know the string + // at least has something in it. We'll need to check if the + // following token is the end (in which case we can return a + // plain string) or if it's not then it has interpolation. + yp_token_t content = parser->current; + parser_lex(parser); + + if (accept1(parser, YP_TOKEN_STRING_END)) { + node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); + } else if (accept1(parser, YP_TOKEN_LABEL_END)) { + node = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); + } else { + // If we get here, then we have interpolation so we'll need + // to create a string or symbol node with interpolation. + yp_node_list_t parts = YP_EMPTY_NODE_LIST; + yp_token_t string_opening = not_provided(parser); + yp_token_t string_closing = not_provided(parser); + + yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, YP_UNESCAPE_ALL); + yp_node_list_append(&parts, part); + + while (!match3(parser, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) { + if ((part = parse_string_part(parser)) != NULL) { + yp_node_list_append(&parts, part); + } + } + + if (accept1(parser, YP_TOKEN_LABEL_END)) { + node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); + } else { + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_INTERPOLATED_TERM); + node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); + } + } + } else { + // If we get here, then the first part of the string is not + // plain string content, in which case we need to parse the + // string as an interpolated string. + yp_node_list_t parts = YP_EMPTY_NODE_LIST; + yp_node_t *part = NULL; + + while (!match3(parser, YP_TOKEN_STRING_END, YP_TOKEN_LABEL_END, YP_TOKEN_EOF)) { + if ((part = parse_string_part(parser)) != NULL) { + yp_node_list_append(&parts, part); + } + } + + if (accept1(parser, YP_TOKEN_LABEL_END)) { + node = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous); + } else { + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_STRING_INTERPOLATED_TERM); + node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, &parts, &parser->previous); + } + } + + if (result == NULL) { + // If the node we just parsed is a symbol node, then we can't + // concatenate it with anything else, so we can now return that + // node. + if (YP_NODE_TYPE_P(node, YP_SYMBOL_NODE) || YP_NODE_TYPE_P(node, YP_INTERPOLATED_SYMBOL_NODE)) { + return node; + } + + // If we don't already have a node, then it's fine and we can just + // set the result to be the node we just parsed. + result = node; + } else { + // Otherwise we need to check the type of the node we just parsed. + // If it cannot be concatenated with the previous node, then we'll + // need to add a syntax error. + if (!YP_NODE_TYPE_P(node, YP_STRING_NODE) && !YP_NODE_TYPE_P(node, YP_INTERPOLATED_STRING_NODE)) { + yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, YP_ERR_STRING_CONCATENATION); + } + + // Either way we will create a concat node to hold the strings + // together. + result = (yp_node_t *) yp_string_concat_node_create(parser, result, node); + } + } + + return result; +} + +// Parse an expression that begins with the previous node that we just lexed. +static inline yp_node_t * +parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) { + switch (parser->current.type) { + case YP_TOKEN_BRACKET_LEFT_ARRAY: { + parser_lex(parser); + + yp_array_node_t *array = yp_array_node_create(parser, &parser->previous); + yp_accepts_block_stack_push(parser, true); + bool parsed_bare_hash = false; + + while (!match2(parser, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_EOF)) { + // Handle the case where we don't have a comma and we have a newline followed by a right bracket. + if (accept1(parser, YP_TOKEN_NEWLINE) && match1(parser, YP_TOKEN_BRACKET_RIGHT)) { + break; + } + + if (yp_array_node_size(array) != 0) { + expect1(parser, YP_TOKEN_COMMA, YP_ERR_ARRAY_SEPARATOR); + } + + // If we have a right bracket immediately following a comma, this is + // allowed since it's a trailing comma. In this case we can break out of + // the loop. + if (match1(parser, YP_TOKEN_BRACKET_RIGHT)) break; + + yp_node_t *element; + + if (accept1(parser, YP_TOKEN_USTAR)) { + yp_token_t operator = parser->previous; + yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_ARRAY_EXPRESSION_AFTER_STAR); + element = (yp_node_t *) yp_splat_node_create(parser, &operator, expression); + } else if (match2(parser, YP_TOKEN_LABEL, YP_TOKEN_USTAR_STAR)) { + if (parsed_bare_hash) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_EXPRESSION_BARE_HASH); + } + + yp_keyword_hash_node_t *hash = yp_keyword_hash_node_create(parser); + element = (yp_node_t *)hash; + + if (!match8(parser, YP_TOKEN_EOF, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_TOKEN_EOF, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_BRACKET_RIGHT, YP_TOKEN_KEYWORD_DO, YP_TOKEN_PARENTHESIS_RIGHT)) { + parse_assocs(parser, (yp_node_t *) hash); + } + + parsed_bare_hash = true; + } else { + element = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_ARRAY_EXPRESSION); + + if (yp_symbol_node_label_p(element) || accept1(parser, YP_TOKEN_EQUAL_GREATER)) { + if (parsed_bare_hash) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_EXPRESSION_BARE_HASH); + } + + yp_keyword_hash_node_t *hash = yp_keyword_hash_node_create(parser); + + yp_token_t operator; + if (parser->previous.type == YP_TOKEN_EQUAL_GREATER) { + operator = parser->previous; + } else { + operator = not_provided(parser); + } + + yp_node_t *value = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_HASH_VALUE); + yp_node_t *assoc = (yp_node_t *) yp_assoc_node_create(parser, element, &operator, value); + yp_keyword_hash_node_elements_append(hash, assoc); + + element = (yp_node_t *)hash; + if (accept1(parser, YP_TOKEN_COMMA) && !match1(parser, YP_TOKEN_BRACKET_RIGHT)) { + parse_assocs(parser, (yp_node_t *) hash); + } + + parsed_bare_hash = true; + } + } + + yp_array_node_elements_append(array, element); + if (YP_NODE_TYPE_P(element, YP_MISSING_NODE)) break; + } + + accept1(parser, YP_TOKEN_NEWLINE); + expect1(parser, YP_TOKEN_BRACKET_RIGHT, YP_ERR_ARRAY_TERM); + yp_array_node_close_set(array, &parser->previous); + yp_accepts_block_stack_pop(parser); + + return (yp_node_t *) array; + } + case YP_TOKEN_PARENTHESIS_LEFT: + case YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES: { + yp_token_t opening = parser->current; + parser_lex(parser); + while (accept2(parser, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE)); + + // If this is the end of the file or we match a right parenthesis, then + // we have an empty parentheses node, and we can immediately return. + if (match2(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_TOKEN_EOF)) { + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_EXPECT_RPAREN); + return (yp_node_t *) yp_parentheses_node_create(parser, &opening, NULL, &parser->previous); + } + + // Otherwise, we're going to parse the first statement in the list + // of statements within the parentheses. + yp_accepts_block_stack_push(parser, true); + yp_node_t *statement = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION); + + // Determine if this statement is followed by a terminator. In the + // case of a single statement, this is fine. But in the case of + // multiple statements it's required. + bool terminator_found = accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + if (terminator_found) { + while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); + } + + // If we hit a right parenthesis, then we're done parsing the + // parentheses node, and we can check which kind of node we should + // return. + if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + if (opening.type == YP_TOKEN_PARENTHESIS_LEFT_PARENTHESES) { + lex_state_set(parser, YP_LEX_STATE_ENDARG); + } + parser_lex(parser); + yp_accepts_block_stack_pop(parser); + + // If we have a single statement and are ending on a right + // parenthesis, then we need to check if this is possibly a + // multiple target node. + if (YP_NODE_TYPE_P(statement, YP_MULTI_TARGET_NODE)) { + yp_multi_target_node_t *multi_target; + if (((yp_multi_target_node_t *) statement)->lparen_loc.start == NULL) { + multi_target = (yp_multi_target_node_t *) statement; + } else { + multi_target = yp_multi_target_node_create(parser); + yp_multi_target_node_targets_append(multi_target, statement); + } + + yp_location_t lparen_loc = YP_LOCATION_TOKEN_VALUE(&opening); + yp_location_t rparen_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + + multi_target->lparen_loc = lparen_loc; + multi_target->rparen_loc = rparen_loc; + multi_target->base.location.start = lparen_loc.start; + multi_target->base.location.end = rparen_loc.end; + + if (match1(parser, YP_TOKEN_COMMA)) { + return parse_targets_validate(parser, (yp_node_t *) multi_target, YP_BINDING_POWER_INDEX); + } else { + return parse_target_validate(parser, (yp_node_t *) multi_target); + } + } + + // If we have a single statement and are ending on a right parenthesis + // and we didn't return a multiple assignment node, then we can return a + // regular parentheses node now. + yp_statements_node_t *statements = yp_statements_node_create(parser); + yp_statements_node_body_append(statements, statement); + + return (yp_node_t *) yp_parentheses_node_create(parser, &opening, (yp_node_t *) statements, &parser->previous); + } + + // If we have more than one statement in the set of parentheses, + // then we are going to parse all of them as a list of statements. + // We'll do that here. + context_push(parser, YP_CONTEXT_PARENS); + yp_statements_node_t *statements = yp_statements_node_create(parser); + yp_statements_node_body_append(statements, statement); + + // If we didn't find a terminator and we didn't find a right + // parenthesis, then this is a syntax error. + if (!terminator_found) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, YP_ERR_EXPECT_EOL_AFTER_STATEMENT); + } + + // Parse each statement within the parentheses. + while (true) { + yp_node_t *node = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_CANNOT_PARSE_EXPRESSION); + yp_statements_node_body_append(statements, node); + + // If we're recovering from a syntax error, then we need to stop + // parsing the statements now. + if (parser->recovering) { + // If this is the level of context where the recovery has + // happened, then we can mark the parser as done recovering. + if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false; + break; + } + + // If we couldn't parse an expression at all, then we need to + // bail out of the loop. + if (YP_NODE_TYPE_P(node, YP_MISSING_NODE)) break; + + // If we successfully parsed a statement, then we are going to + // need terminator to delimit them. + if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); + if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) break; + } else if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + break; + } else { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, YP_ERR_EXPECT_EOL_AFTER_STATEMENT); + } + } + + context_pop(parser); + yp_accepts_block_stack_pop(parser); + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_EXPECT_RPAREN); + + return (yp_node_t *) yp_parentheses_node_create(parser, &opening, (yp_node_t *) statements, &parser->previous); + } + case YP_TOKEN_BRACE_LEFT: { + yp_accepts_block_stack_push(parser, true); + parser_lex(parser); + yp_hash_node_t *node = yp_hash_node_create(parser, &parser->previous); + + if (!match2(parser, YP_TOKEN_BRACE_RIGHT, YP_TOKEN_EOF)) { + parse_assocs(parser, (yp_node_t *) node); + accept1(parser, YP_TOKEN_NEWLINE); + } + + yp_accepts_block_stack_pop(parser); + expect1(parser, YP_TOKEN_BRACE_RIGHT, YP_ERR_HASH_TERM); + yp_hash_node_closing_loc_set(node, &parser->previous); + + return (yp_node_t *) node; + } + case YP_TOKEN_CHARACTER_LITERAL: { + parser_lex(parser); + + yp_token_t opening = parser->previous; + opening.type = YP_TOKEN_STRING_BEGIN; + opening.end = opening.start + 1; + + yp_token_t content = parser->previous; + content.type = YP_TOKEN_STRING_CONTENT; + content.start = content.start + 1; + + yp_token_t closing = not_provided(parser); + yp_node_t *node = (yp_node_t *) yp_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, YP_UNESCAPE_ALL); + + // Characters can be followed by strings in which case they are + // automatically concatenated. + if (match1(parser, YP_TOKEN_STRING_BEGIN)) { + yp_node_t *concat = parse_strings(parser); + return (yp_node_t *) yp_string_concat_node_create(parser, node, concat); + } + + return node; + } + case YP_TOKEN_CLASS_VARIABLE: { + parser_lex(parser); + yp_node_t *node = (yp_node_t *) yp_class_variable_read_node_create(parser, &parser->previous); + + if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); + } + + return node; + } + case YP_TOKEN_CONSTANT: { + parser_lex(parser); + yp_token_t constant = parser->previous; + + // If a constant is immediately followed by parentheses, then this is in + // fact a method call, not a constant read. + if ( + match1(parser, YP_TOKEN_PARENTHESIS_LEFT) || + (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match3(parser, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) || + (yp_accepts_block_stack_p(parser) && match2(parser, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT)) + ) { + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + parse_arguments_list(parser, &arguments, true); + return (yp_node_t *) yp_call_node_fcall_create(parser, &constant, &arguments); + } + + yp_node_t *node = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous); + + if ((binding_power == YP_BINDING_POWER_STATEMENT) && match1(parser, YP_TOKEN_COMMA)) { + // If we get here, then we have a comma immediately following a + // constant, so we're going to parse this as a multiple assignment. + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); + } + + return node; + } + case YP_TOKEN_UCOLON_COLON: { + parser_lex(parser); + + yp_token_t delimiter = parser->previous; + expect1(parser, YP_TOKEN_CONSTANT, YP_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); + + yp_node_t *constant = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous); + yp_node_t *node = (yp_node_t *)yp_constant_path_node_create(parser, NULL, &delimiter, constant); + + if ((binding_power == YP_BINDING_POWER_STATEMENT) && match1(parser, YP_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); + } + + return node; + } + case YP_TOKEN_UDOT_DOT: + case YP_TOKEN_UDOT_DOT_DOT: { + yp_token_t operator = parser->current; + parser_lex(parser); + + yp_node_t *right = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + return (yp_node_t *) yp_range_node_create(parser, NULL, &operator, right); + } + case YP_TOKEN_FLOAT: + parser_lex(parser); + return (yp_node_t *) yp_float_node_create(parser, &parser->previous); + case YP_TOKEN_FLOAT_IMAGINARY: + parser_lex(parser); + return (yp_node_t *) yp_float_node_imaginary_create(parser, &parser->previous); + case YP_TOKEN_FLOAT_RATIONAL: + parser_lex(parser); + return (yp_node_t *) yp_float_node_rational_create(parser, &parser->previous); + case YP_TOKEN_FLOAT_RATIONAL_IMAGINARY: + parser_lex(parser); + return (yp_node_t *) yp_float_node_rational_imaginary_create(parser, &parser->previous); + case YP_TOKEN_NUMBERED_REFERENCE: { + parser_lex(parser); + yp_node_t *node = (yp_node_t *) yp_numbered_reference_read_node_create(parser, &parser->previous); + + if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); + } + + return node; + } + case YP_TOKEN_GLOBAL_VARIABLE: { + parser_lex(parser); + yp_node_t *node = (yp_node_t *) yp_global_variable_read_node_create(parser, &parser->previous); + + if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); + } + + return node; + } + case YP_TOKEN_BACK_REFERENCE: { + parser_lex(parser); + yp_node_t *node = (yp_node_t *) yp_back_reference_read_node_create(parser, &parser->previous); + + if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); + } + + return node; + } + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_METHOD_NAME: { + parser_lex(parser); + yp_token_t identifier = parser->previous; + yp_node_t *node = parse_variable_call(parser); + + if (YP_NODE_TYPE_P(node, YP_CALL_NODE)) { + // If parse_variable_call returned with a call node, then we + // know the identifier is not in the local table. In that case + // we need to check if there are arguments following the + // identifier. + yp_call_node_t *call = (yp_call_node_t *) node; + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + + if (parse_arguments_list(parser, &arguments, true)) { + // Since we found arguments, we need to turn off the + // variable call bit in the flags. + call->base.flags &= (yp_node_flags_t) ~YP_CALL_NODE_FLAGS_VARIABLE_CALL; + + call->opening_loc = arguments.opening_loc; + call->arguments = arguments.arguments; + call->closing_loc = arguments.closing_loc; + call->block = arguments.block; + + if (arguments.block != NULL) { + call->base.location.end = arguments.block->location.end; + } else if (arguments.closing_loc.start == NULL) { + if (arguments.arguments != NULL) { + call->base.location.end = arguments.arguments->base.location.end; + } else { + call->base.location.end = call->message_loc.end; + } + } else { + call->base.location.end = arguments.closing_loc.end; + } + } + } else { + // Otherwise, we know the identifier is in the local table. This + // can still be a method call if it is followed by arguments or + // a block, so we need to check for that here. + if ( + (binding_power <= YP_BINDING_POWER_ASSIGNMENT && (token_begins_expression_p(parser->current.type) || match3(parser, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR))) || + (yp_accepts_block_stack_p(parser) && match2(parser, YP_TOKEN_KEYWORD_DO, YP_TOKEN_BRACE_LEFT)) + ) { + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + parse_arguments_list(parser, &arguments, true); + + yp_call_node_t *fcall = yp_call_node_fcall_create(parser, &identifier, &arguments); + yp_node_destroy(parser, node); + return (yp_node_t *) fcall; + } + } + + if ((binding_power == YP_BINDING_POWER_STATEMENT) && match1(parser, YP_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); + } + + return node; + } + case YP_TOKEN_HEREDOC_START: { + // Here we have found a heredoc. We'll parse it and add it to the + // list of strings. + assert(parser->lex_modes.current->mode == YP_LEX_HEREDOC); + yp_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote; + yp_heredoc_indent_t indent = parser->lex_modes.current->as.heredoc.indent; + + parser_lex(parser); + yp_token_t opening = parser->previous; + + yp_node_t *node; + yp_node_t *part; + + if (match2(parser, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) { + // If we get here, then we have an empty heredoc. We'll create + // an empty content token and return an empty string node. + lex_state_set(parser, YP_LEX_STATE_END); + expect1(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM); + yp_token_t content = parse_strings_empty_content(parser->previous.start); + + if (quote == YP_HEREDOC_QUOTE_BACKTICK) { + node = (yp_node_t *) yp_xstring_node_create_and_unescape(parser, &opening, &content, &parser->previous); + } else { + node = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_NONE); + } + + node->location.end = opening.end; + } else if ((part = parse_string_part(parser)) == NULL) { + // If we get here, then we tried to find something in the + // heredoc but couldn't actually parse anything, so we'll just + // return a missing node. + node = (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end); + } else if (YP_NODE_TYPE_P(part, YP_STRING_NODE) && match2(parser, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) { + // If we get here, then the part that we parsed was plain string + // content and we're at the end of the heredoc, so we can return + // just a string node with the heredoc opening and closing as + // its opening and closing. + yp_string_node_t *cast = (yp_string_node_t *) part; + + cast->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening); + cast->closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->current); + cast->base.location = cast->opening_loc; + + if (quote == YP_HEREDOC_QUOTE_BACKTICK) { + assert(sizeof(yp_string_node_t) == sizeof(yp_x_string_node_t)); + cast->base.type = YP_X_STRING_NODE; + } + + lex_state_set(parser, YP_LEX_STATE_END); + expect1(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM); + + node = (yp_node_t *) cast; + + if (indent == YP_HEREDOC_INDENT_TILDE) { + int common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, -1); + parse_heredoc_dedent_single_node(parser, &cast->unescaped, true, common_whitespace, quote); + } + } else { + // If we get here, then we have multiple parts in the heredoc, + // so we'll need to create an interpolated string node to hold + // them all. + yp_node_list_t parts = YP_EMPTY_NODE_LIST; + yp_node_list_append(&parts, part); + + while (!match2(parser, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) { + if ((part = parse_string_part(parser)) != NULL) { + yp_node_list_append(&parts, part); + } + } + + // Now that we have all of the parts, create the correct type of + // interpolated node. + if (quote == YP_HEREDOC_QUOTE_BACKTICK) { + yp_interpolated_x_string_node_t *cast = yp_interpolated_xstring_node_create(parser, &opening, &opening); + cast->parts = parts; + + lex_state_set(parser, YP_LEX_STATE_END); + expect1(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM); + + yp_interpolated_xstring_node_closing_set(cast, &parser->previous); + cast->base.location = cast->opening_loc; + node = (yp_node_t *) cast; + } else { + yp_interpolated_string_node_t *cast = yp_interpolated_string_node_create(parser, &opening, &parts, &opening); + + lex_state_set(parser, YP_LEX_STATE_END); + expect1(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM); + + yp_interpolated_string_node_closing_set(cast, &parser->previous); + cast->base.location = cast->opening_loc; + node = (yp_node_t *) cast; + } + + // If this is a heredoc that is indented with a ~, then we need + // to dedent each line by the common leading whitespace. + if (indent == YP_HEREDOC_INDENT_TILDE) { + parse_heredoc_dedent(parser, node, quote); + } + } + + if (match1(parser, YP_TOKEN_STRING_BEGIN)) { + yp_node_t *concat = parse_strings(parser); + return (yp_node_t *) yp_string_concat_node_create(parser, node, concat); + } + + return node; + } + case YP_TOKEN_INSTANCE_VARIABLE: { + parser_lex(parser); + yp_node_t *node = (yp_node_t *) yp_instance_variable_read_node_create(parser, &parser->previous); + + if (binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { + node = parse_targets_validate(parser, node, YP_BINDING_POWER_INDEX); + } + + return node; + } + case YP_TOKEN_INTEGER: { + yp_node_flags_t base = parser->integer_base; + parser_lex(parser); + return (yp_node_t *) yp_integer_node_create(parser, base, &parser->previous); + } + case YP_TOKEN_INTEGER_IMAGINARY: { + yp_node_flags_t base = parser->integer_base; + parser_lex(parser); + return (yp_node_t *) yp_integer_node_imaginary_create(parser, base, &parser->previous); + } + case YP_TOKEN_INTEGER_RATIONAL: { + yp_node_flags_t base = parser->integer_base; + parser_lex(parser); + return (yp_node_t *) yp_integer_node_rational_create(parser, base, &parser->previous); + } + case YP_TOKEN_INTEGER_RATIONAL_IMAGINARY: { + yp_node_flags_t base = parser->integer_base; + parser_lex(parser); + return (yp_node_t *) yp_integer_node_rational_imaginary_create(parser, base, &parser->previous); + } + case YP_TOKEN_KEYWORD___ENCODING__: + parser_lex(parser); + return (yp_node_t *) yp_source_encoding_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD___FILE__: + parser_lex(parser); + return (yp_node_t *) yp_source_file_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD___LINE__: + parser_lex(parser); + return (yp_node_t *) yp_source_line_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD_ALIAS: { + parser_lex(parser); + yp_token_t keyword = parser->previous; + + yp_node_t *new_name = parse_alias_argument(parser, true); + yp_node_t *old_name = parse_alias_argument(parser, false); + + switch (YP_NODE_TYPE(new_name)) { + case YP_BACK_REFERENCE_READ_NODE: + case YP_NUMBERED_REFERENCE_READ_NODE: + case YP_GLOBAL_VARIABLE_READ_NODE: { + if (YP_NODE_TYPE_P(old_name, YP_BACK_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(old_name, YP_NUMBERED_REFERENCE_READ_NODE) || YP_NODE_TYPE_P(old_name, YP_GLOBAL_VARIABLE_READ_NODE)) { + if (YP_NODE_TYPE_P(old_name, YP_NUMBERED_REFERENCE_READ_NODE)) { + yp_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, YP_ERR_ALIAS_ARGUMENT); + } + } else { + yp_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, YP_ERR_ALIAS_ARGUMENT); + } + + return (yp_node_t *) yp_alias_global_variable_node_create(parser, &keyword, new_name, old_name); + } + case YP_SYMBOL_NODE: + case YP_INTERPOLATED_SYMBOL_NODE: { + if (!YP_NODE_TYPE_P(old_name, YP_SYMBOL_NODE) && !YP_NODE_TYPE_P(old_name, YP_INTERPOLATED_SYMBOL_NODE)) { + yp_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, YP_ERR_ALIAS_ARGUMENT); + } + } + /* fallthrough */ + default: + return (yp_node_t *) yp_alias_method_node_create(parser, &keyword, new_name, old_name); + } + } + case YP_TOKEN_KEYWORD_CASE: { + parser_lex(parser); + yp_token_t case_keyword = parser->previous; + yp_node_t *predicate = NULL; + + if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); + predicate = NULL; + } else if (match3(parser, YP_TOKEN_KEYWORD_WHEN, YP_TOKEN_KEYWORD_IN, YP_TOKEN_KEYWORD_END)) { + predicate = NULL; + } else if (!token_begins_expression_p(parser->current.type)) { + predicate = NULL; + } else { + predicate = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, YP_ERR_CASE_EXPRESSION_AFTER_CASE); + while (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)); + } + + if (accept1(parser, YP_TOKEN_KEYWORD_END)) { + yp_diagnostic_list_append(&parser->error_list, case_keyword.start, case_keyword.end, YP_ERR_CASE_MISSING_CONDITIONS); + return (yp_node_t *) yp_case_node_create(parser, &case_keyword, predicate, NULL, &parser->previous); + } + + // At this point we can create a case node, though we don't yet know if it + // is a case-in or case-when node. + yp_token_t end_keyword = not_provided(parser); + yp_case_node_t *case_node = yp_case_node_create(parser, &case_keyword, predicate, NULL, &end_keyword); + + if (match1(parser, YP_TOKEN_KEYWORD_WHEN)) { + // At this point we've seen a when keyword, so we know this is a + // case-when node. We will continue to parse the when nodes until we hit + // the end of the list. + while (accept1(parser, YP_TOKEN_KEYWORD_WHEN)) { + yp_token_t when_keyword = parser->previous; + yp_when_node_t *when_node = yp_when_node_create(parser, &when_keyword); + + do { + if (accept1(parser, YP_TOKEN_USTAR)) { + yp_token_t operator = parser->previous; + yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_EXPECT_EXPRESSION_AFTER_STAR); + + yp_splat_node_t *splat_node = yp_splat_node_create(parser, &operator, expression); + yp_when_node_conditions_append(when_node, (yp_node_t *) splat_node); + + if (YP_NODE_TYPE_P(expression, YP_MISSING_NODE)) break; + } else { + yp_node_t *condition = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_CASE_EXPRESSION_AFTER_WHEN); + yp_when_node_conditions_append(when_node, condition); + + if (YP_NODE_TYPE_P(condition, YP_MISSING_NODE)) break; + } + } while (accept1(parser, YP_TOKEN_COMMA)); + + if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + accept1(parser, YP_TOKEN_KEYWORD_THEN); + } else { + expect1(parser, YP_TOKEN_KEYWORD_THEN, YP_ERR_EXPECT_WHEN_DELIMITER); + } + + if (!match3(parser, YP_TOKEN_KEYWORD_WHEN, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_END)) { + yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_CASE_WHEN); + if (statements != NULL) { + yp_when_node_statements_set(when_node, statements); + } + } + + yp_case_node_condition_append(case_node, (yp_node_t *) when_node); + } + } else { + // At this point we expect that we're parsing a case-in node. We will + // continue to parse the in nodes until we hit the end of the list. + while (match1(parser, YP_TOKEN_KEYWORD_IN)) { + bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; + parser->pattern_matching_newlines = true; + + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + parser->command_start = false; + parser_lex(parser); + + yp_token_t in_keyword = parser->previous; + yp_node_t *pattern = parse_pattern(parser, true, YP_ERR_PATTERN_EXPRESSION_AFTER_IN); + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + + // Since we're in the top-level of the case-in node we need to check + // for guard clauses in the form of `if` or `unless` statements. + if (accept1(parser, YP_TOKEN_KEYWORD_IF_MODIFIER)) { + yp_token_t keyword = parser->previous; + yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_CONDITIONAL_IF_PREDICATE); + pattern = (yp_node_t *) yp_if_node_modifier_create(parser, pattern, &keyword, predicate); + } else if (accept1(parser, YP_TOKEN_KEYWORD_UNLESS_MODIFIER)) { + yp_token_t keyword = parser->previous; + yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_CONDITIONAL_UNLESS_PREDICATE); + pattern = (yp_node_t *) yp_unless_node_modifier_create(parser, pattern, &keyword, predicate); + } + + // Now we need to check for the terminator of the in node's pattern. + // It can be a newline or semicolon optionally followed by a `then` + // keyword. + yp_token_t then_keyword; + if (accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON)) { + if (accept1(parser, YP_TOKEN_KEYWORD_THEN)) { + then_keyword = parser->previous; + } else { + then_keyword = not_provided(parser); + } + } else { + expect1(parser, YP_TOKEN_KEYWORD_THEN, YP_ERR_EXPECT_WHEN_DELIMITER); + then_keyword = parser->previous; + } + + // Now we can actually parse the statements associated with the in + // node. + yp_statements_node_t *statements; + if (match3(parser, YP_TOKEN_KEYWORD_IN, YP_TOKEN_KEYWORD_ELSE, YP_TOKEN_KEYWORD_END)) { + statements = NULL; + } else { + statements = parse_statements(parser, YP_CONTEXT_CASE_IN); + } + + // Now that we have the full pattern and statements, we can create the + // node and attach it to the case node. + yp_node_t *condition = (yp_node_t *) yp_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword); + yp_case_node_condition_append(case_node, condition); + } + } + + // If we didn't parse any conditions (in or when) then we need to + // indicate that we have an error. + if (case_node->conditions.size == 0) { + yp_diagnostic_list_append(&parser->error_list, case_keyword.start, case_keyword.end, YP_ERR_CASE_MISSING_CONDITIONS); + } + + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + if (accept1(parser, YP_TOKEN_KEYWORD_ELSE)) { + yp_token_t else_keyword = parser->previous; + yp_else_node_t *else_node; + + if (!match1(parser, YP_TOKEN_KEYWORD_END)) { + else_node = yp_else_node_create(parser, &else_keyword, parse_statements(parser, YP_CONTEXT_ELSE), &parser->current); + } else { + else_node = yp_else_node_create(parser, &else_keyword, NULL, &parser->current); + } + + yp_case_node_consequent_set(case_node, else_node); + } + + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_CASE_TERM); + yp_case_node_end_keyword_loc_set(case_node, &parser->previous); + return (yp_node_t *) case_node; + } + case YP_TOKEN_KEYWORD_BEGIN: { + parser_lex(parser); + + yp_token_t begin_keyword = parser->previous; + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + yp_statements_node_t *begin_statements = NULL; + + if (!match3(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + begin_statements = parse_statements(parser, YP_CONTEXT_BEGIN); + yp_accepts_block_stack_pop(parser); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + } + + yp_begin_node_t *begin_node = yp_begin_node_create(parser, &begin_keyword, begin_statements); + parse_rescues(parser, begin_node); + + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_BEGIN_TERM); + begin_node->base.location.end = parser->previous.end; + yp_begin_node_end_keyword_set(begin_node, &parser->previous); + + if ((begin_node->else_clause != NULL) && (begin_node->rescue_clause == NULL)) { + yp_diagnostic_list_append( + &parser->error_list, + begin_node->else_clause->base.location.start, + begin_node->else_clause->base.location.end, + YP_ERR_BEGIN_LONELY_ELSE + ); + } + + return (yp_node_t *) begin_node; + } + case YP_TOKEN_KEYWORD_BEGIN_UPCASE: { + parser_lex(parser); + yp_token_t keyword = parser->previous; + + expect1(parser, YP_TOKEN_BRACE_LEFT, YP_ERR_BEGIN_UPCASE_BRACE); + yp_token_t opening = parser->previous; + yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_PREEXE); + + expect1(parser, YP_TOKEN_BRACE_RIGHT, YP_ERR_BEGIN_UPCASE_TERM); + yp_context_t context = parser->current_context->context; + if ((context != YP_CONTEXT_MAIN) && (context != YP_CONTEXT_PREEXE)) { + yp_diagnostic_list_append(&parser->error_list, keyword.start, keyword.end, YP_ERR_BEGIN_UPCASE_TOPLEVEL); + } + return (yp_node_t *) yp_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous); + } + case YP_TOKEN_KEYWORD_BREAK: + case YP_TOKEN_KEYWORD_NEXT: + case YP_TOKEN_KEYWORD_RETURN: { + parser_lex(parser); + + yp_token_t keyword = parser->previous; + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + + if ( + token_begins_expression_p(parser->current.type) || + match2(parser, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR) + ) { + yp_binding_power_t binding_power = yp_binding_powers[parser->current.type].left; + + if (binding_power == YP_BINDING_POWER_UNSET || binding_power >= YP_BINDING_POWER_RANGE) { + parse_arguments(parser, &arguments, false, YP_TOKEN_EOF); + } + } + + switch (keyword.type) { + case YP_TOKEN_KEYWORD_BREAK: + return (yp_node_t *) yp_break_node_create(parser, &keyword, arguments.arguments); + case YP_TOKEN_KEYWORD_NEXT: + return (yp_node_t *) yp_next_node_create(parser, &keyword, arguments.arguments); + case YP_TOKEN_KEYWORD_RETURN: { + if ( + (parser->current_context->context == YP_CONTEXT_CLASS) || + (parser->current_context->context == YP_CONTEXT_MODULE) + ) { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_RETURN_INVALID); + } + return (yp_node_t *) yp_return_node_create(parser, &keyword, arguments.arguments); + } + default: + assert(false && "unreachable"); + return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end); + } + } + case YP_TOKEN_KEYWORD_SUPER: { + parser_lex(parser); + + yp_token_t keyword = parser->previous; + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + parse_arguments_list(parser, &arguments, true); + + if (arguments.opening_loc.start == NULL && arguments.arguments == NULL) { + return (yp_node_t *) yp_forwarding_super_node_create(parser, &keyword, &arguments); + } + + return (yp_node_t *) yp_super_node_create(parser, &keyword, &arguments); + } + case YP_TOKEN_KEYWORD_YIELD: { + parser_lex(parser); + + yp_token_t keyword = parser->previous; + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + parse_arguments_list(parser, &arguments, false); + + return (yp_node_t *) yp_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc); + } + case YP_TOKEN_KEYWORD_CLASS: { + parser_lex(parser); + yp_token_t class_keyword = parser->previous; + yp_do_loop_stack_push(parser, false); + + if (accept1(parser, YP_TOKEN_LESS_LESS)) { + yp_token_t operator = parser->previous; + yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_NOT, YP_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS); + + yp_parser_scope_push(parser, true); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + + yp_node_t *statements = NULL; + if (!match3(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_SCLASS); + yp_accepts_block_stack_pop(parser); + } + + if (match2(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || YP_NODE_TYPE_P(statements, YP_STATEMENTS_NODE)); + statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements); + } + + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_CLASS_TERM); + + yp_constant_id_list_t locals = parser->current_scope->locals; + yp_parser_scope_pop(parser); + yp_do_loop_stack_pop(parser); + return (yp_node_t *) yp_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous); + } + + yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, YP_ERR_CLASS_NAME); + yp_token_t name = parser->previous; + if (name.type != YP_TOKEN_CONSTANT) { + yp_diagnostic_list_append(&parser->error_list, name.start, name.end, YP_ERR_CLASS_NAME); + } + + yp_token_t inheritance_operator; + yp_node_t *superclass; + + if (match1(parser, YP_TOKEN_LESS)) { + inheritance_operator = parser->current; + lex_state_set(parser, YP_LEX_STATE_BEG); + + parser->command_start = true; + parser_lex(parser); + + superclass = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, YP_ERR_CLASS_SUPERCLASS); + } else { + inheritance_operator = not_provided(parser); + superclass = NULL; + } + + yp_parser_scope_push(parser, true); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + yp_node_t *statements = NULL; + + if (!match3(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_CLASS); + yp_accepts_block_stack_pop(parser); + } + + if (match2(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || YP_NODE_TYPE_P(statements, YP_STATEMENTS_NODE)); + statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements); + } + + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_CLASS_TERM); + + if (context_def_p(parser)) { + yp_diagnostic_list_append(&parser->error_list, class_keyword.start, class_keyword.end, YP_ERR_CLASS_IN_METHOD); + } + + yp_constant_id_list_t locals = parser->current_scope->locals; + yp_parser_scope_pop(parser); + yp_do_loop_stack_pop(parser); + + if (!YP_NODE_TYPE_P(constant_path, YP_CONSTANT_PATH_NODE) && !(YP_NODE_TYPE_P(constant_path, YP_CONSTANT_READ_NODE))) { + yp_diagnostic_list_append(&parser->error_list, constant_path->location.start, constant_path->location.end, YP_ERR_CLASS_NAME); + } + + return (yp_node_t *) yp_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous); + } + case YP_TOKEN_KEYWORD_DEF: { + yp_token_t def_keyword = parser->current; + + yp_node_t *receiver = NULL; + yp_token_t operator = not_provided(parser); + yp_token_t name = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = def_keyword.end, .end = def_keyword.end }; + + context_push(parser, YP_CONTEXT_DEF_PARAMS); + parser_lex(parser); + + switch (parser->current.type) { + case YP_CASE_OPERATOR: + yp_parser_scope_push(parser, true); + lex_state_set(parser, YP_LEX_STATE_ENDFN); + parser_lex(parser); + name = parser->previous; + break; + case YP_TOKEN_IDENTIFIER: { + parser_lex(parser); + + if (match2(parser, YP_TOKEN_DOT, YP_TOKEN_COLON_COLON)) { + receiver = parse_variable_call(parser); + + yp_parser_scope_push(parser, true); + lex_state_set(parser, YP_LEX_STATE_FNAME); + parser_lex(parser); + + operator = parser->previous; + name = parse_method_definition_name(parser); + } else { + yp_parser_scope_push(parser, true); + name = parser->previous; + } + + break; + } + case YP_TOKEN_CONSTANT: + case YP_TOKEN_INSTANCE_VARIABLE: + case YP_TOKEN_CLASS_VARIABLE: + case YP_TOKEN_GLOBAL_VARIABLE: + case YP_TOKEN_KEYWORD_NIL: + case YP_TOKEN_KEYWORD_SELF: + case YP_TOKEN_KEYWORD_TRUE: + case YP_TOKEN_KEYWORD_FALSE: + case YP_TOKEN_KEYWORD___FILE__: + case YP_TOKEN_KEYWORD___LINE__: + case YP_TOKEN_KEYWORD___ENCODING__: { + yp_parser_scope_push(parser, true); + parser_lex(parser); + yp_token_t identifier = parser->previous; + + if (match2(parser, YP_TOKEN_DOT, YP_TOKEN_COLON_COLON)) { + lex_state_set(parser, YP_LEX_STATE_FNAME); + parser_lex(parser); + operator = parser->previous; + + switch (identifier.type) { + case YP_TOKEN_CONSTANT: + receiver = (yp_node_t *) yp_constant_read_node_create(parser, &identifier); + break; + case YP_TOKEN_INSTANCE_VARIABLE: + receiver = (yp_node_t *) yp_instance_variable_read_node_create(parser, &identifier); + break; + case YP_TOKEN_CLASS_VARIABLE: + receiver = (yp_node_t *) yp_class_variable_read_node_create(parser, &identifier); + break; + case YP_TOKEN_GLOBAL_VARIABLE: + receiver = (yp_node_t *) yp_global_variable_read_node_create(parser, &identifier); + break; + case YP_TOKEN_KEYWORD_NIL: + receiver = (yp_node_t *) yp_nil_node_create(parser, &identifier); + break; + case YP_TOKEN_KEYWORD_SELF: + receiver = (yp_node_t *) yp_self_node_create(parser, &identifier); + break; + case YP_TOKEN_KEYWORD_TRUE: + receiver = (yp_node_t *) yp_true_node_create(parser, &identifier); + break; + case YP_TOKEN_KEYWORD_FALSE: + receiver = (yp_node_t *)yp_false_node_create(parser, &identifier); + break; + case YP_TOKEN_KEYWORD___FILE__: + receiver = (yp_node_t *) yp_source_file_node_create(parser, &identifier); + break; + case YP_TOKEN_KEYWORD___LINE__: + receiver = (yp_node_t *) yp_source_line_node_create(parser, &identifier); + break; + case YP_TOKEN_KEYWORD___ENCODING__: + receiver = (yp_node_t *) yp_source_encoding_node_create(parser, &identifier); + break; + default: + break; + } + + name = parse_method_definition_name(parser); + } else { + name = identifier; + } + break; + } + case YP_TOKEN_PARENTHESIS_LEFT: { + parser_lex(parser); + yp_token_t lparen = parser->previous; + yp_node_t *expression = parse_expression(parser, YP_BINDING_POWER_STATEMENT, YP_ERR_DEF_RECEIVER); + + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_EXPECT_RPAREN); + yp_token_t rparen = parser->previous; + + lex_state_set(parser, YP_LEX_STATE_FNAME); + expect2(parser, YP_TOKEN_DOT, YP_TOKEN_COLON_COLON, YP_ERR_DEF_RECEIVER_TERM); + + operator = parser->previous; + receiver = (yp_node_t *) yp_parentheses_node_create(parser, &lparen, expression, &rparen); + + yp_parser_scope_push(parser, true); + name = parse_method_definition_name(parser); + break; + } + default: + yp_parser_scope_push(parser, true); + name = parse_method_definition_name(parser); + break; + } + + // If, after all that, we were unable to find a method name, add an + // error to the error list. + if (name.type == YP_TOKEN_MISSING) { + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_DEF_NAME); + } + + yp_token_t lparen; + yp_token_t rparen; + yp_parameters_node_t *params; + + switch (parser->current.type) { + case YP_TOKEN_PARENTHESIS_LEFT: { + parser_lex(parser); + lparen = parser->previous; + + if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + params = NULL; + } else { + params = parse_parameters(parser, YP_BINDING_POWER_DEFINED, true, false, true); + } + + lex_state_set(parser, YP_LEX_STATE_BEG); + parser->command_start = true; + + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_DEF_PARAMS_TERM_PAREN); + rparen = parser->previous; + break; + } + case YP_CASE_PARAMETER: { + // If we're about to lex a label, we need to add the label + // state to make sure the next newline is ignored. + if (parser->current.type == YP_TOKEN_LABEL) { + lex_state_set(parser, parser->lex_state | YP_LEX_STATE_LABEL); + } + + lparen = not_provided(parser); + rparen = not_provided(parser); + params = parse_parameters(parser, YP_BINDING_POWER_DEFINED, false, false, true); + break; + } + default: { + lparen = not_provided(parser); + rparen = not_provided(parser); + params = NULL; + break; + } + } + + context_pop(parser); + yp_node_t *statements = NULL; + yp_token_t equal; + yp_token_t end_keyword; + + if (accept1(parser, YP_TOKEN_EQUAL)) { + if (token_is_setter_name(&name)) { + yp_diagnostic_list_append(&parser->error_list, name.start, name.end, YP_ERR_DEF_ENDLESS_SETTER); + } + equal = parser->previous; + + context_push(parser, YP_CONTEXT_DEF); + statements = (yp_node_t *) yp_statements_node_create(parser); + + yp_node_t *statement = parse_expression(parser, YP_BINDING_POWER_DEFINED + 1, YP_ERR_DEF_ENDLESS); + + if (accept1(parser, YP_TOKEN_KEYWORD_RESCUE_MODIFIER)) { + yp_token_t rescue_keyword = parser->previous; + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_RESCUE_MODIFIER_VALUE); + yp_rescue_modifier_node_t *rescue_node = yp_rescue_modifier_node_create(parser, statement, &rescue_keyword, value); + statement = (yp_node_t *)rescue_node; + } + + yp_statements_node_body_append((yp_statements_node_t *) statements, statement); + context_pop(parser); + end_keyword = not_provided(parser); + } else { + equal = not_provided(parser); + + if (lparen.type == YP_TOKEN_NOT_PROVIDED) { + lex_state_set(parser, YP_LEX_STATE_BEG); + parser->command_start = true; + expect2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_ERR_DEF_PARAMS_TERM); + } else { + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + } + + yp_accepts_block_stack_push(parser, true); + yp_do_loop_stack_push(parser, false); + + if (!match3(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_DEF); + yp_accepts_block_stack_pop(parser); + } + + if (match2(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || YP_NODE_TYPE_P(statements, YP_STATEMENTS_NODE)); + statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements); + } + + yp_accepts_block_stack_pop(parser); + yp_do_loop_stack_pop(parser); + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_DEF_TERM); + end_keyword = parser->previous; + } + + yp_constant_id_list_t locals = parser->current_scope->locals; + yp_parser_scope_pop(parser); + + return (yp_node_t *) yp_def_node_create( + parser, + &name, + receiver, + params, + statements, + &locals, + &def_keyword, + &operator, + &lparen, + &rparen, + &equal, + &end_keyword + ); + } + case YP_TOKEN_KEYWORD_DEFINED: { + parser_lex(parser); + yp_token_t keyword = parser->previous; + + yp_token_t lparen; + yp_token_t rparen; + yp_node_t *expression; + + if (accept1(parser, YP_TOKEN_PARENTHESIS_LEFT)) { + lparen = parser->previous; + expression = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, YP_ERR_DEFINED_EXPRESSION); + + if (parser->recovering) { + rparen = not_provided(parser); + } else { + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_EXPECT_RPAREN); + rparen = parser->previous; + } + } else { + lparen = not_provided(parser); + rparen = not_provided(parser); + expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_DEFINED_EXPRESSION); + } + + return (yp_node_t *) yp_defined_node_create( + parser, + &lparen, + expression, + &rparen, + &YP_LOCATION_TOKEN_VALUE(&keyword) + ); + } + case YP_TOKEN_KEYWORD_END_UPCASE: { + parser_lex(parser); + yp_token_t keyword = parser->previous; + + expect1(parser, YP_TOKEN_BRACE_LEFT, YP_ERR_END_UPCASE_BRACE); + yp_token_t opening = parser->previous; + yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_POSTEXE); + + expect1(parser, YP_TOKEN_BRACE_RIGHT, YP_ERR_END_UPCASE_TERM); + return (yp_node_t *) yp_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous); + } + case YP_TOKEN_KEYWORD_FALSE: + parser_lex(parser); + return (yp_node_t *)yp_false_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD_FOR: { + parser_lex(parser); + yp_token_t for_keyword = parser->previous; + yp_node_t *index; + + // First, parse out the first index expression. + if (accept1(parser, YP_TOKEN_USTAR)) { + yp_token_t star_operator = parser->previous; + yp_node_t *name = NULL; + + if (token_begins_expression_p(parser->current.type)) { + name = parse_expression(parser, YP_BINDING_POWER_INDEX, YP_ERR_EXPECT_EXPRESSION_AFTER_STAR); + } + + index = (yp_node_t *) yp_splat_node_create(parser, &star_operator, name); + } else if (token_begins_expression_p(parser->current.type)) { + index = parse_expression(parser, YP_BINDING_POWER_INDEX, YP_ERR_EXPECT_EXPRESSION_AFTER_COMMA); + } else { + yp_diagnostic_list_append(&parser->error_list, for_keyword.start, for_keyword.end, YP_ERR_FOR_INDEX); + index = (yp_node_t *) yp_missing_node_create(parser, for_keyword.start, for_keyword.end); + } + + // Now, if there are multiple index expressions, parse them out. + if (match1(parser, YP_TOKEN_COMMA)) { + index = parse_targets(parser, index, YP_BINDING_POWER_INDEX); + } else { + index = parse_target(parser, index); + } + + yp_do_loop_stack_push(parser, true); + + expect1(parser, YP_TOKEN_KEYWORD_IN, YP_ERR_FOR_IN); + yp_token_t in_keyword = parser->previous; + + yp_node_t *collection = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, YP_ERR_FOR_COLLECTION); + yp_do_loop_stack_pop(parser); + + yp_token_t do_keyword; + if (accept1(parser, YP_TOKEN_KEYWORD_DO_LOOP)) { + do_keyword = parser->previous; + } else { + do_keyword = not_provided(parser); + } + + accept2(parser, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE); + yp_statements_node_t *statements = NULL; + + if (!accept1(parser, YP_TOKEN_KEYWORD_END)) { + statements = parse_statements(parser, YP_CONTEXT_FOR); + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_FOR_TERM); + } + + return (yp_node_t *) yp_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous); + } + case YP_TOKEN_KEYWORD_IF: + parser_lex(parser); + return parse_conditional(parser, YP_CONTEXT_IF); + case YP_TOKEN_KEYWORD_UNDEF: { + parser_lex(parser); + yp_undef_node_t *undef = yp_undef_node_create(parser, &parser->previous); + yp_node_t *name = parse_undef_argument(parser); + + if (YP_NODE_TYPE_P(name, YP_MISSING_NODE)) { + yp_node_destroy(parser, name); + } else { + yp_undef_node_append(undef, name); + + while (match1(parser, YP_TOKEN_COMMA)) { + lex_state_set(parser, YP_LEX_STATE_FNAME | YP_LEX_STATE_FITEM); + parser_lex(parser); + name = parse_undef_argument(parser); + + if (YP_NODE_TYPE_P(name, YP_MISSING_NODE)) { + yp_node_destroy(parser, name); + break; + } + + yp_undef_node_append(undef, name); + } + } + + return (yp_node_t *) undef; + } + case YP_TOKEN_KEYWORD_NOT: { + parser_lex(parser); + + yp_token_t message = parser->previous; + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + yp_node_t *receiver = NULL; + + accept1(parser, YP_TOKEN_NEWLINE); + + if (accept1(parser, YP_TOKEN_PARENTHESIS_LEFT)) { + arguments.opening_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + + if (accept1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + arguments.closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + } else { + receiver = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, YP_ERR_NOT_EXPRESSION); + yp_conditional_predicate(receiver); + + if (!parser->recovering) { + accept1(parser, YP_TOKEN_NEWLINE); + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_EXPECT_RPAREN); + arguments.closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + } + } + } else { + receiver = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_NOT_EXPRESSION); + yp_conditional_predicate(receiver); + } + + return (yp_node_t *) yp_call_node_not_create(parser, receiver, &message, &arguments); + } + case YP_TOKEN_KEYWORD_UNLESS: + parser_lex(parser); + return parse_conditional(parser, YP_CONTEXT_UNLESS); + case YP_TOKEN_KEYWORD_MODULE: { + parser_lex(parser); + + yp_token_t module_keyword = parser->previous; + yp_node_t *constant_path = parse_expression(parser, YP_BINDING_POWER_INDEX, YP_ERR_MODULE_NAME); + yp_token_t name; + + // If we can recover from a syntax error that occurred while parsing + // the name of the module, then we'll handle that here. + if (YP_NODE_TYPE_P(constant_path, YP_MISSING_NODE)) { + yp_token_t missing = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + return (yp_node_t *) yp_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing); + } + + while (accept1(parser, YP_TOKEN_COLON_COLON)) { + yp_token_t double_colon = parser->previous; + + expect1(parser, YP_TOKEN_CONSTANT, YP_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); + yp_node_t *constant = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous); + + constant_path = (yp_node_t *) yp_constant_path_node_create(parser, constant_path, &double_colon, constant); + } + + // Here we retrieve the name of the module. If it wasn't a constant, + // then it's possible that `module foo` was passed, which is a + // syntax error. We handle that here as well. + name = parser->previous; + if (name.type != YP_TOKEN_CONSTANT) { + yp_diagnostic_list_append(&parser->error_list, name.start, name.end, YP_ERR_MODULE_NAME); + } + + yp_parser_scope_push(parser, true); + accept2(parser, YP_TOKEN_SEMICOLON, YP_TOKEN_NEWLINE); + yp_node_t *statements = NULL; + + if (!match3(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + statements = (yp_node_t *) parse_statements(parser, YP_CONTEXT_MODULE); + yp_accepts_block_stack_pop(parser); + } + + if (match2(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) { + assert(statements == NULL || YP_NODE_TYPE_P(statements, YP_STATEMENTS_NODE)); + statements = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) statements); + } + + yp_constant_id_list_t locals = parser->current_scope->locals; + yp_parser_scope_pop(parser); + + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_MODULE_TERM); + + if (context_def_p(parser)) { + yp_diagnostic_list_append(&parser->error_list, module_keyword.start, module_keyword.end, YP_ERR_MODULE_IN_METHOD); + } + + return (yp_node_t *) yp_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous); + } + case YP_TOKEN_KEYWORD_NIL: + parser_lex(parser); + return (yp_node_t *) yp_nil_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD_REDO: + parser_lex(parser); + return (yp_node_t *) yp_redo_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD_RETRY: + parser_lex(parser); + return (yp_node_t *) yp_retry_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD_SELF: + parser_lex(parser); + return (yp_node_t *) yp_self_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD_TRUE: + parser_lex(parser); + return (yp_node_t *) yp_true_node_create(parser, &parser->previous); + case YP_TOKEN_KEYWORD_UNTIL: { + yp_do_loop_stack_push(parser, true); + parser_lex(parser); + yp_token_t keyword = parser->previous; + + yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, YP_ERR_CONDITIONAL_UNTIL_PREDICATE); + yp_do_loop_stack_pop(parser); + + expect3(parser, YP_TOKEN_KEYWORD_DO_LOOP, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_ERR_CONDITIONAL_UNTIL_PREDICATE); + yp_statements_node_t *statements = NULL; + + if (!accept1(parser, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + statements = parse_statements(parser, YP_CONTEXT_UNTIL); + yp_accepts_block_stack_pop(parser); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_UNTIL_TERM); + } + + return (yp_node_t *) yp_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0); + } + case YP_TOKEN_KEYWORD_WHILE: { + yp_do_loop_stack_push(parser, true); + parser_lex(parser); + yp_token_t keyword = parser->previous; + + yp_node_t *predicate = parse_expression(parser, YP_BINDING_POWER_COMPOSITION, YP_ERR_CONDITIONAL_WHILE_PREDICATE); + yp_do_loop_stack_pop(parser); + + expect3(parser, YP_TOKEN_KEYWORD_DO_LOOP, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON, YP_ERR_CONDITIONAL_WHILE_PREDICATE); + yp_statements_node_t *statements = NULL; + + if (!accept1(parser, YP_TOKEN_KEYWORD_END)) { + yp_accepts_block_stack_push(parser, true); + statements = parse_statements(parser, YP_CONTEXT_WHILE); + yp_accepts_block_stack_pop(parser); + accept2(parser, YP_TOKEN_NEWLINE, YP_TOKEN_SEMICOLON); + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_WHILE_TERM); + } + + return (yp_node_t *) yp_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0); + } + case YP_TOKEN_PERCENT_LOWER_I: { + parser_lex(parser); + yp_array_node_t *array = yp_array_node_create(parser, &parser->previous); + + while (!match2(parser, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { + accept1(parser, YP_TOKEN_WORDS_SEP); + if (match1(parser, YP_TOKEN_STRING_END)) break; + + expect1(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_LIST_I_LOWER_ELEMENT); + + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + + yp_node_t *symbol = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_MINIMAL); + yp_array_node_elements_append(array, symbol); + } + + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_LIST_I_LOWER_TERM); + yp_array_node_close_set(array, &parser->previous); + + return (yp_node_t *) array; + } + case YP_TOKEN_PERCENT_UPPER_I: { + parser_lex(parser); + yp_array_node_t *array = yp_array_node_create(parser, &parser->previous); + + // This is the current node that we are parsing that will be added to the + // list of elements. + yp_node_t *current = NULL; + + while (!match2(parser, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { + switch (parser->current.type) { + case YP_TOKEN_WORDS_SEP: { + if (current == NULL) { + // If we hit a separator before we have any content, then we don't + // need to do anything. + } else { + // If we hit a separator after we've hit content, then we need to + // append that content to the list and reset the current node. + yp_array_node_elements_append(array, current); + current = NULL; + } + + parser_lex(parser); + break; + } + case YP_TOKEN_STRING_CONTENT: { + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + + if (current == NULL) { + // If we hit content and the current node is NULL, then this is + // the first string content we've seen. In that case we're going + // to create a new string node and set that to the current. + parser_lex(parser); + current = (yp_node_t *) yp_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL); + } else if (YP_NODE_TYPE_P(current, YP_INTERPOLATED_SYMBOL_NODE)) { + // If we hit string content and the current node is an + // interpolated string, then we need to append the string content + // to the list of child nodes. + yp_node_t *part = parse_string_part(parser); + yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part); + } else if (YP_NODE_TYPE_P(current, YP_SYMBOL_NODE)) { + // If we hit string content and the current node is a string node, + // then we need to convert the current node into an interpolated + // string and add the string content to the list of child nodes. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_interpolated_symbol_node_t *interpolated = + yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + yp_interpolated_symbol_node_append(interpolated, current); + + yp_node_t *part = parse_string_part(parser); + yp_interpolated_symbol_node_append(interpolated, part); + current = (yp_node_t *) interpolated; + } else { + assert(false && "unreachable"); + } + + break; + } + case YP_TOKEN_EMBVAR: { + bool start_location_set = false; + if (current == NULL) { + // If we hit an embedded variable and the current node is NULL, + // then this is the start of a new string. We'll set the current + // node to a new interpolated string. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + current = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + } else if (YP_NODE_TYPE_P(current, YP_SYMBOL_NODE)) { + // If we hit an embedded variable and the current node is a string + // node, then we'll convert the current into an interpolated + // string and add the string node to the list of parts. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + + current = (yp_node_t *) yp_symbol_node_to_string_node(parser, (yp_symbol_node_t *) current); + yp_interpolated_symbol_node_append(interpolated, current); + interpolated->base.location.start = current->location.start; + start_location_set = true; + current = (yp_node_t *) interpolated; + } else { + // If we hit an embedded variable and the current node is an + // interpolated string, then we'll just add the embedded variable. + } + + yp_node_t *part = parse_string_part(parser); + yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part); + if (!start_location_set) { + current->location.start = part->location.start; + } + break; + } + case YP_TOKEN_EMBEXPR_BEGIN: { + bool start_location_set = false; + if (current == NULL) { + // If we hit an embedded expression and the current node is NULL, + // then this is the start of a new string. We'll set the current + // node to a new interpolated string. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + current = (yp_node_t *) yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + } else if (YP_NODE_TYPE_P(current, YP_SYMBOL_NODE)) { + // If we hit an embedded expression and the current node is a + // string node, then we'll convert the current into an + // interpolated string and add the string node to the list of + // parts. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_interpolated_symbol_node_t *interpolated = yp_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + + current = (yp_node_t *) yp_symbol_node_to_string_node(parser, (yp_symbol_node_t *) current); + yp_interpolated_symbol_node_append(interpolated, current); + interpolated->base.location.start = current->location.start; + start_location_set = true; + current = (yp_node_t *) interpolated; + } else if (YP_NODE_TYPE_P(current, YP_INTERPOLATED_SYMBOL_NODE)) { + // If we hit an embedded expression and the current node is an + // interpolated string, then we'll just continue on. + } else { + assert(false && "unreachable"); + } + + yp_node_t *part = parse_string_part(parser); + yp_interpolated_symbol_node_append((yp_interpolated_symbol_node_t *) current, part); + if (!start_location_set) { + current->location.start = part->location.start; + } + break; + } + default: + expect1(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_LIST_I_UPPER_ELEMENT); + parser_lex(parser); + break; + } + } + + // If we have a current node, then we need to append it to the list. + if (current) { + yp_array_node_elements_append(array, current); + } + + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_LIST_I_UPPER_TERM); + yp_array_node_close_set(array, &parser->previous); + + return (yp_node_t *) array; + } + case YP_TOKEN_PERCENT_LOWER_W: { + parser_lex(parser); + yp_array_node_t *array = yp_array_node_create(parser, &parser->previous); + + // skip all leading whitespaces + accept1(parser, YP_TOKEN_WORDS_SEP); + + while (!match2(parser, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { + accept1(parser, YP_TOKEN_WORDS_SEP); + if (match1(parser, YP_TOKEN_STRING_END)) break; + + expect1(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_LIST_W_LOWER_ELEMENT); + + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_node_t *string = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_WHITESPACE); + yp_array_node_elements_append(array, string); + } + + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_LIST_W_LOWER_TERM); + yp_array_node_close_set(array, &parser->previous); + + return (yp_node_t *) array; + } + case YP_TOKEN_PERCENT_UPPER_W: { + parser_lex(parser); + yp_array_node_t *array = yp_array_node_create(parser, &parser->previous); + + // This is the current node that we are parsing that will be added to the + // list of elements. + yp_node_t *current = NULL; + + while (!match2(parser, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { + switch (parser->current.type) { + case YP_TOKEN_WORDS_SEP: { + if (current == NULL) { + // If we hit a separator before we have any content, then we don't + // need to do anything. + } else { + // If we hit a separator after we've hit content, then we need to + // append that content to the list and reset the current node. + yp_array_node_elements_append(array, current); + current = NULL; + } + + parser_lex(parser); + break; + } + case YP_TOKEN_STRING_CONTENT: { + if (current == NULL) { + // If we hit content and the current node is NULL, then this is + // the first string content we've seen. In that case we're going + // to create a new string node and set that to the current. + current = parse_string_part(parser); + } else if (YP_NODE_TYPE_P(current, YP_INTERPOLATED_STRING_NODE)) { + // If we hit string content and the current node is an + // interpolated string, then we need to append the string content + // to the list of child nodes. + yp_node_t *part = parse_string_part(parser); + yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part); + } else if (YP_NODE_TYPE_P(current, YP_STRING_NODE)) { + // If we hit string content and the current node is a string node, + // then we need to convert the current node into an interpolated + // string and add the string content to the list of child nodes. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_interpolated_string_node_t *interpolated = + yp_interpolated_string_node_create(parser, &opening, NULL, &closing); + yp_interpolated_string_node_append(interpolated, current); + + yp_node_t *part = parse_string_part(parser); + yp_interpolated_string_node_append(interpolated, part); + current = (yp_node_t *) interpolated; + } else { + assert(false && "unreachable"); + } + + break; + } + case YP_TOKEN_EMBVAR: { + if (current == NULL) { + // If we hit an embedded variable and the current node is NULL, + // then this is the start of a new string. We'll set the current + // node to a new interpolated string. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + current = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, NULL, &closing); + } else if (YP_NODE_TYPE_P(current, YP_STRING_NODE)) { + // If we hit an embedded variable and the current node is a string + // node, then we'll convert the current into an interpolated + // string and add the string node to the list of parts. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_interpolated_string_node_t *interpolated = yp_interpolated_string_node_create(parser, &opening, NULL, &closing); + yp_interpolated_string_node_append(interpolated, current); + current = (yp_node_t *) interpolated; + } else { + // If we hit an embedded variable and the current node is an + // interpolated string, then we'll just add the embedded variable. + } + + yp_node_t *part = parse_string_part(parser); + yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part); + break; + } + case YP_TOKEN_EMBEXPR_BEGIN: { + if (current == NULL) { + // If we hit an embedded expression and the current node is NULL, + // then this is the start of a new string. We'll set the current + // node to a new interpolated string. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + current = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening, NULL, &closing); + } else if (YP_NODE_TYPE_P(current, YP_STRING_NODE)) { + // If we hit an embedded expression and the current node is a + // string node, then we'll convert the current into an + // interpolated string and add the string node to the list of + // parts. + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_interpolated_string_node_t *interpolated = yp_interpolated_string_node_create(parser, &opening, NULL, &closing); + yp_interpolated_string_node_append(interpolated, current); + current = (yp_node_t *) interpolated; + } else if (YP_NODE_TYPE_P(current, YP_INTERPOLATED_STRING_NODE)) { + // If we hit an embedded expression and the current node is an + // interpolated string, then we'll just continue on. + } else { + assert(false && "unreachable"); + } + + yp_node_t *part = parse_string_part(parser); + yp_interpolated_string_node_append((yp_interpolated_string_node_t *) current, part); + break; + } + default: + expect1(parser, YP_TOKEN_STRING_CONTENT, YP_ERR_LIST_W_UPPER_ELEMENT); + parser_lex(parser); + break; + } + } + + // If we have a current node, then we need to append it to the list. + if (current) { + yp_array_node_elements_append(array, current); + } + + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_LIST_W_UPPER_TERM); + yp_array_node_close_set(array, &parser->previous); + + return (yp_node_t *) array; + } + case YP_TOKEN_REGEXP_BEGIN: { + yp_token_t opening = parser->current; + parser_lex(parser); + + if (match1(parser, YP_TOKEN_REGEXP_END)) { + // If we get here, then we have an end immediately after a start. In + // that case we'll create an empty content token and return an + // uninterpolated regular expression. + yp_token_t content = (yp_token_t) { + .type = YP_TOKEN_STRING_CONTENT, + .start = parser->previous.end, + .end = parser->previous.end + }; + + parser_lex(parser); + return (yp_node_t *) yp_regular_expression_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); + } + + yp_interpolated_regular_expression_node_t *node; + + if (match1(parser, YP_TOKEN_STRING_CONTENT)) { + // In this case we've hit string content so we know the regular + // expression at least has something in it. We'll need to check if the + // following token is the end (in which case we can return a plain + // regular expression) or if it's not then it has interpolation. + yp_token_t content = parser->current; + parser_lex(parser); + + // If we hit an end, then we can create a regular expression node + // without interpolation, which can be represented more succinctly and + // more easily compiled. + if (accept1(parser, YP_TOKEN_REGEXP_END)) { + return (yp_node_t *) yp_regular_expression_node_create_and_unescape(parser, &opening, &content, &parser->previous, YP_UNESCAPE_ALL); + } + + // If we get here, then we have interpolation so we'll need to create + // a regular expression node with interpolation. + node = yp_interpolated_regular_expression_node_create(parser, &opening); + + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL); + yp_interpolated_regular_expression_node_append(node, part); + } else { + // If the first part of the body of the regular expression is not a + // string content, then we have interpolation and we need to create an + // interpolated regular expression node. + node = yp_interpolated_regular_expression_node_create(parser, &opening); + } + + // Now that we're here and we have interpolation, we'll parse all of the + // parts into the list. + while (!match2(parser, YP_TOKEN_REGEXP_END, YP_TOKEN_EOF)) { + yp_node_t *part = parse_string_part(parser); + if (part != NULL) { + yp_interpolated_regular_expression_node_append(node, part); + } + } + + expect1(parser, YP_TOKEN_REGEXP_END, YP_ERR_REGEXP_TERM); + yp_interpolated_regular_expression_node_closing_set(node, &parser->previous); + + return (yp_node_t *) node; + } + case YP_TOKEN_BACKTICK: + case YP_TOKEN_PERCENT_LOWER_X: { + parser_lex(parser); + yp_token_t opening = parser->previous; + + // When we get here, we don't know if this string is going to have + // interpolation or not, even though it is allowed. Still, we want to be + // able to return a string node without interpolation if we can since + // it'll be faster. + if (match1(parser, YP_TOKEN_STRING_END)) { + // If we get here, then we have an end immediately after a start. In + // that case we'll create an empty content token and return an + // uninterpolated string. + yp_token_t content = (yp_token_t) { + .type = YP_TOKEN_STRING_CONTENT, + .start = parser->previous.end, + .end = parser->previous.end + }; + + parser_lex(parser); + return (yp_node_t *) yp_xstring_node_create(parser, &opening, &content, &parser->previous); + } + + yp_interpolated_x_string_node_t *node; + + if (match1(parser, YP_TOKEN_STRING_CONTENT)) { + // In this case we've hit string content so we know the string at least + // has something in it. We'll need to check if the following token is + // the end (in which case we can return a plain string) or if it's not + // then it has interpolation. + yp_token_t content = parser->current; + parser_lex(parser); + + if (accept1(parser, YP_TOKEN_STRING_END)) { + return (yp_node_t *) yp_xstring_node_create_and_unescape(parser, &opening, &content, &parser->previous); + } + + // If we get here, then we have interpolation so we'll need to create + // a string node with interpolation. + node = yp_interpolated_xstring_node_create(parser, &opening, &opening); + + yp_token_t opening = not_provided(parser); + yp_token_t closing = not_provided(parser); + yp_node_t *part = (yp_node_t *) yp_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, YP_UNESCAPE_ALL); + yp_interpolated_xstring_node_append(node, part); + } else { + // If the first part of the body of the string is not a string content, + // then we have interpolation and we need to create an interpolated + // string node. + node = yp_interpolated_xstring_node_create(parser, &opening, &opening); + } + + while (!match2(parser, YP_TOKEN_STRING_END, YP_TOKEN_EOF)) { + yp_node_t *part = parse_string_part(parser); + if (part != NULL) { + yp_interpolated_xstring_node_append(node, part); + } + } + + expect1(parser, YP_TOKEN_STRING_END, YP_ERR_XSTRING_TERM); + yp_interpolated_xstring_node_closing_set(node, &parser->previous); + return (yp_node_t *) node; + } + case YP_TOKEN_USTAR: { + parser_lex(parser); + + // * operators at the beginning of expressions are only valid in the + // context of a multiple assignment. We enforce that here. We'll + // still lex past it though and create a missing node place. + if (binding_power != YP_BINDING_POWER_STATEMENT) { + return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end); + } + + yp_token_t operator = parser->previous; + yp_node_t *name = NULL; + + if (token_begins_expression_p(parser->current.type)) { + name = parse_expression(parser, YP_BINDING_POWER_INDEX, YP_ERR_EXPECT_EXPRESSION_AFTER_STAR); + } + + yp_node_t *splat = (yp_node_t *) yp_splat_node_create(parser, &operator, name); + + if (match1(parser, YP_TOKEN_COMMA)) { + return parse_targets_validate(parser, splat, YP_BINDING_POWER_INDEX); + } else { + return parse_target_validate(parser, splat); + } + } + case YP_TOKEN_BANG: { + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_node_t *receiver = parse_expression(parser, yp_binding_powers[parser->previous.type].right, YP_ERR_UNARY_RECEIVER_BANG); + yp_call_node_t *node = yp_call_node_unary_create(parser, &operator, receiver, "!"); + + yp_conditional_predicate(receiver); + return (yp_node_t *) node; + } + case YP_TOKEN_TILDE: { + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_node_t *receiver = parse_expression(parser, yp_binding_powers[parser->previous.type].right, YP_ERR_UNARY_RECEIVER_TILDE); + yp_call_node_t *node = yp_call_node_unary_create(parser, &operator, receiver, "~"); + + return (yp_node_t *) node; + } + case YP_TOKEN_UMINUS: { + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_node_t *receiver = parse_expression(parser, yp_binding_powers[parser->previous.type].right, YP_ERR_UNARY_RECEIVER_MINUS); + yp_call_node_t *node = yp_call_node_unary_create(parser, &operator, receiver, "-@"); + + return (yp_node_t *) node; + } + case YP_TOKEN_UMINUS_NUM: { + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_node_t *node = parse_expression(parser, yp_binding_powers[parser->previous.type].right, YP_ERR_UNARY_RECEIVER_MINUS); + + if (accept1(parser, YP_TOKEN_STAR_STAR)) { + yp_token_t exponent_operator = parser->previous; + yp_node_t *exponent = parse_expression(parser, yp_binding_powers[exponent_operator.type].right, YP_ERR_EXPECT_ARGUMENT); + node = (yp_node_t *) yp_call_node_binary_create(parser, node, &exponent_operator, exponent); + node = (yp_node_t *) yp_call_node_unary_create(parser, &operator, node, "-@"); + } else { + switch (YP_NODE_TYPE(node)) { + case YP_INTEGER_NODE: + case YP_FLOAT_NODE: + case YP_RATIONAL_NODE: + case YP_IMAGINARY_NODE: + parse_negative_numeric(node); + break; + default: + node = (yp_node_t *) yp_call_node_unary_create(parser, &operator, node, "-@"); + break; + } + } + + return node; + } + case YP_TOKEN_MINUS_GREATER: { + int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting; + parser->lambda_enclosure_nesting = parser->enclosure_nesting; + + yp_accepts_block_stack_push(parser, true); + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_parser_scope_push(parser, false); + yp_block_parameters_node_t *params; + + switch (parser->current.type) { + case YP_TOKEN_PARENTHESIS_LEFT: { + parser->current_scope->explicit_params = true; + yp_token_t opening = parser->current; + parser_lex(parser); + + if (match1(parser, YP_TOKEN_PARENTHESIS_RIGHT)) { + params = yp_block_parameters_node_create(parser, NULL, &opening); + } else { + params = parse_block_parameters(parser, false, &opening, true); + } + + accept1(parser, YP_TOKEN_NEWLINE); + expect1(parser, YP_TOKEN_PARENTHESIS_RIGHT, YP_ERR_EXPECT_RPAREN); + + yp_block_parameters_node_closing_set(params, &parser->previous); + break; + } + case YP_CASE_PARAMETER: { + parser->current_scope->explicit_params = true; + yp_accepts_block_stack_push(parser, false); + yp_token_t opening = not_provided(parser); + params = parse_block_parameters(parser, false, &opening, true); + yp_accepts_block_stack_pop(parser); + break; + } + default: { + params = NULL; + break; + } + } + + yp_token_t opening; + yp_node_t *body = NULL; + parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting; + + if (accept1(parser, YP_TOKEN_LAMBDA_BEGIN)) { + opening = parser->previous; + + if (!accept1(parser, YP_TOKEN_BRACE_RIGHT)) { + body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_BRACES); + expect1(parser, YP_TOKEN_BRACE_RIGHT, YP_ERR_LAMBDA_TERM_BRACE); + } + } else { + expect1(parser, YP_TOKEN_KEYWORD_DO, YP_ERR_LAMBDA_OPEN); + opening = parser->previous; + + if (!match3(parser, YP_TOKEN_KEYWORD_END, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) { + yp_accepts_block_stack_push(parser, true); + body = (yp_node_t *) parse_statements(parser, YP_CONTEXT_LAMBDA_DO_END); + yp_accepts_block_stack_pop(parser); + } + + if (match2(parser, YP_TOKEN_KEYWORD_RESCUE, YP_TOKEN_KEYWORD_ENSURE)) { + assert(body == NULL || YP_NODE_TYPE_P(body, YP_STATEMENTS_NODE)); + body = (yp_node_t *) parse_rescues_as_begin(parser, (yp_statements_node_t *) body); + } + + expect1(parser, YP_TOKEN_KEYWORD_END, YP_ERR_LAMBDA_TERM_END); + } + + yp_constant_id_list_t locals = parser->current_scope->locals; + yp_parser_scope_pop(parser); + yp_accepts_block_stack_pop(parser); + return (yp_node_t *) yp_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, params, body); + } + case YP_TOKEN_UPLUS: { + parser_lex(parser); + + yp_token_t operator = parser->previous; + yp_node_t *receiver = parse_expression(parser, yp_binding_powers[parser->previous.type].right, YP_ERR_UNARY_RECEIVER_PLUS); + yp_call_node_t *node = yp_call_node_unary_create(parser, &operator, receiver, "+@"); + + return (yp_node_t *) node; + } + case YP_TOKEN_STRING_BEGIN: + return parse_strings(parser); + case YP_TOKEN_SYMBOL_BEGIN: { + yp_lex_mode_t lex_mode = *parser->lex_modes.current; + parser_lex(parser); + + return parse_symbol(parser, &lex_mode, YP_LEX_STATE_END); + } + default: + if (context_recoverable(parser, &parser->current)) { + parser->recovering = true; + } + + return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end); + } +} + +static inline yp_node_t * +parse_assignment_value(yp_parser_t *parser, yp_binding_power_t previous_binding_power, yp_binding_power_t binding_power, yp_diagnostic_id_t diag_id) { + yp_node_t *value = parse_starred_expression(parser, binding_power, diag_id); + + if (previous_binding_power == YP_BINDING_POWER_STATEMENT && (YP_NODE_TYPE_P(value, YP_SPLAT_NODE) || match1(parser, YP_TOKEN_COMMA))) { + yp_token_t opening = not_provided(parser); + yp_array_node_t *array = yp_array_node_create(parser, &opening); + + yp_array_node_elements_append(array, value); + value = (yp_node_t *) array; + + while (accept1(parser, YP_TOKEN_COMMA)) { + yp_node_t *element = parse_starred_expression(parser, binding_power, YP_ERR_ARRAY_ELEMENT); + yp_array_node_elements_append(array, element); + if (YP_NODE_TYPE_P(element, YP_MISSING_NODE)) break; + } + } + + return value; +} + +// Ensures a call node that is about to become a call operator node does not +// have a block attached. If it does, then we'll need to add an error message +// and destroy the block. Ideally we would keep the node around so that +// consumers would still have access to it, but we don't have a great structure +// for that at the moment. +static void +parse_call_operator_write_block(yp_parser_t *parser, yp_call_node_t *call_node, const yp_token_t *operator) { + if (call_node->block != NULL) { + yp_diagnostic_list_append(&parser->error_list, operator->start, operator->end, YP_ERR_OPERATOR_WRITE_BLOCK); + yp_node_destroy(parser, (yp_node_t *) call_node->block); + call_node->block = NULL; + } +} + +static inline yp_node_t * +parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t previous_binding_power, yp_binding_power_t binding_power) { + yp_token_t token = parser->current; + + switch (token.type) { + case YP_TOKEN_EQUAL: { + switch (YP_NODE_TYPE(node)) { + case YP_CALL_NODE: { + // If we have no arguments to the call node and we need this + // to be a target then this is either a method call or a + // local variable write. This _must_ happen before the value + // is parsed because it could be referenced in the value. + yp_call_node_t *call_node = (yp_call_node_t *) node; + if (yp_call_node_variable_call_p(call_node)) { + yp_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end); + } + } + /* fallthrough */ + case YP_CASE_WRITABLE: { + parser_lex(parser); + yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_EQUAL); + return parse_write(parser, node, &token, value); + } + case YP_SPLAT_NODE: { + yp_splat_node_t *splat_node = (yp_splat_node_t *) node; + + switch (YP_NODE_TYPE(splat_node->expression)) { + case YP_CASE_WRITABLE: + parser_lex(parser); + yp_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_EQUAL); + return parse_write(parser, (yp_node_t *) splat_node, &token, value); + default: + break; + } + } + /* fallthrough */ + default: + parser_lex(parser); + + // In this case we have an = sign, but we don't know what it's for. We + // need to treat it as an error. For now, we'll mark it as an error + // and just skip right past it. + yp_diagnostic_list_append(&parser->error_list, token.start, token.end, YP_ERR_EXPECT_EXPRESSION_AFTER_EQUAL); + return node; + } + } + case YP_TOKEN_AMPERSAND_AMPERSAND_EQUAL: { + switch (YP_NODE_TYPE(node)) { + case YP_BACK_REFERENCE_READ_NODE: + case YP_NUMBERED_REFERENCE_READ_NODE: + yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, YP_ERR_WRITE_TARGET_READONLY); + /* fallthrough */ + case YP_GLOBAL_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + yp_node_t *result = (yp_node_t *) yp_global_variable_and_write_node_create(parser, node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_CLASS_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + yp_node_t *result = (yp_node_t *) yp_class_variable_and_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_CONSTANT_PATH_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + return (yp_node_t *) yp_constant_path_and_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value); + } + case YP_CONSTANT_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + yp_node_t *result = (yp_node_t *) yp_constant_and_write_node_create(parser, (yp_constant_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_INSTANCE_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + yp_node_t *result = (yp_node_t *) yp_instance_variable_and_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_LOCAL_VARIABLE_READ_NODE: { + yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node; + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth); + + yp_node_destroy(parser, node); + return result; + } + case YP_CALL_NODE: { + // If we have a vcall (a method with no arguments and no + // receiver that could have been a local variable) then we + // will transform it into a local variable write. + if (yp_call_node_variable_call_p((yp_call_node_t *) node)) { + yp_location_t message_loc = ((yp_call_node_t *) node)->message_loc; + yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end); + + if (token_is_numbered_parameter(message_loc.start, message_loc.end)) { + yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, YP_ERR_PARAMETER_NUMBERED_RESERVED); + } + + parser_lex(parser); + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + yp_node_t *result = (yp_node_t *) yp_local_variable_and_write_node_create(parser, node, &token, value, constant_id, 0); + + yp_node_destroy(parser, node); + return result; + } + + parser_lex(parser); + node = parse_target(parser, node); + + assert(YP_NODE_TYPE_P(node, YP_CALL_NODE)); + parse_call_operator_write_block(parser, (yp_call_node_t *) node, &token); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + return (yp_node_t *) yp_call_and_write_node_create(parser, (yp_call_node_t *) node, &token, value); + } + case YP_MULTI_WRITE_NODE: { + parser_lex(parser); + yp_diagnostic_list_append(&parser->error_list, token.start, token.end, YP_ERR_AMPAMPEQ_MULTI_ASSIGN); + return node; + } + default: + parser_lex(parser); + + // In this case we have an &&= sign, but we don't know what it's for. + // We need to treat it as an error. For now, we'll mark it as an error + // and just skip right past it. + yp_diagnostic_list_append(&parser->error_list, token.start, token.end, YP_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ); + return node; + } + } + case YP_TOKEN_PIPE_PIPE_EQUAL: { + switch (YP_NODE_TYPE(node)) { + case YP_BACK_REFERENCE_READ_NODE: + case YP_NUMBERED_REFERENCE_READ_NODE: + yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, YP_ERR_WRITE_TARGET_READONLY); + /* fallthrough */ + case YP_GLOBAL_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + yp_node_t *result = (yp_node_t *) yp_global_variable_or_write_node_create(parser, node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_CLASS_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + yp_node_t *result = (yp_node_t *) yp_class_variable_or_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_CONSTANT_PATH_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + return (yp_node_t *) yp_constant_path_or_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value); + } + case YP_CONSTANT_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + yp_node_t *result = (yp_node_t *) yp_constant_or_write_node_create(parser, (yp_constant_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_INSTANCE_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + yp_node_t *result = (yp_node_t *) yp_instance_variable_or_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_LOCAL_VARIABLE_READ_NODE: { + yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node; + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth); + + yp_node_destroy(parser, node); + return result; + } + case YP_CALL_NODE: { + // If we have a vcall (a method with no arguments and no + // receiver that could have been a local variable) then we + // will transform it into a local variable write. + if (yp_call_node_variable_call_p((yp_call_node_t *) node)) { + yp_location_t message_loc = ((yp_call_node_t *) node)->message_loc; + yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end); + + if (token_is_numbered_parameter(message_loc.start, message_loc.end)) { + yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, YP_ERR_PARAMETER_NUMBERED_RESERVED); + } + + parser_lex(parser); + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + yp_node_t *result = (yp_node_t *) yp_local_variable_or_write_node_create(parser, node, &token, value, constant_id, 0); + + yp_node_destroy(parser, node); + return result; + } + + parser_lex(parser); + node = parse_target(parser, node); + + assert(YP_NODE_TYPE_P(node, YP_CALL_NODE)); + parse_call_operator_write_block(parser, (yp_call_node_t *) node, &token); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + return (yp_node_t *) yp_call_or_write_node_create(parser, (yp_call_node_t *) node, &token, value); + } + case YP_MULTI_WRITE_NODE: { + parser_lex(parser); + yp_diagnostic_list_append(&parser->error_list, token.start, token.end, YP_ERR_PIPEPIPEEQ_MULTI_ASSIGN); + return node; + } + default: + parser_lex(parser); + + // In this case we have an ||= sign, but we don't know what it's for. + // We need to treat it as an error. For now, we'll mark it as an error + // and just skip right past it. + yp_diagnostic_list_append(&parser->error_list, token.start, token.end, YP_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ); + return node; + } + } + case YP_TOKEN_AMPERSAND_EQUAL: + case YP_TOKEN_CARET_EQUAL: + case YP_TOKEN_GREATER_GREATER_EQUAL: + case YP_TOKEN_LESS_LESS_EQUAL: + case YP_TOKEN_MINUS_EQUAL: + case YP_TOKEN_PERCENT_EQUAL: + case YP_TOKEN_PIPE_EQUAL: + case YP_TOKEN_PLUS_EQUAL: + case YP_TOKEN_SLASH_EQUAL: + case YP_TOKEN_STAR_EQUAL: + case YP_TOKEN_STAR_STAR_EQUAL: { + switch (YP_NODE_TYPE(node)) { + case YP_BACK_REFERENCE_READ_NODE: + case YP_NUMBERED_REFERENCE_READ_NODE: + yp_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, YP_ERR_WRITE_TARGET_READONLY); + /* fallthrough */ + case YP_GLOBAL_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + yp_node_t *result = (yp_node_t *) yp_global_variable_operator_write_node_create(parser, node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_CLASS_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + yp_node_t *result = (yp_node_t *) yp_class_variable_operator_write_node_create(parser, (yp_class_variable_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_CONSTANT_PATH_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + return (yp_node_t *) yp_constant_path_operator_write_node_create(parser, (yp_constant_path_node_t *) node, &token, value); + } + case YP_CONSTANT_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + yp_node_t *result = (yp_node_t *) yp_constant_operator_write_node_create(parser, (yp_constant_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_INSTANCE_VARIABLE_READ_NODE: { + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + yp_node_t *result = (yp_node_t *) yp_instance_variable_operator_write_node_create(parser, (yp_instance_variable_read_node_t *) node, &token, value); + + yp_node_destroy(parser, node); + return result; + } + case YP_LOCAL_VARIABLE_READ_NODE: { + yp_local_variable_read_node_t *cast = (yp_local_variable_read_node_t *) node; + parser_lex(parser); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth); + + yp_node_destroy(parser, node); + return result; + } + case YP_CALL_NODE: { + // If we have a vcall (a method with no arguments and no + // receiver that could have been a local variable) then we + // will transform it into a local variable write. + if (yp_call_node_variable_call_p((yp_call_node_t *) node)) { + yp_location_t message_loc = ((yp_call_node_t *) node)->message_loc; + yp_constant_id_t constant_id = yp_parser_local_add_location(parser, message_loc.start, message_loc.end); + + if (token_is_numbered_parameter(message_loc.start, message_loc.end)) { + yp_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, YP_ERR_PARAMETER_NUMBERED_RESERVED); + } + + parser_lex(parser); + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + yp_node_t *result = (yp_node_t *) yp_local_variable_operator_write_node_create(parser, node, &token, value, constant_id, 0); + + yp_node_destroy(parser, node); + return result; + } + + parser_lex(parser); + node = parse_target(parser, node); + + assert(YP_NODE_TYPE_P(node, YP_CALL_NODE)); + parse_call_operator_write_block(parser, (yp_call_node_t *) node, &token); + + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + return (yp_node_t *) yp_call_operator_write_node_create(parser, (yp_call_node_t *) node, &token, value); + } + case YP_MULTI_WRITE_NODE: { + parser_lex(parser); + yp_diagnostic_list_append(&parser->error_list, token.start, token.end, YP_ERR_OPERATOR_MULTI_ASSIGN); + return node; + } + default: + parser_lex(parser); + + // In this case we have an operator but we don't know what it's for. + // We need to treat it as an error. For now, we'll mark it as an error + // and just skip right past it. + yp_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + return node; + } + } + case YP_TOKEN_AMPERSAND_AMPERSAND: + case YP_TOKEN_KEYWORD_AND: { + parser_lex(parser); + + yp_node_t *right = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + return (yp_node_t *) yp_and_node_create(parser, node, &token, right); + } + case YP_TOKEN_KEYWORD_OR: + case YP_TOKEN_PIPE_PIPE: { + parser_lex(parser); + + yp_node_t *right = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + return (yp_node_t *) yp_or_node_create(parser, node, &token, right); + } + case YP_TOKEN_EQUAL_TILDE: { + // Note that we _must_ parse the value before adding the local + // variables in order to properly mirror the behavior of Ruby. For + // example, + // + // /(?<foo>bar)/ =~ foo + // + // In this case, `foo` should be a method call and not a local yet. + parser_lex(parser); + yp_node_t *argument = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + + // By default, we're going to create a call node and then return it. + yp_call_node_t *call = yp_call_node_binary_create(parser, node, &token, argument); + yp_node_t *result = (yp_node_t *) call; + + // If the receiver of this =~ is a regular expression node, then we + // need to introduce local variables for it based on its named + // capture groups. + if (YP_NODE_TYPE_P(node, YP_REGULAR_EXPRESSION_NODE)) { + yp_string_list_t named_captures; + yp_string_list_init(&named_captures); + + const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc; + if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) { + yp_match_write_node_t *match = yp_match_write_node_create(parser, call); + + for (size_t index = 0; index < named_captures.length; index++) { + yp_string_t *name = &named_captures.strings[index]; + assert(name->type == YP_STRING_SHARED); + + yp_constant_id_t local = yp_parser_local_add_location(parser, name->source, name->source + name->length); + yp_constant_id_list_append(&match->locals, local); + } + + result = (yp_node_t *) match; + } + + yp_string_list_free(&named_captures); + } + + return result; + } + case YP_TOKEN_UAMPERSAND: + case YP_TOKEN_USTAR: + case YP_TOKEN_USTAR_STAR: + // The only times this will occur are when we are in an error state, + // but we'll put them in here so that errors can propagate. + case YP_TOKEN_BANG_EQUAL: + case YP_TOKEN_BANG_TILDE: + case YP_TOKEN_EQUAL_EQUAL: + case YP_TOKEN_EQUAL_EQUAL_EQUAL: + case YP_TOKEN_LESS_EQUAL_GREATER: + case YP_TOKEN_GREATER: + case YP_TOKEN_GREATER_EQUAL: + case YP_TOKEN_LESS: + case YP_TOKEN_LESS_EQUAL: + case YP_TOKEN_CARET: + case YP_TOKEN_PIPE: + case YP_TOKEN_AMPERSAND: + case YP_TOKEN_GREATER_GREATER: + case YP_TOKEN_LESS_LESS: + case YP_TOKEN_MINUS: + case YP_TOKEN_PLUS: + case YP_TOKEN_PERCENT: + case YP_TOKEN_SLASH: + case YP_TOKEN_STAR: + case YP_TOKEN_STAR_STAR: { + parser_lex(parser); + + yp_node_t *argument = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + return (yp_node_t *) yp_call_node_binary_create(parser, node, &token, argument); + } + case YP_TOKEN_AMPERSAND_DOT: + case YP_TOKEN_DOT: { + parser_lex(parser); + yp_token_t operator = parser->previous; + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + + // This if statement handles the foo.() syntax. + if (match1(parser, YP_TOKEN_PARENTHESIS_LEFT)) { + parse_arguments_list(parser, &arguments, true); + return (yp_node_t *) yp_call_node_shorthand_create(parser, node, &operator, &arguments); + } + + yp_token_t message; + + switch (parser->current.type) { + case YP_CASE_OPERATOR: + case YP_CASE_KEYWORD: + case YP_TOKEN_CONSTANT: + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_METHOD_NAME: { + parser_lex(parser); + message = parser->previous; + break; + } + default: { + yp_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, YP_ERR_DEF_NAME); + message = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + } + } + + parse_arguments_list(parser, &arguments, true); + yp_call_node_t *call = yp_call_node_call_create(parser, node, &operator, &message, &arguments); + + if ( + (previous_binding_power == YP_BINDING_POWER_STATEMENT) && + arguments.arguments == NULL && + arguments.opening_loc.start == NULL && + match1(parser, YP_TOKEN_COMMA) + ) { + return parse_targets_validate(parser, (yp_node_t *) call, YP_BINDING_POWER_INDEX); + } else { + return (yp_node_t *) call; + } + } + case YP_TOKEN_DOT_DOT: + case YP_TOKEN_DOT_DOT_DOT: { + parser_lex(parser); + + yp_node_t *right = NULL; + if (token_begins_expression_p(parser->current.type)) { + right = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); + } + + return (yp_node_t *) yp_range_node_create(parser, node, &token, right); + } + case YP_TOKEN_KEYWORD_IF_MODIFIER: { + yp_token_t keyword = parser->current; + parser_lex(parser); + + yp_node_t *predicate = parse_expression(parser, binding_power, YP_ERR_CONDITIONAL_IF_PREDICATE); + return (yp_node_t *) yp_if_node_modifier_create(parser, node, &keyword, predicate); + } + case YP_TOKEN_KEYWORD_UNLESS_MODIFIER: { + yp_token_t keyword = parser->current; + parser_lex(parser); + + yp_node_t *predicate = parse_expression(parser, binding_power, YP_ERR_CONDITIONAL_UNLESS_PREDICATE); + return (yp_node_t *) yp_unless_node_modifier_create(parser, node, &keyword, predicate); + } + case YP_TOKEN_KEYWORD_UNTIL_MODIFIER: { + parser_lex(parser); + yp_statements_node_t *statements = yp_statements_node_create(parser); + yp_statements_node_body_append(statements, node); + + yp_node_t *predicate = parse_expression(parser, binding_power, YP_ERR_CONDITIONAL_UNTIL_PREDICATE); + return (yp_node_t *) yp_until_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0); + } + case YP_TOKEN_KEYWORD_WHILE_MODIFIER: { + parser_lex(parser); + yp_statements_node_t *statements = yp_statements_node_create(parser); + yp_statements_node_body_append(statements, node); + + yp_node_t *predicate = parse_expression(parser, binding_power, YP_ERR_CONDITIONAL_WHILE_PREDICATE); + return (yp_node_t *) yp_while_node_modifier_create(parser, &token, predicate, statements, YP_NODE_TYPE_P(node, YP_BEGIN_NODE) ? YP_LOOP_FLAGS_BEGIN_MODIFIER : 0); + } + case YP_TOKEN_QUESTION_MARK: { + parser_lex(parser); + yp_node_t *true_expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_TERNARY_EXPRESSION_TRUE); + + if (parser->recovering) { + // If parsing the true expression of this ternary resulted in a syntax + // error that we can recover from, then we're going to put missing nodes + // and tokens into the remaining places. We want to be sure to do this + // before the `expect` function call to make sure it doesn't + // accidentally move past a ':' token that occurs after the syntax + // error. + yp_token_t colon = (yp_token_t) { .type = YP_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end }; + yp_node_t *false_expression = (yp_node_t *) yp_missing_node_create(parser, colon.start, colon.end); + + return (yp_node_t *) yp_if_node_ternary_create(parser, node, true_expression, &colon, false_expression); + } + + accept1(parser, YP_TOKEN_NEWLINE); + expect1(parser, YP_TOKEN_COLON, YP_ERR_TERNARY_COLON); + + yp_token_t colon = parser->previous; + yp_node_t *false_expression = parse_expression(parser, YP_BINDING_POWER_DEFINED, YP_ERR_TERNARY_EXPRESSION_FALSE); + + return (yp_node_t *) yp_if_node_ternary_create(parser, node, true_expression, &colon, false_expression); + } + case YP_TOKEN_COLON_COLON: { + parser_lex(parser); + yp_token_t delimiter = parser->previous; + + switch (parser->current.type) { + case YP_TOKEN_CONSTANT: { + parser_lex(parser); + yp_node_t *path; + + if ( + (parser->current.type == YP_TOKEN_PARENTHESIS_LEFT) || + (token_begins_expression_p(parser->current.type) || match3(parser, YP_TOKEN_UAMPERSAND, YP_TOKEN_USTAR, YP_TOKEN_USTAR_STAR)) + ) { + // If we have a constant immediately following a '::' operator, then + // this can either be a constant path or a method call, depending on + // what follows the constant. + // + // If we have parentheses, then this is a method call. That would + // look like Foo::Bar(). + yp_token_t message = parser->previous; + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + + parse_arguments_list(parser, &arguments, true); + path = (yp_node_t *) yp_call_node_call_create(parser, node, &delimiter, &message, &arguments); + } else { + // Otherwise, this is a constant path. That would look like Foo::Bar. + yp_node_t *child = (yp_node_t *) yp_constant_read_node_create(parser, &parser->previous); + path = (yp_node_t *)yp_constant_path_node_create(parser, node, &delimiter, child); + } + + // If this is followed by a comma then it is a multiple assignment. + if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { + return parse_targets_validate(parser, path, YP_BINDING_POWER_INDEX); + } + + return path; + } + case YP_CASE_OPERATOR: + case YP_CASE_KEYWORD: + case YP_TOKEN_IDENTIFIER: + case YP_TOKEN_METHOD_NAME: { + parser_lex(parser); + yp_token_t message = parser->previous; + + // If we have an identifier following a '::' operator, then it is for + // sure a method call. + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + parse_arguments_list(parser, &arguments, true); + yp_call_node_t *call = yp_call_node_call_create(parser, node, &delimiter, &message, &arguments); + + // If this is followed by a comma then it is a multiple assignment. + if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { + return parse_targets_validate(parser, (yp_node_t *) call, YP_BINDING_POWER_INDEX); + } + + return (yp_node_t *) call; + } + case YP_TOKEN_PARENTHESIS_LEFT: { + // If we have a parenthesis following a '::' operator, then it is the + // method call shorthand. That would look like Foo::(bar). + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + parse_arguments_list(parser, &arguments, true); + + return (yp_node_t *) yp_call_node_shorthand_create(parser, node, &delimiter, &arguments); + } + default: { + yp_diagnostic_list_append(&parser->error_list, delimiter.start, delimiter.end, YP_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT); + yp_node_t *child = (yp_node_t *) yp_missing_node_create(parser, delimiter.start, delimiter.end); + return (yp_node_t *)yp_constant_path_node_create(parser, node, &delimiter, child); + } + } + } + case YP_TOKEN_KEYWORD_RESCUE_MODIFIER: { + parser_lex(parser); + accept1(parser, YP_TOKEN_NEWLINE); + yp_node_t *value = parse_expression(parser, binding_power, YP_ERR_RESCUE_MODIFIER_VALUE); + + return (yp_node_t *) yp_rescue_modifier_node_create(parser, node, &token, value); + } + case YP_TOKEN_BRACKET_LEFT: { + parser_lex(parser); + + yp_arguments_t arguments = YP_EMPTY_ARGUMENTS; + arguments.opening_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + + if (!accept1(parser, YP_TOKEN_BRACKET_RIGHT)) { + yp_accepts_block_stack_push(parser, true); + parse_arguments(parser, &arguments, false, YP_TOKEN_BRACKET_RIGHT); + yp_accepts_block_stack_pop(parser); + expect1(parser, YP_TOKEN_BRACKET_RIGHT, YP_ERR_EXPECT_RBRACKET); + } + + arguments.closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->previous); + + // If we have a comma after the closing bracket then this is a multiple + // assignment and we should parse the targets. + if (previous_binding_power == YP_BINDING_POWER_STATEMENT && match1(parser, YP_TOKEN_COMMA)) { + yp_call_node_t *aref = yp_call_node_aref_create(parser, node, &arguments); + return parse_targets_validate(parser, (yp_node_t *) aref, YP_BINDING_POWER_INDEX); + } + + // If we're at the end of the arguments, we can now check if there is a + // block node that starts with a {. If there is, then we can parse it and + // add it to the arguments. + yp_block_node_t *block = NULL; + if (accept1(parser, YP_TOKEN_BRACE_LEFT)) { + block = parse_block(parser); + yp_arguments_validate_block(parser, &arguments, block); + } else if (yp_accepts_block_stack_p(parser) && accept1(parser, YP_TOKEN_KEYWORD_DO)) { + block = parse_block(parser); + } + + if (block != NULL) { + if (arguments.block != NULL) { + yp_diagnostic_list_append(&parser->error_list, block->base.location.start, block->base.location.end, YP_ERR_ARGUMENT_AFTER_BLOCK); + if (arguments.arguments == NULL) { + arguments.arguments = yp_arguments_node_create(parser); + } + yp_arguments_node_arguments_append(arguments.arguments, arguments.block); + } + + arguments.block = (yp_node_t *) block; + } + + return (yp_node_t *) yp_call_node_aref_create(parser, node, &arguments); + } + case YP_TOKEN_KEYWORD_IN: { + bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; + parser->pattern_matching_newlines = true; + + yp_token_t operator = parser->current; + parser->command_start = false; + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + + parser_lex(parser); + + yp_node_t *pattern = parse_pattern(parser, true, YP_ERR_PATTERN_EXPRESSION_AFTER_IN); + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + + return (yp_node_t *) yp_match_predicate_node_create(parser, node, pattern, &operator); + } + case YP_TOKEN_EQUAL_GREATER: { + bool previous_pattern_matching_newlines = parser->pattern_matching_newlines; + parser->pattern_matching_newlines = true; + + yp_token_t operator = parser->current; + parser->command_start = false; + lex_state_set(parser, YP_LEX_STATE_BEG | YP_LEX_STATE_LABEL); + + parser_lex(parser); + + yp_node_t *pattern = parse_pattern(parser, true, YP_ERR_PATTERN_EXPRESSION_AFTER_HROCKET); + parser->pattern_matching_newlines = previous_pattern_matching_newlines; + + return (yp_node_t *) yp_match_required_node_create(parser, node, pattern, &operator); + } + default: + assert(false && "unreachable"); + return NULL; + } +} + +// Parse an expression at the given point of the parser using the given binding +// power to parse subsequent chains. If this function finds a syntax error, it +// will append the error message to the parser's error list. +// +// Consumers of this function should always check parser->recovering to +// determine if they need to perform additional cleanup. +static yp_node_t * +parse_expression(yp_parser_t *parser, yp_binding_power_t binding_power, yp_diagnostic_id_t diag_id) { + yp_token_t recovery = parser->previous; + yp_node_t *node = parse_expression_prefix(parser, binding_power); + + // If we found a syntax error, then the type of node returned by + // parse_expression_prefix is going to be a missing node. In that case we need + // to add the error message to the parser's error list. + if (YP_NODE_TYPE_P(node, YP_MISSING_NODE)) { + yp_diagnostic_list_append(&parser->error_list, recovery.end, recovery.end, diag_id); + return node; + } + + // Otherwise we'll look and see if the next token can be parsed as an infix + // operator. If it can, then we'll parse it using parse_expression_infix. + yp_binding_powers_t current_binding_powers; + while ( + current_binding_powers = yp_binding_powers[parser->current.type], + binding_power <= current_binding_powers.left && + current_binding_powers.binary + ) { + node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right); + } + + return node; +} + +static yp_node_t * +parse_program(yp_parser_t *parser) { + yp_parser_scope_push(parser, !parser->current_scope); + parser_lex(parser); + + yp_statements_node_t *statements = parse_statements(parser, YP_CONTEXT_MAIN); + if (!statements) { + statements = yp_statements_node_create(parser); + } + yp_constant_id_list_t locals = parser->current_scope->locals; + yp_parser_scope_pop(parser); + + // If this is an empty file, then we're still going to parse all of the + // statements in order to gather up all of the comments and such. Here we'll + // correct the location information. + if (yp_statements_node_body_length(statements) == 0) { + yp_statements_node_location_set(statements, parser->start, parser->start); + } + + return (yp_node_t *) yp_program_node_create(parser, &locals, statements); +} + +// Read a 32-bit unsigned integer from a pointer. This function is used to read +// the metadata that is passed into the parser from the Ruby implementation. It +// handles aligned and unaligned reads. +static uint32_t +yp_metadata_read_u32(const char *ptr) { + if (((uintptr_t) ptr) % sizeof(uint32_t) == 0) { + return *((uint32_t *) ptr); + } else { + uint32_t value; + memcpy(&value, ptr, sizeof(uint32_t)); + return value; + } +} + +// Process any additional metadata being passed into a call to the parser via +// the yp_parse_serialize function. Since the source of these calls will be from +// Ruby implementation internals we assume it is from a trusted source. +// +// Currently, this is only passing in variable scoping surrounding an eval, but +// eventually it will be extended to hold any additional metadata. This data +// is serialized to reduce the calling complexity for a foreign function call +// vs a foreign runtime making a bindable in-memory version of a C structure. +// +// metadata is assumed to be a valid pointer pointing to well-formed data. The +// format is described below: +// +// ```text +// [ +// filepath_size: uint32_t, +// filepath: char*, +// scopes_count: uint32_t, +// [ +// locals_count: uint32_t, +// [local_size: uint32_t, local: char*]* +// ]* +// ] +// ``` +void +yp_parser_metadata(yp_parser_t *parser, const char *metadata) { + uint32_t filepath_size = yp_metadata_read_u32(metadata); + metadata += 4; + + if (filepath_size) { + yp_string_t filepath_string; + yp_string_constant_init(&filepath_string, metadata, filepath_size); + + parser->filepath_string = filepath_string; + metadata += filepath_size; + } + + uint32_t scopes_count = yp_metadata_read_u32(metadata); + metadata += 4; + + for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) { + uint32_t locals_count = yp_metadata_read_u32(metadata); + metadata += 4; + + yp_parser_scope_push(parser, scope_index == 0); + + for (size_t local_index = 0; local_index < locals_count; local_index++) { + uint32_t local_size = yp_metadata_read_u32(metadata); + metadata += 4; + + uint8_t *constant = malloc(local_size); + memcpy(constant, metadata, local_size); + + yp_parser_local_add_owned(parser, constant, (size_t) local_size); + metadata += local_size; + } + } +} + +/******************************************************************************/ +/* External functions */ +/******************************************************************************/ + +// Initialize a parser with the given start and end pointers. +YP_EXPORTED_FUNCTION void +yp_parser_init(yp_parser_t *parser, const uint8_t *source, size_t size, const char *filepath) { + assert(source != NULL); + + // Set filepath to the file that was passed + if (!filepath) filepath = ""; + yp_string_t filepath_string; + yp_string_constant_init(&filepath_string, filepath, strlen(filepath)); + + *parser = (yp_parser_t) { + .lex_state = YP_LEX_STATE_BEG, + .enclosure_nesting = 0, + .lambda_enclosure_nesting = -1, + .brace_nesting = 0, + .do_loop_stack = YP_STATE_STACK_EMPTY, + .accepts_block_stack = YP_STATE_STACK_EMPTY, + .lex_modes = { + .index = 0, + .stack = {{ .mode = YP_LEX_DEFAULT }}, + .current = &parser->lex_modes.stack[0], + }, + .start = source, + .end = source + size, + .previous = { .type = YP_TOKEN_EOF, .start = source, .end = source }, + .current = { .type = YP_TOKEN_EOF, .start = source, .end = source }, + .next_start = NULL, + .heredoc_end = NULL, + .comment_list = YP_LIST_EMPTY, + .warning_list = YP_LIST_EMPTY, + .error_list = YP_LIST_EMPTY, + .current_scope = NULL, + .current_context = NULL, + .encoding = yp_encoding_utf_8, + .encoding_changed_callback = NULL, + .encoding_decode_callback = NULL, + .encoding_comment_start = source, + .lex_callback = NULL, + .filepath_string = filepath_string, + .constant_pool = YP_CONSTANT_POOL_EMPTY, + .newline_list = YP_NEWLINE_LIST_EMPTY, + .integer_base = 0, + .command_start = true, + .recovering = false, + .encoding_changed = false, + .pattern_matching_newlines = false, + .in_keyword_arg = false, + .semantic_token_seen = false, + .frozen_string_literal = false + }; + + yp_accepts_block_stack_push(parser, true); + + // Initialize the constant pool. We're going to completely guess as to the + // number of constants that we'll need based on the size of the input. The + // ratio we chose here is actually less arbitrary than you might think. + // + // We took ~50K Ruby files and measured the size of the file versus the + // number of constants that were found in those files. Then we found the + // average and standard deviation of the ratios of constants/bytesize. Then + // we added 1.34 standard deviations to the average to get a ratio that + // would fit 75% of the files (for a two-tailed distribution). This works + // because there was about a 0.77 correlation and the distribution was + // roughly normal. + // + // This ratio will need to change if we add more constants to the constant + // pool for another node type. + uint32_t constant_size = ((uint32_t) size) / 95; + yp_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size); + + // Initialize the newline list. Similar to the constant pool, we're going to + // guess at the number of newlines that we'll need based on the size of the + // input. + size_t newline_size = size / 22; + yp_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size); + + // Skip past the UTF-8 BOM if it exists. + if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) { + parser->current.end += 3; + parser->encoding_comment_start += 3; + } + + // If the first two bytes of the source are a shebang, then we'll indicate + // that the encoding comment is at the end of the shebang. + if (peek(parser) == '#' && peek_offset(parser, 1) == '!') { + const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size); + if (encoding_comment_start) { + parser->encoding_comment_start = encoding_comment_start + 1; + } + } +} + +// Register a callback that will be called whenever YARP changes the encoding it +// is using to parse based on the magic comment. +YP_EXPORTED_FUNCTION void +yp_parser_register_encoding_changed_callback(yp_parser_t *parser, yp_encoding_changed_callback_t callback) { + parser->encoding_changed_callback = callback; +} + +// Register a callback that will be called when YARP encounters a magic comment +// with an encoding referenced that it doesn't understand. The callback should +// return NULL if it also doesn't understand the encoding or it should return a +// pointer to a yp_encoding_t struct that contains the functions necessary to +// parse identifiers. +YP_EXPORTED_FUNCTION void +yp_parser_register_encoding_decode_callback(yp_parser_t *parser, yp_encoding_decode_callback_t callback) { + parser->encoding_decode_callback = callback; +} + +// Free all of the memory associated with the comment list. +static inline void +yp_comment_list_free(yp_list_t *list) { + yp_list_node_t *node, *next; + + for (node = list->head; node != NULL; node = next) { + next = node->next; + + yp_comment_t *comment = (yp_comment_t *) node; + free(comment); + } +} + +// Free any memory associated with the given parser. +YP_EXPORTED_FUNCTION void +yp_parser_free(yp_parser_t *parser) { + yp_string_free(&parser->filepath_string); + yp_diagnostic_list_free(&parser->error_list); + yp_diagnostic_list_free(&parser->warning_list); + yp_comment_list_free(&parser->comment_list); + yp_constant_pool_free(&parser->constant_pool); + yp_newline_list_free(&parser->newline_list); + + while (parser->current_scope != NULL) { + // Normally, popping the scope doesn't free the locals since it is + // assumed that ownership has transferred to the AST. However if we have + // scopes while we're freeing the parser, it's likely they came from + // eval scopes and we need to free them explicitly here. + yp_constant_id_list_free(&parser->current_scope->locals); + yp_parser_scope_pop(parser); + } + + while (parser->lex_modes.index >= YP_LEX_STACK_SIZE) { + lex_mode_pop(parser); + } +} + +// Parse the Ruby source associated with the given parser and return the tree. +YP_EXPORTED_FUNCTION yp_node_t * +yp_parse(yp_parser_t *parser) { + return parse_program(parser); +} + +YP_EXPORTED_FUNCTION void +yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) { + yp_buffer_append_str(buffer, "YARP", 4); + yp_buffer_append_u8(buffer, YP_VERSION_MAJOR); + yp_buffer_append_u8(buffer, YP_VERSION_MINOR); + yp_buffer_append_u8(buffer, YP_VERSION_PATCH); + yp_buffer_append_u8(buffer, YP_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0); + + yp_serialize_content(parser, node, buffer); + yp_buffer_append_str(buffer, "\0", 1); +} + +// Parse and serialize the AST represented by the given source to the given +// buffer. +YP_EXPORTED_FUNCTION void +yp_parse_serialize(const uint8_t *source, size_t size, yp_buffer_t *buffer, const char *metadata) { + yp_parser_t parser; + yp_parser_init(&parser, source, size, NULL); + if (metadata) yp_parser_metadata(&parser, metadata); + + yp_node_t *node = yp_parse(&parser); + yp_serialize(&parser, node, buffer); + + yp_node_destroy(&parser, node); + yp_parser_free(&parser); +} + +#undef YP_LOCATION_NULL_VALUE +#undef YP_LOCATION_TOKEN_VALUE +#undef YP_LOCATION_NODE_VALUE +#undef YP_LOCATION_NODE_BASE_VALUE +#undef YP_CASE_KEYWORD +#undef YP_CASE_OPERATOR +#undef YP_CASE_WRITABLE |