summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2024-02-23 10:36:59 -0500
committerKevin Newton <[email protected]>2024-02-23 13:25:31 -0500
commitd1ce989829927a4215952983babadfd1df1b505f (patch)
tree83020fb1034046d58493328a235e5d77e8aa9e4a
parent73dd3ce03e3a0a58a157607385418d7a3724752c (diff)
[ruby/prism] Duplicated hash keys
https://github.com/ruby/prism/commit/3e10c46c14
-rw-r--r--prism/diagnostic.c1
-rw-r--r--prism/diagnostic.h1
-rw-r--r--prism/node.h18
-rw-r--r--prism/prism.c73
-rw-r--r--prism/prism.h1
-rw-r--r--prism/static_literals.c215
-rw-r--r--prism/static_literals.h109
-rw-r--r--prism/templates/src/node.c.erb49
-rw-r--r--prism/util/pm_integer.c23
-rw-r--r--prism/util/pm_integer.h11
-rw-r--r--test/prism/newline_test.rb2
-rw-r--r--test/prism/static_literals_test.rb68
12 files changed, 545 insertions, 26 deletions
diff --git a/prism/diagnostic.c b/prism/diagnostic.c
index 2040387d80..1003ecd339 100644
--- a/prism/diagnostic.c
+++ b/prism/diagnostic.c
@@ -306,6 +306,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_LEN] = {
[PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS] = { "ambiguous first argument; put parentheses or a space even after `+` operator", PM_WARNING_LEVEL_VERBOSE },
[PM_WARN_AMBIGUOUS_PREFIX_STAR] = { "ambiguous `*` has been interpreted as an argument prefix", PM_WARNING_LEVEL_VERBOSE },
[PM_WARN_AMBIGUOUS_SLASH] = { "ambiguous `/`; wrap regexp in parentheses or add a space after `/` operator", PM_WARNING_LEVEL_VERBOSE },
+ [PM_WARN_DUPLICATED_HASH_KEY] = { "key %.*s is duplicated and overwritten on line %" PRIi32, PM_WARNING_LEVEL_DEFAULT },
[PM_WARN_EQUAL_IN_CONDITIONAL] = { "found `= literal' in conditional, should be ==", PM_WARNING_LEVEL_DEFAULT },
[PM_WARN_END_IN_METHOD] = { "END in method; use at_exit", PM_WARNING_LEVEL_DEFAULT },
[PM_WARN_FLOAT_OUT_OF_RANGE] = { "Float %.*s%s out of range", PM_WARNING_LEVEL_VERBOSE }
diff --git a/prism/diagnostic.h b/prism/diagnostic.h
index 7419c0e791..85f15db65c 100644
--- a/prism/diagnostic.h
+++ b/prism/diagnostic.h
@@ -306,6 +306,7 @@ typedef enum {
PM_WARN_AMBIGUOUS_SLASH,
PM_WARN_EQUAL_IN_CONDITIONAL,
PM_WARN_END_IN_METHOD,
+ PM_WARN_DUPLICATED_HASH_KEY,
PM_WARN_FLOAT_OUT_OF_RANGE,
// This is the number of diagnostic codes.
diff --git a/prism/node.h b/prism/node.h
index 8d1b6a599a..9c37c9decc 100644
--- a/prism/node.h
+++ b/prism/node.h
@@ -11,6 +11,17 @@
#include "prism/util/pm_buffer.h"
/**
+ * Attempts to grow the node list to the next size. If there is already
+ * capacity in the list, this function does nothing. Otherwise it reallocates
+ * the list to be twice as large as it was before. If the reallocation fails,
+ * this function returns false, otherwise it returns true.
+ *
+ * @param list The list to grow.
+ * @return True if the list was successfully grown, false otherwise.
+ */
+bool pm_node_list_grow(pm_node_list_t *list);
+
+/**
* Append a new node onto the end of the node list.
*
* @param list The list to append to.
@@ -19,6 +30,13 @@
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node);
/**
+ * Free the internal memory associated with the given node list.
+ *
+ * @param list The list to free.
+ */
+void pm_node_list_free(pm_node_list_t *list);
+
+/**
* Deallocate a node and all of its children.
*
* @param parser The parser that owns the node.
diff --git a/prism/prism.c b/prism/prism.c
index 8bfc6ed6d1..fe1ba8d971 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -11677,10 +11677,31 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
}
/**
+ * Add a node to a set of static literals that holds a set of hash keys. If the
+ * node is a duplicate, then add an appropriate warning.
+ */
+static void
+pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
+ const pm_node_t *duplicated = pm_static_literals_add(parser, literals, node);
+
+ if (duplicated != NULL) {
+ pm_diagnostic_list_append_format(
+ &parser->warning_list,
+ duplicated->location.start,
+ duplicated->location.end,
+ PM_WARN_DUPLICATED_HASH_KEY,
+ (int) (duplicated->location.end - duplicated->location.start),
+ duplicated->location.start,
+ pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line
+ );
+ }
+}
+
+/**
* Parse all of the elements of a hash. returns true if a double splat was found.
*/
static bool
-parse_assocs(pm_parser_t *parser, pm_node_t *node) {
+parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
bool contains_keyword_splat = false;
@@ -11709,6 +11730,8 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
parser_lex(parser);
pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
+ pm_hash_key_static_literals_add(parser, literals, key);
+
pm_token_t operator = not_provided(parser);
pm_node_t *value = NULL;
@@ -11738,8 +11761,16 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
}
default: {
pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_KEY);
- pm_token_t operator;
+ // Hash keys that are strings are automatically frozen. We will
+ // mark that here.
+ if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
+ pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
+ }
+
+ pm_hash_key_static_literals_add(parser, literals, key);
+
+ pm_token_t operator;
if (pm_symbol_node_label_p(key)) {
operator = not_provided(parser);
} else {
@@ -11773,6 +11804,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
// Otherwise by default we will exit out of this loop.
break;
}
+
return contains_keyword_splat;
}
@@ -11830,12 +11862,17 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
argument = (pm_node_t *) hash;
- bool contains_keyword_splat = parse_assocs(parser, (pm_node_t *) hash);
- parsed_bare_hash = true;
+ pm_static_literals_t literals = { 0 };
+ bool contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) hash);
+
parse_arguments_append(parser, arguments, argument);
if (contains_keyword_splat) {
pm_node_flag_set((pm_node_t *)arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT);
}
+
+ pm_static_literals_free(&literals);
+ parsed_bare_hash = true;
+
break;
}
case PM_TOKEN_UAMPERSAND: {
@@ -11925,10 +11962,14 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
+ // Create the set of static literals for this hash.
+ pm_static_literals_t literals = { 0 };
+ pm_hash_key_static_literals_add(parser, &literals, argument);
+
// Finish parsing the one we are part way through
pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_HASH_VALUE);
-
argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
+
pm_keyword_hash_node_elements_append(bare_hash, argument);
argument = (pm_node_t *) bare_hash;
@@ -11937,9 +11978,10 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
token_begins_expression_p(parser->current.type) ||
match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
)) {
- contains_keyword_splat = parse_assocs(parser, (pm_node_t *) bare_hash);
+ contains_keyword_splat = parse_assocs(parser, &literals, (pm_node_t *) bare_hash);
}
+ pm_static_literals_free(&literals);
parsed_bare_hash = true;
} else if (accept1(parser, PM_TOKEN_KEYWORD_IN)) {
// TODO: Could we solve this with binding powers instead?
@@ -14661,13 +14703,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
}
- pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
- element = (pm_node_t *)hash;
+ element = (pm_node_t *) pm_keyword_hash_node_create(parser);
+ pm_static_literals_t literals = { 0 };
if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
- parse_assocs(parser, (pm_node_t *) hash);
+ parse_assocs(parser, &literals, element);
}
+ pm_static_literals_free(&literals);
parsed_bare_hash = true;
} else {
element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_ARRAY_EXPRESSION);
@@ -14678,6 +14721,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
}
pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
+ pm_static_literals_t literals = { 0 };
+ pm_hash_key_static_literals_add(parser, &literals, element);
pm_token_t operator;
if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
@@ -14690,11 +14735,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
pm_keyword_hash_node_elements_append(hash, assoc);
- element = (pm_node_t *)hash;
+ element = (pm_node_t *) hash;
if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
- parse_assocs(parser, (pm_node_t *) hash);
+ parse_assocs(parser, &literals, element);
}
+ pm_static_literals_free(&literals);
parsed_bare_hash = true;
}
}
@@ -14840,10 +14886,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
case PM_TOKEN_BRACE_LEFT: {
pm_accepts_block_stack_push(parser, true);
parser_lex(parser);
+
pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
+ pm_static_literals_t literals = { 0 };
if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
- parse_assocs(parser, (pm_node_t *) node);
+ parse_assocs(parser, &literals, (pm_node_t *) node);
accept1(parser, PM_TOKEN_NEWLINE);
}
@@ -14851,6 +14899,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
pm_hash_node_closing_loc_set(node, &parser->previous);
+ pm_static_literals_free(&literals);
return (pm_node_t *) node;
}
case PM_TOKEN_CHARACTER_LITERAL: {
diff --git a/prism/prism.h b/prism/prism.h
index e24dbf5cad..7d9b96fa82 100644
--- a/prism/prism.h
+++ b/prism/prism.h
@@ -21,6 +21,7 @@
#include "prism/parser.h"
#include "prism/prettyprint.h"
#include "prism/regexp.h"
+#include "prism/static_literals.h"
#include "prism/version.h"
#include <assert.h>
diff --git a/prism/static_literals.c b/prism/static_literals.c
new file mode 100644
index 0000000000..0fab4e98a3
--- /dev/null
+++ b/prism/static_literals.c
@@ -0,0 +1,215 @@
+#include "prism/static_literals.h"
+
+/**
+ * Insert a node into the given sorted list. This will return false if the node
+ * was not already in the list, and true if it was.
+ */
+static pm_node_t *
+pm_node_list_insert(const pm_parser_t *parser, pm_node_list_t *list, pm_node_t *node, int (*compare)(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right)) {
+ // TODO: This would be much more efficient with a binary search.
+ size_t index = 0;
+ while (index < list->size) {
+ int result = compare(parser, list->nodes[index], node);
+
+ // If we find a match, then replace the node and return the old one.
+ if (result == 0) {
+ pm_node_t *result = list->nodes[index];
+ list->nodes[index] = node;
+ return result;
+ }
+
+ if (result > 0) break;
+ index++;
+ }
+
+ pm_node_list_grow(list);
+ memmove(&list->nodes[index + 1], &list->nodes[index], (list->size - index) * sizeof(pm_node_t *));
+
+ list->nodes[index] = node;
+ list->size++;
+
+ return NULL;
+}
+
+/**
+ * Compare two values that can be compared with a simple numeric comparison.
+ */
+#define PM_NUMERIC_COMPARISON(left, right) ((left < right) ? -1 : (left > right) ? 1 : 0)
+
+/**
+ * Return the integer value of the given node as an int64_t.
+ */
+static int64_t
+pm_int64_value(const pm_parser_t *parser, const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_INTEGER_NODE: {
+ const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value;
+ if (integer->length > 0) return integer->negative ? INT64_MIN : INT64_MAX;
+
+ int64_t value = (int64_t) integer->head.value;
+ return integer->negative ? -value : value;
+ }
+ case PM_SOURCE_LINE_NODE:
+ return (int64_t) pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line).line;
+ default:
+ assert(false && "unreachable");
+ return 0;
+ }
+}
+
+/**
+ * A comparison function for comparing two IntegerNode or SourceLineNode
+ * instances.
+ */
+static int
+pm_compare_integer_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+ if (PM_NODE_TYPE_P(left, PM_SOURCE_LINE_NODE) || PM_NODE_TYPE_P(right, PM_SOURCE_LINE_NODE)) {
+ int64_t left_value = pm_int64_value(parser, left);
+ int64_t right_value = pm_int64_value(parser, right);
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
+ }
+
+ const pm_integer_t *left_integer = &((const pm_integer_node_t *) left)->value;
+ const pm_integer_t *right_integer = &((const pm_integer_node_t *) right)->value;
+ return pm_integer_compare(left_integer, right_integer);
+}
+
+/**
+ * A comparison function for comparing two FloatNode instances.
+ */
+static int
+pm_compare_float_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+ const double left_value = ((const pm_float_node_t *) left)->value;
+ const double right_value = ((const pm_float_node_t *) right)->value;
+ return PM_NUMERIC_COMPARISON(left_value, right_value);
+}
+
+/**
+ * A comparison function for comparing two nodes that have attached numbers.
+ */
+static int
+pm_compare_number_nodes(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+ if (PM_NODE_TYPE(left) != PM_NODE_TYPE(right)) {
+ return PM_NUMERIC_COMPARISON(PM_NODE_TYPE(left), PM_NODE_TYPE(right));
+ }
+
+ switch (PM_NODE_TYPE(left)) {
+ case PM_IMAGINARY_NODE:
+ return pm_compare_number_nodes(parser, ((const pm_imaginary_node_t *) left)->numeric, ((const pm_imaginary_node_t *) right)->numeric);
+ case PM_RATIONAL_NODE:
+ return pm_compare_number_nodes(parser, ((const pm_rational_node_t *) left)->numeric, ((const pm_rational_node_t *) right)->numeric);
+ case PM_INTEGER_NODE:
+ return pm_compare_integer_nodes(parser, left, right);
+ case PM_FLOAT_NODE:
+ return pm_compare_float_nodes(parser, left, right);
+ default:
+ assert(false && "unreachable");
+ return 0;
+ }
+}
+
+/**
+ * Return a pointer to the string value of the given node.
+ */
+static const pm_string_t *
+pm_string_value(const pm_node_t *node) {
+ switch (PM_NODE_TYPE(node)) {
+ case PM_STRING_NODE:
+ return &((const pm_string_node_t *) node)->unescaped;
+ case PM_SOURCE_FILE_NODE:
+ return &((const pm_source_file_node_t *) node)->filepath;
+ case PM_SYMBOL_NODE:
+ return &((const pm_symbol_node_t *) node)->unescaped;
+ default:
+ assert(false && "unreachable");
+ return NULL;
+ }
+}
+
+/**
+ * A comparison function for comparing two nodes that have attached strings.
+ */
+static int
+pm_compare_string_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+ const pm_string_t *left_string = pm_string_value(left);
+ const pm_string_t *right_string = pm_string_value(right);
+ return pm_string_compare(left_string, right_string);
+}
+
+/**
+ * A comparison function for comparing two RegularExpressionNode instances.
+ */
+static int
+pm_compare_regular_expression_nodes(PRISM_ATTRIBUTE_UNUSED const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right) {
+ const pm_regular_expression_node_t *left_regexp = (const pm_regular_expression_node_t *) left;
+ const pm_regular_expression_node_t *right_regexp = (const pm_regular_expression_node_t *) right;
+
+ int result = pm_string_compare(&left_regexp->unescaped, &right_regexp->unescaped);
+ if (result != 0) return result;
+
+ return PM_NUMERIC_COMPARISON(left_regexp->base.flags, right_regexp->base.flags);
+}
+
+#undef PM_NUMERIC_COMPARISON
+
+/**
+ * Add a node to the set of static literals.
+ */
+pm_node_t *
+pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
+ if (!PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return NULL;
+
+ switch (PM_NODE_TYPE(node)) {
+ case PM_INTEGER_NODE:
+ case PM_SOURCE_LINE_NODE:
+ return pm_node_list_insert(parser, &literals->integer_nodes, node, pm_compare_integer_nodes);
+ case PM_FLOAT_NODE:
+ return pm_node_list_insert(parser, &literals->float_nodes, node, pm_compare_float_nodes);
+ case PM_RATIONAL_NODE:
+ case PM_IMAGINARY_NODE:
+ return pm_node_list_insert(parser, &literals->rational_nodes, node, pm_compare_number_nodes);
+ case PM_STRING_NODE:
+ case PM_SOURCE_FILE_NODE:
+ return pm_node_list_insert(parser, &literals->string_nodes, node, pm_compare_string_nodes);
+ case PM_REGULAR_EXPRESSION_NODE:
+ return pm_node_list_insert(parser, &literals->regexp_nodes, node, pm_compare_regular_expression_nodes);
+ case PM_SYMBOL_NODE:
+ return pm_node_list_insert(parser, &literals->symbol_nodes, node, pm_compare_string_nodes);
+ case PM_TRUE_NODE: {
+ pm_node_t *duplicated = literals->true_node;
+ literals->true_node = node;
+ return duplicated;
+ }
+ case PM_FALSE_NODE: {
+ pm_node_t *duplicated = literals->false_node;
+ literals->false_node = node;
+ return duplicated;
+ }
+ case PM_NIL_NODE: {
+ pm_node_t *duplicated = literals->nil_node;
+ literals->nil_node = node;
+ return duplicated;
+ }
+ case PM_SOURCE_ENCODING_NODE: {
+ pm_node_t *duplicated = literals->source_encoding_node;
+ literals->source_encoding_node = node;
+ return duplicated;
+ }
+ default:
+ return NULL;
+ }
+}
+
+/**
+ * Free the internal memory associated with the given static literals set.
+ */
+void
+pm_static_literals_free(pm_static_literals_t *literals) {
+ pm_node_list_free(&literals->integer_nodes);
+ pm_node_list_free(&literals->float_nodes);
+ pm_node_list_free(&literals->rational_nodes);
+ pm_node_list_free(&literals->imaginary_nodes);
+ pm_node_list_free(&literals->string_nodes);
+ pm_node_list_free(&literals->regexp_nodes);
+ pm_node_list_free(&literals->symbol_nodes);
+}
diff --git a/prism/static_literals.h b/prism/static_literals.h
new file mode 100644
index 0000000000..837d355985
--- /dev/null
+++ b/prism/static_literals.h
@@ -0,0 +1,109 @@
+/**
+ * @file static_literals.h
+ *
+ * A set of static literal nodes that can be checked for duplicates.
+ */
+#ifndef PRISM_STATIC_LITERALS_H
+#define PRISM_STATIC_LITERALS_H
+
+#include "prism/defines.h"
+#include "prism/ast.h"
+#include "prism/node.h"
+#include "prism/parser.h"
+
+#include <assert.h>
+#include <stdbool.h>
+
+/**
+ * Certain sets of nodes (hash keys and when clauses) check for duplicate nodes
+ * to alert the user of potential issues. To do this, we keep a set of the nodes
+ * that have been seen so far, and compare whenever we find a new node.
+ *
+ * We bucket the nodes based on their type to minimize the number of comparisons
+ * that need to be performed.
+ */
+typedef struct {
+ /**
+ * This is the set of IntegerNode and SourceLineNode instances. We store
+ * them in a sorted list so that we can binary search through them to find
+ * duplicates.
+ */
+ pm_node_list_t integer_nodes;
+
+ /**
+ * This is the set of FloatNode instances. We store them in a sorted list so
+ * that we can binary search through them to find duplicates.
+ */
+ pm_node_list_t float_nodes;
+
+ /**
+ * This is the set of RationalNode instances. We store them in a flat list
+ * that must be searched linearly.
+ */
+ pm_node_list_t rational_nodes;
+
+ /**
+ * This is the set of ImaginaryNode instances. We store them in a flat list
+ * that must be searched linearly.
+ */
+ pm_node_list_t imaginary_nodes;
+
+ /**
+ * This is the set of StringNode and SourceFileNode instances. We store them
+ * in a sorted list so that we can binary search through them to find
+ * duplicates.
+ */
+ pm_node_list_t string_nodes;
+
+ /**
+ * This is the set of RegularExpressionNode instances. We store them in a
+ * sorted list so that we can binary search through them to find duplicates.
+ */
+ pm_node_list_t regexp_nodes;
+
+ /**
+ * This is the set of SymbolNode instances. We store them in a sorted list
+ * so that we can binary search through them to find duplicates.
+ */
+ pm_node_list_t symbol_nodes;
+
+ /**
+ * A pointer to the last TrueNode instance that was inserted, or NULL.
+ */
+ pm_node_t *true_node;
+
+ /**
+ * A pointer to the last FalseNode instance that was inserted, or NULL.
+ */
+ pm_node_t *false_node;
+
+ /**
+ * A pointer to the last NilNode instance that was inserted, or NULL.
+ */
+ pm_node_t *nil_node;
+
+ /**
+ * A pointer to the last SourceEncodingNode instance that was inserted, or
+ * NULL.
+ */
+ pm_node_t *source_encoding_node;
+} pm_static_literals_t;
+
+/**
+ * Add a node to the set of static literals.
+ *
+ * @param parser The parser that created the node.
+ * @param literals The set of static literals to add the node to.
+ * @param node The node to add to the set.
+ * @return A pointer to the node that is being overwritten, if there is one.
+ */
+pm_node_t * pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node);
+
+/**
+ * Free the internal memory associated with the given static literals set.
+ *
+ * @param literals The set of static literals to free.
+ */
+void pm_static_literals_free(pm_static_literals_t *literals);
+
+#endif
diff --git a/prism/templates/src/node.c.erb b/prism/templates/src/node.c.erb
index 89c73451e8..554645c3e7 100644
--- a/prism/templates/src/node.c.erb
+++ b/prism/templates/src/node.c.erb
@@ -17,32 +17,55 @@ pm_node_list_memsize(pm_node_list_t *node_list, pm_memsize_t *memsize) {
}
/**
- * Append a new node onto the end of the node list.
+ * Attempts to grow the node list to the next size. If there is already
+ * capacity in the list, this function does nothing. Otherwise it reallocates
+ * the list to be twice as large as it was before. If the reallocation fails,
+ * this function returns false, otherwise it returns true.
*/
-void
-pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
+bool
+pm_node_list_grow(pm_node_list_t *list) {
if (list->size == list->capacity) {
list->capacity = list->capacity == 0 ? 4 : list->capacity * 2;
list->nodes = (pm_node_t **) realloc(list->nodes, sizeof(pm_node_t *) * list->capacity);
+ return list->nodes != NULL;
+ }
+ return true;
+}
+
+/**
+ * Append a new node onto the end of the node list.
+ */
+void
+pm_node_list_append(pm_node_list_t *list, pm_node_t *node) {
+ if (pm_node_list_grow(list)) {
+ list->nodes[list->size++] = node;
+ }
+}
+
+/**
+ * Free the internal memory associated with the given node list.
+ */
+void
+pm_node_list_free(pm_node_list_t *list) {
+ if (list->capacity > 0) {
+ free(list->nodes);
+ *list = (pm_node_list_t) { 0 };
}
- list->nodes[list->size++] = node;
}
PRISM_EXPORTED_FUNCTION void
pm_node_destroy(pm_parser_t *parser, pm_node_t *node);
/**
- * Deallocate the inner memory of a list of nodes. The parser argument is not
- * used, but is here for the future possibility of pre-allocating memory pools.
+ * Destroy the nodes that are contained within the given node list.
*/
static void
-pm_node_list_free(pm_parser_t *parser, pm_node_list_t *list) {
- if (list->capacity > 0) {
- for (size_t index = 0; index < list->size; index++) {
- pm_node_destroy(parser, list->nodes[index]);
- }
- free(list->nodes);
+pm_node_list_destroy(pm_parser_t *parser, pm_node_list_t *list) {
+ for (size_t index = 0; index < list->size; index++) {
+ pm_node_destroy(parser, list->nodes[index]);
}
+
+ pm_node_list_free(list);
}
/**
@@ -71,7 +94,7 @@ pm_node_destroy(pm_parser_t *parser, pm_node_t *node) {
<%- when Prism::StringField -%>
pm_string_free(&cast-><%= field.name %>);
<%- when Prism::NodeListField -%>
- pm_node_list_free(parser, &cast-><%= field.name %>);
+ pm_node_list_destroy(parser, &cast-><%= field.name %>);
<%- when Prism::ConstantListField -%>
pm_constant_id_list_free(&cast-><%= field.name %>);
<%- when Prism::IntegerField -%>
diff --git a/prism/util/pm_integer.c b/prism/util/pm_integer.c
index f08078356a..720dd60872 100644
--- a/prism/util/pm_integer.c
+++ b/prism/util/pm_integer.c
@@ -153,6 +153,29 @@ pm_integer_memsize(const pm_integer_t *integer) {
}
/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ */
+int
+pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right) {
+ if (left->length < right->length) return -1;
+ if (left->length > right->length) return 1;
+
+ for (
+ const pm_integer_word_t *left_word = &left->head, *right_word = &right->head;
+ left_word != NULL && right_word != NULL;
+ left_word = left_word->next, right_word = right_word->next
+ ) {
+ if (left_word->value < right_word->value) return -1;
+ if (left_word->value > right_word->value) return 1;
+ }
+
+ return 0;
+
+}
+
+/**
* Recursively destroy the linked list of an integer.
*/
static void
diff --git a/prism/util/pm_integer.h b/prism/util/pm_integer.h
index 5525adabb0..a80db633bb 100644
--- a/prism/util/pm_integer.h
+++ b/prism/util/pm_integer.h
@@ -94,6 +94,17 @@ PRISM_EXPORTED_FUNCTION void pm_integer_parse(pm_integer_t *integer, pm_integer_
size_t pm_integer_memsize(const pm_integer_t *integer);
/**
+ * Compare two integers. This function returns -1 if the left integer is less
+ * than the right integer, 0 if they are equal, and 1 if the left integer is
+ * greater than the right integer.
+ *
+ * @param left The left integer to compare.
+ * @param right The right integer to compare.
+ * @return The result of the comparison.
+ */
+int pm_integer_compare(const pm_integer_t *left, const pm_integer_t *right);
+
+/**
* Free the internal memory of an integer. This memory will only be allocated if
* the integer exceeds the size of a single node in the linked list.
*
diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb
index eea69ec5e1..e9975b346e 100644
--- a/test/prism/newline_test.rb
+++ b/test/prism/newline_test.rb
@@ -7,7 +7,7 @@ return unless defined?(RubyVM::InstructionSequence)
module Prism
class NewlineTest < TestCase
base = File.expand_path("../", __FILE__)
- filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb errors_test.rb parser_test.rb unescape_test.rb]
+ filepaths = Dir["*.rb", base: base] - %w[encoding_test.rb errors_test.rb parser_test.rb static_literals_test.rb unescape_test.rb]
filepaths.each do |relative|
define_method("test_newline_flags_#{relative}") do
diff --git a/test/prism/static_literals_test.rb b/test/prism/static_literals_test.rb
new file mode 100644
index 0000000000..a35bb1cd49
--- /dev/null
+++ b/test/prism/static_literals_test.rb
@@ -0,0 +1,68 @@
+# frozen_string_literal: true
+
+require_relative "test_helper"
+
+module Prism
+ class StaticLiteralsTest < TestCase
+ def test_static_literals
+ assert_warning("1")
+ assert_warning("0xA", "10")
+ assert_warning("0o10", "8")
+ assert_warning("0b10", "2")
+ assert_warning("1_000")
+ assert_warning((2**32).to_s(10), "0x#{(2**32).to_s(16)}")
+ assert_warning((2**64).to_s(10), "0x#{(2**64).to_s(16)}")
+
+ assert_warning("__LINE__", "2")
+ assert_warning("3", "__LINE__")
+
+ assert_warning("1.0")
+ assert_warning("1e2", "100.0")
+
+ assert_warning("1r")
+ assert_warning("1.0r")
+
+ assert_warning("1i")
+ assert_warning("1.0i")
+
+ assert_warning("1ri")
+ assert_warning("1.0ri")
+
+ assert_warning("\"#{__FILE__}\"")
+ assert_warning("\"foo\"")
+ assert_warning("\"#{__FILE__}\"", "__FILE__")
+
+ assert_warning("/foo/")
+ refute_warning("/foo/", "/foo/i")
+
+ assert_warning(":foo")
+ assert_warning("%s[foo]")
+
+ assert_warning("true")
+ assert_warning("false")
+ assert_warning("nil")
+ assert_warning("__ENCODING__")
+ end
+
+ private
+
+ def parse_warning(left, right)
+ source = <<~RUBY
+ {
+ #{left} => 1,
+ #{right} => 2
+ }
+ RUBY
+
+ Prism.parse(source, filepath: __FILE__).warnings.first
+ end
+
+ def assert_warning(left, right = left)
+ assert_match %r{key #{Regexp.escape(left)} .+ line 3}, parse_warning(left, right)&.message
+ end
+
+ def refute_warning(left, right)
+ assert_nil parse_warning(left, right)
+ end
+ end
+end