diff options
author | Kevin Newton <[email protected]> | 2024-02-28 09:32:59 -0500 |
---|---|---|
committer | git <[email protected]> | 2024-02-28 15:36:15 +0000 |
commit | 8b556d39d279c871469f6de665191dfbd502d564 (patch) | |
tree | f5eb409dd5a8ec0b7ec2b1fdaa794a16da8c3339 /prism/static_literals.c | |
parent | eb6eb1d4e8572fffd7bce6789eb8e87669293eef (diff) |
[ruby/prism] Refactor static literals into hash
https://github.com/ruby/prism/commit/62382d3967
Diffstat (limited to 'prism/static_literals.c')
-rw-r--r-- | prism/static_literals.c | 182 |
1 files changed, 141 insertions, 41 deletions
diff --git a/prism/static_literals.c b/prism/static_literals.c index 5fba5d790e..74bdf6ead0 100644 --- a/prism/static_literals.c +++ b/prism/static_literals.c @@ -1,40 +1,141 @@ #include "prism/static_literals.h" -/** - * Insert a node into the given sorted list. This will return false if the node - * was not already in the list, and true if it was. - */ -static pm_node_t * -pm_node_list_insert(const pm_parser_t *parser, pm_node_list_t *list, pm_node_t *node, int (*compare)(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right)) { - size_t low = 0; - size_t high = list->size; - - while (low < high) { - size_t mid = (low + high) / 2; - int result = compare(parser, list->nodes[mid], node); - - // If we find a match, then replace the old node with the new one and - // return the old one. - if (result == 0) { - pm_node_t *result = list->nodes[mid]; - list->nodes[mid] = node; - return result; +static inline uint32_t +murmur_scramble(uint32_t value) { + value *= 0xcc9e2d51; + value = (value << 15) | (value >> 17); + value *= 0x1b873593; + return value; +} + +static uint32_t +murmur_hash(const uint8_t *key, size_t length) { + uint32_t h = 0x9747b28c; + uint32_t k; + + /* Read in groups of 4. */ + for (size_t i = length >> 2; i; i--) { + // Here is a source of differing results across endiannesses. + // A swap here has no effects on hash properties though. + memcpy(&k, key, sizeof(uint32_t)); + key += sizeof(uint32_t); + h ^= murmur_scramble(k); + h = (h << 13) | (h >> 19); + h = h * 5 + 0xe6546b64; + } + + /* Read the rest. */ + k = 0; + for (size_t i = length & 3; i; i--) { + k <<= 8; + k |= key[i - 1]; + } + + // A swap is *not* necessary here because the preceding loop already + // places the low bytes in the low places according to whatever endianness + // we use. Swaps only apply when the memory is copied in a chunk. + h ^= murmur_scramble(k); + + /* Finalize. */ + h ^= length; + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +static uint32_t +node_hash(const pm_parser_t *parser, const pm_node_t *node) { + switch (PM_NODE_TYPE(node)) { + case PM_INTEGER_NODE: { + const pm_integer_t *integer = &((const pm_integer_node_t *) node)->value; + const uint32_t *value = &integer->head.value; + + uint32_t hash = murmur_hash((const uint8_t *) value, sizeof(uint32_t)); + for (const pm_integer_word_t *word = integer->head.next; word != NULL; word = word->next) { + value = &word->value; + hash ^= murmur_hash((const uint8_t *) value, sizeof(uint32_t)); + } + + return hash; + } + case PM_SOURCE_LINE_NODE: { + const pm_line_column_t line_column = pm_newline_list_line_column(&parser->newline_list, node->location.start, parser->start_line); + const int32_t *value = &line_column.line; + return murmur_hash((const uint8_t *) value, sizeof(int32_t)); } + case PM_FLOAT_NODE: { + const double *value = &((const pm_float_node_t *) node)->value; + return murmur_hash((const uint8_t *) value, sizeof(double)); + } + case PM_RATIONAL_NODE: { + const pm_node_t *numeric = ((const pm_rational_node_t *) node)->numeric; + return node_hash(parser, numeric) ^ murmur_scramble((uint32_t) node->type); + } + case PM_IMAGINARY_NODE: { + const pm_node_t *numeric = ((const pm_imaginary_node_t *) node)->numeric; + return node_hash(parser, numeric) ^ murmur_scramble((uint32_t) node->type); + } + case PM_STRING_NODE: { + const pm_string_t *value = &((const pm_string_node_t *) node)->unescaped; + return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags); + } + case PM_SOURCE_FILE_NODE: { + const pm_string_t *value = &((const pm_source_file_node_t *) node)->filepath; + return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags); + } + case PM_REGULAR_EXPRESSION_NODE: { + const pm_string_t *value = &((const pm_regular_expression_node_t *) node)->unescaped; + return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags); + } + case PM_SYMBOL_NODE: { + const pm_string_t *value = &((const pm_symbol_node_t *) node)->unescaped; + return murmur_hash(pm_string_source(value), pm_string_length(value) * sizeof(uint8_t)) ^ murmur_scramble((uint32_t) node->flags); + } + default: + assert(false && "unreachable"); + return 0; + } +} + +static pm_node_t * +pm_node_hash_insert(const pm_parser_t *parser, pm_node_hash_t *hash, pm_node_t *node, int (*compare)(const pm_parser_t *parser, const pm_node_t *left, const pm_node_t *right)) { + if (hash->size * 2 >= hash->capacity) { + size_t new_capacity = hash->capacity == 0 ? 4 : hash->capacity * 2; + pm_node_t **new_nodes = calloc(new_capacity, sizeof(pm_node_t *)); + if (new_nodes == NULL) return NULL; + + for (size_t i = 0; i < hash->capacity; i++) { + pm_node_t *node = hash->nodes[i]; - if (result < 0) { - low = mid + 1; - } else { - high = mid; + if (node != NULL) { + size_t index = node_hash(parser, node) % new_capacity; + new_nodes[index] = node; + } } + + hash->nodes = new_nodes; + hash->capacity = new_capacity; + } + + size_t index = node_hash(parser, node) % hash->capacity; + while (hash->nodes[index] != NULL) { + if (compare(parser, hash->nodes[index], node) == 0) break; + index = (index + 1) % hash->capacity; } - pm_node_list_grow(list); - memmove(&list->nodes[low + 1], &list->nodes[low], (list->size - low) * sizeof(pm_node_t *)); + pm_node_t *result = hash->nodes[index]; + if (result == NULL) hash->size++; - list->nodes[low] = node; - list->size++; + hash->nodes[index] = node; + return result; +} - return NULL; +static void +pm_node_hash_free(pm_node_hash_t *hash) { + if (hash->capacity > 0) free(hash->nodes); } /** @@ -168,19 +269,19 @@ pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals switch (PM_NODE_TYPE(node)) { case PM_INTEGER_NODE: case PM_SOURCE_LINE_NODE: - return pm_node_list_insert(parser, &literals->integer_nodes, node, pm_compare_integer_nodes); + return pm_node_hash_insert(parser, &literals->integer_nodes, node, pm_compare_integer_nodes); case PM_FLOAT_NODE: - return pm_node_list_insert(parser, &literals->float_nodes, node, pm_compare_float_nodes); + return pm_node_hash_insert(parser, &literals->float_nodes, node, pm_compare_float_nodes); case PM_RATIONAL_NODE: case PM_IMAGINARY_NODE: - return pm_node_list_insert(parser, &literals->rational_nodes, node, pm_compare_number_nodes); + return pm_node_hash_insert(parser, &literals->number_nodes, node, pm_compare_number_nodes); case PM_STRING_NODE: case PM_SOURCE_FILE_NODE: - return pm_node_list_insert(parser, &literals->string_nodes, node, pm_compare_string_nodes); + return pm_node_hash_insert(parser, &literals->string_nodes, node, pm_compare_string_nodes); case PM_REGULAR_EXPRESSION_NODE: - return pm_node_list_insert(parser, &literals->regexp_nodes, node, pm_compare_regular_expression_nodes); + return pm_node_hash_insert(parser, &literals->regexp_nodes, node, pm_compare_regular_expression_nodes); case PM_SYMBOL_NODE: - return pm_node_list_insert(parser, &literals->symbol_nodes, node, pm_compare_string_nodes); + return pm_node_hash_insert(parser, &literals->symbol_nodes, node, pm_compare_string_nodes); case PM_TRUE_NODE: { pm_node_t *duplicated = literals->true_node; literals->true_node = node; @@ -211,11 +312,10 @@ pm_static_literals_add(const pm_parser_t *parser, pm_static_literals_t *literals */ void pm_static_literals_free(pm_static_literals_t *literals) { - pm_node_list_free(&literals->integer_nodes); - pm_node_list_free(&literals->float_nodes); - pm_node_list_free(&literals->rational_nodes); - pm_node_list_free(&literals->imaginary_nodes); - pm_node_list_free(&literals->string_nodes); - pm_node_list_free(&literals->regexp_nodes); - pm_node_list_free(&literals->symbol_nodes); + pm_node_hash_free(&literals->integer_nodes); + pm_node_hash_free(&literals->float_nodes); + pm_node_hash_free(&literals->number_nodes); + pm_node_hash_free(&literals->string_nodes); + pm_node_hash_free(&literals->regexp_nodes); + pm_node_hash_free(&literals->symbol_nodes); } |