summaryrefslogtreecommitdiff
path: root/yarp/extension.c
diff options
context:
space:
mode:
authorJemma Issroff <[email protected]>2023-06-20 11:53:02 -0400
committerTakashi Kokubun <[email protected]>2023-06-21 11:25:39 -0700
commitcc7f765f2c12a9ba050b0d95f9d85f3923c8d944 (patch)
tree5b5c60c1950240900dc749773083324a0e39748a /yarp/extension.c
parent08478fefca827276d68e33f2e6a5940c85957a51 (diff)
[Feature #19741] Sync all files in yarp
This commit is the initial sync of all files from ruby/yarp into ruby/ruby. Notably, it does the following: * Sync all ruby/yarp/lib/ files to ruby/ruby/lib/yarp * Sync all ruby/yarp/src/ files to ruby/ruby/yarp/ * Sync all ruby/yarp/test/ files to ruby/ruby/test/yarp
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/7964
Diffstat (limited to 'yarp/extension.c')
-rw-r--r--yarp/extension.c556
1 files changed, 556 insertions, 0 deletions
diff --git a/yarp/extension.c b/yarp/extension.c
new file mode 100644
index 0000000000..2cec9814b1
--- /dev/null
+++ b/yarp/extension.c
@@ -0,0 +1,556 @@
+#include "extension.h"
+
+VALUE rb_cYARP;
+VALUE rb_cYARPToken;
+VALUE rb_cYARPLocation;
+
+VALUE rb_cYARPComment;
+VALUE rb_cYARPParseError;
+VALUE rb_cYARPParseWarning;
+VALUE rb_cYARPParseResult;
+
+// Represents a source of Ruby code. It can either be coming from a file or a
+// string. If it's a file, it's going to mmap the contents of the file. If it's
+// a string it's going to just point to the contents of the string.
+typedef struct {
+ enum { SOURCE_FILE, SOURCE_STRING } type;
+ const char *source;
+ size_t size;
+} source_t;
+
+// Read the file indicated by the filepath parameter into source and load its
+// contents and size into the given source_t.
+static int
+source_file_load(source_t *source, VALUE filepath) {
+#ifdef _WIN32
+ HANDLE file = CreateFile(
+ StringValueCStr(filepath),
+ GENERIC_READ,
+ 0,
+ NULL,
+ OPEN_EXISTING,
+ FILE_ATTRIBUTE_NORMAL,
+ NULL
+ );
+ if (file == INVALID_HANDLE_VALUE) {
+ perror("Invalid handle for file");
+ return 1;
+ }
+
+ DWORD file_size = GetFileSize(file, NULL);
+ source->source = malloc(file_size);
+
+ DWORD bytes_read;
+ BOOL success = ReadFile(file, DISCARD_CONST_QUAL(void *, source->source), file_size, &bytes_read, NULL);
+ CloseHandle(file);
+
+ if (!success) {
+ perror("ReadFile failed");
+ return 1;
+ }
+
+ source->size = (size_t) file_size;
+ return 0;
+#else
+ // Open the file for reading
+ int fd = open(StringValueCStr(filepath), O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ return 1;
+ }
+
+ // Stat the file to get the file size
+ struct stat sb;
+ if (fstat(fd, &sb) == -1) {
+ close(fd);
+ perror("fstat");
+ return 1;
+ }
+
+ // mmap the file descriptor to virtually get the contents
+ source->size = sb.st_size;
+
+#ifdef HAVE_MMAP
+ if (!source->size) {
+ source->source = "";
+ return 0;
+ }
+
+ char * res = mmap(NULL, source->size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (res == MAP_FAILED) {
+ perror("Map failed");
+ return 1;
+ } else {
+ source->source = res;
+ }
+#else
+ source->source = malloc(source->size);
+ if (source->source == NULL) return 1;
+
+ ssize_t read_size = read(fd, (void *)source->source, source->size);
+ if (read_size < 0 || (size_t)read_size != source->size) {
+ perror("Read size is incorrect");
+ free((void *)source->source);
+ return 1;
+ }
+#endif
+
+ close(fd);
+ return 0;
+#endif
+}
+
+// Load the contents and size of the given string into the given source_t.
+static void
+source_string_load(source_t *source, VALUE string) {
+ *source = (source_t) {
+ .type = SOURCE_STRING,
+ .source = RSTRING_PTR(string),
+ .size = RSTRING_LEN(string),
+ };
+}
+
+// Free any resources associated with the given source_t.
+static void
+source_file_unload(source_t *source) {
+#ifdef _WIN32
+ free((void *)source->source);
+#else
+#ifdef HAVE_MMAP
+ munmap((void *)source->source, source->size);
+#else
+ free((void *)source->source);
+#endif
+#endif
+}
+
+// Dump the AST corresponding to the given source to a string.
+static VALUE
+dump_source(source_t *source, const char *filepath) {
+ yp_parser_t parser;
+ yp_parser_init(&parser, source->source, source->size, filepath);
+
+ yp_node_t *node = yp_parse(&parser);
+
+ yp_buffer_t buffer;
+ if (!yp_buffer_init(&buffer)) rb_raise(rb_eNoMemError, "failed to allocate memory");
+
+ yp_serialize(&parser, node, &buffer);
+ VALUE dumped = rb_str_new(buffer.value, buffer.length);
+
+ yp_node_destroy(&parser, node);
+ yp_buffer_free(&buffer);
+ yp_parser_free(&parser);
+
+ return dumped;
+}
+
+// Dump the AST corresponding to the given string to a string.
+static VALUE
+dump(VALUE self, VALUE string, VALUE filepath) {
+ source_t source;
+ source_string_load(&source, string);
+ char *str = NULL;
+
+ if (filepath != Qnil) {
+ str = StringValueCStr(filepath);
+ }
+
+ return dump_source(&source, str);
+}
+
+// Dump the AST corresponding to the given file to a string.
+static VALUE
+dump_file(VALUE self, VALUE filepath) {
+ source_t source;
+ if (source_file_load(&source, filepath) != 0) return Qnil;
+
+ VALUE value = dump_source(&source, StringValueCStr(filepath));
+ source_file_unload(&source);
+ return value;
+}
+
+// Extract the comments out of the parser into an array.
+static VALUE
+parser_comments(yp_parser_t *parser) {
+ VALUE comments = rb_ary_new();
+ yp_comment_t *comment;
+
+ for (comment = (yp_comment_t *) parser->comment_list.head; comment != NULL; comment = (yp_comment_t *) comment->node.next) {
+ VALUE location_argv[] = { LONG2FIX(comment->start - parser->start), LONG2FIX(comment->end - parser->start) };
+ VALUE type;
+
+ switch (comment->type) {
+ case YP_COMMENT_INLINE:
+ type = ID2SYM(rb_intern("inline"));
+ break;
+ case YP_COMMENT_EMBDOC:
+ type = ID2SYM(rb_intern("embdoc"));
+ break;
+ case YP_COMMENT___END__:
+ type = ID2SYM(rb_intern("__END__"));
+ break;
+ default:
+ type = ID2SYM(rb_intern("inline"));
+ break;
+ }
+
+ VALUE comment_argv[] = { type, rb_class_new_instance(2, location_argv, rb_cYARPLocation) };
+ rb_ary_push(comments, rb_class_new_instance(2, comment_argv, rb_cYARPComment));
+ }
+
+ return comments;
+}
+
+// Extract the errors out of the parser into an array.
+static VALUE
+parser_errors(yp_parser_t *parser, rb_encoding *encoding) {
+ VALUE errors = rb_ary_new();
+ yp_diagnostic_t *error;
+
+ for (error = (yp_diagnostic_t *) parser->error_list.head; error != NULL; error = (yp_diagnostic_t *) error->node.next) {
+ VALUE location_argv[] = {
+ LONG2FIX(error->start - parser->start),
+ LONG2FIX(error->end - parser->start)
+ };
+
+ VALUE error_argv[] = {
+ rb_enc_str_new_cstr(error->message, encoding),
+ rb_class_new_instance(2, location_argv, rb_cYARPLocation)
+ };
+
+ rb_ary_push(errors, rb_class_new_instance(2, error_argv, rb_cYARPParseError));
+ }
+
+ return errors;
+}
+
+// Extract the warnings out of the parser into an array.
+static VALUE
+parser_warnings(yp_parser_t *parser, rb_encoding *encoding) {
+ VALUE warnings = rb_ary_new();
+ yp_diagnostic_t *warning;
+
+ for (warning = (yp_diagnostic_t *) parser->warning_list.head; warning != NULL; warning = (yp_diagnostic_t *) warning->node.next) {
+ VALUE location_argv[] = {
+ LONG2FIX(warning->start - parser->start),
+ LONG2FIX(warning->end - parser->start)
+ };
+
+ VALUE warning_argv[] = {
+ rb_enc_str_new_cstr(warning->message, encoding),
+ rb_class_new_instance(2, location_argv, rb_cYARPLocation)
+ };
+
+ rb_ary_push(warnings, rb_class_new_instance(2, warning_argv, rb_cYARPParseWarning));
+ }
+
+ return warnings;
+}
+
+typedef struct {
+ VALUE tokens;
+ rb_encoding *encoding;
+} lex_data_t;
+
+static void
+lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
+ lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
+
+ VALUE yields = rb_ary_new_capa(2);
+ rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding));
+ rb_ary_push(yields, INT2FIX(parser->lex_state));
+
+ rb_ary_push(lex_data->tokens, yields);
+}
+
+static void
+lex_encoding_changed_callback(yp_parser_t *parser) {
+ lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
+ lex_data->encoding = rb_enc_find(parser->encoding.name);
+}
+
+// Return an array of tokens corresponding to the given source.
+static VALUE
+lex_source(source_t *source, char *filepath) {
+ yp_parser_t parser;
+ yp_parser_init(&parser, source->source, source->size, filepath);
+ yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
+
+ lex_data_t lex_data = {
+ .tokens = rb_ary_new(),
+ .encoding = rb_utf8_encoding()
+ };
+
+ void *data = (void *) &lex_data;
+ yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
+ .data = data,
+ .callback = lex_token,
+ };
+
+ parser.lex_callback = &lex_callback;
+ yp_node_t *node = yp_parse(&parser);
+
+ VALUE result_argv[] = {
+ lex_data.tokens,
+ parser_comments(&parser),
+ parser_errors(&parser, lex_data.encoding),
+ parser_warnings(&parser, lex_data.encoding)
+ };
+
+ VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
+
+ yp_node_destroy(&parser, node);
+ yp_parser_free(&parser);
+
+ return result;
+}
+
+// Return an array of tokens corresponding to the given string.
+static VALUE
+lex(VALUE self, VALUE string, VALUE filepath) {
+ source_t source;
+ source_string_load(&source, string);
+ char *filepath_char = NULL;
+ if (filepath) {
+ filepath_char = StringValueCStr(filepath);
+ }
+ return lex_source(&source, filepath_char);
+}
+
+// Return an array of tokens corresponding to the given file.
+static VALUE
+lex_file(VALUE self, VALUE filepath) {
+ source_t source;
+ if (source_file_load(&source, filepath) != 0) return Qnil;
+
+ VALUE value = lex_source(&source, StringValueCStr(filepath));
+ source_file_unload(&source);
+ return value;
+}
+
+static VALUE
+parse_source(source_t *source, char *filepath) {
+ yp_parser_t parser;
+ yp_parser_init(&parser, source->source, source->size, filepath);
+
+ yp_node_t *node = yp_parse(&parser);
+ rb_encoding *encoding = rb_enc_find(parser.encoding.name);
+
+ VALUE result_argv[] = {
+ yp_ast_new(&parser, node, encoding),
+ parser_comments(&parser),
+ parser_errors(&parser, encoding),
+ parser_warnings(&parser, encoding)
+ };
+
+ VALUE result = rb_class_new_instance(4, result_argv, rb_cYARPParseResult);
+
+ yp_node_destroy(&parser, node);
+ yp_parser_free(&parser);
+
+ return result;
+}
+
+static VALUE
+parse(VALUE self, VALUE string, VALUE filepath) {
+ source_t source;
+ source_string_load(&source, string);
+#ifdef YARP_DEBUG_MODE_BUILD
+ char* dup = malloc(source.size);
+ memcpy(dup, source.source, source.size);
+ source.source = dup;
+#endif
+ VALUE value = parse_source(&source, NIL_P(filepath) ? NULL : StringValueCStr(filepath));
+#ifdef YARP_DEBUG_MODE_BUILD
+ free(dup);
+#endif
+ return value;
+}
+
+static VALUE
+parse_file(VALUE self, VALUE rb_filepath) {
+ source_t source;
+ if (source_file_load(&source, rb_filepath) != 0) {
+ return Qnil;
+ }
+
+ VALUE value = parse_source(&source, StringValueCStr(rb_filepath));
+ source_file_unload(&source);
+ return value;
+}
+
+static VALUE
+named_captures(VALUE self, VALUE rb_source) {
+ yp_string_list_t string_list;
+ yp_string_list_init(&string_list);
+
+ if (!yp_regexp_named_capture_group_names(RSTRING_PTR(rb_source), RSTRING_LEN(rb_source), &string_list)) {
+ yp_string_list_free(&string_list);
+ return Qnil;
+ }
+
+ VALUE names = rb_ary_new();
+ for (size_t index = 0; index < string_list.length; index++) {
+ const yp_string_t *string = &string_list.strings[index];
+ rb_ary_push(names, rb_str_new(yp_string_source(string), yp_string_length(string)));
+ }
+
+ yp_string_list_free(&string_list);
+ return names;
+}
+
+static VALUE
+unescape(VALUE source, yp_unescape_type_t unescape_type) {
+ yp_string_t string;
+ VALUE result;
+
+ yp_list_t error_list;
+ yp_list_init(&error_list);
+
+ yp_unescape_manipulate_string(RSTRING_PTR(source), RSTRING_LEN(source), &string, unescape_type, &error_list);
+ if (yp_list_empty_p(&error_list)) {
+ result = rb_str_new(yp_string_source(&string), yp_string_length(&string));
+ } else {
+ result = Qnil;
+ }
+
+ yp_string_free(&string);
+ yp_list_free(&error_list);
+
+ return result;
+}
+
+static VALUE
+unescape_none(VALUE self, VALUE source) {
+ return unescape(source, YP_UNESCAPE_NONE);
+}
+
+static VALUE
+unescape_minimal(VALUE self, VALUE source) {
+ return unescape(source, YP_UNESCAPE_MINIMAL);
+}
+
+static VALUE
+unescape_all(VALUE self, VALUE source) {
+ return unescape(source, YP_UNESCAPE_ALL);
+}
+
+// This function returns a hash of information about the given source string's
+// memory usage.
+static VALUE
+memsize(VALUE self, VALUE string) {
+ yp_parser_t parser;
+ size_t length = RSTRING_LEN(string);
+ yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
+
+ yp_node_t *node = yp_parse(&parser);
+ yp_memsize_t memsize;
+ yp_node_memsize(node, &memsize);
+
+ yp_node_destroy(&parser, node);
+ yp_parser_free(&parser);
+
+ VALUE result = rb_hash_new();
+ rb_hash_aset(result, ID2SYM(rb_intern("length")), INT2FIX(length));
+ rb_hash_aset(result, ID2SYM(rb_intern("memsize")), INT2FIX(memsize.memsize));
+ rb_hash_aset(result, ID2SYM(rb_intern("node_count")), INT2FIX(memsize.node_count));
+ return result;
+}
+
+static VALUE
+compile(VALUE self, VALUE string) {
+ yp_parser_t parser;
+ size_t length = RSTRING_LEN(string);
+ yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
+
+ yp_node_t *node = yp_parse(&parser);
+ VALUE result = yp_compile(node);
+
+ yp_node_destroy(&parser, node);
+ yp_parser_free(&parser);
+
+ return result;
+}
+
+static VALUE
+profile_file(VALUE self, VALUE filepath) {
+ source_t source;
+ if (source_file_load(&source, filepath) != 0) return Qnil;
+
+ yp_parser_t parser;
+ yp_parser_init(&parser, source.source, source.size, StringValueCStr(filepath));
+
+ yp_node_t *node = yp_parse(&parser);
+ yp_node_destroy(&parser, node);
+ yp_parser_free(&parser);
+
+ return Qnil;
+}
+
+// The function takes a source string and returns a Ruby array containing the
+// offsets of every newline in the string. (It also includes a 0 at the
+// beginning to indicate the position of the first line.)
+//
+// It accepts a string as its only argument and returns an array of integers.
+static VALUE
+newlines(VALUE self, VALUE string) {
+ yp_parser_t parser;
+ size_t length = RSTRING_LEN(string);
+ yp_parser_init(&parser, RSTRING_PTR(string), length, NULL);
+
+ yp_node_t *node = yp_parse(&parser);
+ yp_node_destroy(&parser, node);
+
+ VALUE result = rb_ary_new_capa(parser.newline_list.size);
+ for (size_t index = 0; index < parser.newline_list.size; index++) {
+ rb_ary_push(result, INT2FIX(parser.newline_list.offsets[index]));
+ }
+
+ yp_parser_free(&parser);
+ return result;
+}
+
+RUBY_FUNC_EXPORTED void
+Init_yarp(void) {
+ if (strcmp(yp_version(), EXPECTED_YARP_VERSION) != 0) {
+ rb_raise(rb_eRuntimeError, "The YARP library version (%s) does not match the expected version (%s)", yp_version(),
+ EXPECTED_YARP_VERSION);
+ }
+
+ rb_cYARP = rb_define_module("YARP");
+ rb_cYARPToken = rb_define_class_under(rb_cYARP, "Token", rb_cObject);
+ rb_cYARPLocation = rb_define_class_under(rb_cYARP, "Location", rb_cObject);
+
+ rb_cYARPComment = rb_define_class_under(rb_cYARP, "Comment", rb_cObject);
+ rb_cYARPParseError = rb_define_class_under(rb_cYARP, "ParseError", rb_cObject);
+ rb_cYARPParseWarning = rb_define_class_under(rb_cYARP, "ParseWarning", rb_cObject);
+ rb_cYARPParseResult = rb_define_class_under(rb_cYARP, "ParseResult", rb_cObject);
+
+ rb_define_const(rb_cYARP, "VERSION", rb_sprintf("%d.%d.%d", YP_VERSION_MAJOR, YP_VERSION_MINOR, YP_VERSION_PATCH));
+
+ rb_define_singleton_method(rb_cYARP, "dump", dump, 2);
+ rb_define_singleton_method(rb_cYARP, "dump_file", dump_file, 1);
+
+ rb_define_singleton_method(rb_cYARP, "lex", lex, 2);
+ rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
+
+ rb_define_singleton_method(rb_cYARP, "_parse", parse, 2);
+ rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
+
+ rb_define_singleton_method(rb_cYARP, "named_captures", named_captures, 1);
+
+ rb_define_singleton_method(rb_cYARP, "unescape_none", unescape_none, 1);
+ rb_define_singleton_method(rb_cYARP, "unescape_minimal", unescape_minimal, 1);
+ rb_define_singleton_method(rb_cYARP, "unescape_all", unescape_all, 1);
+
+ rb_define_singleton_method(rb_cYARP, "memsize", memsize, 1);
+
+ rb_define_singleton_method(rb_cYARP, "compile", compile, 1);
+
+ rb_define_singleton_method(rb_cYARP, "profile_file", profile_file, 1);
+
+ rb_define_singleton_method(rb_cYARP, "newlines", newlines, 1);
+
+ Init_yarp_pack();
+}