diff options
author | Benoit Daloze <[email protected]> | 2023-07-29 16:49:54 +0200 |
---|---|---|
committer | Takashi Kokubun <[email protected]> | 2023-08-16 17:47:32 -0700 |
commit | e712bc9b937c7b9c2993f0d3289f64bb81c70970 (patch) | |
tree | 459415ad2c907e956912dd037e6952d124e5574d /yarp/extension.c | |
parent | 2ccaaaa1017fa411134648bbaa6fa8f8b875e16d (diff) |
[ruby/yarp] Move efficient file reading using demand paging to librubyparser
* So it can be reused by the Fiddle backend, etc and not just the C extension.
* Add YP_STRING_MAPPED to use a consistent interface for yp_string_t.
That way yp_string_free() can be used like for other string types.
* Fix handling of empty file for !HAVE_MMAP && !_WIN32
https://github.com/ruby/yarp/commit/e40bc35801
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/8226
Diffstat (limited to 'yarp/extension.c')
-rw-r--r-- | yarp/extension.c | 192 |
1 files changed, 29 insertions, 163 deletions
diff --git a/yarp/extension.c b/yarp/extension.c index 36e9941d64..a189b1a876 100644 --- a/yarp/extension.c +++ b/yarp/extension.c @@ -14,14 +14,6 @@ VALUE rb_cYARPParseResult; /* IO of Ruby code */ /******************************************************************************/ -// Represents an input of Ruby code. It can either be coming from a file or a -// string. If it's a file, we'll use demand paging to read the contents of the -// file into a string. If it's already a string, we'll reference it directly. -typedef struct { - const char *source; - size_t size; -} input_t; - // Check if the given filepath is a string. If it's nil, then return NULL. If // it's not a string, then raise a type error. Otherwise return the filepath as // a C string. @@ -41,142 +33,15 @@ check_filepath(VALUE filepath) { return StringValueCStr(filepath); } -// Read the file indicated by the filepath parameter into source and load its -// contents and size into the given input_t. -// -// We want to use demand paging as much as possible in order to avoid having to -// read the entire file into memory (which could be detrimental to performance -// for large files). This means that if we're on windows we'll use -// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use -// `mmap`, and on other POSIX systems we'll use `read`. -static int -input_load_filepath(input_t *input, const char *filepath) { -#ifdef _WIN32 - // Open the file for reading. - HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - - if (file == INVALID_HANDLE_VALUE) { - perror("CreateFile failed"); - return 1; - } - - // Get the file size. - DWORD file_size = GetFileSize(file, NULL); - if (file_size == INVALID_FILE_SIZE) { - CloseHandle(file); - perror("GetFileSize failed"); - return 1; - } - - // If the file is empty, then we don't need to do anything else, we'll set - // the source to a constant empty string and return. - if (!file_size) { - CloseHandle(file); - input->size = 0; - input->source = ""; - return 0; - } - - // Create a mapping of the file. - HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); - if (mapping == NULL) { - CloseHandle(file); - perror("CreateFileMapping failed"); - return 1; - } - - // Map the file into memory. - input->source = (const char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0); - CloseHandle(mapping); - CloseHandle(file); - - if (input->source == NULL) { - perror("MapViewOfFile failed"); - return 1; - } - - // Set the size of the source. - input->size = (size_t) file_size; - return 0; -#else - // Open the file for reading - int fd = open(filepath, O_RDONLY); - if (fd == -1) { - perror("open"); - return 1; - } - - // Stat the file to get the file size - struct stat sb; - if (fstat(fd, &sb) == -1) { - close(fd); - perror("fstat"); - return 1; - } - - // mmap the file descriptor to virtually get the contents - input->size = sb.st_size; - -#ifdef HAVE_MMAP - if (!input->size) { - close(fd); - input->source = ""; - return 0; - } - - const char *result = mmap(NULL, input->size, PROT_READ, MAP_PRIVATE, fd, 0); - if (result == MAP_FAILED) { - perror("Map failed"); - return 1; - } else { - input->source = result; - } -#else - input->source = malloc(input->size); - if (input->source == NULL) return 1; - - ssize_t read_size = read(fd, (void *) input->source, input->size); - if (read_size < 0 || (size_t)read_size != input->size) { - perror("Read size is incorrect"); - free((void *) input->source); - return 1; - } -#endif - - close(fd); - return 0; -#endif -} - -// Load the contents and size of the given string into the given input_t. +// Load the contents and size of the given string into the given yp_string_t. static void -input_load_string(input_t *input, VALUE string) { +input_load_string(yp_string_t *input, VALUE string) { // Check if the string is a string. If it's not, then raise a type error. if (!RB_TYPE_P(string, T_STRING)) { rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string)); } - input->source = RSTRING_PTR(string); - input->size = RSTRING_LEN(string); -} - -// Free any resources associated with the given input_t. This is the corollary -// function to source_file_load. It will unmap the file if it was mapped, or -// free the memory if it was allocated. -static void -input_unload_filepath(input_t *input) { - // We don't need to free anything with 0 sized files because we handle that - // with a constant string instead. - if (!input->size) return; - void *memory = (void *) input->source; - -#if defined(_WIN32) - UnmapViewOfFile(memory); -#elif defined(HAVE_MMAP) - munmap(memory, input->size); -#else - free(memory); -#endif + yp_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string)); } /******************************************************************************/ @@ -185,14 +50,14 @@ input_unload_filepath(input_t *input) { // Dump the AST corresponding to the given input to a string. static VALUE -dump_input(input_t *input, const char *filepath) { +dump_input(yp_string_t *input, const char *filepath) { yp_buffer_t buffer; if (!yp_buffer_init(&buffer)) { rb_raise(rb_eNoMemError, "failed to allocate memory"); } yp_parser_t parser; - yp_parser_init(&parser, input->source, input->size, filepath); + yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath); yp_node_t *node = yp_parse(&parser, false); yp_serialize(&parser, node, &buffer); @@ -212,7 +77,7 @@ dump(int argc, VALUE *argv, VALUE self) { VALUE filepath; rb_scan_args(argc, argv, "11", &string, &filepath); - input_t input; + yp_string_t input; input_load_string(&input, string); return dump_input(&input, check_filepath(filepath)); } @@ -220,13 +85,13 @@ dump(int argc, VALUE *argv, VALUE self) { // Dump the AST corresponding to the given file to a string. static VALUE dump_file(VALUE self, VALUE filepath) { - input_t input; + yp_string_t input; const char *checked = check_filepath(filepath); - if (input_load_filepath(&input, checked) != 0) return Qnil; + if (!yp_string_mapped_init(&input, checked)) return Qnil; VALUE value = dump_input(&input, checked); - input_unload_filepath(&input); + yp_string_free(&input); return value; } @@ -356,13 +221,13 @@ lex_encoding_changed_callback(yp_parser_t *parser) { // Return an array of tokens corresponding to the given source. static VALUE -lex_input(input_t *input, const char *filepath) { +lex_input(yp_string_t *input, const char *filepath) { yp_parser_t parser; - yp_parser_init(&parser, input->source, input->size, filepath); + yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath); yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback); VALUE offsets = rb_ary_new(); - VALUE source_argv[] = { rb_str_new(input->source, input->size), offsets }; + VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets }; VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource); lex_data_t lex_data = { @@ -410,7 +275,7 @@ lex(int argc, VALUE *argv, VALUE self) { VALUE filepath; rb_scan_args(argc, argv, "11", &string, &filepath); - input_t input; + yp_string_t input; input_load_string(&input, string); return lex_input(&input, check_filepath(filepath)); } @@ -418,13 +283,13 @@ lex(int argc, VALUE *argv, VALUE self) { // Return an array of tokens corresponding to the given file. static VALUE lex_file(VALUE self, VALUE filepath) { - input_t input; + yp_string_t input; const char *checked = check_filepath(filepath); - if (input_load_filepath(&input, checked) != 0) return Qnil; + if (!yp_string_mapped_init(&input, checked)) return Qnil; VALUE value = lex_input(&input, checked); - input_unload_filepath(&input); + yp_string_free(&input); return value; } @@ -435,9 +300,9 @@ lex_file(VALUE self, VALUE filepath) { // Parse the given input and return a ParseResult instance. static VALUE -parse_input(input_t *input, const char *filepath) { +parse_input(yp_string_t *input, const char *filepath) { yp_parser_t parser; - yp_parser_init(&parser, input->source, input->size, filepath); + yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath); yp_node_t *node = yp_parse(&parser, false); rb_encoding *encoding = rb_enc_find(parser.encoding.name); @@ -466,13 +331,14 @@ parse(int argc, VALUE *argv, VALUE self) { VALUE filepath; rb_scan_args(argc, argv, "11", &string, &filepath); - input_t input; + yp_string_t input; input_load_string(&input, string); #ifdef YARP_DEBUG_MODE_BUILD - char* dup = malloc(input.size); - memcpy(dup, input.source, input.size); - input.source = dup; + size_t length = yp_string_length(&input); + char* dup = malloc(length); + memcpy(dup, yp_string_source(&input), length); + yp_string_constant_init(&input, dup, length); #endif VALUE value = parse_input(&input, check_filepath(filepath)); @@ -487,13 +353,13 @@ parse(int argc, VALUE *argv, VALUE self) { // Parse the given file and return a ParseResult instance. static VALUE parse_file(VALUE self, VALUE filepath) { - input_t input; + yp_string_t input; const char *checked = check_filepath(filepath); - if (input_load_filepath(&input, checked) != 0) return Qnil; + if (!yp_string_mapped_init(&input, checked)) return Qnil; VALUE value = parse_input(&input, checked); - input_unload_filepath(&input); + yp_string_free(&input); return value; } @@ -586,13 +452,13 @@ memsize(VALUE self, VALUE string) { // parser for memory and speed. static VALUE profile_file(VALUE self, VALUE filepath) { - input_t input; + yp_string_t input; const char *checked = check_filepath(filepath); - if (input_load_filepath(&input, checked) != 0) return Qnil; + if (!yp_string_mapped_init(&input, checked)) return Qnil; yp_parser_t parser; - yp_parser_init(&parser, input.source, input.size, checked); + yp_parser_init(&parser, yp_string_source(&input), yp_string_length(&input), checked); yp_node_t *node = yp_parse(&parser, false); yp_node_destroy(&parser, node); |