summaryrefslogtreecommitdiff
path: root/yarp/extension.c
diff options
context:
space:
mode:
authorBenoit Daloze <[email protected]>2023-07-29 16:49:54 +0200
committerTakashi Kokubun <[email protected]>2023-08-16 17:47:32 -0700
commite712bc9b937c7b9c2993f0d3289f64bb81c70970 (patch)
tree459415ad2c907e956912dd037e6952d124e5574d /yarp/extension.c
parent2ccaaaa1017fa411134648bbaa6fa8f8b875e16d (diff)
[ruby/yarp] Move efficient file reading using demand paging to librubyparser
* So it can be reused by the Fiddle backend, etc and not just the C extension. * Add YP_STRING_MAPPED to use a consistent interface for yp_string_t. That way yp_string_free() can be used like for other string types. * Fix handling of empty file for !HAVE_MMAP && !_WIN32 https://github.com/ruby/yarp/commit/e40bc35801
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/8226
Diffstat (limited to 'yarp/extension.c')
-rw-r--r--yarp/extension.c192
1 files changed, 29 insertions, 163 deletions
diff --git a/yarp/extension.c b/yarp/extension.c
index 36e9941d64..a189b1a876 100644
--- a/yarp/extension.c
+++ b/yarp/extension.c
@@ -14,14 +14,6 @@ VALUE rb_cYARPParseResult;
/* IO of Ruby code */
/******************************************************************************/
-// Represents an input of Ruby code. It can either be coming from a file or a
-// string. If it's a file, we'll use demand paging to read the contents of the
-// file into a string. If it's already a string, we'll reference it directly.
-typedef struct {
- const char *source;
- size_t size;
-} input_t;
-
// Check if the given filepath is a string. If it's nil, then return NULL. If
// it's not a string, then raise a type error. Otherwise return the filepath as
// a C string.
@@ -41,142 +33,15 @@ check_filepath(VALUE filepath) {
return StringValueCStr(filepath);
}
-// Read the file indicated by the filepath parameter into source and load its
-// contents and size into the given input_t.
-//
-// We want to use demand paging as much as possible in order to avoid having to
-// read the entire file into memory (which could be detrimental to performance
-// for large files). This means that if we're on windows we'll use
-// `MapViewOfFile`, on POSIX systems that have access to `mmap` we'll use
-// `mmap`, and on other POSIX systems we'll use `read`.
-static int
-input_load_filepath(input_t *input, const char *filepath) {
-#ifdef _WIN32
- // Open the file for reading.
- HANDLE file = CreateFile(filepath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-
- if (file == INVALID_HANDLE_VALUE) {
- perror("CreateFile failed");
- return 1;
- }
-
- // Get the file size.
- DWORD file_size = GetFileSize(file, NULL);
- if (file_size == INVALID_FILE_SIZE) {
- CloseHandle(file);
- perror("GetFileSize failed");
- return 1;
- }
-
- // If the file is empty, then we don't need to do anything else, we'll set
- // the source to a constant empty string and return.
- if (!file_size) {
- CloseHandle(file);
- input->size = 0;
- input->source = "";
- return 0;
- }
-
- // Create a mapping of the file.
- HANDLE mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL);
- if (mapping == NULL) {
- CloseHandle(file);
- perror("CreateFileMapping failed");
- return 1;
- }
-
- // Map the file into memory.
- input->source = (const char *) MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, 0);
- CloseHandle(mapping);
- CloseHandle(file);
-
- if (input->source == NULL) {
- perror("MapViewOfFile failed");
- return 1;
- }
-
- // Set the size of the source.
- input->size = (size_t) file_size;
- return 0;
-#else
- // Open the file for reading
- int fd = open(filepath, O_RDONLY);
- if (fd == -1) {
- perror("open");
- return 1;
- }
-
- // Stat the file to get the file size
- struct stat sb;
- if (fstat(fd, &sb) == -1) {
- close(fd);
- perror("fstat");
- return 1;
- }
-
- // mmap the file descriptor to virtually get the contents
- input->size = sb.st_size;
-
-#ifdef HAVE_MMAP
- if (!input->size) {
- close(fd);
- input->source = "";
- return 0;
- }
-
- const char *result = mmap(NULL, input->size, PROT_READ, MAP_PRIVATE, fd, 0);
- if (result == MAP_FAILED) {
- perror("Map failed");
- return 1;
- } else {
- input->source = result;
- }
-#else
- input->source = malloc(input->size);
- if (input->source == NULL) return 1;
-
- ssize_t read_size = read(fd, (void *) input->source, input->size);
- if (read_size < 0 || (size_t)read_size != input->size) {
- perror("Read size is incorrect");
- free((void *) input->source);
- return 1;
- }
-#endif
-
- close(fd);
- return 0;
-#endif
-}
-
-// Load the contents and size of the given string into the given input_t.
+// Load the contents and size of the given string into the given yp_string_t.
static void
-input_load_string(input_t *input, VALUE string) {
+input_load_string(yp_string_t *input, VALUE string) {
// Check if the string is a string. If it's not, then raise a type error.
if (!RB_TYPE_P(string, T_STRING)) {
rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(string));
}
- input->source = RSTRING_PTR(string);
- input->size = RSTRING_LEN(string);
-}
-
-// Free any resources associated with the given input_t. This is the corollary
-// function to source_file_load. It will unmap the file if it was mapped, or
-// free the memory if it was allocated.
-static void
-input_unload_filepath(input_t *input) {
- // We don't need to free anything with 0 sized files because we handle that
- // with a constant string instead.
- if (!input->size) return;
- void *memory = (void *) input->source;
-
-#if defined(_WIN32)
- UnmapViewOfFile(memory);
-#elif defined(HAVE_MMAP)
- munmap(memory, input->size);
-#else
- free(memory);
-#endif
+ yp_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string));
}
/******************************************************************************/
@@ -185,14 +50,14 @@ input_unload_filepath(input_t *input) {
// Dump the AST corresponding to the given input to a string.
static VALUE
-dump_input(input_t *input, const char *filepath) {
+dump_input(yp_string_t *input, const char *filepath) {
yp_buffer_t buffer;
if (!yp_buffer_init(&buffer)) {
rb_raise(rb_eNoMemError, "failed to allocate memory");
}
yp_parser_t parser;
- yp_parser_init(&parser, input->source, input->size, filepath);
+ yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
yp_node_t *node = yp_parse(&parser, false);
yp_serialize(&parser, node, &buffer);
@@ -212,7 +77,7 @@ dump(int argc, VALUE *argv, VALUE self) {
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
- input_t input;
+ yp_string_t input;
input_load_string(&input, string);
return dump_input(&input, check_filepath(filepath));
}
@@ -220,13 +85,13 @@ dump(int argc, VALUE *argv, VALUE self) {
// Dump the AST corresponding to the given file to a string.
static VALUE
dump_file(VALUE self, VALUE filepath) {
- input_t input;
+ yp_string_t input;
const char *checked = check_filepath(filepath);
- if (input_load_filepath(&input, checked) != 0) return Qnil;
+ if (!yp_string_mapped_init(&input, checked)) return Qnil;
VALUE value = dump_input(&input, checked);
- input_unload_filepath(&input);
+ yp_string_free(&input);
return value;
}
@@ -356,13 +221,13 @@ lex_encoding_changed_callback(yp_parser_t *parser) {
// Return an array of tokens corresponding to the given source.
static VALUE
-lex_input(input_t *input, const char *filepath) {
+lex_input(yp_string_t *input, const char *filepath) {
yp_parser_t parser;
- yp_parser_init(&parser, input->source, input->size, filepath);
+ yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
VALUE offsets = rb_ary_new();
- VALUE source_argv[] = { rb_str_new(input->source, input->size), offsets };
+ VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
lex_data_t lex_data = {
@@ -410,7 +275,7 @@ lex(int argc, VALUE *argv, VALUE self) {
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
- input_t input;
+ yp_string_t input;
input_load_string(&input, string);
return lex_input(&input, check_filepath(filepath));
}
@@ -418,13 +283,13 @@ lex(int argc, VALUE *argv, VALUE self) {
// Return an array of tokens corresponding to the given file.
static VALUE
lex_file(VALUE self, VALUE filepath) {
- input_t input;
+ yp_string_t input;
const char *checked = check_filepath(filepath);
- if (input_load_filepath(&input, checked) != 0) return Qnil;
+ if (!yp_string_mapped_init(&input, checked)) return Qnil;
VALUE value = lex_input(&input, checked);
- input_unload_filepath(&input);
+ yp_string_free(&input);
return value;
}
@@ -435,9 +300,9 @@ lex_file(VALUE self, VALUE filepath) {
// Parse the given input and return a ParseResult instance.
static VALUE
-parse_input(input_t *input, const char *filepath) {
+parse_input(yp_string_t *input, const char *filepath) {
yp_parser_t parser;
- yp_parser_init(&parser, input->source, input->size, filepath);
+ yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
yp_node_t *node = yp_parse(&parser, false);
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
@@ -466,13 +331,14 @@ parse(int argc, VALUE *argv, VALUE self) {
VALUE filepath;
rb_scan_args(argc, argv, "11", &string, &filepath);
- input_t input;
+ yp_string_t input;
input_load_string(&input, string);
#ifdef YARP_DEBUG_MODE_BUILD
- char* dup = malloc(input.size);
- memcpy(dup, input.source, input.size);
- input.source = dup;
+ size_t length = yp_string_length(&input);
+ char* dup = malloc(length);
+ memcpy(dup, yp_string_source(&input), length);
+ yp_string_constant_init(&input, dup, length);
#endif
VALUE value = parse_input(&input, check_filepath(filepath));
@@ -487,13 +353,13 @@ parse(int argc, VALUE *argv, VALUE self) {
// Parse the given file and return a ParseResult instance.
static VALUE
parse_file(VALUE self, VALUE filepath) {
- input_t input;
+ yp_string_t input;
const char *checked = check_filepath(filepath);
- if (input_load_filepath(&input, checked) != 0) return Qnil;
+ if (!yp_string_mapped_init(&input, checked)) return Qnil;
VALUE value = parse_input(&input, checked);
- input_unload_filepath(&input);
+ yp_string_free(&input);
return value;
}
@@ -586,13 +452,13 @@ memsize(VALUE self, VALUE string) {
// parser for memory and speed.
static VALUE
profile_file(VALUE self, VALUE filepath) {
- input_t input;
+ yp_string_t input;
const char *checked = check_filepath(filepath);
- if (input_load_filepath(&input, checked) != 0) return Qnil;
+ if (!yp_string_mapped_init(&input, checked)) return Qnil;
yp_parser_t parser;
- yp_parser_init(&parser, input.source, input.size, checked);
+ yp_parser_init(&parser, yp_string_source(&input), yp_string_length(&input), checked);
yp_node_t *node = yp_parse(&parser, false);
yp_node_destroy(&parser, node);