summaryrefslogtreecommitdiff
path: root/prism/prism.c
diff options
context:
space:
mode:
Diffstat (limited to 'prism/prism.c')
-rw-r--r--prism/prism.c110
1 files changed, 91 insertions, 19 deletions
diff --git a/prism/prism.c b/prism/prism.c
index 5389cac9f6..e33d3e1d3c 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -500,6 +500,9 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
/** True if the -p command line option was given. */
#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
+/** True if the -x command line option was given. */
+#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
+
/******************************************************************************/
/* Diagnostic-related functions */
/******************************************************************************/
@@ -19123,6 +19126,38 @@ parse_program(pm_parser_t *parser) {
/******************************************************************************/
/**
+ * A vendored version of strnstr that is used to find a substring within a
+ * string with a given length. This function is used to search for the Ruby
+ * engine name within a shebang when the -x option is passed to Ruby.
+ *
+ * The only modification that we made here is that we don't do NULL byte checks
+ * because we know the little parameter will not have a NULL byte and we allow
+ * the big parameter to have them.
+ */
+static const char *
+pm_strnstr(const char *big, const char *little, size_t big_length) {
+ size_t little_length = strlen(little);
+
+ for (const char *big_end = big + big_length; big < big_end; big++) {
+ if (*big == *little && memcmp(big, little, little_length) == 0) return big;
+ }
+
+ return NULL;
+}
+
+/**
+ * Potentially warn the user if the shebang that has been found to include
+ * "ruby" has a carriage return at the end, as that can cause problems on some
+ * platforms.
+ */
+static void
+pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
+ if (length > 2 && start[length - 1] == '\n' && start[length - 2] == '\r') {
+ pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
+ }
+}
+
+/**
* Initialize a parser with the given start and end pointers.
*/
PRISM_EXPORTED_FUNCTION void
@@ -19208,22 +19243,6 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
// line option
parser->start_line = options->line;
- // offset option
- if (options->offset != 0) {
- const uint8_t *cursor = parser->start;
- const uint8_t *offset = cursor + options->offset;
-
- const uint8_t *newline = NULL;
- while ((newline = next_newline(cursor, parser->end - cursor)) != NULL) {
- if (newline > offset) break;
- pm_newline_list_append(&parser->newline_list, newline);
- cursor = newline + 1;
- }
-
- parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = offset, .end = offset };
- parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = offset, .end = offset };
- }
-
// encoding option
size_t encoding_length = pm_string_length(&options->encoding);
if (encoding_length > 0) {
@@ -19277,12 +19296,65 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
}
}
+ // If the -x command line flag is set, or the first shebang of the file does
+ // not include "ruby", then we'll search for a shebang that does include
+ // "ruby" and start parsing from there.
+ bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
+
// If the first two bytes of the source are a shebang, then we'll indicate
// that the encoding comment is at the end of the shebang.
if (peek(parser) == '#' && peek_offset(parser, 1) == '!') {
- const uint8_t *encoding_comment_start = next_newline(source, (ptrdiff_t) size);
- if (encoding_comment_start) {
- parser->encoding_comment_start = encoding_comment_start + 1;
+ const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
+ size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
+
+ if (pm_strnstr((const char *) parser->start, "ruby", length) != NULL) {
+ pm_parser_warn_shebang_carriage_return(parser, parser->start, length);
+ if (newline != NULL) parser->encoding_comment_start = newline + 1;
+ search_shebang = false;
+ } else {
+ search_shebang = true;
+ }
+ }
+
+ // Here we're going to find the first shebang that includes "ruby" and start
+ // parsing from there.
+ if (search_shebang) {
+ bool found = false;
+
+ // This is going to point to the start of each line as we check it.
+ // We'll maintain a moving window looking at each line at they come.
+ const uint8_t *cursor = parser->start;
+
+ // The newline pointer points to the end of the current line that we're
+ // considering. If it is NULL, then we're at the end of the file.
+ const uint8_t *newline = next_newline(cursor, parser->end - cursor);
+
+ while (newline != NULL) {
+ pm_newline_list_append(&parser->newline_list, newline);
+
+ cursor = newline + 1;
+ newline = next_newline(cursor, parser->end - cursor);
+
+ size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
+ if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
+ if (parser->newline_list.size == 1) {
+ pm_parser_warn_shebang_carriage_return(parser, cursor, length);
+ }
+
+ if (pm_strnstr((const char *) cursor, "ruby", length) != NULL) {
+ found = true;
+ parser->encoding_comment_start = newline + 1;
+ break;
+ }
+ }
+ }
+
+ if (found) {
+ parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
+ parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
+ } else {
+ pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
+ pm_newline_list_clear(&parser->newline_list);
}
}
}