Skip to content

Fix #69181: READ_CSV|DROP_NEW_LINE drops newlines within fields #7618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions ext/spl/spl_directory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1887,7 +1887,7 @@ static zend_result spl_filesystem_object_cast(zend_object *readobj, zval *writeo
}
/* }}} */

static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bool silent, zend_long line_add) /* {{{ */
static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bool silent, zend_long line_add, bool csv) /* {{{ */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One thing I was thinking was instead of a csv bool arg, to pass the SPL flags as an argument and spl_filesystem_file_read_csv() to add the SPL_FILE_OBJECT_READ_CSV flag when calling this function. Not sure if this is cleaner tho

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I guess we could even temporarily add the SPL_FILE_OBJECT_READ_CSV flag. But I'm afraid it's somewhat messy either way.

{
char *buf;
size_t line_len = 0;
Expand Down Expand Up @@ -1917,7 +1917,7 @@ static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bo
intern->u.file.current_line = estrdup("");
intern->u.file.current_line_len = 0;
} else {
if (SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)) {
if (!csv && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)) {
if (line_len > 0 && buf[line_len - 1] == '\n') {
line_len--;
if (line_len > 0 && buf[line_len - 1] == '\r') {
Expand All @@ -1935,20 +1935,30 @@ static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bo
return SUCCESS;
} /* }}} */

static inline zend_result spl_filesystem_file_read(spl_filesystem_object *intern, bool silent)
static inline zend_result spl_filesystem_file_read(spl_filesystem_object *intern, bool silent, bool csv)
{
zend_long line_add = (intern->u.file.current_line) ? 1 : 0;
return spl_filesystem_file_read_ex(intern, silent, line_add);
return spl_filesystem_file_read_ex(intern, silent, line_add, csv);
}

static bool is_line_empty(spl_filesystem_object *intern)
{
char *current_line = intern->u.file.current_line;
size_t current_line_len = intern->u.file.current_line_len;
return current_line_len == 0
|| ((SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_READ_CSV) && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)
&& ((current_line_len == 1 && current_line[0] == '\n')
|| (current_line_len == 2 && current_line[0] == '\r' && current_line[1] == '\n'))));
}

static zend_result spl_filesystem_file_read_csv(spl_filesystem_object *intern, char delimiter, char enclosure, int escape, zval *return_value) /* {{{ */
{
do {
zend_result ret = spl_filesystem_file_read(intern, /* silent */ true);
zend_result ret = spl_filesystem_file_read(intern, /* silent */ true, /* csv */ true);
if (ret != SUCCESS) {
return ret;
}
} while (!intern->u.file.current_line_len && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_SKIP_EMPTY));
} while (is_line_empty(intern) && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_SKIP_EMPTY));

size_t buf_len = intern->u.file.current_line_len;
char *buf = estrndup(intern->u.file.current_line, buf_len);
Expand Down Expand Up @@ -2006,7 +2016,7 @@ static zend_result spl_filesystem_file_read_line_ex(zval * this_ptr, spl_filesys
zval_ptr_dtor(&retval);
return SUCCESS;
} else {
return spl_filesystem_file_read(intern, /* silent */ true);
return spl_filesystem_file_read(intern, /* silent */ true, /* csv */ false);
}
} /* }}} */

Expand Down Expand Up @@ -2214,7 +2224,7 @@ PHP_METHOD(SplFileObject, fgets)

CHECK_SPL_FILE_OBJECT_IS_INITIALIZED(intern);

if (spl_filesystem_file_read_ex(intern, /* silent */ false, /* line_add */ 1) == FAILURE) {
if (spl_filesystem_file_read_ex(intern, /* silent */ false, /* line_add */ 1, /* csv */ false) == FAILURE) {
RETURN_THROWS();
}
RETURN_STRINGL(intern->u.file.current_line, intern->u.file.current_line_len);
Expand Down Expand Up @@ -2644,7 +2654,7 @@ PHP_METHOD(SplFileObject, fscanf)
CHECK_SPL_FILE_OBJECT_IS_INITIALIZED(intern);

/* Get next line */
if (spl_filesystem_file_read(intern, /* silent */ false) == FAILURE) {
if (spl_filesystem_file_read(intern, /* silent */ false, /* csv */ false) == FAILURE) {
RETURN_THROWS();
}

Expand Down
70 changes: 70 additions & 0 deletions ext/spl/tests/bug69181.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
--TEST--
Bug #69181 (READ_CSV|DROP_NEW_LINE drops newlines within fields)
--FILE--
<?php
$filename = __DIR__ . "/bug69181.csv";
$csv = <<<CSV
"foo\n\nbar\nbaz",qux

"foo\nbar\nbaz",qux
CSV;

file_put_contents($filename, $csv);

$file = new SplFileObject($filename);
$file->setFlags(SplFileObject::SKIP_EMPTY | SplFileObject::DROP_NEW_LINE | SplFileObject::READ_CSV);
var_dump(iterator_to_array($file));

echo "\n====\n\n";

$file->rewind();
while (($record = $file->fgetcsv())) {
var_dump($record);
}
?>
--EXPECT--
array(2) {
[0]=>
array(2) {
[0]=>
string(12) "foo

bar
baz"
[1]=>
string(3) "qux"
}
[2]=>
array(2) {
[0]=>
string(11) "foo
bar
baz"
[1]=>
string(3) "qux"
}
}

====

array(2) {
[0]=>
string(12) "foo

bar
baz"
[1]=>
string(3) "qux"
}
array(2) {
[0]=>
string(11) "foo
bar
baz"
[1]=>
string(3) "qux"
}
--CLEAN--
<?php
@unlink(__DIR__ . "/bug69181.csv");
?>