summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/src/sgml/libpq.sgml5
-rw-r--r--doc/src/sgml/protocol.sgml5
-rw-r--r--doc/src/sgml/ref/copy.sgml36
-rw-r--r--doc/src/sgml/ref/psql-ref.sgml7
-rw-r--r--src/backend/commands/copyfromparse.c80
-rw-r--r--src/backend/commands/copyto.c7
-rw-r--r--src/bin/psql/copy.c27
-rw-r--r--src/test/regress/expected/copy.out18
-rw-r--r--src/test/regress/sql/copy.sql12
9 files changed, 105 insertions, 92 deletions
diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 783e8e750bb..4a727d44997 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -7381,8 +7381,9 @@ int PQputline(PGconn *conn,
<literal>\.</literal> as a final line to indicate to the server that it had
finished sending <command>COPY</command> data. While this still works, it is deprecated and the
special meaning of <literal>\.</literal> can be expected to be removed in a
- future release. It is sufficient to call <xref linkend="libpq-PQendcopy"/> after
- having sent the actual data.
+ future release. (It already will misbehave in <literal>CSV</literal>
+ mode.) It is sufficient to call <xref linkend="libpq-PQendcopy"/>
+ after having sent the actual data.
</para>
</note>
</listitem>
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index 11b64567797..2d2481bb8b8 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -7606,8 +7606,9 @@ psql "dbname=postgres replication=database" -c "IDENTIFY_SYSTEM;"
is a well-defined way to recover from errors during <command>COPY</command>. The special
<quote><literal>\.</literal></quote> last line is not needed anymore, and is not sent
during <command>COPY OUT</command>.
- (It is still recognized as a terminator during <command>COPY IN</command>, but its use is
- deprecated and will eventually be removed.) Binary <command>COPY</command> is supported.
+ (It is still recognized as a terminator during text-mode <command>COPY
+ IN</command>, but not in CSV mode. The text-mode behavior is
+ deprecated and may eventually be removed.) Binary <command>COPY</command> is supported.
The CopyInResponse and CopyOutResponse messages include fields indicating
the number of columns and the format of each column.
</para>
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 1518af8a045..fdbd20bc50b 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -646,11 +646,16 @@ COPY <replaceable class="parameter">count</replaceable>
</para>
<para>
- End of data can be represented by a single line containing just
+ End of data can be represented by a line containing just
backslash-period (<literal>\.</literal>). An end-of-data marker is
not necessary when reading from a file, since the end of file
- serves perfectly well; it is needed only when copying data to or from
- client applications using pre-3.0 client protocol.
+ serves perfectly well; in that context this provision exists only for
+ backward compatibility. However, <application>psql</application>
+ uses <literal>\.</literal> to terminate a <literal>COPY FROM
+ STDIN</literal> operation (that is, reading
+ in-line <command>COPY</command> data in a SQL script). In that
+ context the rule is needed to be able to end the operation before the
+ end of the script.
</para>
<para>
@@ -811,18 +816,27 @@ COPY <replaceable class="parameter">count</replaceable>
<para>
Because backslash is not a special character in the <literal>CSV</literal>
- format, <literal>\.</literal>, the end-of-data marker, could also appear
- as a data value. To avoid any misinterpretation, a <literal>\.</literal>
- data value appearing as a lone entry on a line is automatically
- quoted on output, and on input, if quoted, is not interpreted as the
- end-of-data marker. If you are loading a file created by another
- application that has a single unquoted column and might have a
- value of <literal>\.</literal>, you might need to quote that value in the
- input file.
+ format, the end-of-data marker used in text mode (<literal>\.</literal>)
+ is not normally treated as special when reading <literal>CSV</literal>
+ data. An exception is that <application>psql</application> will terminate
+ a <literal>COPY FROM STDIN</literal> operation (that is, reading
+ in-line <command>COPY</command> data in a SQL script) at a line containing
+ only <literal>\.</literal>, whether it is text or <literal>CSV</literal>
+ mode.
</para>
<note>
<para>
+ <productname>PostgreSQL</productname> versions before v18 always
+ recognized unquoted <literal>\.</literal> as an end-of-data marker,
+ even when reading from a separate file. For compatibility with older
+ versions, <command>COPY TO</command> will quote <literal>\.</literal>
+ when it's alone on a line, even though this is no longer necessary.
+ </para>
+ </note>
+
+ <note>
+ <para>
In <literal>CSV</literal> format, all characters are significant. A quoted value
surrounded by white space, or any characters other than
<literal>DELIMITER</literal>, will include those characters. This can cause
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 3fd9959ed16..b825ca96a23 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -1135,7 +1135,8 @@ SELECT $1 \parse stmt1
<para>
For <literal>\copy ... from stdin</literal>, data rows are read from the same
- source that issued the command, continuing until <literal>\.</literal>
+ source that issued the command, continuing until a line containing
+ only <literal>\.</literal>
is read or the stream reaches <acronym>EOF</acronym>. This option is useful
for populating tables in-line within an SQL script file.
For <literal>\copy ... to stdout</literal>, output is sent to the same place
@@ -1179,10 +1180,6 @@ SELECT $1 \parse stmt1
destination, because all data must pass through the client/server
connection. For large amounts of data the <acronym>SQL</acronym>
command might be preferable.
- Also, because of this pass-through method, <literal>\copy
- ... from</literal> in <acronym>CSV</acronym> mode will erroneously
- treat a <literal>\.</literal> data value alone on a line as an
- end-of-input marker.
</para>
</tip>
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 97a4c387a30..a280efe23f9 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -136,14 +136,6 @@ if (1) \
} \
} else ((void) 0)
-/* Undo any read-ahead and jump out of the block. */
-#define NO_END_OF_COPY_GOTO \
-if (1) \
-{ \
- input_buf_ptr = prev_raw_ptr + 1; \
- goto not_end_of_copy; \
-} else ((void) 0)
-
/* NOTE: there's a copy of this in copyto.c */
static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
@@ -1182,7 +1174,6 @@ CopyReadLineText(CopyFromState cstate)
bool result = false;
/* CSV variables */
- bool first_char_in_line = true;
bool in_quote = false,
last_was_esc = false;
char quotec = '\0';
@@ -1268,12 +1259,12 @@ CopyReadLineText(CopyFromState cstate)
if (cstate->opts.csv_mode)
{
/*
- * If character is '\\' or '\r', we may need to look ahead below.
- * Force fetch of the next character if we don't already have it.
- * We need to do this before changing CSV state, in case one of
- * these characters is also the quote or escape character.
+ * If character is '\r', we may need to look ahead below. Force
+ * fetch of the next character if we don't already have it. We
+ * need to do this before changing CSV state, in case '\r' is also
+ * the quote or escape character.
*/
- if (c == '\\' || c == '\r')
+ if (c == '\r')
{
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
}
@@ -1377,10 +1368,10 @@ CopyReadLineText(CopyFromState cstate)
}
/*
- * In CSV mode, we only recognize \. alone on a line. This is because
- * \. is a valid CSV data value.
+ * Process backslash, except in CSV mode where backslash is a normal
+ * character.
*/
- if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
+ if (c == '\\' && !cstate->opts.csv_mode)
{
char c2;
@@ -1398,12 +1389,6 @@ CopyReadLineText(CopyFromState cstate)
if (c2 == '.')
{
input_buf_ptr++; /* consume the '.' */
-
- /*
- * Note: if we loop back for more data here, it does not
- * matter that the CSV state change checks are re-executed; we
- * will come back here with no important state changed.
- */
if (cstate->eol_type == EOL_CRNL)
{
/* Get the next character */
@@ -1412,23 +1397,13 @@ CopyReadLineText(CopyFromState cstate)
c2 = copy_input_buf[input_buf_ptr++];
if (c2 == '\n')
- {
- if (!cstate->opts.csv_mode)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("end-of-copy marker does not match previous newline style")));
- else
- NO_END_OF_COPY_GOTO;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker does not match previous newline style")));
else if (c2 != '\r')
- {
- if (!cstate->opts.csv_mode)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("end-of-copy marker corrupt")));
- else
- NO_END_OF_COPY_GOTO;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker corrupt")));
}
/* Get the next character */
@@ -1437,14 +1412,9 @@ CopyReadLineText(CopyFromState cstate)
c2 = copy_input_buf[input_buf_ptr++];
if (c2 != '\r' && c2 != '\n')
- {
- if (!cstate->opts.csv_mode)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("end-of-copy marker corrupt")));
- else
- NO_END_OF_COPY_GOTO;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker corrupt")));
if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
(cstate->eol_type == EOL_CRNL && c2 != '\n') ||
@@ -1467,7 +1437,7 @@ CopyReadLineText(CopyFromState cstate)
result = true; /* report EOF */
break;
}
- else if (!cstate->opts.csv_mode)
+ else
{
/*
* If we are here, it means we found a backslash followed by
@@ -1475,23 +1445,11 @@ CopyReadLineText(CopyFromState cstate)
* after a backslash is special, so we skip over that second
* character too. If we didn't do that \\. would be
* considered an eof-of copy, while in non-CSV mode it is a
- * literal backslash followed by a period. In CSV mode,
- * backslashes are not special, so we want to process the
- * character after the backslash just like a normal character,
- * so we don't increment in those cases.
+ * literal backslash followed by a period.
*/
input_buf_ptr++;
}
}
-
- /*
- * This label is for CSV cases where \. appears at the start of a
- * line, but there is more text after it, meaning it was a data value.
- * We are more strict for \. in CSV mode because \. could be a data
- * value, while in non-CSV mode, \. cannot be a data value.
- */
-not_end_of_copy:
- first_char_in_line = false;
} /* end of outer loop */
/*
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 91de442f434..463083e645d 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -1160,8 +1160,11 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
if (!use_quote)
{
/*
- * Because '\.' can be a data value, quote it if it appears alone on a
- * line so it is not interpreted as the end-of-data marker.
+ * Quote '\.' if it appears alone on a line, so that it will not be
+ * interpreted as an end-of-data marker. (PG 18 and up will not
+ * interpret '\.' in CSV that way, except in embedded-in-SQL data; but
+ * we want the data to be loadable by older versions too. Also, this
+ * avoids breaking clients that are still using PQgetline().)
*/
if (single_attr && strcmp(ptr, "\\.") == 0)
use_quote = true;
diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index 961ae326949..e020e4d665d 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -620,20 +620,29 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool isbinary, PGresult **res)
/* current line is done? */
if (buf[buflen - 1] == '\n')
{
- /* check for EOF marker, but not on a partial line */
- if (at_line_begin)
+ /*
+ * When at the beginning of the line and the data is
+ * inlined, check for EOF marker. If the marker is found,
+ * we must stop at this point. If not, the \. line can be
+ * sent to the server, and we let it decide whether it's
+ * an EOF or not depending on the format: in TEXT mode, \.
+ * will be interpreted as an EOF, in CSV, it will not.
+ */
+ if (at_line_begin && copystream == pset.cur_cmd_source)
{
- /*
- * This code erroneously assumes '\.' on a line alone
- * inside a quoted CSV string terminates the \copy.
- * https://www.postgresql.org/message-id/[email protected]
- *
- * https://www.postgresql.org/message-id/[email protected]
- */
if ((linelen == 3 && memcmp(fgresult, "\\.\n", 3) == 0) ||
(linelen == 4 && memcmp(fgresult, "\\.\r\n", 4) == 0))
{
copydone = true;
+
+ /*
+ * Remove the EOF marker from the data sent. In
+ * CSV mode, the EOF marker must be removed,
+ * otherwise it would be interpreted by the server
+ * as valid data.
+ */
+ *fgresult = '\0';
+ buflen -= linelen;
}
}
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index 44114089a6d..174fe056033 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -32,6 +32,24 @@ select * from copytest except select * from copytest2;
-------+------+--------
(0 rows)
+--- test unquoted \. as data inside CSV
+-- do not use copy out to export the data, as it would quote \.
+\o :filename
+\qecho line1
+\qecho '\\.'
+\qecho line2
+\o
+-- get the data back in with copy
+truncate copytest2;
+copy copytest2(test) from :'filename' csv;
+select test from copytest2 order by test collate "C";
+ test
+-------
+ \.
+ line1
+ line2
+(3 rows)
+
-- test header line feature
create temp table copytest3 (
c1 int,
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index e2dd24cb351..8ed7922ab49 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -38,6 +38,18 @@ copy copytest2 from :'filename' csv quote '''' escape E'\\';
select * from copytest except select * from copytest2;
+--- test unquoted \. as data inside CSV
+-- do not use copy out to export the data, as it would quote \.
+\o :filename
+\qecho line1
+\qecho '\\.'
+\qecho line2
+\o
+-- get the data back in with copy
+truncate copytest2;
+copy copytest2(test) from :'filename' csv;
+select test from copytest2 order by test collate "C";
+
-- test header line feature