diff options
author | Fujii Masao | 2024-10-03 06:57:32 +0000 |
---|---|---|
committer | Fujii Masao | 2024-10-03 06:57:32 +0000 |
commit | a1c4c8a9e1e3a53996dafa1f4ee6d4f7de2c58b2 (patch) | |
tree | c434f3b85d9091c7512e3b891525d6f9744b653f | |
parent | e7834a1a251d4a28245377f383ff20a657ba8262 (diff) |
file_fdw: Add on_error and log_verbosity options to file_fdw.
In v17, the on_error and log_verbosity options were introduced for
the COPY command. This commit extends support for these options
to file_fdw.
Setting on_error = 'ignore' for a file_fdw foreign table allows users
to query it without errors, even when the input file contains
malformed rows, by skipping the problematic rows.
Both on_error and log_verbosity options apply to SELECT and ANALYZE
operations on file_fdw foreign tables.
Author: Atsushi Torikoshi
Reviewed-by: Masahiko Sawada, Fujii Masao
Discussion: https://postgr.es/m/ab59dad10490ea3734cf022b16c24cfd@oss.nttdata.com
-rw-r--r-- | contrib/file_fdw/expected/file_fdw.out | 19 | ||||
-rw-r--r-- | contrib/file_fdw/file_fdw.c | 107 | ||||
-rw-r--r-- | contrib/file_fdw/sql/file_fdw.sql | 7 | ||||
-rw-r--r-- | doc/src/sgml/file-fdw.sgml | 23 |
4 files changed, 140 insertions, 16 deletions
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out index 86c148a86ba..593fdc782e3 100644 --- a/contrib/file_fdw/expected/file_fdw.out +++ b/contrib/file_fdw/expected/file_fdw.out @@ -206,6 +206,25 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; SELECT * FROM agg_bad; -- ERROR ERROR: invalid input syntax for type real: "aaa" CONTEXT: COPY agg_bad, line 3, column b: "aaa" +-- on_error and log_verbosity tests +ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore'); +SELECT * FROM agg_bad; +NOTICE: 1 row was skipped due to data type incompatibility + a | b +-----+-------- + 100 | 99.097 + 42 | 324.78 +(2 rows) + +ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent'); +SELECT * FROM agg_bad; + a | b +-----+-------- + 100 | 99.097 + 42 | 324.78 +(2 rows) + +ANALYZE agg_bad; -- misc query tests \t on SELECT explain_filter('EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv'); diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index d16821f8e1b..043204c3e7e 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -22,6 +22,7 @@ #include "catalog/pg_authid.h" #include "catalog/pg_foreign_table.h" #include "commands/copy.h" +#include "commands/copyfrom_internal.h" #include "commands/defrem.h" #include "commands/explain.h" #include "commands/vacuum.h" @@ -74,6 +75,8 @@ static const struct FileFdwOption valid_options[] = { {"null", ForeignTableRelationId}, {"default", ForeignTableRelationId}, {"encoding", ForeignTableRelationId}, + {"on_error", ForeignTableRelationId}, + {"log_verbosity", ForeignTableRelationId}, {"force_not_null", AttributeRelationId}, {"force_null", AttributeRelationId}, @@ -723,38 +726,74 @@ fileIterateForeignScan(ForeignScanState *node) FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; EState *estate = CreateExecutorState(); ExprContext *econtext; - MemoryContext oldcontext; + MemoryContext oldcontext = CurrentMemoryContext; TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; - bool found; + CopyFromState cstate = festate->cstate; ErrorContextCallback errcallback; /* Set up callback to identify error line number. */ errcallback.callback = CopyFromErrorCallback; - errcallback.arg = (void *) festate->cstate; + errcallback.arg = (void *) cstate; errcallback.previous = error_context_stack; error_context_stack = &errcallback; /* - * The protocol for loading a virtual tuple into a slot is first - * ExecClearTuple, then fill the values/isnull arrays, then - * ExecStoreVirtualTuple. If we don't find another row in the file, we - * just skip the last step, leaving the slot empty as required. - * * We pass ExprContext because there might be a use of the DEFAULT option * in COPY FROM, so we may need to evaluate default expressions. */ - ExecClearTuple(slot); econtext = GetPerTupleExprContext(estate); +retry: + /* * DEFAULT expressions need to be evaluated in a per-tuple context, so * switch in case we are doing that. */ - oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - found = NextCopyFrom(festate->cstate, econtext, - slot->tts_values, slot->tts_isnull); - if (found) + MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + + /* + * The protocol for loading a virtual tuple into a slot is first + * ExecClearTuple, then fill the values/isnull arrays, then + * ExecStoreVirtualTuple. If we don't find another row in the file, we + * just skip the last step, leaving the slot empty as required. + * + */ + ExecClearTuple(slot); + + if (NextCopyFrom(cstate, econtext, slot->tts_values, slot->tts_isnull)) + { + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE && + cstate->escontext->error_occurred) + { + /* + * Soft error occurred, skip this tuple and just make + * ErrorSaveContext ready for the next NextCopyFrom. Since we + * don't set details_wanted and error_data is not to be filled, + * just resetting error_occurred is enough. + */ + cstate->escontext->error_occurred = false; + + /* Switch back to original memory context */ + MemoryContextSwitchTo(oldcontext); + + /* + * Make sure we are interruptible while repeatedly calling + * NextCopyFrom() until no soft error occurs. + */ + CHECK_FOR_INTERRUPTS(); + + /* + * Reset the per-tuple exprcontext, to clean-up after expression + * evaluations etc. + */ + ResetPerTupleExprContext(estate); + + /* Repeat NextCopyFrom() until no soft error occurs */ + goto retry; + } + ExecStoreVirtualTuple(slot); + } /* Switch back to original memory context */ MemoryContextSwitchTo(oldcontext); @@ -796,8 +835,19 @@ fileEndForeignScan(ForeignScanState *node) FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state; /* if festate is NULL, we are in EXPLAIN; nothing to do */ - if (festate) - EndCopyFrom(festate->cstate); + if (!festate) + return; + + if (festate->cstate->opts.on_error == COPY_ON_ERROR_IGNORE && + festate->cstate->num_errors > 0 && + festate->cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT) + ereport(NOTICE, + errmsg_plural("%llu row was skipped due to data type incompatibility", + "%llu rows were skipped due to data type incompatibility", + (unsigned long long) festate->cstate->num_errors, + (unsigned long long) festate->cstate->num_errors)); + + EndCopyFrom(festate->cstate); } /* @@ -1113,7 +1163,8 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel, * which must have at least targrows entries. * The actual number of rows selected is returned as the function result. * We also count the total number of rows in the file and return it into - * *totalrows. Note that *totaldeadrows is always set to 0. + * *totalrows. Rows skipped due to on_error = 'ignore' are not included + * in this count. Note that *totaldeadrows is always set to 0. * * Note that the returned list of rows is not always in order by physical * position in the file. Therefore, correlation estimates derived later @@ -1191,6 +1242,21 @@ file_acquire_sample_rows(Relation onerel, int elevel, if (!found) break; + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE && + cstate->escontext->error_occurred) + { + /* + * Soft error occurred, skip this tuple and just make + * ErrorSaveContext ready for the next NextCopyFrom. Since we + * don't set details_wanted and error_data is not to be filled, + * just resetting error_occurred is enough. + */ + cstate->escontext->error_occurred = false; + + /* Repeat NextCopyFrom() until no soft error occurs */ + continue; + } + /* * The first targrows sample rows are simply copied into the * reservoir. Then we start replacing tuples in the sample until we @@ -1236,6 +1302,15 @@ file_acquire_sample_rows(Relation onerel, int elevel, /* Clean up. */ MemoryContextDelete(tupcontext); + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE && + cstate->num_errors > 0 && + cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT) + ereport(NOTICE, + errmsg_plural("%llu row was skipped due to data type incompatibility", + "%llu rows were skipped due to data type incompatibility", + (unsigned long long) cstate->num_errors, + (unsigned long long) cstate->num_errors)); + EndCopyFrom(cstate); pfree(values); diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql index f0548e14e18..edd77c5cd20 100644 --- a/contrib/file_fdw/sql/file_fdw.sql +++ b/contrib/file_fdw/sql/file_fdw.sql @@ -150,6 +150,13 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a; -- error context report tests SELECT * FROM agg_bad; -- ERROR +-- on_error and log_verbosity tests +ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore'); +SELECT * FROM agg_bad; +ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent'); +SELECT * FROM agg_bad; +ANALYZE agg_bad; + -- misc query tests \t on SELECT explain_filter('EXPLAIN (VERBOSE, COSTS FALSE) SELECT * FROM agg_csv'); diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index f2f2af9a596..bb3579b0777 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -126,6 +126,29 @@ </listitem> </varlistentry> + <varlistentry> + <term><literal>on_error</literal></term> + + <listitem> + <para> + Specifies how to behave when encountering an error converting a column's + input value into its data type, + the same as <command>COPY</command>'s <literal>ON_ERROR</literal> option. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><literal>log_verbosity</literal></term> + + <listitem> + <para> + Specifies the amount of messages emitted by <literal>file_fdw</literal>, + the same as <command>COPY</command>'s <literal>LOG_VERBOSITY</literal> option. + </para> + </listitem> + </varlistentry> + </variablelist> <para> |