diff options
author | Daniel Gustafsson | 2024-03-19 12:32:50 +0000 |
---|---|---|
committer | Daniel Gustafsson | 2024-03-19 12:32:50 +0000 |
commit | 347758b1206364e3bec5ad6cd649b4ba9fe1be7b (patch) | |
tree | cd5dbdc7d8f9257a7c64e6930b34e4df00dde152 /src/bin/pg_upgrade/version.c | |
parent | 5577a71fb0cc16bb7f5e3c40b89eb77460eac724 (diff) |
pg_upgrade: run all data type checks per connection
The checks for data type usage were each connecting to all databases
in the cluster and running their query. On clusters which have a lot
of databases this can become unnecessarily expensive. This moves the
checks to run in a single connection instead to minimize setup and
teardown overhead.
Reviewed-by: Nathan Bossart <[email protected]>
Reviewed-by: Justin Pryzby <[email protected]>
Reviewed-by: Peter Eisentraut <[email protected]>
Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/bin/pg_upgrade/version.c')
-rw-r--r-- | src/bin/pg_upgrade/version.c | 265 |
1 files changed, 9 insertions, 256 deletions
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index 9dc1399f368..2de6dffccda 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -9,235 +9,23 @@ #include "postgres_fe.h" -#include "catalog/pg_class_d.h" #include "fe_utils/string_utils.h" #include "pg_upgrade.h" - /* - * check_for_data_types_usage() - * Detect whether there are any stored columns depending on given type(s) - * - * If so, write a report to the given file name, and return true. - * - * base_query should be a SELECT yielding a single column named "oid", - * containing the pg_type OIDs of one or more types that are known to have - * inconsistent on-disk representations across server versions. - * - * We check for the type(s) in tables, matviews, and indexes, but not views; - * there's no storage involved in a view. + * version_hook functions for check_for_data_types_usage in order to determine + * whether a data type check should be executed for the cluster in question or + * not. */ bool -check_for_data_types_usage(ClusterInfo *cluster, - const char *base_query, - const char *output_path) +jsonb_9_4_check_applicable(ClusterInfo *cluster) { - bool found = false; - FILE *script = NULL; - int dbnum; + /* JSONB changed its storage format during 9.4 beta */ + if (GET_MAJOR_VERSION(cluster->major_version) == 904 && + cluster->controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER) + return true; - for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) - { - DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; - PGconn *conn = connectToServer(cluster, active_db->db_name); - PQExpBufferData querybuf; - PGresult *res; - bool db_used = false; - int ntups; - int rowno; - int i_nspname, - i_relname, - i_attname; - - /* - * The type(s) of interest might be wrapped in a domain, array, - * composite, or range, and these container types can be nested (to - * varying extents depending on server version, but that's not of - * concern here). To handle all these cases we need a recursive CTE. - */ - initPQExpBuffer(&querybuf); - appendPQExpBuffer(&querybuf, - "WITH RECURSIVE oids AS ( " - /* start with the type(s) returned by base_query */ - " %s " - " UNION ALL " - " SELECT * FROM ( " - /* inner WITH because we can only reference the CTE once */ - " WITH x AS (SELECT oid FROM oids) " - /* domains on any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' " - " UNION ALL " - /* arrays over any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' " - " UNION ALL " - /* composite types containing any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x " - " WHERE t.typtype = 'c' AND " - " t.oid = c.reltype AND " - " c.oid = a.attrelid AND " - " NOT a.attisdropped AND " - " a.atttypid = x.oid " - " UNION ALL " - /* ranges containing any type selected so far */ - " SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x " - " WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid" - " ) foo " - ") " - /* now look for stored columns of any such type */ - "SELECT n.nspname, c.relname, a.attname " - "FROM pg_catalog.pg_class c, " - " pg_catalog.pg_namespace n, " - " pg_catalog.pg_attribute a " - "WHERE c.oid = a.attrelid AND " - " NOT a.attisdropped AND " - " a.atttypid IN (SELECT oid FROM oids) AND " - " c.relkind IN (" - CppAsString2(RELKIND_RELATION) ", " - CppAsString2(RELKIND_MATVIEW) ", " - CppAsString2(RELKIND_INDEX) ") AND " - " c.relnamespace = n.oid AND " - /* exclude possible orphaned temp tables */ - " n.nspname !~ '^pg_temp_' AND " - " n.nspname !~ '^pg_toast_temp_' AND " - /* exclude system catalogs, too */ - " n.nspname NOT IN ('pg_catalog', 'information_schema')", - base_query); - - res = executeQueryOrDie(conn, "%s", querybuf.data); - - ntups = PQntuples(res); - i_nspname = PQfnumber(res, "nspname"); - i_relname = PQfnumber(res, "relname"); - i_attname = PQfnumber(res, "attname"); - for (rowno = 0; rowno < ntups; rowno++) - { - found = true; - if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) - pg_fatal("could not open file \"%s\": %m", output_path); - if (!db_used) - { - fprintf(script, "In database: %s\n", active_db->db_name); - db_used = true; - } - fprintf(script, " %s.%s.%s\n", - PQgetvalue(res, rowno, i_nspname), - PQgetvalue(res, rowno, i_relname), - PQgetvalue(res, rowno, i_attname)); - } - - PQclear(res); - - termPQExpBuffer(&querybuf); - - PQfinish(conn); - } - - if (script) - fclose(script); - - return found; -} - -/* - * check_for_data_type_usage() - * Detect whether there are any stored columns depending on the given type - * - * If so, write a report to the given file name, and return true. - * - * type_name should be a fully qualified type name. This is just a - * trivial wrapper around check_for_data_types_usage() to convert a - * type name into a base query. - */ -bool -check_for_data_type_usage(ClusterInfo *cluster, - const char *type_name, - const char *output_path) -{ - bool found; - char *base_query; - - base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid", - type_name); - - found = check_for_data_types_usage(cluster, base_query, output_path); - - free(base_query); - - return found; -} - - -/* - * old_9_3_check_for_line_data_type_usage() - * 9.3 -> 9.4 - * Fully implement the 'line' data type in 9.4, which previously returned - * "not enabled" by default and was only functionally enabled with a - * compile-time switch; as of 9.4 "line" has a different on-disk - * representation format. - */ -void -old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for incompatible \"line\" data type"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_line.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.line", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"line\" data type in user tables.\n" - "This data type changed its internal and input/output format\n" - "between your old and new versions so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - - -/* - * old_9_6_check_for_unknown_data_type_usage() - * 9.6 -> 10 - * It's no longer allowed to create tables or views with "unknown"-type - * columns. We do not complain about views with such columns, because - * they should get silently converted to "text" columns during the DDL - * dump and reload; it seems unlikely to be worth making users do that - * by hand. However, if there's a table with such a column, the DDL - * reload will fail, so we should pre-detect that rather than failing - * mid-upgrade. Worse, if there's a matview with such a column, the - * DDL reload will silently change it to "text" which won't match the - * on-disk storage (which is like "cstring"). So we *must* reject that. - */ -void -old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for invalid \"unknown\" user columns"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_unknown.txt"); - - if (check_for_data_type_usage(cluster, "pg_catalog.unknown", output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"unknown\" data type in user tables.\n" - "This data type is no longer allowed in tables, so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); + return false; } /* @@ -352,41 +140,6 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) } /* - * old_11_check_for_sql_identifier_data_type_usage() - * 11 -> 12 - * In 12, the sql_identifier data type was switched from name to varchar, - * which does affect the storage (name is by-ref, but not varlena). This - * means user tables using sql_identifier for columns are broken because - * the on-disk format is different. - */ -void -old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster) -{ - char output_path[MAXPGPATH]; - - prep_status("Checking for invalid \"sql_identifier\" user columns"); - - snprintf(output_path, sizeof(output_path), "%s/%s", - log_opts.basedir, - "tables_using_sql_identifier.txt"); - - if (check_for_data_type_usage(cluster, "information_schema.sql_identifier", - output_path)) - { - pg_log(PG_REPORT, "fatal"); - pg_fatal("Your installation contains the \"sql_identifier\" data type in user tables.\n" - "The on-disk format for this data type has changed, so this\n" - "cluster cannot currently be upgraded. You can\n" - "drop the problem columns and restart the upgrade.\n" - "A list of the problem columns is in the file:\n" - " %s", output_path); - } - else - check_ok(); -} - - -/* * report_extension_updates() * Report extensions that should be updated. */ |