pg_upgrade: run all data type checks per connection

The checks for data type usage were each connecting to all databases in the cluster and running their query. On clusters which have a lot of databases this can become unnecessarily expensive. This moves the checks to run in a single connection instead to minimize setup and teardown overhead. Reviewed-by: Nathan Bossart <[email protected]> Reviewed-by: Justin Pryzby <[email protected]> Reviewed-by: Peter Eisentraut <[email protected]> Discussion: https://postgr.es/m/[email protected]
author: Daniel Gustafsson 2024-03-19 12:32:50 +0000
committer: Daniel Gustafsson 2024-03-19 12:32:50 +0000
commit: 347758b1206364e3bec5ad6cd649b4ba9fe1be7b (patch)
tree: cd5dbdc7d8f9257a7c64e6930b34e4df00dde152 /src/bin/pg_upgrade/version.c
parent: 5577a71fb0cc16bb7f5e3c40b89eb77460eac724 (diff)
1 files changed, 9 insertions, 256 deletions
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c
index 9dc1399f368..2de6dffccda 100644
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -9,235 +9,23 @@
 
 #include "postgres_fe.h"
 
-#include "catalog/pg_class_d.h"
 #include "fe_utils/string_utils.h"
 #include "pg_upgrade.h"
 
-
 /*
- * check_for_data_types_usage()
- *	Detect whether there are any stored columns depending on given type(s)
- *
- * If so, write a report to the given file name, and return true.
- *
- * base_query should be a SELECT yielding a single column named "oid",
- * containing the pg_type OIDs of one or more types that are known to have
- * inconsistent on-disk representations across server versions.
- *
- * We check for the type(s) in tables, matviews, and indexes, but not views;
- * there's no storage involved in a view.
+ * version_hook functions for check_for_data_types_usage in order to determine
+ * whether a data type check should be executed for the cluster in question or
+ * not.
  */
 bool
-check_for_data_types_usage(ClusterInfo *cluster,
-						   const char *base_query,
-						   const char *output_path)
+jsonb_9_4_check_applicable(ClusterInfo *cluster)
 {
-	bool		found = false;
-	FILE	   *script = NULL;
-	int			dbnum;
+	/* JSONB changed its storage format during 9.4 beta */
+	if (GET_MAJOR_VERSION(cluster->major_version) == 904 &&
+		cluster->controldata.cat_ver < JSONB_FORMAT_CHANGE_CAT_VER)
+		return true;
 
-	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
-	{
-		DbInfo	   *active_db = &cluster->dbarr.dbs[dbnum];
-		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
-		PQExpBufferData querybuf;
-		PGresult   *res;
-		bool		db_used = false;
-		int			ntups;
-		int			rowno;
-		int			i_nspname,
-					i_relname,
-					i_attname;
-
-		/*
-		 * The type(s) of interest might be wrapped in a domain, array,
-		 * composite, or range, and these container types can be nested (to
-		 * varying extents depending on server version, but that's not of
-		 * concern here).  To handle all these cases we need a recursive CTE.
-		 */
-		initPQExpBuffer(&querybuf);
-		appendPQExpBuffer(&querybuf,
-						  "WITH RECURSIVE oids AS ( "
-		/* start with the type(s) returned by base_query */
-						  "	%s "
-						  "	UNION ALL "
-						  "	SELECT * FROM ( "
-		/* inner WITH because we can only reference the CTE once */
-						  "		WITH x AS (SELECT oid FROM oids) "
-		/* domains on any type selected so far */
-						  "			SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typbasetype = x.oid AND typtype = 'd' "
-						  "			UNION ALL "
-		/* arrays over any type selected so far */
-						  "			SELECT t.oid FROM pg_catalog.pg_type t, x WHERE typelem = x.oid AND typtype = 'b' "
-						  "			UNION ALL "
-		/* composite types containing any type selected so far */
-						  "			SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_class c, pg_catalog.pg_attribute a, x "
-						  "			WHERE t.typtype = 'c' AND "
-						  "				  t.oid = c.reltype AND "
-						  "				  c.oid = a.attrelid AND "
-						  "				  NOT a.attisdropped AND "
-						  "				  a.atttypid = x.oid "
-						  "			UNION ALL "
-		/* ranges containing any type selected so far */
-						  "			SELECT t.oid FROM pg_catalog.pg_type t, pg_catalog.pg_range r, x "
-						  "			WHERE t.typtype = 'r' AND r.rngtypid = t.oid AND r.rngsubtype = x.oid"
-						  "	) foo "
-						  ") "
-		/* now look for stored columns of any such type */
-						  "SELECT n.nspname, c.relname, a.attname "
-						  "FROM	pg_catalog.pg_class c, "
-						  "		pg_catalog.pg_namespace n, "
-						  "		pg_catalog.pg_attribute a "
-						  "WHERE	c.oid = a.attrelid AND "
-						  "		NOT a.attisdropped AND "
-						  "		a.atttypid IN (SELECT oid FROM oids) AND "
-						  "		c.relkind IN ("
-						  CppAsString2(RELKIND_RELATION) ", "
-						  CppAsString2(RELKIND_MATVIEW) ", "
-						  CppAsString2(RELKIND_INDEX) ") AND "
-						  "		c.relnamespace = n.oid AND "
-		/* exclude possible orphaned temp tables */
-						  "		n.nspname !~ '^pg_temp_' AND "
-						  "		n.nspname !~ '^pg_toast_temp_' AND "
-		/* exclude system catalogs, too */
-						  "		n.nspname NOT IN ('pg_catalog', 'information_schema')",
-						  base_query);
-
-		res = executeQueryOrDie(conn, "%s", querybuf.data);
-
-		ntups = PQntuples(res);
-		i_nspname = PQfnumber(res, "nspname");
-		i_relname = PQfnumber(res, "relname");
-		i_attname = PQfnumber(res, "attname");
-		for (rowno = 0; rowno < ntups; rowno++)
-		{
-			found = true;
-			if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
-				pg_fatal("could not open file \"%s\": %m", output_path);
-			if (!db_used)
-			{
-				fprintf(script, "In database: %s\n", active_db->db_name);
-				db_used = true;
-			}
-			fprintf(script, "  %s.%s.%s\n",
-					PQgetvalue(res, rowno, i_nspname),
-					PQgetvalue(res, rowno, i_relname),
-					PQgetvalue(res, rowno, i_attname));
-		}
-
-		PQclear(res);
-
-		termPQExpBuffer(&querybuf);
-
-		PQfinish(conn);
-	}
-
-	if (script)
-		fclose(script);
-
-	return found;
-}
-
-/*
- * check_for_data_type_usage()
- *	Detect whether there are any stored columns depending on the given type
- *
- * If so, write a report to the given file name, and return true.
- *
- * type_name should be a fully qualified type name.  This is just a
- * trivial wrapper around check_for_data_types_usage() to convert a
- * type name into a base query.
- */
-bool
-check_for_data_type_usage(ClusterInfo *cluster,
-						  const char *type_name,
-						  const char *output_path)
-{
-	bool		found;
-	char	   *base_query;
-
-	base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid",
-						  type_name);
-
-	found = check_for_data_types_usage(cluster, base_query, output_path);
-
-	free(base_query);
-
-	return found;
-}
-
-
-/*
- * old_9_3_check_for_line_data_type_usage()
- *	9.3 -> 9.4
- *	Fully implement the 'line' data type in 9.4, which previously returned
- *	"not enabled" by default and was only functionally enabled with a
- *	compile-time switch; as of 9.4 "line" has a different on-disk
- *	representation format.
- */
-void
-old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster)
-{
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for incompatible \"line\" data type");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_line.txt");
-
-	if (check_for_data_type_usage(cluster, "pg_catalog.line", output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"line\" data type in user tables.\n"
-				 "This data type changed its internal and input/output format\n"
-				 "between your old and new versions so this\n"
-				 "cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
-}
-
-
-/*
- * old_9_6_check_for_unknown_data_type_usage()
- *	9.6 -> 10
- *	It's no longer allowed to create tables or views with "unknown"-type
- *	columns.  We do not complain about views with such columns, because
- *	they should get silently converted to "text" columns during the DDL
- *	dump and reload; it seems unlikely to be worth making users do that
- *	by hand.  However, if there's a table with such a column, the DDL
- *	reload will fail, so we should pre-detect that rather than failing
- *	mid-upgrade.  Worse, if there's a matview with such a column, the
- *	DDL reload will silently change it to "text" which won't match the
- *	on-disk storage (which is like "cstring").  So we *must* reject that.
- */
-void
-old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster)
-{
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for invalid \"unknown\" user columns");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_unknown.txt");
-
-	if (check_for_data_type_usage(cluster, "pg_catalog.unknown", output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"unknown\" data type in user tables.\n"
-				 "This data type is no longer allowed in tables, so this\n"
-				 "cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
+	return false;
 }
 
 /*
@@ -352,41 +140,6 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode)
 }
 
 /*
- * old_11_check_for_sql_identifier_data_type_usage()
- *	11 -> 12
- *	In 12, the sql_identifier data type was switched from name to varchar,
- *	which does affect the storage (name is by-ref, but not varlena). This
- *	means user tables using sql_identifier for columns are broken because
- *	the on-disk format is different.
- */
-void
-old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster)
-{
-	char		output_path[MAXPGPATH];
-
-	prep_status("Checking for invalid \"sql_identifier\" user columns");
-
-	snprintf(output_path, sizeof(output_path), "%s/%s",
-			 log_opts.basedir,
-			 "tables_using_sql_identifier.txt");
-
-	if (check_for_data_type_usage(cluster, "information_schema.sql_identifier",
-								  output_path))
-	{
-		pg_log(PG_REPORT, "fatal");
-		pg_fatal("Your installation contains the \"sql_identifier\" data type in user tables.\n"
-				 "The on-disk format for this data type has changed, so this\n"
-				 "cluster cannot currently be upgraded.  You can\n"
-				 "drop the problem columns and restart the upgrade.\n"
-				 "A list of the problem columns is in the file:\n"
-				 "    %s", output_path);
-	}
-	else
-		check_ok();
-}
-
-
-/*
  * report_extension_updates()
  *	Report extensions that should be updated.
  */
author	Daniel Gustafsson	2024-03-19 12:32:50 +0000
committer	Daniel Gustafsson	2024-03-19 12:32:50 +0000
commit	347758b1206364e3bec5ad6cd649b4ba9fe1be7b (patch)
tree	cd5dbdc7d8f9257a7c64e6930b34e4df00dde152 /src/bin/pg_upgrade/version.c
parent	5577a71fb0cc16bb7f5e3c40b89eb77460eac724 (diff)