Skip to content

Commit 61dc533

Browse files
jeff-davisCommitfest Bot
authored and
Commitfest Bot
committed
Add pg_upgrade check for Unicode-dependent relations.
1 parent 5f2540d commit 61dc533

File tree

1 file changed

+180
-0
lines changed

1 file changed

+180
-0
lines changed

src/bin/pg_upgrade/check.c

+180
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "catalog/pg_class_d.h"
1414
#include "fe_utils/string_utils.h"
1515
#include "pg_upgrade.h"
16+
#include "common/unicode_version.h"
1617

1718
static void check_new_cluster_is_empty(void);
1819
static void check_is_install_user(ClusterInfo *cluster);
@@ -25,6 +26,7 @@ static void check_for_tables_with_oids(ClusterInfo *cluster);
2526
static void check_for_pg_role_prefix(ClusterInfo *cluster);
2627
static void check_for_new_tablespace_dir(void);
2728
static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
29+
static void check_for_unicode_update(ClusterInfo *cluster);
2830
static void check_new_cluster_logical_replication_slots(void);
2931
static void check_new_cluster_subscription_configuration(void);
3032
static void check_old_cluster_for_valid_slots(void);
@@ -633,6 +635,12 @@ check_and_dump_old_cluster(void)
633635

634636
check_for_data_types_usage(&old_cluster);
635637

638+
/*
639+
* Unicode updates can affect some objects that use expressions with
640+
* functions dependent on Unicode.
641+
*/
642+
check_for_unicode_update(&old_cluster);
643+
636644
/*
637645
* PG 14 changed the function signature of encoding conversion functions.
638646
* Conversions from older versions cannot be upgraded automatically
@@ -1754,6 +1762,178 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
17541762
check_ok();
17551763
}
17561764

1765+
/*
1766+
* Callback function for processing results of query for
1767+
* check_for_unicode_update()'s UpgradeTask. If the query returned any rows
1768+
* (i.e., the check failed), write the details to the report file.
1769+
*/
1770+
static void
1771+
process_unicode_update(DbInfo *dbinfo, PGresult *res, void *arg)
1772+
{
1773+
UpgradeTaskReport *report = (UpgradeTaskReport *) arg;
1774+
int ntups = PQntuples(res);
1775+
int i_reloid = PQfnumber(res, "reloid");
1776+
int i_nspname = PQfnumber(res, "nspname");
1777+
int i_relname = PQfnumber(res, "relname");
1778+
1779+
if (ntups == 0)
1780+
return;
1781+
1782+
if (report->file == NULL &&
1783+
(report->file = fopen_priv(report->path, "w")) == NULL)
1784+
pg_fatal("could not open file \"%s\": %m", report->path);
1785+
1786+
fprintf(report->file, "In database: %s\n", dbinfo->db_name);
1787+
1788+
for (int rowno = 0; rowno < ntups; rowno++)
1789+
fprintf(report->file, " (oid=%s) %s.%s\n",
1790+
PQgetvalue(res, rowno, i_reloid),
1791+
PQgetvalue(res, rowno, i_nspname),
1792+
PQgetvalue(res, rowno, i_relname));
1793+
}
1794+
1795+
/*
1796+
* Check if the Unicode version built into Postgres changed between the old
1797+
* cluster and the new cluster.
1798+
*/
1799+
static bool
1800+
unicode_version_changed(ClusterInfo *cluster)
1801+
{
1802+
PGconn *conn_template1 = connectToServer(cluster, "template1");
1803+
PGresult *res;
1804+
char *old_unicode_version;
1805+
bool unicode_updated;
1806+
1807+
res = executeQueryOrDie(conn_template1, "SELECT unicode_version()");
1808+
old_unicode_version = PQgetvalue(res, 0, 0);
1809+
unicode_updated = (strcmp(old_unicode_version, PG_UNICODE_VERSION) != 0);
1810+
1811+
PQclear(res);
1812+
PQfinish(conn_template1);
1813+
1814+
return unicode_updated;
1815+
}
1816+
1817+
/*
1818+
* check_for_unicode_update()
1819+
*
1820+
* Check if the version of Unicode in the old server and the new server
1821+
* differ. If so, check for indexes, partitioned tables, or constraints that
1822+
* use expressions with functions dependent on Unicode behavior.
1823+
*/
1824+
static void
1825+
check_for_unicode_update(ClusterInfo *cluster)
1826+
{
1827+
UpgradeTaskReport report;
1828+
UpgradeTask *task = upgrade_task_create();
1829+
const char *query;
1830+
1831+
/*
1832+
* The builtin provider did not exist prior to version 17. While there are
1833+
* still problems that could potentially be caught from earlier versions,
1834+
* such as an index on NORMALIZE(), we don't check for that here.
1835+
*/
1836+
if (GET_MAJOR_VERSION(cluster->major_version) < 1700)
1837+
return;
1838+
1839+
prep_status("Checking for objects affected by Unicode update");
1840+
1841+
if (!unicode_version_changed(cluster))
1842+
{
1843+
check_ok();
1844+
return;
1845+
}
1846+
1847+
report.file = NULL;
1848+
snprintf(report.path, sizeof(report.path), "%s/%s",
1849+
log_opts.basedir,
1850+
"unicode_dependent_rels.txt");
1851+
1852+
query =
1853+
/* collations that use built-in Unicode for character semantics */
1854+
"WITH collations(collid) AS ( "
1855+
" SELECT oid FROM pg_collation "
1856+
" WHERE collprovider='b' AND colllocale IN ('C.UTF-8','PG_UNICODE_FAST') "
1857+
/* include default collation, if appropriate */
1858+
" UNION "
1859+
" SELECT 'pg_catalog.default'::regcollation FROM pg_database "
1860+
" WHERE datname = current_database() AND "
1861+
" datlocprovider='b' AND datlocale IN ('C.UTF-8','PG_UNICODE_FAST') "
1862+
"), "
1863+
/* functions that use built-in Unicode */
1864+
"functions(procid) AS ( "
1865+
" SELECT proc.oid FROM pg_proc proc "
1866+
" WHERE proname IN ('normalize','unicode_assigned','unicode_version','is_normalized') AND "
1867+
" pronamespace='pg_catalog'::regnamespace "
1868+
"), "
1869+
/* operators that use the input collation for character semantics */
1870+
"coll_operators(operid, procid, collid) AS ( "
1871+
" SELECT oper.oid, oper.oprcode, collid FROM pg_operator oper, collations "
1872+
" WHERE oprname IN ('~', '~*', '!~', '!~*', '~~*', '!~~*') AND "
1873+
" oprnamespace='pg_catalog'::regnamespace AND "
1874+
" oprright='text'::regtype "
1875+
"), "
1876+
/* functions that use the input collation for character semantics */
1877+
"coll_functions(procid, collid) AS ( "
1878+
" SELECT proc.oid, collid FROM pg_proc proc, collations "
1879+
" WHERE proname IN ('lower','initcap','upper') AND "
1880+
" pronamespace='pg_catalog'::regnamespace AND "
1881+
" proargtypes[0] = 'text'::regtype "
1882+
/* include functions behind the operators listed above */
1883+
" UNION "
1884+
" SELECT procid, collid FROM coll_operators "
1885+
"), "
1886+
1887+
/*
1888+
* Generate patterns to search a pg_node_tree for the above functions and
1889+
* operators.
1890+
*/
1891+
"patterns(p) AS ( "
1892+
" SELECT '{FUNCEXPR :funcid ' || procid::text || '[ }]' FROM functions "
1893+
" UNION "
1894+
" SELECT '{OPEXPR :opno ' || operid::text || ' (:\\w+ \\w+ )*' || "
1895+
" ':inputcollid ' || collid::text || '[ }]' FROM coll_operators "
1896+
" UNION "
1897+
" SELECT '{FUNCEXPR :funcid ' || procid::text || ' (:\\w+ \\w+ )*' || "
1898+
" ':inputcollid ' || collid::text || '[ }]' FROM coll_functions "
1899+
") "
1900+
1901+
/*
1902+
* Match the patterns against expressions used for relation contents.
1903+
*/
1904+
"SELECT reloid, relkind, nspname, relname "
1905+
" FROM ( "
1906+
" SELECT conrelid "
1907+
" FROM pg_constraint, patterns WHERE conbin::text ~ p "
1908+
" UNION "
1909+
" SELECT indexrelid "
1910+
" FROM pg_index, patterns WHERE indexprs::text ~ p OR indpred::text ~ p "
1911+
" UNION "
1912+
" SELECT partrelid "
1913+
" FROM pg_partitioned_table, patterns WHERE partexprs::text ~ p "
1914+
" ) s(reloid), pg_class c, pg_namespace n, pg_database d "
1915+
" WHERE s.reloid = c.oid AND c.relnamespace = n.oid AND "
1916+
" d.datname = current_database() AND "
1917+
" d.encoding = pg_char_to_encoding('UTF8');";
1918+
1919+
upgrade_task_add_step(task, query,
1920+
process_unicode_update,
1921+
true, &report);
1922+
upgrade_task_run(task, cluster);
1923+
upgrade_task_free(task);
1924+
1925+
if (report.file)
1926+
{
1927+
fclose(report.file);
1928+
report_status(PG_WARNING, "warning");
1929+
pg_log(PG_WARNING, "Your installation contains relations that may be affected by a new version of Unicode.\n"
1930+
"A list of potentially-affected relations is in the file:\n"
1931+
" %s", report.path);
1932+
}
1933+
else
1934+
check_ok();
1935+
}
1936+
17571937
/*
17581938
* check_new_cluster_logical_replication_slots()
17591939
*

0 commit comments

Comments
 (0)