summaryrefslogtreecommitdiff
path: root/src/backend/commands/tablespace.c
diff options
context:
space:
mode:
authorTom Lane2004-06-18 06:14:31 +0000
committerTom Lane2004-06-18 06:14:31 +0000
commit2467394ee1566e82d0314d12a0d1c0a5670a28c9 (patch)
tree57b87b8c181a9c3eb0f33bf775a5f31b9de8b890 /src/backend/commands/tablespace.c
parent474875f4438ea0d18f9f4170117bc407e6812515 (diff)
Tablespaces. Alternate database locations are dead, long live tablespaces.
There are various things left to do: contrib dbsize and oid2name modules need work, and so does the documentation. Also someone should think about COMMENT ON TABLESPACE and maybe RENAME TABLESPACE. Also initlocation is dead, it just doesn't know it yet. Gavin Sherry and Tom Lane.
Diffstat (limited to 'src/backend/commands/tablespace.c')
-rw-r--r--src/backend/commands/tablespace.c660
1 files changed, 660 insertions, 0 deletions
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
new file mode 100644
index 00000000000..d412389bb86
--- /dev/null
+++ b/src/backend/commands/tablespace.c
@@ -0,0 +1,660 @@
+/*-------------------------------------------------------------------------
+ *
+ * tablespace.c
+ * Commands to manipulate table spaces
+ *
+ *
+ * Tablespaces in PostgreSQL are designed to allow users to determine
+ * where the data file(s) for a given database object reside on the file
+ * system.
+ *
+ * A tablespace represents a directory on the file system. At tablespace
+ * creation time, the directory must be empty. To simplify things and
+ * remove the possibility of having file name conflicts, we isolate
+ * files within a tablespace into database-specific subdirectories.
+ *
+ * To support file access via the information given in RelFileNode, we
+ * maintain a symbolic-link map in $PGDATA/pg_tablespaces. The symlinks are
+ * named by tablespace OIDs and point to the actual tablespace directories.
+ * Thus the full path to an arbitrary file is
+ * $PGDATA/pg_tablespaces/spcoid/dboid/relfilenode
+ *
+ * There are two tablespaces created at initdb time: global (for shared
+ * tables) and default (for everything else). For backwards compatibility
+ * and to remain functional on platforms without symlinks, these tablespaces
+ * are accessed specially: they are respectively
+ * $PGDATA/global/relfilenode
+ * $PGDATA/base/dboid/relfilenode
+ *
+ * The implementation is designed to be backwards compatible. For this reason
+ * (and also as a feature unto itself) when a user creates an object without
+ * specifying a tablespace, we look at the object's parent and place
+ * the object in the parent's tablespace. The hierarchy is as follows:
+ * database > schema > table > index
+ *
+ * To allow CREATE DATABASE to give a new database a default tablespace
+ * that's different from the template database's default, we make the
+ * provision that a zero in pg_class.reltablespace means the database's
+ * default tablespace. Without this, CREATE DATABASE would have to go in
+ * and munge the system catalogs of the new database. This special meaning
+ * of zero also applies in pg_namespace.nsptablespace.
+ *
+ *
+ * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/commands/tablespace.c,v 1.1 2004/06/18 06:13:23 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "access/heapam.h"
+#include "catalog/catalog.h"
+#include "catalog/catname.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_tablespace.h"
+#include "commands/tablespace.h"
+#include "miscadmin.h"
+#include "storage/fd.h"
+#include "storage/smgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+
+static void set_short_version(const char *path);
+static bool directory_is_empty(const char *path);
+
+
+/*
+ * Each database using a table space is isolated into its own name space
+ * by a subdirectory named for the database OID. On first creation of an
+ * object in the tablespace, create the subdirectory. If the subdirectory
+ * already exists, just fall through quietly.
+ *
+ * If tablespaces are not supported, this is just a no-op; CREATE DATABASE
+ * is expected to create the default subdirectory for the database.
+ */
+void
+TablespaceCreateDbspace(Oid spcNode, Oid dbNode)
+{
+#ifdef HAVE_SYMLINK
+ struct stat st;
+ char *dir;
+
+ /*
+ * The global tablespace doesn't have per-database subdirectories,
+ * so nothing to do for it.
+ */
+ if (spcNode == GLOBALTABLESPACE_OID)
+ return;
+
+ Assert(OidIsValid(spcNode));
+ Assert(OidIsValid(dbNode));
+
+ dir = GetDatabasePath(dbNode, spcNode);
+
+ if (stat(dir, &st) < 0)
+ {
+ if (errno == ENOENT)
+ {
+ /*
+ * Acquire ExclusiveLock on pg_tablespace to ensure that no
+ * DROP TABLESPACE or TablespaceCreateDbspace is running
+ * concurrently. Simple reads from pg_tablespace are OK.
+ */
+ Relation rel;
+
+ rel = heap_openr(TableSpaceRelationName, ExclusiveLock);
+
+ /*
+ * Recheck to see if someone created the directory while
+ * we were waiting for lock.
+ */
+ if (stat(dir, &st) == 0 && S_ISDIR(st.st_mode))
+ {
+ /* need not do anything */
+ }
+ else
+ {
+ /* OK, go for it */
+ if (mkdir(dir, S_IRWXU) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create directory \"%s\": %m",
+ dir)));
+ }
+
+ /* OK to drop the exclusive lock */
+ heap_close(rel, ExclusiveLock);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat directory \"%s\": %m", dir)));
+ }
+ }
+ else
+ {
+ /* be paranoid */
+ if (!S_ISDIR(st.st_mode))
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" exists but is not a directory",
+ dir)));
+ }
+
+ pfree(dir);
+#endif /* HAVE_SYMLINK */
+}
+
+/*
+ * Create a table space
+ *
+ * Only superusers can create a tablespace. This seems a reasonable restriction
+ * since we're determining the system layout and, anyway, we probably have
+ * root if we're doing this kind of activity
+ */
+void
+CreateTableSpace(CreateTableSpaceStmt *stmt)
+{
+#ifdef HAVE_SYMLINK
+ Relation rel;
+ Datum values[Natts_pg_tablespace];
+ char nulls[Natts_pg_tablespace];
+ HeapTuple tuple;
+ Oid tablespaceoid;
+ char *location;
+ char *linkloc;
+ AclId ownerid;
+
+ /* validate */
+
+ /* don't call this in a transaction block */
+ PreventTransactionChain((void *) stmt, "CREATE TABLESPACE");
+
+ /* Must be super user */
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to create tablespace \"%s\"",
+ stmt->tablespacename),
+ errhint("Must be superuser to create a tablespace.")));
+
+ /* However, the eventual owner of the tablespace need not be */
+ if (stmt->owner)
+ {
+ /* No need to check result, get_usesysid() does that */
+ ownerid = get_usesysid(stmt->owner);
+ }
+ else
+ ownerid = GetUserId();
+
+ /* Unix-ify the offered path, and strip any trailing slashes */
+ location = pstrdup(stmt->location);
+ canonicalize_path(location);
+
+ /* disallow quotes, else CREATE DATABASE would be at risk */
+ if (strchr(location, '\''))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("tablespace location may not contain single quotes")));
+
+ /*
+ * Allowing relative paths seems risky
+ *
+ * this also helps us ensure that location is not empty or whitespace
+ */
+ if (!is_absolute_path(location))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("tablespace location must be an absolute path")));
+
+ /*
+ * Check that location isn't too long. Remember that we're going to append
+ * '/<dboid>/<relid>.<nnn>' (XXX but do we ever form the whole path
+ * explicitly? This may be overly conservative.)
+ */
+ if (strlen(location) >= (MAXPGPATH - 1 - 10 - 1 - 10 - 1 - 10))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("tablespace location \"%s\" is too long",
+ location)));
+
+ /*
+ * Check that there is no other tablespace by this name. (The
+ * unique index would catch this anyway, but might as well give
+ * a friendlier message.)
+ */
+ if (OidIsValid(get_tablespace_oid(stmt->tablespacename)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("tablespace \"%s\" already exists",
+ stmt->tablespacename)));
+
+ /*
+ * Insert tuple into pg_tablespace. The purpose of doing this first
+ * is to lock the proposed tablename against other would-be creators.
+ * The insertion will roll back if we find problems below.
+ */
+ rel = heap_openr(TableSpaceRelationName, RowExclusiveLock);
+
+ MemSet(nulls, ' ', Natts_pg_tablespace);
+
+ values[Anum_pg_tablespace_spcname - 1] =
+ DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename));
+ values[Anum_pg_tablespace_spcowner - 1] =
+ Int32GetDatum(ownerid);
+ values[Anum_pg_tablespace_spclocation - 1] =
+ DirectFunctionCall1(textin, CStringGetDatum(location));
+ nulls[Anum_pg_tablespace_spcacl - 1] = 'n';
+
+ tuple = heap_formtuple(rel->rd_att, values, nulls);
+
+ tablespaceoid = newoid();
+
+ HeapTupleSetOid(tuple, tablespaceoid);
+
+ simple_heap_insert(rel, tuple);
+
+ CatalogUpdateIndexes(rel, tuple);
+
+ heap_freetuple(tuple);
+
+ /*
+ * Attempt to coerce target directory to safe permissions. If this
+ * fails, it doesn't exist or has the wrong owner.
+ */
+ if (chmod(location, 0700) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not set permissions on directory \"%s\": %m",
+ location)));
+
+ /*
+ * Check the target directory is empty.
+ */
+ if (!directory_is_empty(location))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("directory \"%s\" is not empty",
+ location)));
+
+ /*
+ * Create the PG_VERSION file in the target directory. This has several
+ * purposes: to make sure we can write in the directory, to prevent
+ * someone from creating another tablespace pointing at the same
+ * directory (the emptiness check above will fail), and to label
+ * tablespace directories by PG version.
+ */
+ set_short_version(location);
+
+ /*
+ * All seems well, create the symlink
+ */
+ linkloc = (char *) palloc(strlen(DataDir) + 16 + 10 + 1);
+ sprintf(linkloc, "%s/pg_tablespaces/%u", DataDir, tablespaceoid);
+
+ if (symlink(location, linkloc) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create symbolic link \"%s\": %m",
+ linkloc)));
+
+ pfree(linkloc);
+ pfree(location);
+
+ heap_close(rel, RowExclusiveLock);
+
+#else /* !HAVE_SYMLINK */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif /* HAVE_SYMLINK */
+}
+
+/*
+ * Drop a table space
+ *
+ * Be careful to check that the tablespace is empty.
+ */
+void
+DropTableSpace(DropTableSpaceStmt *stmt)
+{
+#ifdef HAVE_SYMLINK
+ char *tablespacename = stmt->tablespacename;
+ HeapScanDesc scandesc;
+ Relation rel;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+ char *location;
+ Oid tablespaceoid;
+ DIR *dirdesc;
+ struct dirent *de;
+ char *subfile;
+
+ /* don't call this in a transaction block */
+ PreventTransactionChain((void *) stmt, "DROP TABLESPACE");
+
+ /*
+ * Acquire ExclusiveLock on pg_tablespace to ensure that no one else
+ * is trying to do DROP TABLESPACE or TablespaceCreateDbspace concurrently.
+ */
+ rel = heap_openr(TableSpaceRelationName, ExclusiveLock);
+
+ /*
+ * Find the target tuple
+ */
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_spcname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(tablespacename));
+ scandesc = heap_beginscan(rel, SnapshotNow, 1, entry);
+ tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace \"%s\" does not exist",
+ tablespacename)));
+
+ tablespaceoid = HeapTupleGetOid(tuple);
+
+ /* Must be superuser or owner */
+ if (GetUserId() != ((Form_pg_tablespace) GETSTRUCT(tuple))->spcowner &&
+ !superuser())
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TABLESPACE,
+ tablespacename);
+
+ /* Disallow drop of the standard tablespaces, even by superuser */
+ if (tablespaceoid == GLOBALTABLESPACE_OID ||
+ tablespaceoid == DEFAULTTABLESPACE_OID)
+ aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_TABLESPACE,
+ tablespacename);
+
+ location = (char *) palloc(strlen(DataDir) + 16 + 10 + 1);
+ sprintf(location, "%s/pg_tablespaces/%u", DataDir, tablespaceoid);
+
+ /*
+ * Check if the tablespace still contains any files. We try to rmdir
+ * each per-database directory we find in it. rmdir failure implies
+ * there are still files in that subdirectory, so give up. (We do not
+ * have to worry about undoing any already completed rmdirs, since
+ * the next attempt to use the tablespace from that database will simply
+ * recreate the subdirectory via TablespaceCreateDbspace.)
+ *
+ * Since we hold exclusive lock, no one else should be creating any
+ * fresh subdirectories in parallel. It is possible that new files
+ * are being created within subdirectories, though, so the rmdir
+ * call could fail. Worst consequence is a less friendly error message.
+ */
+ dirdesc = AllocateDir(location);
+ if (dirdesc == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m",
+ location)));
+
+ errno = 0;
+ while ((de = readdir(dirdesc)) != NULL)
+ {
+ /* Note we ignore PG_VERSION for the nonce */
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0 ||
+ strcmp(de->d_name, "PG_VERSION") == 0)
+ {
+ errno = 0;
+ continue;
+ }
+
+ subfile = palloc(strlen(location) + 1 + strlen(de->d_name) + 1);
+ sprintf(subfile, "%s/%s", location, de->d_name);
+
+ /* This check is just to deliver a friendlier error message */
+ if (!directory_is_empty(subfile))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("tablespace \"%s\" is not empty",
+ tablespacename)));
+
+ /* Do the real deed */
+ if (rmdir(subfile) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not delete directory \"%s\": %m",
+ subfile)));
+
+ pfree(subfile);
+ }
+#ifdef WIN32
+ /* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
+ not in released version */
+ if (GetLastError() == ERROR_NO_MORE_FILES)
+ errno = 0;
+#endif
+ if (errno)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read directory \"%s\": %m",
+ location)));
+ FreeDir(dirdesc);
+
+ /*
+ * Okay, try to unlink PG_VERSION and then remove the symlink.
+ */
+ subfile = palloc(strlen(location) + 11 + 1);
+ sprintf(subfile, "%s/PG_VERSION", location);
+
+ if (unlink(subfile) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not unlink file \"%s\": %m",
+ subfile)));
+
+ if (unlink(location) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not unlink symbolic link \"%s\": %m",
+ location)));
+
+ pfree(subfile);
+ pfree(location);
+
+ /*
+ * We have successfully destroyed the infrastructure ... there is
+ * now no way to roll back the DROP ... so proceed to remove the
+ * pg_tablespace tuple.
+ */
+ simple_heap_delete(rel, &tuple->t_self);
+
+ heap_endscan(scandesc);
+
+ heap_close(rel, ExclusiveLock);
+
+#else /* !HAVE_SYMLINK */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+#endif /* HAVE_SYMLINK */
+}
+
+
+/*
+ * write out the PG_VERSION file in the specified directory
+ */
+static void
+set_short_version(const char *path)
+{
+ char *short_version;
+ bool gotdot = false;
+ int end;
+ char *fullname;
+ FILE *version_file;
+
+ /* Construct short version string (should match initdb.c) */
+ short_version = pstrdup(PG_VERSION);
+
+ for (end = 0; short_version[end] != '\0'; end++)
+ {
+ if (short_version[end] == '.')
+ {
+ Assert(end != 0);
+ if (gotdot)
+ break;
+ else
+ gotdot = true;
+ }
+ else if (short_version[end] < '0' || short_version[end] > '9')
+ {
+ /* gone past digits and dots */
+ break;
+ }
+ }
+ Assert(end > 0 && short_version[end - 1] != '.' && gotdot);
+ short_version[end] = '\0';
+
+ /* Now write the file */
+ fullname = palloc(strlen(path) + 11 + 1);
+ sprintf(fullname, "%s/PG_VERSION", path);
+ version_file = AllocateFile(fullname, PG_BINARY_W);
+ if (version_file == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m",
+ fullname)));
+ fprintf(version_file, "%s\n", short_version);
+ if (FreeFile(version_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write to file \"%s\": %m",
+ fullname)));
+
+ pfree(fullname);
+ pfree(short_version);
+}
+
+/*
+ * Check if a directory is empty.
+ */
+static bool
+directory_is_empty(const char *path)
+{
+ DIR *dirdesc;
+ struct dirent *de;
+
+ dirdesc = AllocateDir(path);
+ if (dirdesc == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m",
+ path)));
+
+ errno = 0;
+ while ((de = readdir(dirdesc)) != NULL)
+ {
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ {
+ errno = 0;
+ continue;
+ }
+ FreeDir(dirdesc);
+ return false;
+ }
+#ifdef WIN32
+ /* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
+ not in released version */
+ if (GetLastError() == ERROR_NO_MORE_FILES)
+ errno = 0;
+#endif
+ if (errno)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read directory \"%s\": %m",
+ path)));
+ FreeDir(dirdesc);
+ return true;
+}
+
+/*
+ * get_tablespace_oid - given a tablespace name, look up the OID
+ *
+ * Returns InvalidOid if tablespace name not found.
+ */
+Oid
+get_tablespace_oid(const char *tablespacename)
+{
+ Oid result;
+ Relation rel;
+ HeapScanDesc scandesc;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+
+ /* Search pg_tablespace */
+ rel = heap_openr(TableSpaceRelationName, AccessShareLock);
+
+ ScanKeyInit(&entry[0],
+ Anum_pg_tablespace_spcname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(tablespacename));
+ scandesc = heap_beginscan(rel, SnapshotNow, 1, entry);
+ tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+ if (HeapTupleIsValid(tuple))
+ result = HeapTupleGetOid(tuple);
+ else
+ result = InvalidOid;
+
+ heap_endscan(scandesc);
+ heap_close(rel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * get_tablespace_name - given a tablespace OID, look up the name
+ *
+ * Returns a palloc'd string, or NULL if no such tablespace.
+ */
+char *
+get_tablespace_name(Oid spc_oid)
+{
+ char *result;
+ Relation rel;
+ HeapScanDesc scandesc;
+ HeapTuple tuple;
+ ScanKeyData entry[1];
+
+ /* Search pg_tablespace */
+ rel = heap_openr(TableSpaceRelationName, AccessShareLock);
+
+ ScanKeyInit(&entry[0],
+ ObjectIdAttributeNumber,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(spc_oid));
+ scandesc = heap_beginscan(rel, SnapshotNow, 1, entry);
+ tuple = heap_getnext(scandesc, ForwardScanDirection);
+
+ /* We assume that there can be at most one matching tuple */
+ if (HeapTupleIsValid(tuple))
+ result = pstrdup(NameStr(((Form_pg_tablespace) GETSTRUCT(tuple))->spcname));
+ else
+ result = NULL;
+
+ heap_endscan(scandesc);
+ heap_close(rel, AccessShareLock);
+
+ return result;
+}