diff options
| author | Robert Haas | 2013-07-22 14:34:34 +0000 |
|---|---|---|
| committer | Robert Haas | 2013-07-22 15:09:10 +0000 |
| commit | f01d1ae3a104019d6d68aeff85c4816a275130b3 (patch) | |
| tree | a0fa034de5d28f5eb458bba87c6ed6e5a163bd37 /src/backend/utils | |
| parent | b3b10c39038c20457ef058c7f4e5589c28a84f1c (diff) | |
Add infrastructure for mapping relfilenodes to relation OIDs.
Future patches are expected to introduce logical replication that
works by decoding WAL. WAL contains relfilenodes rather than relation
OIDs, so this infrastructure will be needed to find the relation OID
based on WAL contents.
If logical replication does not make it into this release, we probably
should consider reverting this, since it will add some overhead to DDL
operations that create new relations. One additional index insert per
pg_class row is not a large overhead, but it's more than zero.
Another way of meeting the needs of logical replication would be to
the relation OID to WAL, but that would burden DML operations, not
only DDL.
Andres Freund, with some changes by me. Design review, in earlier
versions, by Álvaro Herrera.
Diffstat (limited to 'src/backend/utils')
| -rw-r--r-- | src/backend/utils/adt/dbsize.c | 28 | ||||
| -rw-r--r-- | src/backend/utils/cache/Makefile | 3 | ||||
| -rw-r--r-- | src/backend/utils/cache/inval.c | 2 | ||||
| -rw-r--r-- | src/backend/utils/cache/relfilenodemap.c | 247 | ||||
| -rw-r--r-- | src/backend/utils/cache/relmapper.c | 53 |
5 files changed, 331 insertions, 2 deletions
diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 34482abee3e..21d1c946abe 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -28,6 +28,7 @@ #include "utils/builtins.h" #include "utils/numeric.h" #include "utils/rel.h" +#include "utils/relfilenodemap.h" #include "utils/relmapper.h" #include "utils/syscache.h" @@ -756,6 +757,33 @@ pg_relation_filenode(PG_FUNCTION_ARGS) } /* + * Get the relation via (reltablespace, relfilenode) + * + * This is expected to be used when somebody wants to match an individual file + * on the filesystem back to its table. Thats not trivially possible via + * pg_class because that doesn't contain the relfilenodes of shared and nailed + * tables. + * + * We don't fail but return NULL if we cannot find a mapping. + * + * Instead of knowing DEFAULTTABLESPACE_OID you can pass 0. + */ +Datum +pg_filenode_relation(PG_FUNCTION_ARGS) +{ + Oid reltablespace = PG_GETARG_OID(0); + Oid relfilenode = PG_GETARG_OID(1); + Oid heaprel = InvalidOid; + + heaprel = RelidByRelfilenode(reltablespace, relfilenode); + + if (!OidIsValid(heaprel)) + PG_RETURN_NULL(); + else + PG_RETURN_OID(heaprel); +} + +/* * Get the pathname (relative to $PGDATA) of a relation * * See comments for pg_relation_filenode. diff --git a/src/backend/utils/cache/Makefile b/src/backend/utils/cache/Makefile index 32d722e34f6..a943f8ea4bc 100644 --- a/src/backend/utils/cache/Makefile +++ b/src/backend/utils/cache/Makefile @@ -13,6 +13,7 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = attoptcache.o catcache.o evtcache.o inval.o plancache.o relcache.o \ - relmapper.o spccache.o syscache.o lsyscache.o typcache.o ts_cache.o + relmapper.o relfilenodemap.o spccache.o syscache.o lsyscache.o \ + typcache.o ts_cache.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 3356d0fe1e2..bfe7d787b7b 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -178,7 +178,7 @@ static int maxSharedInvalidMessagesArray; */ #define MAX_SYSCACHE_CALLBACKS 32 -#define MAX_RELCACHE_CALLBACKS 5 +#define MAX_RELCACHE_CALLBACKS 10 static struct SYSCACHECALLBACK { diff --git a/src/backend/utils/cache/relfilenodemap.c b/src/backend/utils/cache/relfilenodemap.c new file mode 100644 index 00000000000..372cb33c003 --- /dev/null +++ b/src/backend/utils/cache/relfilenodemap.c @@ -0,0 +1,247 @@ +/*------------------------------------------------------------------------- + * + * relfilenodemap.c + * relfilenode to oid mapping cache. + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/relfilenode.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/indexing.h" +#include "catalog/pg_class.h" +#include "catalog/pg_tablespace.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/hsearch.h" +#include "utils/inval.h" +#include "utils/fmgroids.h" +#include "utils/rel.h" +#include "utils/relfilenodemap.h" +#include "utils/relmapper.h" + +/* Hash table for informations about each relfilenode <-> oid pair */ +static HTAB *RelfilenodeMapHash = NULL; + +/* built first time through in InitializeRelfilenodeMap */ +ScanKeyData relfilenode_skey[2]; + +typedef struct +{ + Oid reltablespace; + Oid relfilenode; +} RelfilenodeMapKey; + +typedef struct +{ + RelfilenodeMapKey key; /* lookup key - must be first */ + Oid relid; /* pg_class.oid */ +} RelfilenodeMapEntry; + +/* + * RelfilenodeMapInvalidateCallback + * Flush mapping entries when pg_class is updated in a relevant fashion. + */ +static void +RelfilenodeMapInvalidateCallback(Datum arg, Oid relid) +{ + HASH_SEQ_STATUS status; + RelfilenodeMapEntry *entry; + + /* nothing to do if not active or deleted */ + if (RelfilenodeMapHash == NULL) + return; + + /* if relid is InvalidOid, we must invalidate the entire cache */ + if (relid == InvalidOid) + { + hash_destroy(RelfilenodeMapHash); + RelfilenodeMapHash = NULL; + return; + } + + hash_seq_init(&status, RelfilenodeMapHash); + while ((entry = (RelfilenodeMapEntry *) hash_seq_search(&status)) != NULL) + { + /* Same OID may occur in more than one tablespace. */ + if (entry->relid == relid) + { + if (hash_search(RelfilenodeMapHash, + (void *) &entry->key, + HASH_REMOVE, + NULL) == NULL) + elog(ERROR, "hash table corrupted"); + } + } +} + +/* + * RelfilenodeMapInvalidateCallback + * Initialize cache, either on first use or after a reset. + */ +static void +InitializeRelfilenodeMap(void) +{ + HASHCTL ctl; + static bool initial_init_done = false; + int i; + + /* Make sure we've initialized CacheMemoryContext. */ + if (CacheMemoryContext == NULL) + CreateCacheMemoryContext(); + + /* Initialize the hash table. */ + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(RelfilenodeMapKey); + ctl.entrysize = sizeof(RelfilenodeMapEntry); + ctl.hash = tag_hash; + ctl.hcxt = CacheMemoryContext; + + RelfilenodeMapHash = + hash_create("RelfilenodeMap cache", 1024, &ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + + /* + * For complete resets we simply delete the entire hash, but there's no + * need to do the other stuff multiple times. Especially the initialization + * of the relcche invalidation should only be done once. + */ + if (initial_init_done) + return; + + /* build skey */ + MemSet(&relfilenode_skey, 0, sizeof(relfilenode_skey)); + + for (i = 0; i < 2; i++) + { + fmgr_info_cxt(F_OIDEQ, + &relfilenode_skey[i].sk_func, + CacheMemoryContext); + relfilenode_skey[i].sk_strategy = BTEqualStrategyNumber; + relfilenode_skey[i].sk_subtype = InvalidOid; + relfilenode_skey[i].sk_collation = InvalidOid; + } + + relfilenode_skey[0].sk_attno = Anum_pg_class_reltablespace; + relfilenode_skey[1].sk_attno = Anum_pg_class_relfilenode; + + /* Watch for invalidation events. */ + CacheRegisterRelcacheCallback(RelfilenodeMapInvalidateCallback, + (Datum) 0); + initial_init_done = true; +} + +/* + * Map a relation's (tablespace, filenode) to a relation's oid and cache the + * result. + * + * Returns InvalidOid if no relation matching the criteria could be found. + */ +Oid +RelidByRelfilenode(Oid reltablespace, Oid relfilenode) +{ + RelfilenodeMapKey key; + RelfilenodeMapEntry *entry; + bool found; + SysScanDesc scandesc; + Relation relation; + HeapTuple ntp; + ScanKeyData skey[2]; + + if (RelfilenodeMapHash == NULL) + InitializeRelfilenodeMap(); + + /* pg_class will show 0 when the value is actually MyDatabaseTableSpace */ + if (reltablespace == MyDatabaseTableSpace) + reltablespace = 0; + + MemSet(&key, 0, sizeof(key)); + key.reltablespace = reltablespace; + key.relfilenode = relfilenode; + + /* + * Check cache and enter entry if nothing could be found. Even if no target + * relation can be found later on we store the negative match and return a + * InvalidOid from cache. That's not really necessary for performance since + * querying invalid values isn't supposed to be a frequent thing, but the + * implementation is simpler this way. + */ + entry = hash_search(RelfilenodeMapHash, (void *) &key, HASH_ENTER, &found); + + if (found) + return entry->relid; + + /* ok, no previous cache entry, do it the hard way */ + + /* check shared tables */ + if (reltablespace == GLOBALTABLESPACE_OID) + { + entry->relid = RelationMapFilenodeToOid(relfilenode, true); + return entry->relid; + } + + /* check plain relations by looking in pg_class */ + relation = heap_open(RelationRelationId, AccessShareLock); + + /* copy scankey to local copy, it will be modified during the scan */ + memcpy(skey, relfilenode_skey, sizeof(skey)); + + /* set scan arguments */ + skey[0].sk_argument = ObjectIdGetDatum(reltablespace); + skey[1].sk_argument = ObjectIdGetDatum(relfilenode); + + scandesc = systable_beginscan(relation, + ClassTblspcRelfilenodeIndexId, + true, + NULL, + 2, + skey); + + found = false; + + while (HeapTupleIsValid(ntp = systable_getnext(scandesc))) + { + bool isnull; + + if (found) + elog(ERROR, + "unexpected duplicate for tablespace %u, relfilenode %u", + reltablespace, relfilenode); + found = true; + +#ifdef USE_ASSERT_CHECKING + if (assert_enabled) + { + Oid check; + check = fastgetattr(ntp, Anum_pg_class_reltablespace, + RelationGetDescr(relation), + &isnull); + Assert(!isnull && check == reltablespace); + + check = fastgetattr(ntp, Anum_pg_class_relfilenode, + RelationGetDescr(relation), + &isnull); + Assert(!isnull && check == relfilenode); + } +#endif + entry->relid = HeapTupleGetOid(ntp); + } + + systable_endscan(scandesc); + heap_close(relation, AccessShareLock); + + /* check for tables that are mapped but not shared */ + if (!found) + entry->relid = RelationMapFilenodeToOid(relfilenode, false); + + return entry->relid; +} diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 2c7d9f3287b..18f0342a7de 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -181,6 +181,59 @@ RelationMapOidToFilenode(Oid relationId, bool shared) } /* + * RelationMapFilenodeToOid + * + * Do the reverse of the normal direction of mapping done in + * RelationMapOidToFilenode. + * + * This is not supposed to be used during normal running but rather for + * information purposes when looking at the filesystem or xlog. + * + * Returns InvalidOid if the OID is not known; this can easily happen if the + * relfilenode doesn't pertain to a mapped relation. + */ +Oid +RelationMapFilenodeToOid(Oid filenode, bool shared) +{ + const RelMapFile *map; + int32 i; + + /* If there are active updates, believe those over the main maps */ + if (shared) + { + map = &active_shared_updates; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + map = &shared_map; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + } + else + { + map = &active_local_updates; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + map = &local_map; + for (i = 0; i < map->num_mappings; i++) + { + if (filenode == map->mappings[i].mapfilenode) + return map->mappings[i].mapoid; + } + } + + return InvalidOid; +} + +/* * RelationMapUpdateMap * * Install a new relfilenode mapping for the specified relation. |
