diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index b257b02..6e63afe 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -32,6 +32,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
 	pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
 	pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
 	pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
+	pg_mv_statistic.h \
 	pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
 	pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
 	pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index a819952..bb82fe8 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -152,6 +152,18 @@ CREATE VIEW pg_indexes AS
          LEFT JOIN pg_tablespace T ON (T.oid = I.reltablespace)
     WHERE C.relkind IN ('r', 'm') AND I.relkind = 'i';
 
+CREATE VIEW pg_mv_stats AS
+    SELECT
+        N.nspname AS schemaname,
+        C.relname AS tablename,
+        S.stakeys AS attnums,
+        length(S.stamcv) AS mcvbytes,
+        pg_mv_stats_mvclist_info(S.stamcv) AS mcvinfo,
+        length(S.stahist) AS histbytes,
+        pg_mv_stats_histogram_info(S.stahist) AS histinfo
+    FROM (pg_mv_statistic S JOIN pg_class C ON (C.oid = S.starelid))
+        LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
+
 CREATE VIEW pg_stats AS
     SELECT
         nspname AS schemaname,
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 954e5a6..32e0d07 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -27,6 +27,7 @@
 #include "catalog/indexing.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "commands/dbcommands.h"
 #include "commands/tablecmds.h"
@@ -54,7 +55,11 @@
 #include "utils/syscache.h"
 #include "utils/timestamp.h"
 #include "utils/tqual.h"
+#include "utils/fmgroids.h"
+#include "utils/builtins.h"
 
+#include "utils/mvstats.h"
+#include "access/sysattr.h"
 
 /* Data structure for Algorithm S from Knuth 3.4.2 */
 typedef struct
@@ -111,6 +116,62 @@ static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
 static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
 
 
+/* multivariate statistics (histogram, MCV list, associative rules) */
+
+static void build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
+						   int natts, VacAttrStats **vacattrstats);
+static void update_mv_stats(Oid relid,
+							MVHistogram histogram, MCVList mcvlist);
+
+/* multivariate histograms */
+static MVHistogram build_mv_histogram(int numrows, HeapTuple *rows,
+									  int2vector *attrs,
+									  int attr_cnt, VacAttrStats **vacattrstats,
+									  int numrows_total);
+static MVBucket create_initial_mv_bucket(int numrows, HeapTuple *rows,
+										 int2vector *attrs, int natts,
+										 VacAttrStats **vacattrstats);
+static MVBucket select_bucket_to_partition(int nbuckets, MVBucket * buckets);
+static MVBucket partition_bucket(MVBucket bucket, int2vector *attrs,
+								 int natts, VacAttrStats **vacattrstats);
+static MVBucket copy_mv_bucket(MVBucket bucket, uint32 ndimensions);
+
+static void update_bucket_ndistinct(MVBucket bucket, int2vector *attrs,
+									VacAttrStats ** stats);
+static void update_dimension_ndistinct(MVBucket bucket, int dimension,
+									   int2vector *attrs,
+									   VacAttrStats ** stats,
+									   bool update_boundaries);
+/* multivariate MCV list */
+static MCVList build_mv_mcvlist(int numrows, HeapTuple *rows,
+								int2vector *attrs,
+								int natts, VacAttrStats **vacattrstats,
+								int *numrows_filtered);
+
+/* multivariate associative rules */
+static void build_mv_associations(int numrows, HeapTuple *rows,
+								  int2vector *attrs,
+								  int natts, VacAttrStats **vacattrstats);
+
+/* serialization */
+static bytea * serialize_mv_histogram(MVHistogram histogram);
+static bytea * serialize_mv_mcvlist(MCVList mcvlist);
+
+/* comparators, used when constructing multivariate stats */
+static int compare_scalars_simple(const void *a, const void *b, void *arg);
+static int compare_scalars_partition(const void *a, const void *b, void *arg);
+static int compare_scalars_memcmp(const void *a, const void *b, void *arg);
+static int compare_scalars_memcmp_2(const void *a, const void *b);
+
+static VacAttrStats ** lookup_var_attr_stats(int2vector *attrs,
+								int natts, VacAttrStats **vacattrstats);
+
+/* some debugging methods */
+#ifdef MVSTATS_DEBUG
+static void print_mv_histogram_info(MVHistogram histogram);
+#endif
+
+
 /*
  *	analyze_rel() -- analyze one relation
  */
@@ -472,6 +533,13 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
 	 * all analyzable columns.  We use a lower bound of 100 rows to avoid
 	 * possible overflow in Vitter's algorithm.  (Note: that will also be the
 	 * target in the corner case where there are no analyzable columns.)
+	 * 
+	 * FIXME This sample sizing is mostly OK when computing stats for
+	 *       individual columns, but when computing multi-variate stats
+	 *       for multivariate stats (histograms, mcv, ...) it's rather
+	 *       insufficient. For small number of dimensions it works, but
+	 *       for complex stats it'd be nice use sample proportional to
+	 *       the table (say, 0.5% - 1%) instead of a fixed size.
 	 */
 	targrows = 100;
 	for (i = 0; i < attr_cnt; i++)
@@ -574,6 +642,9 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
 			update_attstats(RelationGetRelid(Irel[ind]), false,
 							thisdata->attr_cnt, thisdata->vacattrstats);
 		}
+
+		/* Build multivariate stats (if there are any). */
+		build_mv_stats(onerel, numrows, rows, attr_cnt, vacattrstats);
 	}
 
 	/*
@@ -2815,3 +2886,1985 @@ compare_mcvs(const void *a, const void *b)
 
 	return da - db;
 }
+
+/*
+ * Compute requested multivariate stats, using the rows sampled for the
+ * plain (single-column) stats.
+ *
+ * This fetches a list of stats from pg_mv_statistic, computes the stats
+ * and serializes them back into the catalog (as bytea values).
+ */
+static void
+build_mv_stats(Relation onerel, int numrows, HeapTuple *rows,
+			   int natts, VacAttrStats **vacattrstats)
+{
+	int i;
+	MVStats mvstats;
+	int		nmvstats;
+
+	/*
+	 * Fetch defined MV groups from pg_mv_statistic, and then compute
+	 * the MV statistics (histograms for now).
+	 * 
+	 * TODO move this to a separate method or something ...
+	 */
+	mvstats = list_mv_stats(RelationGetRelid(onerel), &nmvstats, false);
+
+	for (i = 0; i < nmvstats; i++)
+	{
+		MCVList		mcvlist   = NULL;
+		MVHistogram	histogram = NULL;
+		int numrows_filtered  = 0;
+
+		/* int2 vector of attnums the stats should be computed on */
+		int2vector * attrs = mvstats[i].stakeys;
+
+		/* check allowed number of dimensions */
+		Assert((attrs->dim1 >= 2) && (attrs->dim1 <= MVSTATS_MAX_DIMENSIONS));
+
+		/*
+		 * Analyze associations between pairs of columns.
+		 * 
+		 * FIXME store the identified associations back to pg_mv_statistic
+		 */
+		build_mv_associations(numrows, rows, attrs, natts, vacattrstats);
+
+		/* build the MCV list */
+		mcvlist = build_mv_mcvlist(numrows, rows, attrs, natts, vacattrstats, &numrows_filtered);
+
+		/*
+		 * Build a multivariate histogram on the columns.
+		 *
+		 * FIXME remove the rows used to build the MCV from the histogram.
+		 *       Another option might be subtracting the MCV selectivities
+		 *       from the histogram, but I'm not sure whether that works
+		 *       accurately (maybe it introduces additional errors).
+		 */
+		if (numrows_filtered > 0)
+			histogram = build_mv_histogram(numrows_filtered, rows, attrs, natts, vacattrstats, numrows);
+
+		/* store the histogram / MCV list in the catalog */
+		update_mv_stats(mvstats[i].mvoid, histogram, mcvlist);
+
+#ifdef MVSTATS_DEBUG
+		print_mv_histogram_info(histogram);
+#endif
+
+	}
+}
+
+/*
+ * Lookup the VacAttrStats info for the selected columns, with indexes
+ * matching the attrs vector (to make it easy to work with when
+ * computing multivariate stats).
+ */
+static VacAttrStats **
+lookup_var_attr_stats(int2vector *attrs, int natts, VacAttrStats **vacattrstats)
+{
+	int i, j;
+	int numattrs = attrs->dim1;
+	VacAttrStats **stats = (VacAttrStats**)palloc0(numattrs * sizeof(VacAttrStats*));
+
+	/* lookup VacAttrStats info for the requested columns (same attnum) */
+	for (i = 0; i < numattrs; i++)
+	{
+		stats[i] = NULL;
+		for (j = 0; j < natts; j++)
+		{
+			if (attrs->values[i] == vacattrstats[j]->tupattnum)
+			{
+				stats[i] = vacattrstats[j];
+				break;
+			}
+		}
+
+		/*
+		 * Check that we found the info, that the attnum matches and
+		 * that there's the requested 'lt' operator and that the type
+		 * is 'passed-by-value'.
+		 */
+		Assert(stats[i] != NULL);
+		Assert(stats[i]->tupattnum == attrs->values[i]);
+
+		/* FIXME This is rather ugly way to check for 'ltopr' (which
+		 *       is defined for 'scalar' attributes).
+		 */
+		Assert(stats[i]->compute_stats == compute_scalar_stats);
+
+		/* TODO remove the 'pass by value' requirement */
+		Assert(stats[i]->attrtype->typbyval);
+	}
+
+	return stats;
+}
+
+/*
+ * TODO Add ndistinct estimation, probably the one described in "Towards
+ *      Estimation Error Guarantees for Distinct Values, PODS 2000,
+ *      p. 268-279" (the ones called GEE, or maybe AE).
+ *
+ * TODO The "combined" ndistinct is more likely to scale with the number
+ *      of rows (in the table), because a single column behaving this
+ *      way is sufficient for such behavior.
+ */
+static MVBucket
+create_initial_mv_bucket(int numrows, HeapTuple *rows, int2vector *attrs,
+						 int natts, VacAttrStats **vacattrstats)
+{
+	int i;
+	int	numattrs = attrs->dim1;
+
+	/* info for the interesting attributes only */
+	VacAttrStats **stats = lookup_var_attr_stats(attrs, natts, vacattrstats);
+
+	/* resulting bucket */
+	MVBucket bucket = (MVBucket)palloc0(sizeof(MVBucketData));
+
+	Assert(numrows > 0);
+	Assert(rows != NULL);
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	/* allocate the per-dimension arrays */
+	bucket->ndistincts = (uint32*)palloc0(numattrs * sizeof(uint32));
+	bucket->nullsonly = (bool*)palloc0(numattrs * sizeof(bool));
+
+	/* inclusiveness boundaries - lower/upper bounds */
+	bucket->min_inclusive = (bool*)palloc0(numattrs * sizeof(bool));
+	bucket->max_inclusive = (bool*)palloc0(numattrs * sizeof(bool));
+
+	/* lower/upper boundaries */
+	bucket->min = (Datum*)palloc0(numattrs * sizeof(Datum));
+	bucket->max = (Datum*)palloc0(numattrs * sizeof(Datum));
+
+	/*
+	 * All the sample rows fall into the initial bucket.
+	 * 
+	 * FIXME This is wrong (unless all columns are NOT NULL), because we
+	 *       skipped the NULL values.
+	 */
+	bucket->numrows = numrows;
+	bucket->ntuples = numrows;
+	bucket->rows = rows;
+
+	/*
+	 * Update the number of ndistinct combinations in the bucket (which
+	 * we use when selecting bucket to partition), and then number of
+	 * distinct values for each partition (which we use when choosing
+	 * which dimension to split).
+	 */
+	update_bucket_ndistinct(bucket, attrs, stats);
+
+	for (i = 0; i < numattrs; i++)
+		update_dimension_ndistinct(bucket, i, attrs, stats, true);
+
+	/*
+	 * The initial bucket was not split at all, so we'll start with the
+	 * first dimension in the next round (index = 0).
+	 */
+	bucket->last_split_dimension = -1;
+
+	return bucket;
+}
+
+/*
+ * TODO Fix to handle arbitrarily-sized histograms (not just 2D ones)
+ *      and call the right output procedures (for the particular type).
+ *
+ * TODO This should somehow fetch info about the data types, and use
+ *      the appropriate output functions to print the boundary values.
+ *      Right now this prints the 8B value as an integer.
+ *
+ * TODO Also, provide a special function for 2D histogram, printing
+ *      a gnuplot script (with rectangles).
+ *
+ * TODO For string types (once supported) we can sort the strings first,
+ *      assign them a sequence of integers and use the original values
+ *      as labels.
+ */
+#ifdef MVSTATS_DEBUG
+static void
+print_mv_histogram_info(MVHistogram histogram)
+{
+	int i = 0;
+
+	elog(WARNING, "histogram nbuckets=%d", histogram->nbuckets);
+
+	for (i = 0; i < histogram->nbuckets; i++)
+	{
+		MVBucket bucket = histogram->buckets[i];
+		elog(WARNING, "  bucket %d : ndistinct=%f ntuples=%d min=[%ld, %ld], max=[%ld, %ld] distinct=[%d,%d]",
+			i, bucket->ndistinct, bucket->numrows,
+			bucket->min[0], bucket->min[1], bucket->max[0], bucket->max[1],
+			bucket->ndistincts[0], bucket->ndistincts[1]);
+	}
+}
+#endif
+
+/*
+ * A very simple partitioning selection criteria - choose the bucket
+ * with the highest number of distinct values.
+ *
+ * Returns either pointer to the bucket selected to be partitioned,
+ * or NULL if there are no buckets that may be split (i.e. all buckets
+ * contain a single distinct value).
+ *
+ * TODO Consider other partitioning criteria (v-optimal, maxdiff etc.).
+ *
+ * TODO Allowing the bucket to degenerate to a single combination of
+ *      values makes it rather strange MCV list. Maybe we should use
+ *      higher lower boundary, or maybe make the selection criteria
+ *      more complex (e.g. consider number of rows in the bucket, etc.).
+ *
+ *      That however is different from buckets 'degenerated' only for
+ *      some dimensions (e.g. half of them), which is perfectly
+ *      appropriate for statistics on a combination of low and high
+ *      cardinality columns.
+ */
+static MVBucket
+select_bucket_to_partition(int nbuckets, MVBucket * buckets)
+{
+	int i;
+	int ndistinct = 1; /* if ndistinct=1, we can't split the bucket */
+	MVBucket bucket = NULL;
+
+	for (i = 0; i < nbuckets; i++)
+	{
+		/* if the ndistinct count is higher, use this bucket */
+		if (buckets[i]->ndistinct > ndistinct) {
+			bucket = buckets[i];
+			ndistinct = buckets[i]->ndistinct;
+		}
+	}
+
+	/* may be NULL if there are not buckets with (ndistinct>1) */
+	return bucket;
+}
+
+/*
+ * A simple bucket partitioning implementation - splits the dimensions in
+ * a round-robin manner (considering only those with ndistinct>1). That
+ * is first a dimension 0 is split, then 1, 2, ... until reaching the
+ * end of attribute list, and then wrapping back to 0. Of course,
+ * dimensions with a single distinct value are skipped.
+ *
+ * This is essentially what Muralikrishna/DeWitt described in their SIGMOD
+ * article (M. Muralikrishna, David J. DeWitt: Equi-Depth Histograms For
+ * Estimating Selectivity Factors For Multi-Dimensional Queries. SIGMOD
+ * Conference 1988: 28-36).
+ *
+ * There are multiple histogram options, centered around the partitioning
+ * criteria, specifying both how to choose a bucket and the dimension
+ * most in need of a split. For a nice summary and general overview, see
+ * "rK-Hist : an R-Tree based histogram for multi-dimensional selectivity
+ * estimation" thesis by J. A. Lopez, Concordia University, p.34-37 (and
+ * possibly p. 32-34 for explanation of the terms).
+ * 
+ * This splits the bucket by tweaking the existing one, and returning the
+ * new bucket (essentially shrinking the existing one in-place and returning
+ * the other "half" as a new bucket). The caller is responsible for adding
+ * the new bucket into the list of buckets.
+ *
+ * TODO It requires care to prevent splitting only one dimension and not
+ *      splitting another one at all (which might happen easily in case of
+ *      strongly dependent columns - e.g. y=x).
+ *
+ * TODO Should probably consider statistics target for the columns (e.g. to
+ *      split dimensions with higher statistics target more frequently).
+ */
+static MVBucket
+partition_bucket(MVBucket bucket, int2vector *attrs,
+				 int natts, VacAttrStats **vacattrstats)
+{
+	int i;
+	int dimension;
+	int numattrs = attrs->dim1;
+
+	Datum split_value;
+	MVBucket new_bucket;
+
+	/* needed for sort, when looking for the split value */
+	bool isNull;
+	int nvalues = 0;
+	StdAnalyzeData * mystats = NULL;
+	ScalarItem * values = (ScalarItem*)palloc0(bucket->numrows * sizeof(ScalarItem));
+	SortSupportData ssup;
+
+	/* looking for the split value */
+	int ndistinct = 1;	/* number of distinct values below current value */
+	int nrows = 1;		/* number of rows below current value */
+
+	/* needed when splitting the values */
+	HeapTuple * oldrows = bucket->rows;
+	int oldnrows = bucket->numrows;
+
+	/* info for the interesting attributes only */
+	VacAttrStats **stats = lookup_var_attr_stats(attrs, natts, vacattrstats);
+
+	/*
+	 * We can't split buckets with a single distinct value (this also
+	 * disqualifies NULL-only dimensions). Also, there has to be multiple
+	 * sample rows (otherwise, how could there be more distinct values).
+	 */
+	Assert(bucket->ndistinct > 1);
+	Assert(bucket->numrows > 1);
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	/*
+	 * Look for the next dimension to split, in a round robin manner.
+	 * We'll use the first one with (ndistinct > 1).
+	 *
+	 * If we happen to wrap around, something clearly went wrong (we
+	 * can't mess with the last_split_dimension directly, because we
+	 * couldn't do this check).
+	 */
+	dimension = bucket->last_split_dimension;
+	while (true)
+	{
+		dimension = (dimension + 1) % numattrs;
+
+		if (bucket->ndistincts[dimension] > 1)
+			break;
+
+		/* if we ran the previous split dimension, it's infinite loop */
+		Assert(dimension != bucket->last_split_dimension);
+	}
+
+	/* Remember the dimension for the next split of this bucket. */
+	bucket->last_split_dimension = dimension;
+
+	/*
+	 * Walk through the selected dimension, collect and sort the values
+	 * and then choose the value to use as the new boundary.
+	 */
+	mystats = (StdAnalyzeData *) stats[dimension]->extra_data;
+
+	/* initialize sort support, etc. */
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+
+	/* We always use the default collation for statistics */
+	ssup.ssup_collation = DEFAULT_COLLATION_OID;
+	ssup.ssup_nulls_first = false;
+
+	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+	for (i = 0; i < bucket->numrows; i++)
+	{
+		/* remember the index of the sample row, to make the partitioning simpler */
+		values[nvalues].value = heap_getattr(bucket->rows[i], attrs->values[dimension],
+											 stats[dimension]->tupDesc, &isNull);
+		values[nvalues].tupno = i;
+
+		/* no NULL values allowed here (we don't do splits by null-only dimensions) */
+		Assert(!isNull);
+
+		nvalues++;
+	}
+
+	/* sort the array (pass-by-value datum */
+	qsort_arg((void *) values, nvalues, sizeof(ScalarItem),
+			  compare_scalars_partition, (void *) &ssup);
+
+	/*
+	 * We know there are bucket->ndistincts[dimension] distinct values
+	 * in this dimension, and we want to split this into half, so walk
+	 * through the array and stop once we see (ndistinct/2) values.
+	 *
+	 * We always choose the "next" value, i.e. (n/2+1)-th distinct value,
+	 * and use it as an exclusive upper boundary (and inclusive lower
+	 * boundary).
+	 *
+	 * TODO Maybe we should use "average" of the two middle distinct
+	 *      values (at least for even distinct counts), but that would
+	 *      require being able to do an average (which does not work
+	 *      for non-arithmetic types).
+	 *
+	 * TODO Another option is to look for a split that'd give about
+	 *      50% tuples (not distinct values) in each partition. That
+	 *      might work better when there are a few very frequent
+	 *      values, and many rare ones.
+	 */
+	split_value = values[0].value;
+	for (i = 1; i < bucket->numrows; i++)
+	{
+		/* count distinct values */
+		if (values[i].value != values[i-1].value)
+			ndistinct += 1;
+
+		/* once we've seen 1/2 distinct values (and use the value) */
+		if (ndistinct > bucket->ndistincts[dimension] / 2)
+		{
+			split_value = values[i].value;
+			break;
+		}
+
+		/* keep track how many rows belong to the first bucket */
+		nrows += 1;
+	}
+
+	Assert(nrows > 0);
+	Assert(nrows < bucket->numrows);
+
+	/* create the new bucket as a (incomplete) copy of the one being partitioned. */
+	new_bucket = copy_mv_bucket(bucket, numattrs);
+
+	/*
+	* Do the actual split of the chosen dimension, using the split value as the
+	* upper bound for the existing bucket, and lower bound for the new one.
+	*/
+	bucket->max[dimension]     = split_value;
+	new_bucket->min[dimension] = split_value;
+
+	bucket->max_inclusive[dimension]		= false;
+	new_bucket->max_inclusive[dimension]	= true;
+
+	/*
+	 * Redistribute the sample tuples using the 'ScalarItem->tupno'
+	 * index. We know 'nrows' rows should remain in the original
+	 * bucket and the rest goes to the new one.
+	 */
+
+	bucket->rows     = (HeapTuple*)palloc0(nrows * sizeof(HeapTuple));
+	new_bucket->rows = (HeapTuple*)palloc0((oldnrows - nrows) * sizeof(HeapTuple));
+
+	bucket->numrows	 = nrows;
+	new_bucket->numrows = (oldnrows - nrows);
+
+	/*
+	 * The first nrows should go to the first bucket, the rest should
+	 * go to the new one. Use the tupno field to get the actual HeapTuple
+	 * row from the original array of sample rows.
+	 */
+	for (i = 0; i < nrows; i++)
+		memcpy(&bucket->rows[i], &oldrows[values[i].tupno], sizeof(HeapTuple));
+
+	for (i = nrows; i < oldnrows; i++)
+		memcpy(&new_bucket->rows[i-nrows], &oldrows[values[i].tupno], sizeof(HeapTuple));
+
+	/* update ndistinct values for the buckets (total and per dimension) */
+	update_bucket_ndistinct(bucket, attrs, stats);
+	update_bucket_ndistinct(new_bucket, attrs, stats);
+
+	/*
+	 * TODO We don't need to do this for the dimension we used for split,
+	 *      because we know how many distinct values went to each partition.
+	 */
+	for (i = 0; i < numattrs; i++)
+	{
+		update_dimension_ndistinct(bucket, i, attrs, stats, false);
+		update_dimension_ndistinct(new_bucket, i, attrs, stats, false);
+	}
+
+	pfree(oldrows);
+	pfree(values);
+
+	return new_bucket;
+}
+
+/*
+ * Copy a histogram bucket. The copy does not include the build-time
+ * data, i.e. sampled rows etc.
+ */
+static MVBucket
+copy_mv_bucket(MVBucket bucket, uint32 ndimensions)
+{
+	MVBucket new_bucket = (MVBucket)palloc0(sizeof(MVBucketData));
+
+	/* Copy only the attributes that will stay the same after the split, and
+	 * we'll recompute the rest after the split. */
+
+	new_bucket->last_split_dimension = bucket->last_split_dimension;
+
+	/* allocate the per-dimension arrays */
+	new_bucket->ndistincts = (uint32*)palloc0(ndimensions * sizeof(uint32));
+	new_bucket->nullsonly = (bool*)palloc0(ndimensions * sizeof(bool));
+
+	/* inclusiveness boundaries - lower/upper bounds */
+	new_bucket->min_inclusive = (bool*)palloc0(ndimensions * sizeof(bool));
+	new_bucket->max_inclusive = (bool*)palloc0(ndimensions * sizeof(bool));
+
+	/* lower/upper boundaries */
+	new_bucket->min = (Datum*)palloc0(ndimensions * sizeof(Datum));
+	new_bucket->max = (Datum*)palloc0(ndimensions * sizeof(Datum));
+
+	/* copy data */
+	memcpy(new_bucket->nullsonly, bucket->nullsonly, ndimensions * sizeof(bool));
+
+	memcpy(new_bucket->min_inclusive, bucket->min_inclusive, ndimensions*sizeof(bool));
+	memcpy(new_bucket->min, bucket->min, ndimensions*sizeof(Datum));
+
+	memcpy(new_bucket->max_inclusive, bucket->max_inclusive, ndimensions*sizeof(bool));
+	memcpy(new_bucket->max, bucket->max, ndimensions*sizeof(Datum));
+
+	return new_bucket;
+}
+
+/*
+ * Counts the number of distinct values in the bucket. This just copies
+ * the Datum values into a simple array, and sorts them using memcmp-based
+ * comparator. That means it only works for pass-by-value data types
+ * (assuming they don't use collations etc.)
+ * 
+ * FIXME Make this work with all types (not just pass-by-value ones).
+ * 
+ * TODO This might evaluate and store the distinct counts for all
+ *      possible attribute combinations. The assumption is this might be
+ *      useful for estimating things like GROUP BY cardinalities (e.g.
+ *      in cases when some buckets contain a lot of low-frequency
+ *      combinations, and other buckets contain few high-frequency ones).
+ *
+ *      But it's unclear whether it's worth the price. Computing this
+ *      is actually quite cheap, because it may be evaluated at the very
+ *      end, when the buckets are rather small (so sorting it in 2^N ways
+ *      is not a big deal). Assuming the partitioning algorithm does not
+ *      use these values to do the decisions, of course (the current
+ *      algorithm does not).
+ *
+ *      The overhead with storing, fetching and parsing the data is more
+ *      concerning - adding 2^N values per bucket (even if it's just
+ *      a 1B or 2B value) would significantly bloat the histogram, and
+ *      thus the impact on optimizer. Which is not really desirable.
+ * 
+ * TODO This only updates the ndistinct for the sample (or bucket), but
+ *      we eventually need an estimate of the total number of distinct
+ *      values in the dataset. It's possible to either use the current
+ *      1D approach (i.e., if it's more than 10% of the sample, assume
+ *      it's proportional to the number of rows). Or it's possible to
+ *      implement the estimator suggested in the article, supposedly
+ *      giving 'optimal' estimates (w.r.t. probability of error).
+ */
+static void
+update_bucket_ndistinct(MVBucket bucket, int2vector *attrs, VacAttrStats ** stats)
+{
+	int i, j, idx = 0;
+	int numattrs = attrs->dim1;
+	Size len = sizeof(Datum) * numattrs;
+	bool isNull;
+
+	/*
+	 * We could collect this while walking through all the attributes
+	 * above (this way we have to call heap_getattr twice).
+	 */
+	Datum * values = palloc0(bucket->numrows * numattrs * sizeof(Datum));
+
+	for (j = 0; j < bucket->numrows; j++)
+		for (i = 0; i < numattrs; i++)
+			values[idx++] = heap_getattr(bucket->rows[j], attrs->values[i],
+										 stats[i]->tupDesc, &isNull);
+
+	qsort_arg((void *) values, bucket->numrows, sizeof(Datum) * numattrs,
+			  compare_scalars_memcmp, &len);
+
+	bucket->ndistinct = 1;
+
+	for (i = 1; i < bucket->numrows; i++)
+		if (memcmp(&values[i * numattrs], &values[(i-1) * numattrs], len) != 0)
+			bucket->ndistinct += 1;
+
+	pfree(values);
+
+}
+
+/*
+ * Count distinct values per bucket dimension.
+ *
+ * TODO Remove unnecessary parameters - don't pass in the whole arrays,
+ *      just the proper elements.
+ */
+static void
+update_dimension_ndistinct(MVBucket bucket, int dimension, int2vector *attrs,
+						   VacAttrStats ** stats, bool update_boundaries)
+{
+	int j;
+	int nvalues = 0;
+	bool isNull;
+	Datum * values = (Datum*)palloc0(bucket->numrows * sizeof(Datum));
+	SortSupportData ssup;
+
+	StdAnalyzeData * mystats = (StdAnalyzeData *) stats[dimension]->extra_data;
+
+	memset(&ssup, 0, sizeof(ssup));
+	ssup.ssup_cxt = CurrentMemoryContext;
+
+	/* We always use the default collation for statistics */
+	ssup.ssup_collation = DEFAULT_COLLATION_OID;
+	ssup.ssup_nulls_first = false;
+
+	PrepareSortSupportFromOrderingOp(mystats->ltopr, &ssup);
+
+	for (j = 0; j < bucket->numrows; j++)
+	{
+		values[nvalues] = heap_getattr(bucket->rows[j], attrs->values[dimension],
+									   stats[dimension]->tupDesc, &isNull);
+
+		/* ignore NULL values */
+		if (! isNull)
+			nvalues++;
+	}
+
+	/* there's always at least 1 distinct value (may be NULL) */
+	bucket->ndistincts[dimension] = 1;
+
+	/* if there are only NULL values in the column, mark it so and continue
+	 * with the next one */
+	if (nvalues == 0)
+	{
+		pfree(values);
+		bucket->nullsonly[dimension] = true;
+		return;
+	}
+
+	/* sort the array (pass-by-value datum */
+	qsort_arg((void *) values, nvalues, sizeof(Datum),
+			  compare_scalars_simple, (void *) &ssup);
+
+	/*
+	 * Update min/max boundaries to the smallest bounding box. Generally, this
+	 * needs to be done only when constructing the initial bucket.
+	 */
+	if (update_boundaries)
+	{
+		/* store the min/max values */
+		bucket->min[dimension] = values[0];
+		bucket->min_inclusive[dimension] = true;
+
+		bucket->max[dimension] = values[nvalues-1];
+		bucket->max_inclusive[dimension] = true;
+	}
+
+	/*
+	 * Walk through the array and count distinct values by comparing
+	 * succeeding values.
+	 * 
+	 * FIXME This only works for pass-by-value types (i.e. not VARCHARs etc.).
+	 */
+	for (j = 1; j < nvalues; j++) {
+		if (values[j] != values[j-1])
+			bucket->ndistincts[dimension] += 1;
+	}
+
+	pfree(values);
+}
+
+/*
+ * Fetch list of MV stats defined on a table, without the actual data
+ * for histograms, MCV lists etc.
+ */
+MVStats
+list_mv_stats(Oid relid, int *nstats, bool built_only)
+{
+	Relation	indrel;
+	SysScanDesc indscan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	MVStats		result;
+
+	/* start with 16 items, that should be enough for most cases */
+	int maxitems = 16;
+	result = (MVStats)palloc0(sizeof(MVStatsData) * maxitems);
+	*nstats = 0;
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	ScanKeyInit(&skey,
+				Anum_pg_mv_statistic_starelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(relid));
+
+	indrel = heap_open(MvStatisticRelationId, AccessShareLock);
+	indscan = systable_beginscan(indrel, MvStatisticRelidIndexId, true,
+								 NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+	{
+		Form_pg_mv_statistic stats = (Form_pg_mv_statistic) GETSTRUCT(htup);
+
+		/*
+		 * Skip statistics that were not computed yet (if only stats
+		 * that were already built were requested)
+		 */
+		if (built_only && (! (stats->hist_built || stats->mcv_built || stats->assoc_built)))
+			continue;
+
+		/* double the array size if needed */
+		if (*nstats == maxitems)
+		{
+			maxitems *= 2;
+			result = (MVStats)repalloc(result, sizeof(MVStatsData) * maxitems);
+		}
+
+		result[*nstats].mvoid = HeapTupleGetOid(htup);
+		result[*nstats].stakeys = buildint2vector(stats->stakeys.values, stats->stakeys.dim1);
+		result[*nstats].hist_built = stats->hist_built;
+		result[*nstats].mcv_built = stats->mcv_built;
+		result[*nstats].assoc_built = stats->assoc_built;
+		*nstats += 1;
+	}
+
+	systable_endscan(indscan);
+
+	heap_close(indrel, AccessShareLock);
+
+	/* TODO maybe save the list into relcache, as in RelationGetIndexList
+	 *      (which was used as an inspiration of this one)?. */
+
+	return result;
+}
+
+
+/*
+ * Serialize the MV histogram into a bytea value.
+ *
+ * The serialized first deduplicates the boundary values into a separate
+ * array, and uses 2B indexes when serializing the buckets. This stores
+ * a significant amount of space because each bucket split adds a single
+ * new boundary value, so e.g. with 4 attributes and 8191 splits (thus
+ * 8192 buckets), there are only ~8200 distinct boundary values.
+ *
+ * But as each bucket has 8 boundary values (4+4), that's ~64k Datums.
+ * That's roughly 65kB vs. 512kB, but we haven't included the indexes
+ * used to reference the boundary values. By using int16 indexes (which
+ * should be more than enough for all reasonable histogram sizes),
+ * this amounts to ~128kB (8192*8*2). So in total it's ~196kB vs. 512kB,
+ * i.e. more than 2x compression, which is nice.
+ *
+ * The implementation is simple - walk through the buckets, collect all
+ * the boundary values, keep only distinct values (in a sorted array)
+ * and then replace the values with indexes (using binary search).
+ *
+ * It's possible to either serialize/deserialize the histogram into
+ * a MVHistogram, or create a special structure working with this
+ * compressed structure (and keep MVBucket/MVHistogram only for the
+ * building phase). This might actually work better thanks to better
+ * CPU cache hit ratio, and simpler deserialization.
+ *
+ * This encoding will probably prevent automatic varlena compression,
+ * because first part of the serialized bytea will be an array of unique
+ * values (although sorted), and pglz decides whether to compress by
+ * trying to compress the first part (~1kB or so). Which will be poor,
+ * due to the lack of repetition.
+ *
+ * But in this case this is probably desirable - the data in general
+ * won't be really compressible (in addition to the 2x compression we
+ * got thanks to the encoding). In a sense the encoding scheme is
+ * actually a context-aware compression (usually compressing to ~30%).
+ * So this seems appropriate in this case.
+ *
+ * FIXME Make this work with arbitrary types.
+ *
+ * TODO Try to keep the compressed form, instead of deserializing it to
+ *      MVHistogram/MVBucket.
+ *
+ * TODO We might get a bit better compression by considering the actual
+ *      data type length. The current implementation treats all data as
+ *      8B values, but for INT it's actually 4B etc. OTOH this is only
+ *      related to the lookup table, and most of the space is occupied
+ *      by the buckets (with int16 indexes). And we don't have type info
+ *      at the moment, so it would be difficult (but we'll nedd it to
+ *      support all types, so maybe then).
+ */
+static bytea *
+serialize_mv_histogram(MVHistogram histogram)
+{
+	int i = 0, j = 0;
+
+	/* total size (histogram header + all buckets) */
+	Size	total_len;
+	char   *tmp = NULL;
+	bytea  *result = NULL;
+
+	/* we need to accumulate all boundary values (min/max) */
+	int idx = 0;
+	int max_values = histogram->nbuckets * histogram->ndimensions * 2;
+	Datum * values = (Datum*)palloc0(max_values * sizeof(Datum));
+	Size len = sizeof(Datum);
+
+	/* we'll collect unique boundary values into this */
+	int		ndistinct = 0;
+	Datum  *lookup = NULL;
+	uint16 *indexes = (uint16*)palloc0(sizeof(uint16) * histogram->ndimensions);
+
+	/*
+	 * Collect the boundary values first, sort them and generate a small
+	 * array with only distinct values.
+	 */
+	for (i = 0; i < histogram->nbuckets; i++)
+	{
+		for (j = 0; j < histogram->ndimensions; j++)
+		{
+			values[idx++] = histogram->buckets[i]->min[j];
+			values[idx++] = histogram->buckets[i]->max[j];
+		}
+	}
+
+	/*
+	 * We've allocated just enough space for all boundary values, but
+	 * this may change once we start handling NULL values (as we'll
+	 * probably skip those).
+	 *
+	 * Also, we expect at least one boundary value at this moment.
+	 */
+	Assert(max_values == idx);
+	Assert(idx > 1);
+
+	/*
+	 * Sort the collected boundary values using a simple memcmp-based
+	 * comparator (this won't work for pass-by-reference types), and
+	 * then walk the data and count the distinct values.
+	 */
+	qsort((void *) values, idx, len, compare_scalars_memcmp_2);
+
+	ndistinct = 1;
+	for (i = 1; i < max_values; i++)
+		ndistinct += (values[i-1] != values[i]) ? 1 : 0;
+
+	/*
+	 * At this moment we can allocate the bytea value (and we'll collect
+	 * the boundary values directly into it).
+	 *
+	 * The bytea will be structured like this:
+	 *
+	 * - varlena header            : VARHDRSZ
+	 * - histogram header          : offsetof(MVHistogram,buckets)
+	 * - number of boundary values : sizeof(uint32)
+	 * - boundary values           : ndistinct * sizeof(Datum)
+	 * - buckets                   : nbuckets * BUCKET_SIZE_SERIALIZED
+	 *
+	 * We'll assume 2B indexes into the boundary values, because each
+	 * bucket 'split' introduces one boundary value. Moreover, multiple
+	 * splits may introduce the same value, so this should be enough for
+	 * at least 65k buckets (and likely more). That's more than enough
+	 * for reasonable histogram sizes.
+	 */
+
+	Assert(ndistinct <= 65536);
+
+	total_len = VARHDRSZ + offsetof(MVHistogramData, buckets) +
+				(sizeof(uint32) + ndistinct * sizeof(Datum)) +
+				histogram->nbuckets * BUCKET_SIZE_SERIALIZED(histogram->ndimensions);
+
+	result = (bytea*)palloc0(total_len);
+	tmp = VARDATA(result);
+
+	SET_VARSIZE(result, total_len);
+
+	/* copy the global histogram header */
+	memcpy(tmp, histogram, offsetof(MVHistogramData, buckets));
+	tmp += offsetof(MVHistogramData, buckets);
+
+	/*
+	 * Copy the number of distinct values, and then all the distinct
+	 * values currently stored in the 'values' array (sorted).
+	 */
+	 memcpy(tmp, &ndistinct, sizeof(uint32));
+	 tmp += sizeof(uint32);
+
+	lookup = (Datum*)tmp;
+
+	for (i = 0; i < max_values; i++)
+	{
+		/* skip values that are equal to the previous one */
+		if ((i > 0) && (values[i-1] == values[i]))
+			continue;
+
+		memcpy(tmp, &values[i], sizeof(Datum));
+		tmp += sizeof(Datum);
+	}
+
+	Assert(tmp - (char*)lookup == ndistinct * sizeof(Datum));
+
+	/* now serialize all the buckets - first the header, without the
+	 * variable-length part, then all the variable length parts */
+	for (i = 0; i < histogram->nbuckets; i++)
+	{
+		MVBucket	bucket = histogram->buckets[i];
+
+		/* write the common bucket header */
+		memcpy(tmp, bucket, offsetof(MVBucketData, ndistincts));
+		tmp += offsetof(MVBucketData, ndistincts);
+
+		/* per-dimension ndistincts / nullsonly */
+		memcpy(tmp, bucket->ndistincts, sizeof(uint32)*histogram->ndimensions);
+		tmp += sizeof(uint32)*histogram->ndimensions;
+
+		memcpy(tmp, bucket->nullsonly, sizeof(bool)*histogram->ndimensions);
+		tmp += sizeof(bool)*histogram->ndimensions;
+
+		memcpy(tmp, bucket->min_inclusive, sizeof(bool)*histogram->ndimensions);
+		tmp += sizeof(bool)*histogram->ndimensions;
+
+		memcpy(tmp, bucket->max_inclusive, sizeof(bool)*histogram->ndimensions);
+		tmp += sizeof(bool)*histogram->ndimensions;
+
+		/* and now translate the min (and then max) boundaries to indexes */
+		for (j = 0; j < histogram->ndimensions; j++)
+		{
+			Datum *v = (Datum*)bsearch(&bucket->min[j], lookup, ndistinct,
+									   sizeof(Datum), compare_scalars_memcmp_2);
+
+			Assert(v != NULL);
+			indexes[j] = (v - lookup);		/* Datum arithmetics (not char) */
+			Assert(indexes[j] < ndistinct);	/* we have to be within the array */
+		}
+
+		memcpy(tmp, indexes, sizeof(uint16)*histogram->ndimensions);
+		tmp += sizeof(uint16)*histogram->ndimensions;
+
+		for (j = 0; j < histogram->ndimensions; j++)
+		{
+			Datum *v = (Datum*)bsearch(&bucket->max[j], lookup, ndistinct,
+									   sizeof(Datum), compare_scalars_memcmp_2);
+			Assert(v != NULL);
+			indexes[j] = (v - lookup);		/* Datum arithmetics (not char) */
+			Assert(indexes[j] < ndistinct);	/* we have to be within the array */
+		}
+
+		memcpy(tmp, indexes, sizeof(uint16)*histogram->ndimensions);
+		tmp += sizeof(uint16)*histogram->ndimensions;
+	}
+
+	pfree(indexes);
+
+	return result;
+}
+
+/*
+ * Reverse to serialize histogram. This essentially expands the serialized
+ * form back to MVHistogram / MVBucket.
+ */
+MVHistogram
+deserialize_mv_histogram(bytea * data)
+{
+	int i = 0, j = 0;
+
+	Size	expected_length;
+	char   *tmp = NULL;
+	MVHistogram histogram;
+
+	uint32	nlookup;	/* Datum lookup table */
+	Datum   *lookup = NULL;
+
+	if (data == NULL)
+		return NULL;
+
+	/* get pointer to the data part of the varlena */
+	tmp = VARDATA(data);
+
+	histogram = (MVHistogram)palloc0(sizeof(MVHistogramData));
+
+	/* copy the histogram header in place */
+	memcpy(histogram, tmp, offsetof(MVHistogramData, buckets));
+	tmp += offsetof(MVHistogramData, buckets);
+
+	if (histogram->magic != MVHIST_MAGIC)
+	{
+		pfree(histogram);
+		elog(WARNING, "not a MV Histogram (magic number mismatch)");
+		return NULL;
+	}
+
+	Assert(histogram->type == MVHIST_TYPE_BASIC);
+	Assert(histogram->nbuckets > 0);
+	Assert(histogram->nbuckets <= MVHIST_MAX_BUCKETS);
+	Assert(histogram->ndimensions > 0);
+	Assert(histogram->ndimensions <= MVSTATS_MAX_DIMENSIONS);
+
+	/* now, get the size of the lookup table */
+	memcpy(&nlookup, tmp, sizeof(uint32));
+	tmp += sizeof(uint32);
+	lookup = (Datum*)tmp;
+
+	/* skip to the first bucket */
+	tmp += sizeof(Datum) * nlookup;
+
+	/* check the total serialized length */
+	expected_length = offsetof(MVHistogramData, buckets) +
+			sizeof(uint32) + nlookup * sizeof(Datum) +
+			histogram->nbuckets * BUCKET_SIZE_SERIALIZED(histogram->ndimensions);
+
+	/* check serialized length */
+	if (VARSIZE_ANY_EXHDR(data) != expected_length)
+	{
+		elog(ERROR, "invalid MV histogram serialized size (expected %ld, got %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_length);
+		return NULL;
+	}
+
+	/* allocate bucket pointers */
+	histogram->buckets = (MVBucket*)palloc0(histogram->nbuckets * sizeof(MVBucket));
+
+	/* deserialize the buckets, one by one */
+	for (i = 0; i < histogram->nbuckets; i++)
+	{
+		/* don't allocate space for the build-only fields */
+		MVBucket	bucket = (MVBucket)palloc0(offsetof(MVBucketData, rows));
+		uint16     *indexes = NULL;
+
+		/* write the common bucket header */
+		memcpy(bucket, tmp, offsetof(MVBucketData, ndistincts));
+		tmp += offsetof(MVBucketData, ndistincts);
+
+		/* per-dimension ndistincts / nullsonly */
+		bucket->ndistincts = (uint32*)palloc0(sizeof(uint32)*histogram->ndimensions);
+		memcpy(bucket->ndistincts, tmp, sizeof(uint32)*histogram->ndimensions);
+		tmp += sizeof(uint32)*histogram->ndimensions;
+
+		bucket->nullsonly = (bool*)palloc0(sizeof(bool)*histogram->ndimensions);
+		memcpy(bucket->nullsonly, tmp, sizeof(bool)*histogram->ndimensions);
+		tmp += sizeof(bool)*histogram->ndimensions;
+
+		bucket->min_inclusive = (bool*)palloc0(sizeof(bool)*histogram->ndimensions);
+		memcpy(bucket->min_inclusive, tmp, sizeof(bool)*histogram->ndimensions);
+		tmp += sizeof(bool)*histogram->ndimensions;
+
+		bucket->max_inclusive = (bool*)palloc0(sizeof(bool)*histogram->ndimensions);
+		memcpy(bucket->max_inclusive, tmp, sizeof(bool)*histogram->ndimensions);
+		tmp += sizeof(bool)*histogram->ndimensions;
+
+		/* translate the indexes back to Datum values */
+		bucket->min = (Datum*)palloc0(sizeof(Datum)*histogram->ndimensions);
+		bucket->max = (Datum*)palloc0(sizeof(Datum)*histogram->ndimensions);
+
+		indexes = (uint16*)tmp;
+		tmp += sizeof(uint16) * histogram->ndimensions;
+		for (j = 0; j < histogram->ndimensions; j++)
+			memcpy(&bucket->min[j], &lookup[indexes[j]], sizeof(Datum));
+
+		indexes = (uint16*)tmp;
+		tmp += sizeof(uint16) * histogram->ndimensions;
+		for (j = 0; j < histogram->ndimensions; j++)
+			memcpy(&bucket->max[j], &lookup[indexes[j]], sizeof(Datum));
+
+		histogram->buckets[i] = bucket;
+	}
+
+	return histogram;
+}
+
+/*
+ * Serialize MCV list into a bytea value.
+ *
+ * This does not use any kind of deduplication (compared to histogram
+ * serialization), as we don't expect the same efficiency here.
+ *
+ * This simply writes a MCV header (number of items, ...) and then Datum
+ * values for all attribute of a item, followed by the item frequency
+ * (as a double).
+ */
+static bytea *
+serialize_mv_mcvlist(MCVList mcvlist)
+{
+	int i;
+
+	/* we need to store nitems, and each needs ndimension * Datum, plus a double */
+	Size len = VARHDRSZ + offsetof(MCVListData, items) + mcvlist->nitems * (sizeof(Datum) * mcvlist->ndimensions + sizeof(double));
+
+	bytea * output = (bytea*)palloc0(len);
+
+	char * tmp = VARDATA(output);
+
+	SET_VARSIZE(output, len);
+
+	/* first, store the number of dimensions / items */
+	memcpy(tmp, mcvlist, offsetof(MCVListData, items));
+	tmp += offsetof(MCVListData, items);
+
+	/* now, walk through the items and store values + frequency for each MCV item */
+	for (i = 0; i < mcvlist->nitems; i++)
+	{
+		memcpy(tmp, mcvlist->items[i]->values, mcvlist->ndimensions * sizeof(Datum));
+		tmp += mcvlist->ndimensions * sizeof(Datum);
+
+		memcpy(tmp, &mcvlist->items[i]->frequency, sizeof(double));
+		tmp += sizeof(double);
+	}
+
+	return output;
+
+}
+
+MCVList deserialize_mv_mcvlist(bytea * data)
+{
+	int		i;
+	Size	expected_size;
+	MCVList mcvlist;
+	char   *tmp;
+
+	if (data == NULL)
+		return NULL;
+
+	if (VARSIZE_ANY_EXHDR(data) < offsetof(MCVListData,items))
+		elog(ERROR, "invalid MCV Size %ld (expected at least %ld)",
+			 VARSIZE_ANY_EXHDR(data), offsetof(MCVListData,items));
+
+	/* read the MCV list header */
+	mcvlist = (MCVList)palloc0(sizeof(MCVListData));
+
+	/* initialize pointer to the data part (skip the varlena header) */
+	tmp = VARDATA(data);
+
+	/* get the header and perform basic sanity checks */
+	memcpy(mcvlist, tmp, offsetof(MCVListData,items));
+	tmp += offsetof(MCVListData,items);
+
+	if (mcvlist->magic != MVSTAT_MCV_MAGIC)
+		elog(ERROR, "invalid MCV magic %d (expected %dd)",
+			 mcvlist->magic, MVSTAT_MCV_MAGIC);
+
+	if (mcvlist->type != MVSTAT_MCV_TYPE_BASIC)
+		elog(ERROR, "invalid MCV type %d (expected %dd)",
+			 mcvlist->type, MVSTAT_MCV_TYPE_BASIC);
+
+	Assert(mcvlist->nitems > 0);
+	Assert((mcvlist->ndimensions >= 2) && (mcvlist->ndimensions <= MVSTATS_MAX_DIMENSIONS));
+
+	/* what bytea size do we expect for those parameters */
+	expected_size = offsetof(MCVListData,items) +
+					mcvlist->nitems * (sizeof(Datum) * mcvlist->ndimensions + sizeof(double));
+
+	if (VARSIZE_ANY_EXHDR(data) != expected_size)
+		elog(ERROR, "invalid MCV Size %ld (expected %ld)",
+			 VARSIZE_ANY_EXHDR(data), expected_size);
+
+	/* allocate space for the MCV items */
+	mcvlist->items = (MCVItem*)palloc0(sizeof(MCVItem) * mcvlist->nitems);
+
+	for (i = 0; i < mcvlist->nitems; i++)
+	{
+		MCVItem item = (MCVItem)palloc0(offsetof(MCVItemData, values) +
+										mcvlist->ndimensions * sizeof(Datum));
+
+		memcpy(item->values, tmp, mcvlist->ndimensions * sizeof(Datum));
+		tmp += mcvlist->ndimensions * sizeof(Datum);
+
+		memcpy(&item->frequency, tmp, sizeof(double));
+		tmp += sizeof(double);
+
+		mcvlist->items[i] = item;
+	}
+
+	return mcvlist;
+}
+
+static void
+update_mv_stats(Oid mvoid, MVHistogram histogram, MCVList mcvlist)
+{
+	HeapTuple	stup,
+				oldtup;
+	Datum		values[Natts_pg_mv_statistic];
+	bool		nulls[Natts_pg_mv_statistic];
+	bool		replaces[Natts_pg_mv_statistic];
+
+	Relation	sd = heap_open(MvStatisticRelationId, RowExclusiveLock);
+
+	memset(nulls,    1, Natts_pg_mv_statistic * sizeof(bool));
+	memset(replaces, 0, Natts_pg_mv_statistic * sizeof(bool));
+	memset(values,   0, Natts_pg_mv_statistic * sizeof(Datum));
+
+	/*
+	 * Construct a new pg_mv_statistic tuple - replace only the histogram
+	 * and MCV list, depending whether it actually was computed.
+	 */
+	if (histogram != NULL)
+	{
+		nulls[Anum_pg_mv_statistic_stahist-1]    = false;
+		values[Anum_pg_mv_statistic_stahist - 1]
+			= PointerGetDatum(serialize_mv_histogram(histogram));
+	}
+
+	if (mcvlist != NULL)
+	{
+		nulls[Anum_pg_mv_statistic_stamcv -1]    = false;
+		values[Anum_pg_mv_statistic_stamcv  - 1]
+			= PointerGetDatum(serialize_mv_mcvlist(mcvlist));
+	}
+
+	/* always replace the value (either by bytea or NULL) */
+	replaces[Anum_pg_mv_statistic_stahist-1] = true;
+	replaces[Anum_pg_mv_statistic_stamcv -1] = true;
+
+	/* always change the availability flags */
+	nulls[Anum_pg_mv_statistic_hist_built-1] = false;
+	nulls[Anum_pg_mv_statistic_mcv_built -1] = false;
+
+	replaces[Anum_pg_mv_statistic_hist_built -1] = true;
+	replaces[Anum_pg_mv_statistic_mcv_built  -1] = true;
+
+	values[Anum_pg_mv_statistic_hist_built -1] = BoolGetDatum(histogram != NULL);
+	values[Anum_pg_mv_statistic_mcv_built  -1] = BoolGetDatum(mcvlist != NULL);
+
+	/* Is there already a pg_mv_statistic tuple for this attribute? */
+	oldtup = SearchSysCache1(MVSTATOID,
+							 ObjectIdGetDatum(mvoid));
+
+	if (HeapTupleIsValid(oldtup))
+	{
+		/* Yes, replace it */
+		stup = heap_modify_tuple(oldtup,
+								 RelationGetDescr(sd),
+								 values,
+								 nulls,
+								 replaces);
+		ReleaseSysCache(oldtup);
+		simple_heap_update(sd, &stup->t_self, stup);
+	}
+	else
+		elog(ERROR, "invalid pg_mv_statistic record (oid=%d)", mvoid);
+
+	/* update indexes too */
+	CatalogUpdateIndexes(sd, stup);
+
+	heap_freetuple(stup);
+
+	heap_close(sd, RowExclusiveLock);
+}
+
+
+/* MV stats */
+
+Datum
+pg_mv_stats_histogram_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+	char	   *result;
+
+	MVHistogram hist = deserialize_mv_histogram(data);
+
+	result = palloc0(128);
+	snprintf(result, 128, "nbuckets=%d", hist->nbuckets);
+
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+Datum
+pg_mv_stats_mvclist_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+	char	   *result;
+
+	MCVList	mcvlist = deserialize_mv_mcvlist(data);
+
+	result = palloc0(128);
+	snprintf(result, 128, "nitems=%d", mcvlist->nitems);
+
+	pfree(mcvlist);
+
+	PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+Datum
+pg_mv_stats_histogram_gnuplot(PG_FUNCTION_ARGS)
+{
+	int			i = 0;
+
+	/* FIXME (handle the length properly using StringBuilder */
+	Size		len = 1024*1024;
+	char	   *buffer = palloc0(len);
+	char	   *str = buffer;
+	bytea	   *data = PG_GETARG_BYTEA_P(0);
+
+	MVHistogram hist = deserialize_mv_histogram(data);
+
+	for (i = 0; i < hist->nbuckets; i++)
+	{
+		str += snprintf(str, len - (str - buffer),
+						"set object %d rect from %ld,%ld to %ld,%ld lw 1\n",
+						(i+1),
+						hist->buckets[i]->min[0], hist->buckets[i]->min[1],
+						hist->buckets[i]->max[0], hist->buckets[i]->max[1]);
+	}
+
+	PG_RETURN_TEXT_P(cstring_to_text(buffer));
+
+}
+
+bytea *
+fetch_mv_histogram(Oid mvoid)
+{
+	Relation	indrel;
+	SysScanDesc indscan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	bytea	   *stahist = NULL;
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	ScanKeyInit(&skey,
+				ObjectIdAttributeNumber,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(mvoid));
+
+	indrel = heap_open(MvStatisticRelationId, AccessShareLock);
+	indscan = systable_beginscan(indrel, MvStatisticOidIndexId, true,
+								 NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+	{
+		bool isnull = false;
+		Datum hist  = SysCacheGetAttr(MVSTATOID, htup,
+								   Anum_pg_mv_statistic_stahist, &isnull);
+
+		Assert(!isnull);
+
+		stahist = DatumGetByteaP(hist);
+
+		break;
+	}
+
+	systable_endscan(indscan);
+
+	heap_close(indrel, AccessShareLock);
+
+	/* TODO maybe save the list into relcache, as in RelationGetIndexList
+	 *      (which was used as an inspiration of this one)?. */
+
+	return stahist;
+}
+
+bytea *
+fetch_mv_mcvlist(Oid mvoid)
+{
+	Relation	indrel;
+	SysScanDesc indscan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	bytea	   *mcvlist = NULL;
+
+	/* Prepare to scan pg_mv_statistic for entries having indrelid = this rel. */
+	ScanKeyInit(&skey,
+				ObjectIdAttributeNumber,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(mvoid));
+
+	indrel = heap_open(MvStatisticRelationId, AccessShareLock);
+	indscan = systable_beginscan(indrel, MvStatisticOidIndexId, true,
+								 NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+	{
+		bool isnull = false;
+		Datum tmp  = SysCacheGetAttr(MVSTATOID, htup,
+								   Anum_pg_mv_statistic_stamcv, &isnull);
+
+		Assert(!isnull);
+
+		mcvlist = DatumGetByteaP(tmp);
+
+		break;
+	}
+
+	systable_endscan(indscan);
+
+	heap_close(indrel, AccessShareLock);
+
+	/* TODO maybe save the list into relcache, as in RelationGetIndexList
+	 *      (which was used as an inspiration of this one)?. */
+
+	return mcvlist;
+}
+
+int
+mv_get_index(AttrNumber varattno, int2vector * stakeys)
+{
+	int i, idx = 0;
+	for (i = 0; i < stakeys->dim1; i++)
+	{
+		if (stakeys->values[i] < varattno)
+			idx += 1;
+		else
+			break;
+	}
+	return idx;
+}
+
+/*
+ * Building a multivariate algorithm. In short it first creates a single
+ * bucket containing all the rows, and then repeatedly split is by first
+ * searching for the bucket / dimension most in need of a split.
+ *
+ * The current criteria is rather simple, by looking at the number of
+ * distinct values (combination of column values for bucket, column
+ * values for a dimension). This is somehow naive, but seems to work
+ * quite well. See the discussion at select_bucket_to_partition and
+ * partition_bucket for more details about alternative algorithms.
+ *
+ * So the current algorithm looks like this:
+ *
+ *     while [not reaching maximum number of buckets]
+ *
+ *         choose bucket to partition (max distinct combinations)
+ *             if no bucket to partition
+ *                 terminate the algorithm
+ *
+ *         choose bucket dimension to partition (max distinct values)
+ *             split the bucket into two buckets
+ *
+ */
+static MVHistogram
+build_mv_histogram(int numrows, HeapTuple *rows, int2vector *attrs,
+				   int attr_cnt, VacAttrStats **vacattrstats,
+				   int numrows_total)
+{
+	int i;
+	int ndistinct;
+	int numattrs = attrs->dim1;
+	int *ndistincts = (int*)palloc0(sizeof(int) * numattrs);
+
+	MVHistogram histogram = (MVHistogram)palloc0(sizeof(MVHistogramData));
+
+	HeapTuple * rows_copy = (HeapTuple*)palloc0(numrows * sizeof(HeapTuple));
+	memcpy(rows_copy, rows, sizeof(HeapTuple) * numrows);
+
+	Assert((numattrs >= 2) && (numattrs <= MVSTATS_MAX_DIMENSIONS));
+
+	histogram->ndimensions = numattrs;
+
+	histogram->magic = MVHIST_MAGIC;
+	histogram->type  = MVHIST_TYPE_BASIC;
+	histogram->nbuckets = 1;
+
+	/* create max buckets (better than repalloc for short-lived objects) */
+	histogram->buckets = (MVBucket*)palloc0(MVHIST_MAX_BUCKETS * sizeof(MVBucket));
+
+	/* create the initial bucket, covering the whole sample set */
+	histogram->buckets[0] = create_initial_mv_bucket(numrows, rows_copy, attrs,
+													 attr_cnt, vacattrstats);
+
+	ndistinct = histogram->buckets[0]->ndistinct;
+
+	/* keep the global ndistinct values */
+	for (i = 0; i < numattrs; i++)
+		ndistincts[i] = histogram->buckets[0]->ndistincts[i];
+
+	while (histogram->nbuckets < MVHIST_MAX_BUCKETS)
+	{
+		MVBucket bucket = select_bucket_to_partition(histogram->nbuckets, histogram->buckets);
+
+		/* no more buckets to partition */
+		if (bucket == NULL)
+			break;
+
+		histogram->buckets[histogram->nbuckets] = partition_bucket(bucket, attrs,
+																   attr_cnt, vacattrstats);
+
+		histogram->nbuckets += 1;
+	}
+
+	/*
+	 * FIXME store the histogram in a catalog in a serialized form (simple for
+	 *       pass-by-value, more complicated for buckets on varlena types)
+	 */
+	for (i = 0; i < histogram->nbuckets; i++)
+	{
+		int d;
+		histogram->buckets[i]->ntuples = (histogram->buckets[i]->numrows * 1.0) / numrows_total;
+		histogram->buckets[i]->ndistinct = (histogram->buckets[i]->ndistinct * 1.0) / ndistinct;
+
+		for (d = 0; d < numattrs; d++)
+			histogram->buckets[i]->ndistincts[d] = (histogram->buckets[i]->ndistincts[d] * 1.0) / ndistincts[d];
+	}
+
+	pfree(ndistincts);
+
+	return histogram;
+
+}
+
+/*
+ * Mine associations between the columns, in the form (A => B).
+ *
+ * At the moment this only works for associations between two columns,
+ * but it might be useful to mine for rules involving multiple columns
+ * on the left side. That is rules [A,B] => C and so on. Handling
+ * multiple columns on the right side is not necessary, because such
+ * rules may be decomposed into a set of rules, one for each column.
+ * I.e. A => [B,C] is exactly the same as (A => B) & (A => C).
+ *
+ * Those rules don't immediately identify redundant clauses, because the
+ * user may choose "incompatible conditions" (e.g. by using a zip code
+ * and a mismatching city) and so on. This should however be easy to
+ * identify from a histogram, because the conditions will match a bucket
+ * with low frequencies.
+ *
+ * The question is whether this can be useful when we have a histogram,
+ * because such incompatible conditions should result in not matching
+ * any buckets (or matching only buckets with low frequencies).
+ *
+ * The problem is that histograms work like this when the sorting is
+ * compatible with the meaning of the data. We're often using data types
+ * that support sorting (e.g. INT, BIGING) as a kind of labels where
+ * the sorting really does not make much sense. Sorting by ZIP code will
+ * result in sorting the cities quite randomly, and similarly for most
+ * surrogate primary / foreign keys. In such cases the histograms are
+ * pretty useless.
+ *
+ * So, a good approach might be testing the independence of the data
+ * (by building a contingency table) and buildint the MV histogram only
+ * when there's a dependency. For the 'label' data this should notice
+ * the histogram is useless. So we won't build it (and we may use that
+ * as a sign supporting the association rule).
+ *
+ * Another option is to look at selectivity of A and B separately, and
+ * then use the minimum of those.
+ *
+ * TODO investigate using histogram and MCV list to confirm the
+ *      associative rule
+ *
+ * TODO investigate statistical testing of the distribution (to decide
+ *      whether it makes sense to build the histogram)
+ *
+ * TODO Using a min/max of selectivities would probably make more sense
+ *      for the associated columns.
+ */
+static void
+build_mv_associations(int numrows, HeapTuple *rows, int2vector *attrs,
+					  int natts, VacAttrStats **vacattrstats)
+{
+	int i;
+	bool isNull;
+	Size len = 2 * sizeof(Datum);	/* only simple associations a => b */
+	int numattrs = attrs->dim1;
+
+	/* TODO Maybe this should be somehow related to the number of
+	 *      distinct columns in the two columns we're currently analyzing.
+	 *      Assuming the distribution is uniform, we should expected to
+	 *      observe in the sample - we can then use the average group
+	 *      size as a threshold. That seems better than a static approach.
+	 */
+	int min_group_size = 10;
+
+	/* dimension indexes we'll check for associations [a => b] */
+	int dima, dimb;
+
+	/* info for the interesting attributes only
+	 * 
+	 * TODO Compute this only once and pass it to all the methods
+	 *      that need it.
+	 */
+	VacAttrStats **stats = lookup_var_attr_stats(attrs, natts, vacattrstats);
+
+	/* We'll reuse the same array for all the combinations */
+	Datum * values = (Datum*)palloc0(numrows * 2 * sizeof(Datum));
+
+	Assert(numattrs >= 2);
+
+	for (dima = 0; dima < numattrs; dima++)
+	{
+
+		for (dimb = 0; dimb < numattrs; dimb++)
+		{
+
+			int supporting = 0;
+			int contradicting = 0;
+
+			Datum val_a, val_b;
+			int violations = 0;
+			int group_size = 0;
+
+			int supporting_rows = 0;
+
+			/* skip (dima==dimb) */
+			if (dima == dimb)
+				continue;
+
+			/*
+			 * FIXME Not sure if this handles NULL values properly (not sure
+			 *       how to do that). We assume that NULL means 0 for now,
+			 *       handling it just like any other value.
+			 */
+			for (i = 0; i < numrows; i++)
+			{
+				values[i*2]   = heap_getattr(rows[i], attrs->values[dima], stats[dima]->tupDesc, &isNull);
+				values[i*2+1] = heap_getattr(rows[i], attrs->values[dimb], stats[dimb]->tupDesc, &isNull);
+			}
+
+			qsort_arg((void *) values, numrows, sizeof(Datum) * 2, compare_scalars_memcmp, &len);
+
+			/*
+			 * Walk through the array, split it into rows according to
+			 * the A value, and count distinct values in the other one.
+			 * If there's a single B value for the whole group, we count
+			 * it as supporting the association, otherwise we count it
+			 * as contradicting.
+			 * 
+			 * Furthermore we require a group to have at least a certain
+			 * number of rows to be considered useful. When contradicting,
+			 * use it always.
+			 */
+
+			/* start with values from the first row */
+			val_a = values[0];
+			val_b = values[1];
+			group_size  = 1;
+
+			for (i = 1; i < numrows; i++)
+			{
+				if (values[2*i] != val_a)	/* end of the group */
+				{
+					/*
+					 * If there are no contradicting rows, count it as
+					 * supporting (otherwise contradicting), but only if
+					 * the group is large enough.
+					 *
+					 * The requirement of a minimum group size makes it
+					 * impossible to identify [unique,unique] cases, but
+					 * that's probably a different case. This is more
+					 * about [zip => city] associations etc.
+					 */
+					supporting += ((violations == 0) && (group_size >= min_group_size)) ? 1 : 0;
+					contradicting += (violations != 0) ? 1 : 0;
+
+					supporting_rows += ((violations == 0) && (group_size >= min_group_size)) ? group_size : 0;
+
+					/* current values start a new group */
+					val_a = values[2*i];
+					val_b = values[2*i+1];
+					violations = 0;
+					group_size = 1;
+				}
+				else
+				{
+					if (values[2*i+1] != val_b)	/* mismatch of a B value */
+					{
+						val_b = values[2*i+1];
+						violations += 1;
+					}
+
+					group_size += 1;
+				}
+			}
+
+			/* FIXME handle the last group */
+			supporting += ((violations == 0) && (group_size >= min_group_size)) ? 1 : 0;
+			contradicting += (violations != 0) ? 1 : 0;
+			supporting_rows += ((violations == 0) && (group_size >= min_group_size)) ? group_size : 0;
+
+			/*
+			 * See if the number of rows supporting the association is at least
+			 * 10x the number of rows violating the hypothetical rule.
+			 *
+			 * TODO This is rather arbitrary limit - I guess it's possible to do
+			 *      some math to come up with a better rule (e.g. testing a hypothesis
+			 *      'this is due to randomness'). We can create a contingency table
+			 *      from the values and use it for testing. Possibly only when
+			 *      there are no contradicting rows?
+			 * 
+			 * TODO Also, if (a => b) and (b => a) at the same time, it pretty much
+			 *      means the columns have the same values (or one is a 'label'),
+			 *      making the conditions rather redundant. Although it's possible
+			 *      that the query uses incompatible combination of values.
+			 */
+			if (supporting_rows > (numrows - supporting_rows) * 10)
+			{
+				// elog(WARNING, "%d => %d : supporting=%d contradicting=%d", dima, dimb, supporting, contradicting);
+			}
+
+		}
+	}
+
+	pfree(values);
+
+}
+
+/*
+ * Compute the list of most common items, where item is a combination of
+ * values for all the columns. For small number of distinct values, we
+ * may be able to represent the distribution pretty exactly, with
+ * per-item statistics.
+ *
+ * If we can represent the distribution using a MCV list only, it's great
+ * because that allows much better estimates (especially for equality).
+ * Such discrete distributions are also easier to combine (more
+ * efficient and more accurate) than when using histograms.
+ *
+ * FIXME This does not handle NULL values at the moment.
+ *
+ * TODO When computing equality selectivity (a=1 AND b=2), we can do that
+ *      pretty exactly assuming (a) we hit a MCV item and (b) the
+ *      histogram is built on those two columns only (i.e. there are no
+ *      other columns). In that case we can estimate the selectivity
+ *      using only the MCV.
+ *
+ *      When we don't hit a MCV item, we can use the frequency of the
+ *      least probable MCV item as upper bound of the selectivity
+ *      (otherwise it'd get into the MCV list). Again, this only works
+ *      when the histogram size matches the restricted columns.
+ *
+ *      When the histogram is larger (i.e. there are additional columns),
+ *      we can't be sure how is the selectivity distributed among the MCV
+ *      list and the histogram (we may get several MCV items matching
+ *      the conditions and several histogram buckets at the same time).
+ *
+ *      In this case we can probably clamp the selectivity by minimum of
+ *      selectivities for each condition. For example if we know the
+ *      number of distinct values for each column, we can use 1/ndistinct
+ *      as a per-column estimate. Or rather 1/ndistinct + selectivity
+ *      derived from the MCV list.
+ *
+ *      If there's no histogram (thus the distribution is approximated
+ *      only by the MCV list), the size of the stats (whether there are
+ *      some other columns, not referenced in the conditions) does not
+ *      matter. We can do pretty accurate estimation using the MCV.
+ *
+ * TODO Currently there's no logic to consider building only a MCV list
+ *      (and not building the histogram at all).
+ * 
+ * TODO For types that don't reasonably support ordering (either because
+ *      the type does not support that or when the user adds some option
+ *      to the ADD STATISTICS command - e.g. UNSORTED_STATS), building
+ *      the histogram may be pointless and inefficient. This is esp.
+ *      true for varlena types that may be quite large and a large MCV
+ *      list may be a better choice, because it makes equality estimates
+ *      more accurate. Due to the unsorted nature, range queries on those
+ *      attributes are rather useless anyway.
+ *
+ *      Another thing is that by restricting to MCV list and equality
+ *      conditions, we can use hash values instead of long varlena values.
+ *      The equality estimation will be very accurate.
+ *
+ *      This however complicates matching the columns to available
+ *      statistics, as it will require matching clauses (not columns) to
+ *      stats. And it may get quite complex - e.g. what if there are
+ *      multiple clauses, each compatible with different stats subset?
+ * 
+ * FIXME Create a special-purpose type for MCV items (instead of a plain
+ *       Datum array, which is very difficult to work with).
+ */
+static MCVList
+build_mv_mcvlist(int numrows, HeapTuple *rows, int2vector *attrs,
+					  int natts, VacAttrStats **vacattrstats,
+					  int *numrows_filtered)
+{
+	int i, j, idx = 0;
+	int numattrs = attrs->dim1;
+	Size len = sizeof(Datum) * numattrs;
+	bool isNull;
+	int ndistinct = 0;
+	int mcv_threshold = 0;
+	int count = 0;
+	int nitems = 0;
+
+	MCVList	mcvlist = NULL;
+
+	VacAttrStats **stats = lookup_var_attr_stats(attrs, natts, vacattrstats);
+
+	/*
+	 * We could collect this while walking through all the attributes
+	 * above (this way we have to call heap_getattr twice).
+	 * 
+	 * TODO We're using Datum (8B), even for data types smaller than this
+	 *      (notably int4 and float4). Maybe we could save some space here,
+	 *      although it seems the bytea compression will handle it just fine.
+	 */
+	Datum * values = palloc0(numrows * numattrs * sizeof(Datum));
+
+	for (j = 0; j < numrows; j++)
+		for (i = 0; i < numattrs; i++)
+			values[idx++] = heap_getattr(rows[j], attrs->values[i], stats[i]->tupDesc, &isNull);
+
+	qsort_arg((void *) values, numrows, sizeof(Datum) * numattrs, compare_scalars_memcmp, &len);
+
+	/*
+	 * Count the number of distinct values - we need this to determine
+	 * the threshold (125% of the average frequency).
+	 */
+	ndistinct = 1;
+	for (i = 1; i < numrows; i++)
+		if (memcmp(&values[i * numattrs], &values[(i-1) * numattrs], len) != 0)
+			ndistinct += 1;
+
+	/*
+	 * Determine how many groups actually exceed the threshold, and then
+	 * walk the array again and collect them into an array.
+	 * 
+	 * TODO for now the threshold is the same as in the single-column
+	 * 		case (average + 25%), but maybe that's worth revisiting
+	 * 
+	 * TODO see if we can fit all the distinct values in the MCV list
+	 */
+	mcv_threshold = 1.25 * numrows / ndistinct;
+	mcv_threshold = (mcv_threshold < 4) ? 4  : mcv_threshold;
+
+	/*
+	 * If there are less than some number of items, store all with at
+	 * least two rows in the sample.
+	 * 
+	 * FIXME We can do this only if we believe we got all the distinct
+	 *       values of the table.
+	 */
+	if (ndistinct <= MVSTAT_MCVLIST_MAX_ITEMS)
+		mcv_threshold = 2;
+
+	count = 1;
+	for (i = 1; i <= numrows; i++)
+	{
+		/* last row or a new group */
+		if ((i == numrows) || (memcmp(&values[i * numattrs], &values[(i-1) * numattrs], len) != 0))
+		{
+			/* count the MCV item if exceeding the threshold */
+			if (count >= mcv_threshold)
+				nitems += 1;
+
+			count = 1;
+		}
+		else	/* same group, just increase the number of items */
+			count += 1;
+	}
+
+	/* by default we keep all the rows (even if there's no MCV list) */
+	*numrows_filtered = numrows;
+
+	/* we know the number of mcvitems, now collect them in a 2nd pass */
+	if (nitems > 0)
+	{
+		/* we need to store the frequency for each group, so (numattrs + 1) */
+		mcvlist = (MCVList)palloc0(sizeof(MCVListData));
+
+		mcvlist->magic = MVSTAT_MCV_MAGIC;
+		mcvlist->type = MVSTAT_MCV_TYPE_BASIC;
+		mcvlist->ndimensions = numattrs;
+		mcvlist->nitems = nitems;
+		mcvlist->items = (MCVItem*)palloc0(sizeof(MCVItem)*nitems);
+
+		/* now repeat the same loop as above, but this time copy the data
+		 * for items exceeding the threshold */
+		count = 1;
+		nitems = 0;
+		for (i = 1; i <= numrows; i++)
+		{
+
+			/* last row or a new group */
+			if ((i == numrows) || (memcmp(&values[i * numattrs], &values[(i-1) * numattrs], len) != 0))
+			{
+				/* count the MCV item if exceeding the threshold (and copy into the array) */
+				if (count >= mcv_threshold)
+				{
+					/* first, allocate the item (with the proper size of values) */
+					MCVItem item = (MCVItem)palloc0(offsetof(MCVItemData, values) +
+															  sizeof(Datum)*mcvlist->ndimensions);
+
+					/* then copy values from the _previous_ group */
+					memcpy(item->values, &values[(i-1)*numattrs], len);
+
+					/* and finally the group frequency */
+					item->frequency = (double)count / numrows;
+
+					mcvlist->items[nitems] = item;
+					nitems += 1;
+				}
+
+				count = 1;
+			}
+			else	/* same group, just increase the number of items */
+				count += 1;
+		}
+
+		/* make sure the loops are consistent */
+		Assert(nitems == mcvlist->nitems);
+
+		/*
+		 * Remove the rows matching the MCV items.
+		 *
+		 * FIXME This implementation is rather naive, effectively O(N^2).
+		 *       As the MCV list grows, the check will take longer and
+		 *       longer. And as the number of sampled rows increases (by
+		 *       increasing statistics target), it will take longer and
+		 *       longer. One option is to sort the MCV items first and
+		 *       then perform a binary search.
+		 */
+		if (nitems == ndistinct) /* all rows are covered by MCV items */
+			*numrows_filtered = 0;
+		else /* (nitems < ndistinct) && (nitems > 0) */
+		{
+			int nfiltered = 0;
+			HeapTuple *rows_filtered = (HeapTuple*)palloc0(sizeof(HeapTuple) * numrows);
+
+			/* walk through the tuples, compare the values to MCV items */
+			for (i = 0; i < numrows; i++)
+			{
+				bool	match = false;
+				Datum  *keys = (Datum*)palloc0(numattrs * sizeof(Datum));
+
+				/* collect the key values */
+				for (j = 0; j < numattrs; j++)
+					keys[j] = heap_getattr(rows[i], attrs->values[j], stats[j]->tupDesc, &isNull);
+
+				/* scan through the MCV list for matches */
+				for (j = 0; j < mcvlist->nitems; j++)
+					if (memcmp(keys, mcvlist->items[j]->values, sizeof(Datum)*numattrs) == 0)
+					{
+						match = true;
+						break;
+					}
+
+				/* if no match in the MCV list, copy the row into the filtered ones */
+				if (! match)
+					memcpy(&rows_filtered[nfiltered++], &rows[i], sizeof(HeapTuple));
+
+				pfree(keys);
+			}
+
+			/* replace the first part */
+			memcpy(rows, rows_filtered, sizeof(HeapTuple) * nfiltered);
+			*numrows_filtered = nfiltered;
+
+			pfree(rows_filtered);
+
+		}
+	}
+
+	pfree(values);
+
+	/*
+	 * TODO Single-dimensional MCV is stored sorted by frequency (descending).
+	 *      Maybe this should be stored like that too?
+	 */
+
+	return mcvlist;
+}
+
+/* multi-variate stats comparator */
+
+/*
+ * qsort_arg comparator for sorting Datums (MV stats)
+ *
+ * This does not maintain the tupnoLink array.
+ */
+static int
+compare_scalars_simple(const void *a, const void *b, void *arg)
+{
+	Datum		da = *(Datum*)a;
+	Datum		db = *(Datum*)b;
+	SortSupport ssup= (SortSupport) arg;
+
+	return ApplySortComparator(da, false, db, false, ssup);
+}
+
+/*
+ * qsort_arg comparator for sorting data when partitioning a MV bucket
+ */
+static int
+compare_scalars_partition(const void *a, const void *b, void *arg)
+{
+	Datum		da = ((ScalarItem*)a)->value;
+	Datum		db = ((ScalarItem*)b)->value;
+	SortSupport ssup= (SortSupport) arg;
+
+	return ApplySortComparator(da, false, db, false, ssup);
+}
+
+/*
+ * qsort_arg comparator for sorting Datum[] (row of Datums) when
+ * counting distinct values.
+ */
+static int
+compare_scalars_memcmp(const void *a, const void *b, void *arg)
+{
+	Size		len = *(Size*)arg;
+
+	return memcmp(a, b, len);
+}
+
+static int
+compare_scalars_memcmp_2(const void *a, const void *b)
+{
+	return memcmp(a, b, sizeof(Datum));
+}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 714a9f1..7f9e54f 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -35,6 +35,7 @@
 #include "catalog/pg_foreign_table.h"
 #include "catalog/pg_inherits.h"
 #include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_rowsecurity.h"
@@ -91,7 +92,7 @@
 #include "utils/syscache.h"
 #include "utils/tqual.h"
 #include "utils/typcache.h"
-
+#include "utils/mvstats.h"
 
 /*
  * ON COMMIT action list
@@ -139,8 +140,9 @@ static List *on_commits = NIL;
 #define AT_PASS_ADD_COL			5		/* ADD COLUMN */
 #define AT_PASS_ADD_INDEX		6		/* ADD indexes */
 #define AT_PASS_ADD_CONSTR		7		/* ADD constraints, defaults */
-#define AT_PASS_MISC			8		/* other stuff */
-#define AT_NUM_PASSES			9
+#define AT_PASS_ADD_STATS		8		/* ADD statistics */
+#define AT_PASS_MISC			9		/* other stuff */
+#define AT_NUM_PASSES			10
 
 typedef struct AlteredTableInfo
 {
@@ -414,7 +416,8 @@ static void ATExecReplicaIdentity(Relation rel, ReplicaIdentityStmt *stmt, LOCKM
 static void ATExecGenericOptions(Relation rel, List *options);
 static void ATExecEnableRowSecurity(Relation rel);
 static void ATExecDisableRowSecurity(Relation rel);
-
+static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+								StatisticsDef *def, LOCKMODE lockmode);
 static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
 				   ForkNumber forkNum, char relpersistence);
 static const char *storage_name(char c);
@@ -2965,6 +2968,7 @@ AlterTableGetLockLevel(List *cmds)
 				 * updates.
 				 */
 			case AT_SetStatistics:		/* Uses MVCC in getTableAttrs() */
+			case AT_AddStatistics:		/* XXX not sure if the right level */
 			case AT_ClusterOn:	/* Uses MVCC in getIndexes() */
 			case AT_DropCluster:		/* Uses MVCC in getIndexes() */
 			case AT_SetOptions:	/* Uses MVCC in getTableAttrs() */
@@ -3112,6 +3116,7 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
 			pass = AT_PASS_ADD_CONSTR;
 			break;
 		case AT_SetStatistics:	/* ALTER COLUMN SET STATISTICS */
+		case AT_AddStatistics:	/* XXX maybe not the right place */
 			ATSimpleRecursion(wqueue, rel, cmd, recurse, lockmode);
 			/* Performs own permission checks */
 			ATPrepSetStatistics(rel, cmd->name, cmd->def, lockmode);
@@ -3407,6 +3412,9 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
 		case AT_SetStatistics:	/* ALTER COLUMN SET STATISTICS */
 			ATExecSetStatistics(rel, cmd->name, cmd->def, lockmode);
 			break;
+		case AT_AddStatistics:		/* ADD STATISTICS */
+			ATExecAddStatistics(tab, rel, (StatisticsDef *) cmd->def, lockmode);
+			break;
 		case AT_SetOptions:		/* ALTER COLUMN SET ( options ) */
 			ATExecSetOptions(rel, cmd->name, cmd->def, false, lockmode);
 			break;
@@ -11616,3 +11624,197 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid,
 
 	ReleaseSysCache(tuple);
 }
+
+/* used for sorting the attnums in ATExecAddStatistics */
+static int compare_int16(const void *a, const void *b)
+{
+	return memcmp(a, b, sizeof(int16));
+}
+
+/*
+ * Implements the ALTER TABLE ... ADD STATISTICS (options) ON (columns).
+ *
+ * The code is an unholy mix of pieces that really belong to other parts
+ * of the source tree.
+ *
+ * FIXME Check that the types are pass-by-value and support sort,
+ *       although maybe we can live without the sort (and only build
+ *       MCV list / association rules).
+ *
+ * FIXME This should probably check for duplicate stats (i.e. same
+ *       keys, same options). Although maybe it's useful to have
+ *       multiple stats on the same columns with different options
+ *       (say, a detailed MCV-only stats for some queries, histogram
+ *       for others, etc.)
+ */
+static void ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+						StatisticsDef *def, LOCKMODE lockmode)
+{
+	int			i, j;
+	ListCell   *l;
+	int16		attnums[INDEX_MAX_KEYS];
+	Oid 		atttypids[INDEX_MAX_KEYS];
+	int			numcols = 0;
+
+	Oid			mvstatoid;
+	HeapTuple	htup;
+	Datum		values[Natts_pg_mv_statistic];
+	bool		nulls[Natts_pg_mv_statistic];
+	int2vector *stakeys;
+	Relation	mvstatrel;
+
+	/* by default build everything */
+	bool 	build_histogram = true,
+			build_mcv = true,
+			build_associations = true;
+
+	/* build regular MCV (not hashed by default) */
+	bool	mcv_hashed = false;
+
+	int32 	max_buckets = -1,
+			max_mcv_items = -1;
+
+	Assert(IsA(def, StatisticsDef));
+
+	/* transform the column names to attnum values */
+
+	foreach(l, def->keys)
+	{
+		char	   *attname = strVal(lfirst(l));
+		HeapTuple	atttuple;
+
+		atttuple = SearchSysCacheAttName(RelationGetRelid(rel), attname);
+
+		if (!HeapTupleIsValid(atttuple))
+			ereport(ERROR,
+					(errcode(ERRCODE_UNDEFINED_COLUMN),
+					 errmsg("column \"%s\" referenced in statistics does not exist",
+							attname)));
+
+		/* more than MVHIST_MAX_DIMENSIONS columns not allowed */
+		if (numcols >= MVSTATS_MAX_DIMENSIONS)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("cannot have more than %d keys in a statistics",
+							MVSTATS_MAX_DIMENSIONS)));
+
+		attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
+		atttypids[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->atttypid;
+		ReleaseSysCache(atttuple);
+		numcols++;
+	}
+
+	/*
+	 * Check the lower bound (at least 2 columns), the upper bound was
+	 * already checked in the loop.
+	 */
+	if (numcols < 2)
+			ereport(ERROR,
+					(errcode(ERRCODE_TOO_MANY_COLUMNS),
+					 errmsg("multivariate stats require 2 or more columns")));
+
+	/* look for duplicities */
+	for (i = 0; i < numcols; i++)
+		for (j = 0; j < numcols; j++)
+			if ((i != j) && (attnums[i] == attnums[j]))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("duplicate column name in statistics definition")));
+
+	/* parse the statistics options */
+	foreach (l, def->options)
+	{
+		DefElem *opt = (DefElem*)lfirst(l);
+
+		if (strcmp(opt->defname, "histogram") == 0)
+			build_histogram = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "mcv") == 0)
+			build_mcv = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "mcv_hashed") == 0)
+			mcv_hashed = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "associations") == 0)
+			build_associations = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "max_buckets") == 0)
+		{
+			max_buckets = defGetInt32(opt);
+
+			/* TODO check that this is not used with 'histogram off' */
+
+			/* sanity check */
+			if (max_buckets < 1024)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("minimum number of buckets is 1024")));
+
+			else if (max_buckets > 32768) /* FIXME use the proper constant */
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("minimum number of buckets is 1024")));
+
+		}
+		else if (strcmp(opt->defname, "max_mcv_items") == 0)
+		{
+			max_mcv_items = defGetInt32(opt);
+
+			/* TODO check that this is not used with 'mcv off' */
+
+			/* sanity check */
+			if (max_mcv_items < 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("max number of MCV items must be non-negative")));
+
+			else if (max_mcv_items > 8192) /* FIXME use the proper constant */
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("max number of MCV items is 8192")));
+
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized STATISTICS option \"%s\"",
+							opt->defname)));
+	}
+
+	/* sort the attnums and build int2vector */
+	qsort(attnums, numcols, sizeof(int16), compare_int16);
+	stakeys = buildint2vector(attnums, numcols);
+
+	/*
+	 * Okay, let's create the pg_mv_statistic entry.
+	 */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	/* no stats collected yet, so just the keys */
+	values[Anum_pg_mv_statistic_starelid-1] = ObjectIdGetDatum(RelationGetRelid(rel));
+
+	values[Anum_pg_mv_statistic_stakeys -1] = PointerGetDatum(stakeys);
+	values[Anum_pg_mv_statistic_hist_enabled  -1] = BoolGetDatum(build_histogram);
+	values[Anum_pg_mv_statistic_mcv_enabled   -1] = BoolGetDatum(build_mcv);
+	values[Anum_pg_mv_statistic_mcv_hashed    -1] = BoolGetDatum(mcv_hashed);
+	values[Anum_pg_mv_statistic_assoc_enabled -1] = BoolGetDatum(build_associations);
+
+	values[Anum_pg_mv_statistic_hist_max_buckets -1] = Int32GetDatum(max_buckets);
+	values[Anum_pg_mv_statistic_mcv_max_items    -1] = Int32GetDatum(max_mcv_items);
+
+	nulls[Anum_pg_mv_statistic_staassoc -1] = true;
+	nulls[Anum_pg_mv_statistic_stamcv   -1] = true;
+	nulls[Anum_pg_mv_statistic_stahist  -1] = true;
+
+	/* insert the tuple into pg_mv_statistic */
+	mvstatrel = heap_open(MvStatisticRelationId, RowExclusiveLock);
+
+	htup = heap_form_tuple(mvstatrel->rd_att, values, nulls);
+
+	mvstatoid = simple_heap_insert(mvstatrel, htup);
+
+	CatalogUpdateIndexes(mvstatrel, htup);
+
+	heap_freetuple(htup);
+
+	heap_close(mvstatrel, RowExclusiveLock);
+
+	return;
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index e76b5b3..da35331 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3903,6 +3903,17 @@ _copyAlterPolicyStmt(const AlterPolicyStmt *from)
 	return newnode;
 }
 
+static StatisticsDef *
+_copyStatisticsDef(const StatisticsDef *from)
+{
+	StatisticsDef  *newnode = makeNode(StatisticsDef);
+
+	COPY_NODE_FIELD(keys);
+	COPY_NODE_FIELD(options);
+
+	return newnode;
+}
+
 /* ****************************************************************
  *					pg_list.h copy functions
  * ****************************************************************
@@ -4717,6 +4728,9 @@ copyObject(const void *from)
 		case T_CommonTableExpr:
 			retval = _copyCommonTableExpr(from);
 			break;
+		case T_StatisticsDef:
+			retval = _copyStatisticsDef(from);
+			break;
 		case T_PrivGrantee:
 			retval = _copyPrivGrantee(from);
 			break;
@@ -4729,7 +4743,6 @@ copyObject(const void *from)
 		case T_XmlSerialize:
 			retval = _copyXmlSerialize(from);
 			break;
-
 		default:
 			elog(ERROR, "unrecognized node type: %d", (int) nodeTag(from));
 			retval = 0;			/* keep compiler quiet */
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 9b657fb..9c32735 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -24,6 +24,9 @@
 #include "utils/lsyscache.h"
 #include "utils/selfuncs.h"
 
+#include "utils/mvstats.h"
+#include "catalog/pg_collation.h"
+#include "utils/typcache.h"
 
 /*
  * Data structure for accumulating info about possible range-query
@@ -43,6 +46,23 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 			   bool varonleft, bool isLTsel, Selectivity s2);
 
 
+static bool is_mv_compatible(Node *clause, Oid varRelid, Index *varno,
+							 Bitmapset  **attnums);
+static Bitmapset  *collect_mv_attnums(PlannerInfo *root, List *clauses,
+									  Oid varRelid, Oid *relid);
+static int choose_mv_histogram(int nmvstats, MVStats mvstats,
+							   Bitmapset *attnums);
+static List *clauselist_mv_split(List *clauses, Oid varRelid,
+								 List **mvclauses, MVStats mvstats);
+
+static Selectivity clauselist_mv_selectivity(PlannerInfo *root,
+						List *clauses, MVStats mvstats);
+static Selectivity clauselist_mv_selectivity_mcvlist(PlannerInfo *root,
+									List *clauses, MVStats mvstats,
+									bool *fullmatch, Selectivity *lowsel);
+static Selectivity clauselist_mv_selectivity_histogram(PlannerInfo *root,
+									List *clauses, MVStats mvstats);
+
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
  ****************************************************************************/
@@ -100,14 +120,74 @@ clauselist_selectivity(PlannerInfo *root,
 	RangeQueryClause *rqlist = NULL;
 	ListCell   *l;
 
+	/* processing mv stats */
+	Oid			relid = InvalidOid;
+	int			nmvstats = 0;
+	MVStats		mvstats = NULL;
+
+	/* attributes in mv-compatible clauses */
+	Bitmapset  *mvattnums = NULL;
+
 	/*
-	 * If there's exactly one clause, then no use in trying to match up pairs,
-	 * so just go directly to clause_selectivity().
+	 * If there's exactly one clause, then no use in trying to match up
+	 * pairs, so just go directly to clause_selectivity().
 	 */
 	if (list_length(clauses) == 1)
 		return clause_selectivity(root, (Node *) linitial(clauses),
 								  varRelid, jointype, sjinfo);
 
+	/* collect attributes from mv-compatible clauses */
+	mvattnums = collect_mv_attnums(root, clauses, varRelid, &relid);
+
+	/*
+	 * If there are mv-compatible clauses, referencing at least two
+	 * columns (otherwise it makes no sense to use mv stats), fetch the
+	 * MV histograms for the relation (only the column keys, not the
+	 * histograms yet - we'll decide which histogram to use first).
+	 */
+	if (bms_num_members(mvattnums) >= 2)
+	{
+		/* clauses compatible with multi-variate stats */
+		List	*mvclauses = NIL;
+
+		/* fetch info from the catalog (not the serialized stats yet) */
+		mvstats = list_mv_stats(relid, &nmvstats, true);
+
+		/*
+		 * If there are candidate statistics, choose the histogram first.
+		 * At the moment we only use a single statistics, covering the
+		 * most columns (using info from the previous step). If there
+		 * are multiple such histograms, we'll use the smallest one
+		 * (with the lowest number of dimensions).
+		 * 
+		 * This may not be optimal choice, if the 'smaller' stats has
+		 * much less buckets than the rejected one (making it less
+		 * accurate).
+		 *
+		 * We may end up without multivariate statistics, if none of the
+		 * stats matches at least two columns from the clauses (in that
+		 * case we may just use the single dimensional stats).
+		 */
+		if (nmvstats > 0)
+		{
+			int idx = choose_mv_histogram(nmvstats, mvstats, mvattnums);
+
+			if (idx >= 0)	/* we have a matching stats */
+			{
+				MVStats mvstat = &mvstats[idx];
+
+				/* split the clauselist into regular and mv-clauses */
+				clauses = clauselist_mv_split(clauses, varRelid, &mvclauses, mvstat);
+
+				/* we've chosen the histogram to match the clauses */
+				Assert(mvclauses != NIL);
+
+				/* compute the multivariate stats */
+				s1 *= clauselist_mv_selectivity(root, mvclauses, mvstat);
+			}
+		}
+	}
+
 	/*
 	 * Initial scan over clauses.  Anything that doesn't look like a potential
 	 * rangequery clause gets multiplied into s1 and forgotten. Anything that
@@ -782,3 +862,1010 @@ clause_selectivity(PlannerInfo *root,
 
 	return s1;
 }
+
+
+
+/*
+ * Estimate selectivity for the list of MV-compatible clauses, using that
+ * particular histogram.
+ *
+ * When we hit a single bucket, we don't know what portion of it actually
+ * matches the clauses (e.g. equality), and we use 1/2 the bucket by
+ * default. However, the MV histograms are usually less detailed than
+ * the per-column ones, meaning the sum of buckets is often quite high
+ * (thanks to combining a lot of "partially hit" buckets).
+ *
+ * There are several ways to improve this, usually with cases when it
+ * won't really help. Also, the more complex the process, the worse
+ * the failures (i.e. misestimates).
+ *
+ * (1) Use the MV histogram only as a way to combine multiple
+ *     per-column histograms, essentially rewriting
+ *
+ *       P(A & B) = P(A) * P(B|A)
+ *
+ *     where P(B|A) may be computed using a proper "slice" of the
+ *     histogram, by first selecting only buckets where A is true, and
+ *     then using the boundaries to 'restrict' the per-colunm histogram.
+ *
+ *     With more clauses, it gets more complicated, of course
+ *
+ *       P(A & B & C) = P(A & C) * P(B|A & C)
+ *                    = P(A) * P(C|A) * P(B|A & C)
+ *
+ *     and so on.
+ * 
+ *     Of course, the question is how well and efficiently we can
+ *     compute the conditional probabilities - whether this approach
+ *     can improve the estimates (instead of amplifying the errors).
+ *
+ *     Also, this does not eliminate the need for histogram on [A,B,C].
+ *
+ * (2) Use multiple smaller (and more accurate) histograms, and combine
+ *     them using a process similar to the above. E.g. by assuming that
+ *     B and C are independent, we can rewrite
+ *
+ *       P(B|A & C) = P(B|A)
+ * 
+ *     so we can rewrite the whole formula to
+ * 
+ *       P(A & B & C) = P(A) * P(C|A) * P(B|A)
+ * 
+ *     and we're OK with two 2D histograms [A,C] and [A,B].
+ *
+ *     It'd be nice to perform some sort of statistical test (Fisher
+ *     or another chi-squared test) to identify independent components
+ *     and automatically separate them into smaller histograms.
+ *
+ * (3) Using the estimated number of distinct values in a bucket to
+ *     decide the selectivity of equality in the bucket (instead of
+ *     blindly using 1/2 of the bucket, we may use 1/ndistinct).
+ *     Of course, if the ndistinct estimate is way off, or when the
+ *     distribution is not uniform (one distict items get much more
+ *     items), this will fail. Also, we currently don't have ndistinct
+ *     estimate available at this moment (but it shouldn't be that
+ *     difficult to compute as ndistinct and ntuples should be available).
+ *
+ * TODO Clamp the selectivity by min of the per-clause selectivities
+ *      (i.e. the selectivity of the most restrictive clause), because
+ *      that's the maximum we can ever get from ANDed list of clauses.
+ *      This may probably prevent issues with hitting too many buckets
+ *      and low precision histograms.
+ *
+ * TODO We may support some additional conditions, most importantly
+ *      those matching multiple columns (e.g. "a = b" or "a < b").
+ *      Ultimately we could track multi-table histograms for join
+ *      cardinality estimation.
+ *
+ * TODO Currently this is only estimating all clauses, or clauses
+ *      matching varRelid (when it's not 0). I'm not sure what's the
+ *      purpose of varRelid, but my assumption is this is used for
+ *      join conditions and such. In that case we can use those clauses
+ *      to restrict the other (i.e. filter the histogram buckets first,
+ *      before estimating the other clauses). This is essentially equal
+ *      to computing P(A|B) where "B" are the clauses not matching the
+ *      varRelid.
+ * 
+ * TODO Further thoughts on processing equality clauses - maybe it'd be
+ *      better to look for stats (with MCV) covered by the equality
+ *      clauses, because then we have a chance to find an exact match
+ *      in the MCV list, which is pretty much the best we can do. We may
+ *      also look at the least frequent MCV item, and use it as a upper
+ *      boundary for the selectivity (had there been a more frequent
+ *      item, it'd be in the MCV list).
+ *
+ *      These conditions may then be used as a condition for the other
+ *      selectivities, i.e. we may estimate P(A,B) first, and then
+ *      compute P(C|A,B) from another histogram. This may be useful when
+ *      we can estimate P(A,B) accurately (e.g. because it's a complete
+ *      equality match evaluated on MCV list), and then compute the
+ *      conditional probability P(C|A,B), giving us the requested stats
+ *
+ *          P(A,B,C) = P(A,B) * P(C|A,B)
+ *
+ * TODO There are several options for 'sanity clamping' the estimates.
+ *
+ *      First, if we have selectivities for each condition, then
+ * 
+ *          P(A,B) <= MIN(P(A), P(B))
+ *
+ *      Because additional conditions (connected by AND) can only lower
+ *      the probability.
+ *
+ *      So we can do some basic sanity checks using the single-variate
+ *      stats (the ones we have right now).
+ *
+ *      Second, when we have multivariate stats with a MCV list, then
+ *
+ *      (a) if we have a full equality condition (one equality condition
+ *          on each column) and we found a match in the MCV list, this is
+ *          the selectivity (and it's supposed to be exact)
+ *
+ *      (b) if we have a full equality condition and we haven't found a
+ *          match in the MCV list, then the selectivity is below the
+ *          lowest selectivity in the MCV list
+ *
+ *      (c) if we have a equality condition (not full), we can still
+ *          search the MCV for matches and use the sum of probabilities
+ *          as a lower boundary for the histogram (if there are no
+ *          matches in the MCV list, then we have no boundary)
+ *
+ *      Third, if there are multiple multivariate stats for a set of
+ *      clauses, we may compute all of them and then somehow aggregate
+ *      them - e.g. by choosing the minimum, median or average. The
+ *      multi-variate stats are susceptible to overestimation (because
+ *      we take 50% of the bucket for partial matches). Some stats may
+ *      give better estimates than others, but it's very difficult to
+ *      say determine that in advance which one is the best (it depends
+ *      on the number of buckets, number of additional columns not
+ *      referenced in the clauses etc.) so we may compute all and then
+ *      choose a sane aggregation (minimum seems like a good approach).
+ *      Of course, this may result in longer / more expensive estimation
+ *      (CPU-wise), but it may be worth it.
+ *
+ *      There are ways to address this, though. First, it's possible to
+ *      add a GUC choosing whether to do a 'simple' (using a single
+ *      stats expected to give the best estimate) and 'complex' (combining
+ *      the multiple estimates).
+ * 
+ *          multivariate_estimates = (simple|full)
+ *
+ *      Also, this might be enabled at a table level, by something like
+ * 
+ *          ALTER TABLE ... SET STATISTICS (simple|full)
+ * 
+ *      Which would make it possible to use this only for the tables
+ *      where the simple approach does not work.
+ * 
+ *      Also, there are ways to optimize this algorithmically. E.g. we
+ *      may try to get an estimate from a matching MCV list first, and
+ *      if we happen to get a "full equality match" we may stop computing
+ *      the estimates from other stats (for this condition) because
+ *      that's probably the best estimate we can really get.
+ *
+ * TODO When applying the clauses to the histogram/MCV list, we can do
+ *      that from the most selective clauses first, because that'll
+ *      eliminate the buckets/items sooner (so we'll be able to skip
+ *      them without inspection, which is more expensive).
+ */
+static Selectivity
+clauselist_mv_selectivity(PlannerInfo *root, List *clauses, MVStats mvstats)
+{
+	bool fullmatch = false;
+	Selectivity s1 = 0.0, s2 = 0.0;
+
+	/*
+	 * Lowest frequency in the MCV list (may be used as an upper bound
+	 * for full equality conditions that did not match any MCV item).
+	 */
+	Selectivity mcv_low = 0.0;
+
+	/* TODO Evaluate simple 1D selectivities, use the smallest one as
+	 *      an upper bound, product as lower bound, and sort the
+	 *      clauses in ascending order by selectivity (to optimize the
+	 *      MCV/histogram evaluation). 
+	 */
+
+	/* Evaluate the MCV first. */
+	s1 = clauselist_mv_selectivity_mcvlist(root, clauses, mvstats,
+										   &fullmatch, &mcv_low);
+
+	/*
+	 * If we got a full equality match on the MCV list, we're done (and
+	 * the estimate is pretty good).
+	 */
+	if (fullmatch && (s1 > 0.0))
+		return s1;
+
+	/* FIXME if (fullmatch) without matching MCV item, use the mcv_low
+	 *       selectivity as upper bound */
+
+	s2 = clauselist_mv_selectivity_histogram(root, clauses, mvstats);
+
+	/* TODO clamp to <= 1.0 (or more strictly, when possible) */
+	return s1 + s2;
+}
+
+/*
+ * Collect attributes from mv-compatible clauses.
+ * 
+ */
+static Bitmapset *
+collect_mv_attnums(PlannerInfo *root, List *clauses, Oid varRelid, Oid *relid)
+{
+	Index		varno = 0;
+	Bitmapset  *attnums = NULL;
+	ListCell   *l;
+
+	/*
+	 * Walk through the clauses and identify the ones we can estimate
+	 * using multivariate stats, and remember the relid/columns. We'll
+	 * then cross-check if we have suitable stats, and only if needed
+	 * we'll split the clauses into multivariate and regular lists.
+	 *
+	 * For now we're only interested in RestrictInfo nodes with nested
+	 * OpExpr, using either a range or equality.
+	 */
+	foreach (l, clauses)
+	{
+		Node	   *clause = (Node *) lfirst(l);
+
+		/* ignore the result for now - we only need the info */
+		is_mv_compatible(clause, varRelid, &varno, &attnums);
+	}
+
+	/*
+	 * If there are at least two attributes referenced by the clause(s),
+	 * fetch the relation info (and pass back the Oid of the relation).
+	 */
+	if (bms_num_members(attnums) > 1)
+	{
+		RelOptInfo *rel = find_base_rel(root, varno);
+		*relid = root->simple_rte_array[bms_singleton_member(rel->relids)]->relid;
+	}
+	else
+	{
+		if (attnums != NULL)
+			pfree(attnums);
+		attnums = NULL;
+		*relid = InvalidOid;
+	}
+
+	return attnums;
+}
+
+/*
+ * We're looking for a histogram matching at least 2 attributes, and we
+ * want the smallest histogram available wrt. to number of buckets (to
+ * get efficient estimation and likely better precision. The precision
+ * depends on the total number of buckets too, but the lower the number
+ * of dimensions the smaller (and more precise) the buckets can get.
+ */
+static int
+choose_mv_histogram(int nmvstats, MVStats mvstats, Bitmapset *attnums)
+{
+	int i, j;
+
+	int choice = -1;
+	int current_matches = 1;					/* goal #1: maximize */
+	int current_dims = (MVSTATS_MAX_DIMENSIONS+1);	/* goal #2: minimize */
+
+	for (i = 0; i < nmvstats; i++)
+	{
+		int matches = 0;	/* columns matching this histogram */
+
+		int2vector * attrs = mvstats[i].stakeys;
+		int	numattrs = mvstats[i].stakeys->dim1;
+
+		/* count columns covered by the histogram */
+		for (j = 0; j < numattrs; j++)
+			if (bms_is_member(attrs->values[j], attnums))
+				matches++;
+
+		/*
+		 * Use this histogram when it improves the number of matches or
+		 * when it keeps the number of matches and is smaller.
+		 */
+		if ((matches > current_matches) ||
+			((matches == current_matches) && (current_dims > numattrs)))
+		{
+			choice = i;
+			current_matches = matches;
+			current_dims = numattrs;
+		}
+	}
+
+	return choice;
+}
+
+/*
+ * This splits the clauses list into two parts - one containing clauses
+ * that will be evaluated using the chosen histogram, and the remaining
+ * clauses (either non-mvcompatible, or not related to the histogram).
+ */
+static List *
+clauselist_mv_split(List *clauses, Oid varRelid, List **mvclauses, MVStats mvstats)
+{
+	int i;
+	ListCell *l;
+	List	 *non_mvclauses = NIL;
+
+	/* FIXME is there a better way to get info on int2vector? */
+	int2vector * attrs = mvstats->stakeys;
+	int	numattrs = mvstats->stakeys->dim1;
+
+	/* erase the list of mv-compatible clauses */
+	*mvclauses = NIL;
+
+	foreach (l, clauses)
+	{
+		RestrictInfo *rinfo;
+		Node	   *clause = (Node *) lfirst(l);
+
+		/*
+		 * Only restrictinfo may be mv-compatible, so everything else
+		 * goes to the non-mv list directly
+		 * 
+		 * TODO create a macro/function to decide mv-compatible clauses
+		 *      (along the is_opclause for example)
+		 */
+		if (! IsA(clause, RestrictInfo))
+		{
+			non_mvclauses = lappend(non_mvclauses, clause);
+			continue;
+		}
+
+		rinfo = (RestrictInfo *) clause;
+		clause = (Node*)rinfo->clause;
+
+		/* Pseudoconstants go directly to the non-mv list too. */
+		if (rinfo->pseudoconstant)
+		{
+			non_mvclauses = lappend(non_mvclauses, rinfo);
+			continue;
+		}
+
+		if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
+		{
+			OpExpr	   *expr = (OpExpr *) clause;
+			bool		varonleft = true;
+			bool		ok;
+
+			ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
+				(is_pseudo_constant_clause_relids(lsecond(expr->args),
+												rinfo->right_relids) ||
+				(varonleft = false,
+				is_pseudo_constant_clause_relids(linitial(expr->args),
+												rinfo->left_relids)));
+
+			if (ok)
+			{
+
+				Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+
+				/*
+				 * Only consider this variable if (varRelid == 0) or when the varno
+				 * matches varRelid (see explanation at clause_selectivity).
+				 */
+				if (! ((varRelid == 0) || (varRelid == var->varno)))
+				{
+					non_mvclauses = lappend(non_mvclauses, rinfo);
+					continue;
+				}
+
+				/*
+				* If it's not a "<" or ">" or "=" operator, just ignore the
+				* clause. Otherwise note the relid and attnum for the variable.
+				*/
+				switch (get_oprrest(expr->opno))
+				{
+					case F_SCALARLTSEL:
+					case F_SCALARGTSEL:
+					case F_EQSEL:
+						if (! IS_SPECIAL_VARNO(var->varno))	/* FIXME necessary here? */
+						{
+							bool match = false;
+							for (i = 0; i < numattrs; i++)
+								if (attrs->values[i] == var->varattno)
+									match = true;
+
+							if (match)
+								*mvclauses = lappend(*mvclauses, clause);
+							else
+								non_mvclauses = lappend(non_mvclauses, rinfo);
+						}
+				}
+			}
+		}
+	}
+
+	/*
+	 * Perform regular estimation using the clauses incompatible
+	 * with the chosen histogram (or MV stats in general).
+	 */
+	return non_mvclauses;
+
+}
+
+/*
+ * Determines whether the clause is compatible with multivariate stats,
+ * and if it is, returns some additional information - varno (index
+ * into simple_rte_array) and a bitmap of attributes. This is then
+ * used to fetch related multivariate statistics.
+ *
+ * At this moment we only support basic conditions of the form
+ * 
+ *     variable OP constant
+ *
+ * where OP is one of [=,<,<=,>=,>] (which is however determined by
+ * looking at the associated function for estimating selectivity, just
+ * like with the single-dimensional case).
+ */
+static bool
+is_mv_compatible(Node *clause, Oid varRelid, Index *varno, Bitmapset  **attnums)
+{
+
+	if (IsA(clause, RestrictInfo))
+	{
+		RestrictInfo *rinfo = (RestrictInfo *) clause;
+
+		/* Pseudoconstants are not really interesting here. */
+		if (rinfo->pseudoconstant)
+			return false;
+
+		/* get the actual clause from the RestrictInfo ... */
+		clause = (Node*)rinfo->clause;
+
+		/* is it 'variable op constant' ? */
+		if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
+		{
+			OpExpr	   *expr = (OpExpr *) clause;
+			bool		varonleft = true;
+			bool		ok;
+
+			ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
+				(is_pseudo_constant_clause_relids(lsecond(expr->args),
+												rinfo->right_relids) ||
+				(varonleft = false,
+				is_pseudo_constant_clause_relids(linitial(expr->args),
+												rinfo->left_relids)));
+
+			if (ok)
+			{
+
+				Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+
+				/*
+				 * Only consider this variable if (varRelid == 0) or when the varno
+				 * matches varRelid (see explanation at clause_selectivity).
+				 */
+				if (! ((varRelid == 0) || (varRelid == var->varno)))
+					return false;
+
+				/* Also skip special varno values, and system attributes ... */
+				if ((IS_SPECIAL_VARNO(var->varno)) || (! AttrNumberIsForUserDefinedAttr(var->varattno)))
+					return false;
+
+				/*
+				 * If it's not a "<" or ">" or "=" operator, just ignore the
+				 * clause. Otherwise note the relid and attnum for the variable.
+				 * This uses the function for estimating selectivity, ont the
+				 * operator directly (a bit awkward, but well ...).
+				 */
+				switch (get_oprrest(expr->opno))
+					{
+						case F_SCALARLTSEL:
+						case F_SCALARGTSEL:
+						case F_EQSEL:
+							*varno = var->varno;
+							*attnums = bms_add_member(*attnums, var->varattno);
+							return true;
+					}
+			}
+		}
+	}
+
+	return false;
+
+}
+
+/*
+ * Estimate selectivity of clauses using a MCV list.
+ *
+ * If there's no MCV list for the stats, the function returns 0.0.
+ *
+ * While computing the estimate, the function checks whether all the
+ * columns were matched with an equality condition. If that's the case,
+ * it's assumed we can skip computing the estimate from histogram,
+ * because all the rows matching the condition are represented by the
+ * MCV item.
+ *
+ * The function also returns the frequency of the least frequent item
+ * on the MCV list, which may be useful for clamping estimate from the
+ * histogram.
+ */
+static Selectivity
+clauselist_mv_selectivity_mcvlist(PlannerInfo *root, List *clauses,
+								  MVStats mvstats, bool *fullmatch,
+								  Selectivity *lowsel)
+{
+	int i;
+	Selectivity s = 0.0;
+	ListCell * l;
+	char * mcvitems = NULL;
+	MCVList mcvlist = NULL;
+
+	Bitmapset *matches = NULL;	/* attributes with equality matches */
+
+	/* there's no MCV list yet */
+	if (! mvstats->mcv_built)
+		return 0.0;
+
+	mcvlist = deserialize_mv_mcvlist(fetch_mv_mcvlist(mvstats->mvoid));
+
+	Assert(mcvlist != NULL);
+	Assert (clauses != NIL);
+	Assert (list_length(clauses) >= 2);
+
+	mcvitems = palloc0(sizeof(char) * mcvlist->nitems);
+	memset(mcvitems, MVSTATS_MATCH_FULL, sizeof(char)*mcvlist->nitems);
+
+	/* no match here */
+	*lowsel = 1.0;
+
+	/* loop through the list of MV-compatible clauses and do the estimation */
+	foreach (l, clauses)
+	{
+		Node * clause = (Node*)lfirst(l);
+		OpExpr * expr = (OpExpr*)clause;
+		bool		varonleft = true;
+		bool		ok;
+
+		/* operator */
+		FmgrInfo	opproc;
+
+		fmgr_info(get_opcode(expr->opno), &opproc);
+
+		ok = (NumRelids(clause) == 1) &&
+			 (is_pseudo_constant_clause(lsecond(expr->args)) ||
+			 (varonleft = false,
+			  is_pseudo_constant_clause(linitial(expr->args))));
+
+		if (ok)
+		{
+
+			FmgrInfo	ltproc, gtproc;
+			Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+			Const * cst = (varonleft) ? lsecond(expr->args) : linitial(expr->args);
+			bool isgt = (! varonleft);
+
+			/*
+			 * TODO Fetch only when really needed (probably for equality only)
+			 * TODO Technically either lt/gt is sufficient.
+			 * 
+			 * FIXME The code in analyze.c creates histograms only for types
+			 *       with enough ordering (by calling get_sort_group_operators).
+			 *       Is this the same assumption, i.e. are we certain that we
+			 *       get the ltproc/gtproc every time we ask? Or are there types
+			 *       where get_sort_group_operators returns ltopr and here we
+			 *       get nothing?
+			 */
+			TypeCacheEntry *typecache = lookup_type_cache(var->vartype, TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+
+			/* FIXME proper matching attribute to dimension */
+			int idx = mv_get_index(var->varattno, mvstats->stakeys);
+
+			fmgr_info(get_opcode(typecache->lt_opr), &ltproc);
+			fmgr_info(get_opcode(typecache->gt_opr), &gtproc);
+
+			/* process the MCV list first */
+			for (i = 0; i < mcvlist->nitems; i++)
+			{
+				bool tmp;
+				MCVItem item = mcvlist->items[i];
+
+				/* find the lowest selectivity in the MCV */
+				if (item->frequency < *lowsel)
+					*lowsel = item->frequency;
+
+				/* skip MCV items already ruled out */
+				if (mcvitems[i] == MVSTATS_MATCH_NONE)
+					continue;
+
+				/* TODO consider bsearch here (list is sorted by values)
+				 * TODO handle other operators too (LT, GT)
+				 * TODO identify "full match" when the clauses fully
+				 *      match the whole MCV list (so that checking the
+				 *      histogram is not needed)
+				 */
+				if (get_oprrest(expr->opno) == F_EQSEL)
+				{
+					/*
+					 * We don't care about isgt in equality, because it does not matter
+					 * whether it's (var = const) or (const = var).
+					 */
+					if (memcmp(&cst->constvalue, &item->values[idx], sizeof(Datum)) != 0)
+						mcvitems[i] = MVSTATS_MATCH_NONE;
+					else
+						matches = bms_add_member(matches, idx);
+				}
+				else if (get_oprrest(expr->opno) == F_SCALARLTSEL)	/* column < constant */
+				{
+
+					if (! isgt)	/* (var < const) */
+					{
+						/*
+						 * First check whether the constant is below the lower boundary (in that 
+						 * case we can skip the bucket, because there's no overlap).
+						 */
+						tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+															 DEFAULT_COLLATION_OID,
+															 cst->constvalue,
+															 item->values[idx]));
+
+						if (tmp)
+						{
+							mcvitems[i] = MVSTATS_MATCH_NONE; /* no match */
+							continue;
+						}
+
+					} /* (get_oprrest(expr->opno) == F_SCALARLTSEL) */
+					else	/* (const < var) */
+					{
+						/*
+						 * First check whether the constant is above the upper boundary (in that 
+						 * case we can skip the bucket, because there's no overlap).
+						 */
+						tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+															 DEFAULT_COLLATION_OID,
+															 item->values[idx],
+															 cst->constvalue));
+						if (tmp)
+						{
+							mcvitems[i] = MVSTATS_MATCH_NONE; /* no match */
+							continue;
+						}
+					}
+				}
+				else if (get_oprrest(expr->opno) == F_SCALARGTSEL)	/* column > constant */
+				{
+
+					if (! isgt)	/* (var > const) */
+					{
+						/*
+						 * First check whether the constant is above the upper boundary (in that 
+						 * case we can skip the bucket, because there's no overlap).
+						 */
+						tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+															 DEFAULT_COLLATION_OID,
+															 cst->constvalue,
+															 item->values[idx]));
+						if (tmp)
+						{
+							mcvitems[i] = MVSTATS_MATCH_NONE; /* no match */
+							continue;
+						}
+
+					}
+					else /* (const > var) */
+					{
+						/*
+						 * First check whether the constant is below the lower boundary (in
+						 * that case we can skip the bucket, because there's no overlap).
+						 */
+						tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+															 DEFAULT_COLLATION_OID,
+															 item->values[idx],
+															 cst->constvalue));
+						if (tmp)
+						{
+							mcvitems[i] = MVSTATS_MATCH_NONE; /* no match */
+							continue;
+						}
+					}
+
+				} /* (get_oprrest(expr->opno) == F_SCALARGTSEL) */
+
+			}
+		}
+	}
+
+	for (i = 0; i < mcvlist->nitems; i++)
+	{
+		if (mcvitems[i] != MVSTATS_MATCH_NONE)
+			s += mcvlist->items[i]->frequency;
+	}
+
+	*fullmatch = (bms_num_members(matches) == mcvlist->ndimensions);
+
+	pfree(mcvitems);
+	pfree(mcvlist);
+
+	return s;
+}
+
+/*
+ * Estimate selectivity of clauses using a histogram.
+ *
+ * If there's no histogram list for the stats, the function returns 0.0.
+ */
+static Selectivity
+clauselist_mv_selectivity_histogram(PlannerInfo *root, List *clauses,
+									MVStats mvstats)
+{
+	int i;
+	Selectivity s = 0.0;
+	ListCell * l;
+	char *buckets = NULL;
+	MVHistogram mvhist = NULL;
+
+	/* there's no histogram */
+	if (! mvstats->hist_built)
+		return 0.0;
+
+	/* There may be no histogram in the stats (check hist_built flag) */
+	mvhist = deserialize_mv_histogram(fetch_mv_histogram(mvstats->mvoid));
+
+	Assert (mvhist != NULL);
+	Assert (clauses != NIL);
+	Assert (list_length(clauses) >= 2);
+
+	/*
+	 * Bitmap of bucket matches (mismatch, partial, full). by default
+	 * all buckets fully match (and we'll eliminate them).
+	 */
+	buckets = palloc0(sizeof(char) * mvhist->nbuckets);
+	memset(buckets,  MVSTATS_MATCH_FULL, sizeof(char)*mvhist->nbuckets);
+
+	/* loop through the clauses and do the estimation */
+	foreach (l, clauses)
+	{
+		Node * clause = (Node*)lfirst(l);
+		OpExpr * expr = (OpExpr*)clause;
+		bool		varonleft = true;
+		bool		ok;
+
+		FmgrInfo	opproc;			/* operator */
+		fmgr_info(get_opcode(expr->opno), &opproc);
+
+		ok = (NumRelids(clause) == 1) &&
+			 (is_pseudo_constant_clause(lsecond(expr->args)) ||
+			 (varonleft = false,
+			  is_pseudo_constant_clause(linitial(expr->args))));
+
+		if (ok)
+		{
+			FmgrInfo	ltproc;
+			Var * var = (varonleft) ? linitial(expr->args) : lsecond(expr->args);
+			Const * cst = (varonleft) ? lsecond(expr->args) : linitial(expr->args);
+			bool isgt = (! varonleft);
+
+			/*
+			 * TODO Fetch only when really needed (probably for equality only)
+			 *
+			 * TODO Technically either lt/gt is sufficient.
+			 * 
+			 * FIXME The code in analyze.c creates histograms only for types
+			 *       with enough ordering (by calling get_sort_group_operators).
+			 *       Is this the same assumption, i.e. are we certain that we
+			 *       get the ltproc/gtproc every time we ask? Or are there types
+			 *       where get_sort_group_operators returns ltopr and here we
+			 *       get nothing?
+			 */
+			TypeCacheEntry *typecache
+				= lookup_type_cache(var->vartype, TYPECACHE_EQ_OPR | TYPECACHE_LT_OPR
+																   | TYPECACHE_GT_OPR);
+
+			/* lookup dimension for the attribute */
+			int idx = mv_get_index(var->varattno, mvstats->stakeys);
+
+			fmgr_info(get_opcode(typecache->lt_opr), &ltproc);
+
+			/*
+			 * Check this for all buckets that still have "true" in the bitmap
+			 * 
+			 * We already know the clauses use suitable operators (because that's
+			 * how we filtered them).
+			 */
+			for (i = 0; i < mvhist->nbuckets; i++)
+			{
+				bool tmp;
+				MVBucket bucket = mvhist->buckets[i];
+
+				/*
+				 * Skip buckets that were already eliminated - this is impotant
+				 * considering how we update the info (we only lower the match)
+				 */
+				if (buckets[i] == MVSTATS_MATCH_NONE)
+					continue;
+
+				/*
+				* If it's not a "<" or ">" or "=" operator, just ignore the
+				* clause. Otherwise note the relid and attnum for the variable.
+				*
+				* TODO I'm really unsure the handling of 'isgt' flag (that is, clauses
+				*      with reverse order of variable/constant) is correct. I wouldn't
+				*      be surprised if there was some mixup. Using the lt/gt operators
+				*      instead of messing with the opproc could make it simpler.
+				*      It would however be using a different operator than the query,
+				*      although it's not any shadier than using the selectivity function
+				*      as is done currently.
+				*
+				* FIXME Once the min/max values are deduplicated, we can easily minimize
+				*       the number of calls to the comparator (assuming we keep the
+				*       deduplicated structure). See the note on compression at MVBucket
+				*       serialize/deserialize methods.
+				*/
+				switch (get_oprrest(expr->opno))
+				{
+					case F_SCALARLTSEL:	/* column < constant */
+
+						if (! isgt)	/* (var < const) */
+						{
+							/*
+							 * First check whether the constant is below the lower boundary (in that 
+							 * case we can skip the bucket, because there's no overlap).
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 cst->constvalue,
+																 bucket->min[idx]));
+							if (tmp)
+							{
+								buckets[i] = MVSTATS_MATCH_NONE; /* no match */
+								continue;
+							}
+
+							/*
+							 * Now check whether the upper boundary is below the constant (in that
+							 * case it's a partial match).
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 cst->constvalue,
+																 bucket->max[idx]));
+
+							if (tmp)
+								buckets[i] = MVSTATS_MATCH_PARTIAL; /* partial match */
+						}
+						else	/* (const < var) */
+						{
+							/*
+							 * First check whether the constant is above the upper boundary (in that 
+							 * case we can skip the bucket, because there's no overlap).
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 bucket->max[idx],
+																 cst->constvalue));
+							if (tmp)
+							{
+								buckets[i] = MVSTATS_MATCH_NONE; /* no match */
+								continue;
+							}
+
+							/*
+							 * Now check whether the lower boundary is below the constant (in that
+							 * case it's a partial match).
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 bucket->min[idx],
+																 cst->constvalue));
+
+							if (tmp)
+								buckets[i] = MVSTATS_MATCH_PARTIAL; /* partial match */
+						}
+						break;
+
+					case F_SCALARGTSEL:	/* column > constant */
+
+						if (! isgt)	/* (var > const) */
+						{
+							/*
+							 * First check whether the constant is above the upper boundary (in that 
+							 * case we can skip the bucket, because there's no overlap).
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 cst->constvalue,
+																 bucket->max[idx]));
+							if (tmp)
+							{
+								buckets[i] = MVSTATS_MATCH_NONE; /* no match */
+								continue;
+							}
+
+							/*
+							 * Now check whether the lower boundary is below the constant (in that
+							 * case it's a partial match).
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 cst->constvalue,
+																 bucket->min[idx]));
+
+							if (tmp)
+								buckets[i] = MVSTATS_MATCH_PARTIAL; /* partial match */
+						}
+						else /* (const > var) */
+						{
+							/*
+							 * First check whether the constant is below the lower boundary (in
+							 * that case we can skip the bucket, because there's no overlap).
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 bucket->min[idx],
+																 cst->constvalue));
+							if (tmp)
+							{
+								buckets[i] = MVSTATS_MATCH_NONE; /* no match */
+								continue;
+							}
+
+							/*
+							 * Now check whether the upper boundary is below the constant (in that
+							 * case it's a partial match).
+							 */
+							tmp = DatumGetBool(FunctionCall2Coll(&opproc,
+																 DEFAULT_COLLATION_OID,
+																 bucket->max[idx],
+																 cst->constvalue));
+
+							if (tmp)
+								buckets[i] = MVSTATS_MATCH_PARTIAL; /* partial match */
+						}
+
+						break;
+
+					case F_EQSEL:
+
+						/*
+						 * We only check whether the value is within the bucket, using the lt/gt
+						 * operators fetched from type cache.
+						 * 
+						 * TODO We'll use the default 50% estimate, but that's probably way off
+						 *		if there are multiple distinct values. Consider tweaking this a
+						 *		somehow, e.g. using only a part inversely proportional to the
+						 *		estimated number of distinct values in the bucket.
+						 *
+						 * TODO This does not handle inclusion flags at the moment, thus counting
+						 *		some buckets twice (when hitting the boundary).
+						 * 
+						 * TODO Optimization is that if max[i] == min[i], it's effectively a MCV
+						 *		item and we can count the whole bucket as a complete match (thus
+						 *		using 100% bucket selectivity and not just 50%).
+						 * 
+						 * TODO Technically some buckets may "degenerate" into single-value
+						 *		buckets (not necessarily for all the dimensions) - maybe this
+						 *		is better than keeping a separate MCV list (multi-dimensional).
+						 *		Update: Actually, that's unlikely to be better than a separate
+						 *		MCV list for two reasons - first, it requires ~2x the space
+						 *		(because of storing lower/upper boundaries) and second because
+						 *		the buckets are ranges - depending on the partitioning algorithm
+						 *		it may not even degenerate into (min=max) bucket. For example the
+						 *		the current partitioning algorithm never does that.
+						 */
+						tmp = DatumGetBool(FunctionCall2Coll(&ltproc,
+															 DEFAULT_COLLATION_OID,
+															 cst->constvalue,
+															 bucket->min[idx]));
+
+						if (tmp)
+						{
+							buckets[i] = MVSTATS_MATCH_NONE;	/* constvalue < min */
+							continue;
+						}
+
+						tmp = DatumGetBool(FunctionCall2Coll(&ltproc,
+															 DEFAULT_COLLATION_OID,
+															 bucket->max[idx],
+															 cst->constvalue));
+
+						if (tmp)
+						{
+							buckets[i] = MVSTATS_MATCH_NONE;	/* constvalue > max */
+							continue;
+						}
+
+						/* partial match */
+						buckets[i] = MVSTATS_MATCH_PARTIAL;
+
+						break;
+				}
+			}
+		}
+	}
+
+	/* now, walk through the buckets and sum the selectivities */
+	for (i = 0; i < mvhist->nbuckets; i++)
+	{
+		if (buckets[i] == MVSTATS_MATCH_FULL)
+			s += mvhist->buckets[i]->ntuples;
+		else if (buckets[i] == MVSTATS_MATCH_PARTIAL)
+			s += 0.5 * mvhist->buckets[i]->ntuples;
+	}
+
+	return s;
+}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index bd180e7..d725ae0 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -366,6 +366,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 				create_generic_options alter_generic_options
 				relation_expr_list dostmt_opt_list
 
+%type <list>	OptStatsOptions 
+%type <str>		stats_options_name
+%type <node>	stats_options_arg
+%type <defelt>	stats_options_elem
+%type <list>	stats_options_list
+
+
 %type <list>	opt_fdw_options fdw_options
 %type <defelt>	fdw_option
 
@@ -484,7 +491,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <keyword> unreserved_keyword type_func_name_keyword
 %type <keyword> col_name_keyword reserved_keyword
 
-%type <node>	TableConstraint TableLikeClause
+%type <node>	TableConstraint TableLikeClause TableStatistics
 %type <ival>	TableLikeOptionList TableLikeOption
 %type <list>	ColQualList
 %type <node>	ColConstraint ColConstraintElem ConstraintAttr
@@ -2312,6 +2319,14 @@ alter_table_cmd:
 					n->subtype = AT_DisableRowSecurity;
 					$$ = (Node *)n;
 				}
+			/* ALTER TABLE <name> ADD STATISTICS (options) ON (columns) ... */
+			| ADD_P TableStatistics
+				{
+					AlterTableCmd *n = makeNode(AlterTableCmd);
+					n->subtype = AT_AddStatistics;
+					n->def = $2;
+					$$ = (Node *)n;
+				}
 			| alter_generic_options
 				{
 					AlterTableCmd *n = makeNode(AlterTableCmd);
@@ -3382,6 +3397,56 @@ OptConsTableSpace:   USING INDEX TABLESPACE name	{ $$ = $4; }
 ExistingIndex:   USING INDEX index_name				{ $$ = $3; }
 		;
 
+/*****************************************************************************
+ *
+ *		QUERY :
+ *				ALTER TABLE relname ADD STATISTICS (columns) WITH (options)
+ *
+ *****************************************************************************/
+
+TableStatistics:
+			STATISTICS OptStatsOptions ON '(' columnList ')'
+				{
+					StatisticsDef *n = makeNode(StatisticsDef);
+					n->keys  = $5;
+					n->options  = $2;
+					$$ = (Node *) n;
+				}
+		;
+
+OptStatsOptions:
+			'(' stats_options_list ')'		{ $$ = $2; }
+			| /*EMPTY*/						{ $$ = NIL; }
+		;
+
+stats_options_list:
+			stats_options_elem
+				{
+					$$ = list_make1($1);
+				}
+			| stats_options_list ',' stats_options_elem
+				{
+					$$ = lappend($1, $3);
+				}
+		;
+
+stats_options_elem:
+			stats_options_name stats_options_arg
+				{
+					$$ = makeDefElem($1, $2);
+				}
+		;
+
+stats_options_name:
+			NonReservedWord			{ $$ = $1; }
+		;
+
+stats_options_arg:
+			opt_boolean_or_string	{ $$ = (Node *) makeString($1); }
+			| NumericOnly			{ $$ = (Node *) $1; }
+			| /* EMPTY */			{ $$ = NULL; }
+		;
+
 
 /*****************************************************************************
  *
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 94d951c..ec90773 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -43,6 +43,7 @@
 #include "catalog/pg_foreign_server.h"
 #include "catalog/pg_foreign_table.h"
 #include "catalog/pg_language.h"
+#include "catalog/pg_mv_statistic.h"
 #include "catalog/pg_namespace.h"
 #include "catalog/pg_opclass.h"
 #include "catalog/pg_operator.h"
@@ -499,6 +500,17 @@ static const struct cachedesc cacheinfo[] = {
 		},
 		4
 	},
+	{MvStatisticRelationId,		/* MVSTATOID */
+		MvStatisticOidIndexId,
+		1,
+		{
+			ObjectIdAttributeNumber,
+			0,
+			0,
+			0
+		},
+		128
+	},
 	{NamespaceRelationId,		/* NAMESPACENAME */
 		NamespaceNameIndexId,
 		1,
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 870692c..d2266c0 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -173,6 +173,11 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using
 DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops));
 #define LargeObjectMetadataOidIndexId	2996
 
+DECLARE_UNIQUE_INDEX(pg_mv_statistic_oid_index, 3259, on pg_mv_statistic using btree(oid oid_ops));
+#define MvStatisticOidIndexId  3259
+DECLARE_INDEX(pg_mv_statistic_relid_index, 3264, on pg_mv_statistic using btree(starelid oid_ops));
+#define MvStatisticRelidIndexId	3264
+
 DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops));
 #define NamespaceNameIndexId  2684
 DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops));
diff --git a/src/include/catalog/pg_mv_statistic.h b/src/include/catalog/pg_mv_statistic.h
new file mode 100644
index 0000000..d725957
--- /dev/null
+++ b/src/include/catalog/pg_mv_statistic.h
@@ -0,0 +1,89 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_mv_statistic.h
+ *	  definition of the system "multivariate statistic" relation (pg_mv_statistic)
+ *	  along with the relation's initial contents.
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/catalog/pg_mv_statistic.h
+ *
+ * NOTES
+ *	  the genbki.pl script reads this file and generates .bki
+ *	  information from the DATA() statements.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_MV_STATISTIC_H
+#define PG_MV_STATISTIC_H
+
+#include "catalog/genbki.h"
+
+/* ----------------
+ *		pg_mv_statistic definition.  cpp turns this into
+ *		typedef struct FormData_pg_mv_statistic
+ * ----------------
+ */
+#define MvStatisticRelationId  3260
+
+CATALOG(pg_mv_statistic,3260)
+{
+	/* These fields form the unique key for the entry: */
+	Oid			starelid;		/* relation containing attributes */
+
+	/* statistics requested to build */
+	bool		hist_enabled;		/* build histogram? */
+	bool		mcv_enabled;		/* build MCV list? */
+	bool		mcv_hashed;			/* build hashed MCV? */
+	bool		assoc_enabled;		/* analyze associations? */
+
+	/* histogram / MCV size */
+	int32		hist_max_buckets;	/* max buckets */
+	int32		mcv_max_items;		/* max MCV items */
+
+	/* statistics that are available (if requested) */
+	bool		hist_built;			/* histogram was built */
+	bool		mcv_built;			/* MCV list was built */
+	bool		assoc_built;		/* associations were built */
+
+	/* variable-length fields start here, but we allow direct access to stakeys */
+	int2vector	stakeys;			/* array of column keys */
+
+#ifdef CATALOG_VARLEN
+	bytea		staassoc;			/* association rules (serialized) */
+	bytea		stamcv;				/* MCV list (serialized) */
+	bytea		stahist;			/* MV histogram (serialized) */
+#endif
+
+} FormData_pg_mv_statistic;
+
+/* ----------------
+ *		Form_pg_mv_statistic corresponds to a pointer to a tuple with
+ *		the format of pg_mv_statistic relation.
+ * ----------------
+ */
+typedef FormData_pg_mv_statistic *Form_pg_mv_statistic;
+
+/* ----------------
+ *		compiler constants for pg_attrdef
+ * ----------------
+ */
+#define Natts_pg_mv_statistic					14
+#define Anum_pg_mv_statistic_starelid			1
+#define Anum_pg_mv_statistic_hist_enabled		2
+#define Anum_pg_mv_statistic_mcv_enabled		3
+#define Anum_pg_mv_statistic_mcv_hashed			4
+#define Anum_pg_mv_statistic_assoc_enabled		5
+#define Anum_pg_mv_statistic_hist_max_buckets	6
+#define Anum_pg_mv_statistic_mcv_max_items		7
+#define Anum_pg_mv_statistic_hist_built			8
+#define Anum_pg_mv_statistic_mcv_built			9
+#define Anum_pg_mv_statistic_assoc_built		10
+#define Anum_pg_mv_statistic_stakeys			11
+#define Anum_pg_mv_statistic_staassoc			12
+#define Anum_pg_mv_statistic_stamcv				13
+#define Anum_pg_mv_statistic_stahist			14
+
+#endif   /* PG_MV_STATISTIC_H */
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 497e652..c3c03b6 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2676,6 +2676,13 @@ DESCR("current user privilege on any column by rel name");
 DATA(insert OID = 3029 (  has_any_column_privilege	   PGNSP PGUID 12 10 0 0 0 f f f f t f s 2 0 16 "26 25" _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ ));
 DESCR("current user privilege on any column by rel oid");
 
+DATA(insert OID = 3261 (  pg_mv_stats_histogram_info	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_histogram_info _null_ _null_ _null_ ));
+DESCR("multi-variate statistics: histogram info");
+DATA(insert OID = 3262 (  pg_mv_stats_mvclist_info	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_mvclist_info _null_ _null_ _null_ ));
+DESCR("multi-variate statistics: MCV list info");
+DATA(insert OID = 3263 (  pg_mv_stats_histogram_gnuplot	PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "17" _null_ _null_ _null_ _null_ pg_mv_stats_histogram_gnuplot _null_ _null_ _null_ ));
+DESCR("multi-variate statistics: 2D histogram gnuplot");
+
 DATA(insert OID = 1928 (  pg_stat_get_numscans			PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ ));
 DESCR("statistics: number of scans done for table/index");
 DATA(insert OID = 1929 (  pg_stat_get_tuples_returned	PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ ));
diff --git a/src/include/catalog/toasting.h b/src/include/catalog/toasting.h
index a4af551..c7839c0 100644
--- a/src/include/catalog/toasting.h
+++ b/src/include/catalog/toasting.h
@@ -49,6 +49,7 @@ extern void BootstrapToastTable(char *relName,
 DECLARE_TOAST(pg_attrdef, 2830, 2831);
 DECLARE_TOAST(pg_constraint, 2832, 2833);
 DECLARE_TOAST(pg_description, 2834, 2835);
+DECLARE_TOAST(pg_mv_statistic, 3265, 3954);
 DECLARE_TOAST(pg_proc, 2836, 2837);
 DECLARE_TOAST(pg_rewrite, 2838, 2839);
 DECLARE_TOAST(pg_seclabel, 3598, 3599);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index bc71fea..b916edd 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -413,6 +413,7 @@ typedef enum NodeTag
 	T_XmlSerialize,
 	T_WithClause,
 	T_CommonTableExpr,
+	T_StatisticsDef,
 
 	/*
 	 * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h)
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 3e4f815..c3e458a 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -543,6 +543,14 @@ typedef struct ColumnDef
 	int			location;		/* parse location, or -1 if none/unknown */
 } ColumnDef;
 
+typedef struct StatisticsDef
+{
+	NodeTag		type;
+	List	   *keys;			/* String nodes naming referenced column(s) */
+	List	   *options;		/* list of DefElem nodes */
+} StatisticsDef;
+
+
 /*
  * TableLikeClause - CREATE TABLE ( ... LIKE ... ) clause
  */
@@ -1338,7 +1346,8 @@ typedef enum AlterTableType
 	AT_ReplicaIdentity,			/* REPLICA IDENTITY */
 	AT_EnableRowSecurity,		/* ENABLE ROW SECURITY */
 	AT_DisableRowSecurity,		/* DISABLE ROW SECURITY */
-	AT_GenericOptions			/* OPTIONS (...) */
+	AT_GenericOptions,			/* OPTIONS (...) */
+	AT_AddStatistics			/* add statistics */
 } AlterTableType;
 
 typedef struct ReplicaIdentityStmt
diff --git a/src/include/utils/mvstats.h b/src/include/utils/mvstats.h
new file mode 100644
index 0000000..157891a
--- /dev/null
+++ b/src/include/utils/mvstats.h
@@ -0,0 +1,283 @@
+/*-------------------------------------------------------------------------
+ *
+ * mvstats.h
+ *	  Multivariate statistics and selectivity estimation functions.
+ *
+ *
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/mvstats.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef MVSTATS_H
+#define MVSTATS_H
+
+/*
+ * Multivariate statistics for planner/optimizer, implementing extensions
+ * of the single-column statistics:
+ * 
+ * - multivariate MCV list
+ * - multivariate histograms
+ *
+ * There's also an experimental support for associative rules (values in
+ * one column implying values in other columns - e.g. ZIP code implies
+ * name of a city, etc.).
+ *
+ * The current implementation has various limitations:
+ *
+ *  (a) it supports only data types passed by value
+ *
+ *  (b) no support for NULL values
+ *
+ * Both (a) and (b) should be straightforward to fix (and usually
+ * described in comments at related data structures or functions).
+ *
+ * The stats may be built only directly on columns, not on expressions.
+ * And there are usually some additional technical limits (e.g. number
+ * of columns in a histogram, etc.).
+ *
+ * Those limits serve mostly as sanity checks and while increasing them
+ * is possible (the implementation should not break), it's expected to
+ * lead either to very bad precision or expensive planning.
+ */
+
+/*
+ * Multivariate histograms
+ *
+ * Histograms are a collection of buckets, represented by n-dimensional
+ * rectangles. Each rectangle is delimited by an array of lower and
+ * upper boundaries, so that for for the i-th attribute
+ * 
+ *     min[i] <= value[i] <= max[i]
+ *
+ * Each bucket tracks frequency (fraction of tuples it contains),
+ * information about the inequalities, number of distinct values in
+ * each dimension (which is used when building the histogram) etc.
+ *
+ * The boundaries may be either inclusive or exclusive, or the whole
+ * dimension may be NULL.
+ *
+ * The buckets may overlap (assuming the build algorithm keeps the
+ * frequencies additive) or may not cover the whole space (i.e. allow
+ * gaps). This entirely depends on the algorithm used to build the
+ * histogram.
+ *
+ * The histograms are marked with a 'magic' constant, mostly to make
+ * sure the bytea really is a histogram in serialized form.
+ *
+ * We do expect to support multiple histogram types, with different
+ * features etc. The 'type' field is used to identify those types.
+ * Technically some histogram types might use completely different
+ * bucket representation, but that's not expected at the moment.
+ *
+ * TODO Add pointer to 'private' data, meant for private data for
+ *      other algorithms for building the histogram.
+ *
+ * TODO The current implementation does not handle NULL values (it's
+ *      somehow prepared for that, but the algorithm building the
+ *      histogram ignores them). The idea is to build buckets with one
+ *      or more NULL-only dimensions - there'll be at most 2^ndimensions
+ *      such buckets, which for 8 atttributes (current limit) is 256.
+ *      That's quite reasonable, considering we expect thousands of
+ *      buckets in total.
+ * 
+ * TODO This structure is used both when building the histogram, and
+ *      then when using it to compute estimates. That's why the last
+ *      few elements are not used once the histogram is built.
+ *
+ * TODO The limit on number of buckets is quite arbitrary, aiming for
+ *      sufficient accuracy while still being fast. Probably should be
+ *      replaced with a dynamic limit dependent on statistics target,
+ *      number of attributes (dimensions) and statistics target
+ *      associated with the attributes. Also, this needs to be related
+ *      to the number of sampled rows, by either clamping it to a
+ *      reasonable number (after seeing the number of rows) or using
+ *      it when computing the number of rows to sample. Something like
+ *      10 rows per bucket seems reasonable.
+ *
+ * TODO We may replace the bool arrays with a suitably large data type
+ *      (say, uint16 or uint32) and get rid of the allocations. It's
+ *      unlikely we'll ever support more than 32 columns as that'd
+ *      result in poor precision, huge histograms (splitting each
+ *      dimension once would mean 2^32 buckets), and very expensive
+ *      estimation. MCVItem already does it this way.
+ *
+ * TODO Actually the distinct stats (both for combination of all columns
+ *      and for combinations of various subsets of columns) should be
+ *      moved to a separate structure (next to histogram/MCV/...) to
+ *      make it useful even without a histogram computed etc.
+ */
+typedef struct MVBucketData {
+
+	/* Frequencies of this bucket. */
+	float	ntuples;	/* frequency of tuples tuples */
+	float	ndistinct;	/* frequency of distinct values */
+
+	/*
+	 * Number of distinct values in each dimension. This is used when
+	 * building the histogram (and is not serialized/deserialized), but
+	 * it could be useful for estimating ndistinct for combinations of
+	 * columns.
+	 *
+	 * It would mean tracking 2^N values for each bucket, and even if
+	 * those values might be stores in 1B it's still a lot of space
+	 * (considering the expected number of buckets).
+	 *
+	 * TODO Consider tracking ndistincts for all attribute combinations.
+	 */
+	uint32 *ndistincts;
+
+	/*
+	 * Information about dimensions being NULL-only. Not yet used.
+	 */ 
+	bool   *nullsonly;
+
+	/* lower boundaries - values and information about the inequalities */
+	Datum  *min;
+	bool   *min_inclusive;
+
+	/* upper boundaries - values and information about the inequalities */
+	Datum  *max;
+	bool   *max_inclusive;
+
+	/*
+	 * Sample tuples falling into this bucket, index of the dimension
+	 * the bucket was split by in the last step.
+	 *
+	 * XXX These fields are needed only while building the histogram,
+	 *     and are not serialized at all.
+	 */
+	HeapTuple  *rows;
+	uint32		numrows;
+	int			last_split_dimension;
+
+} MVBucketData;
+
+typedef MVBucketData	*MVBucket;
+
+
+typedef struct MVHistogramData {
+
+	uint32		magic;			/* magic constant marker */
+	uint32		type;			/* type of histogram (BASIC) */
+	uint32 		nbuckets;		/* number of buckets (buckets array) */
+	uint32		ndimensions;	/* number of dimensions */
+
+	MVBucket   *buckets;		/* array of buckets */
+
+} MVHistogramData;
+
+typedef MVHistogramData *MVHistogram;
+
+
+/* used to flag stats serialized to bytea */
+#define MVHIST_MAGIC	0x7F8C5670		/* marks serialized bytea */
+#define MVHIST_TYPE_BASIC		1		/* basic histogram type */
+
+/* limits (mostly sanity check, may be relaxed in the future) */
+#define MVHIST_MAX_BUCKETS		16384	/* max number of buckets */
+
+/* bucket size in a serialized form */
+#define BUCKET_SIZE_SERIALIZED(ndims) \
+	(offsetof(MVBucketData, ndistincts) + \
+	(ndims) * (2 * sizeof(uint16) + sizeof(uint32) + 3 * sizeof(bool)))
+
+
+/*
+ * Multivariate MCV (most-common value) lists
+ *
+ * A straight-forward extension of MCV items - i.e. a list (array) of
+ * combinations of attribute values, together with a frequency and
+ * null flags.
+ *
+ * This already uses the trick with using uint32 as a null bitmap.
+ * 
+ * TODO Shouldn't the MCVItemData use plain pointer for values, instead
+ *      of the single-item array trick?
+ *
+ * TODO It's possible to build a special case of MCV list, storing not
+ *      the actual values but only 32/64-bit hash. This is only useful
+ *      for estimating equality clauses and for large varlena types.
+ */
+typedef struct MCVItemData {
+	double		frequency;	/* frequency of this combination */
+	uint32		nulls;		/* lags of NULL values (up to 32 columns) */
+	Datum		values[1];	/* variable-length (ndimensions) */
+} MCVItemData;
+
+typedef MCVItemData *MCVItem;
+
+/* multivariate MCV list - essentally an array of MCV items */ 
+typedef struct MCVListData {
+	uint32		magic;			/* magic constant marker */
+	uint32		type;			/* type of MCV list (BASIC) */
+	uint32		ndimensions;	/* number of dimensions */
+	uint32		nitems;			/* number of MCV items in the array */
+	MCVItem	   *items;			/* array of MCV items */
+} MCVListData;
+
+typedef MCVListData *MCVList;
+
+/* used to flag stats serialized to bytea */
+#define MVSTAT_MCV_MAGIC		0xE1A651C2	/* marks serialized bytea */
+#define MVSTAT_MCV_TYPE_BASIC	1			/* basic MCV list type */
+
+/* TODO consider increasing the limit, and/or using statistics target */
+#define MVSTAT_MCVLIST_MAX_ITEMS	1024	/* max items in MCV list */
+
+
+/*
+ * Basic info about the stats, used when choosing what to use
+ * 
+ * TODO Add info about what statistics is available (histogram, MCV,
+ *      hashed MCV, assciative rules).
+ */
+typedef struct MVStatsData {
+	Oid			mvoid;		/* OID of the stats in pg_mv_statistic */
+	int2vector *stakeys;	/* attnums for columns in the stats */
+	bool		hist_built;	/* histogram is already available */
+	bool		mcv_built;	/* MCV list is already available */
+	bool		assoc_built;	/* associative rules available */
+} MVStatsData;
+
+typedef struct MVStatsData *MVStats;
+
+
+/*
+ * Degree of how much MCV item / histogram bucket matches a clause.
+ * This is then considered when computing the selectivity.
+ */
+#define MVSTATS_MATCH_NONE		0		/* no match at all */
+#define MVSTATS_MATCH_PARTIAL	1		/* partial match */
+#define MVSTATS_MATCH_FULL		2		/* full match */
+
+
+#define MVSTATS_MAX_DIMENSIONS	8		/* max number of attributes */
+
+/*
+ * TODO Maybe fetching the histogram/MCV list separately is inefficient?
+ *      Consider adding a single `fetch_stats` method, fetching all
+ *      stats specified using flags (or something like that).
+ */
+MVStats list_mv_stats(Oid relid, int *nstats, bool built_only);
+bytea * fetch_mv_histogram(Oid mvoid);
+bytea * fetch_mv_mcvlist(Oid mvoid);
+
+/* deserialization of stats (serialization is private to analyze) */
+MVHistogram deserialize_mv_histogram(bytea * data);
+MCVList     deserialize_mv_mcvlist(bytea * data);
+
+/*
+ * Returns index of the attribute number within the vector (i.e. a
+ * dimension within the stats).
+ */
+int mv_get_index(AttrNumber varattno, int2vector * stakeys);
+
+/* FIXME this probably belongs somewhere else (not to operations stats) */
+extern Datum pg_mv_stats_histogram_info(PG_FUNCTION_ARGS);
+extern Datum pg_mv_stats_histogram_gnuplot(PG_FUNCTION_ARGS);
+extern Datum pg_mv_stats_mvclist_info(PG_FUNCTION_ARGS);
+
+#endif
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index f97229f..a275bd5 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -66,6 +66,7 @@ enum SysCacheIdentifier
 	INDEXRELID,
 	LANGNAME,
 	LANGOID,
+	MVSTATOID,
 	NAMESPACENAME,
 	NAMESPACEOID,
 	OPERNAMENSP,
diff --git a/src/test/regress/regression.diffs b/src/test/regress/regression.diffs
new file mode 100644
index 0000000..179c09d
--- /dev/null
+++ b/src/test/regress/regression.diffs
@@ -0,0 +1,294 @@
+*** /home/tomas/work/postgres/src/test/regress/expected/updatable_views.out	2014-10-29 00:22:04.820171312 +0100
+--- /home/tomas/work/postgres/src/test/regress/results/updatable_views.out	2014-11-10 02:54:44.083052362 +0100
+***************
+*** 657,668 ****
+    FROM information_schema.views
+   WHERE table_name LIKE 'rw_view%'
+   ORDER BY table_name;
+!  table_name | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  rw_view1   | NO           | NO                 | NO                   | NO                   | NO
+!  rw_view2   | NO           | NO                 | NO                   | NO                   | NO
+! (2 rows)
+! 
+  SELECT table_name, column_name, is_updatable
+    FROM information_schema.columns
+   WHERE table_name LIKE 'rw_view%'
+--- 657,663 ----
+    FROM information_schema.views
+   WHERE table_name LIKE 'rw_view%'
+   ORDER BY table_name;
+! ERROR:  no relation entry for relid 1880
+  SELECT table_name, column_name, is_updatable
+    FROM information_schema.columns
+   WHERE table_name LIKE 'rw_view%'
+***************
+*** 710,721 ****
+    FROM information_schema.views
+   WHERE table_name LIKE 'rw_view%'
+   ORDER BY table_name;
+!  table_name | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  rw_view1   | NO           | NO                 | NO                   | NO                   | YES
+!  rw_view2   | NO           | NO                 | NO                   | NO                   | NO
+! (2 rows)
+! 
+  SELECT table_name, column_name, is_updatable
+    FROM information_schema.columns
+   WHERE table_name LIKE 'rw_view%'
+--- 705,711 ----
+    FROM information_schema.views
+   WHERE table_name LIKE 'rw_view%'
+   ORDER BY table_name;
+! ERROR:  no relation entry for relid 1880
+  SELECT table_name, column_name, is_updatable
+    FROM information_schema.columns
+   WHERE table_name LIKE 'rw_view%'
+***************
+*** 746,757 ****
+    FROM information_schema.views
+   WHERE table_name LIKE 'rw_view%'
+   ORDER BY table_name;
+!  table_name | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  rw_view1   | NO           | NO                 | YES                  | NO                   | YES
+!  rw_view2   | NO           | NO                 | NO                   | NO                   | NO
+! (2 rows)
+! 
+  SELECT table_name, column_name, is_updatable
+    FROM information_schema.columns
+   WHERE table_name LIKE 'rw_view%'
+--- 736,742 ----
+    FROM information_schema.views
+   WHERE table_name LIKE 'rw_view%'
+   ORDER BY table_name;
+! ERROR:  no relation entry for relid 1880
+  SELECT table_name, column_name, is_updatable
+    FROM information_schema.columns
+   WHERE table_name LIKE 'rw_view%'
+***************
+*** 782,793 ****
+    FROM information_schema.views
+   WHERE table_name LIKE 'rw_view%'
+   ORDER BY table_name;
+!  table_name | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  rw_view1   | NO           | NO                 | YES                  | YES                  | YES
+!  rw_view2   | NO           | NO                 | NO                   | NO                   | NO
+! (2 rows)
+! 
+  SELECT table_name, column_name, is_updatable
+    FROM information_schema.columns
+   WHERE table_name LIKE 'rw_view%'
+--- 767,773 ----
+    FROM information_schema.views
+   WHERE table_name LIKE 'rw_view%'
+   ORDER BY table_name;
+! ERROR:  no relation entry for relid 1880
+  SELECT table_name, column_name, is_updatable
+    FROM information_schema.columns
+   WHERE table_name LIKE 'rw_view%'
+***************
+*** 1385,1398 ****
+  Options: check_option=local
+  
+  SELECT * FROM information_schema.views WHERE table_name = 'rw_view1';
+!  table_catalog | table_schema | table_name |          view_definition           | check_option | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ---------------+--------------+------------+------------------------------------+--------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  regression    | public       | rw_view1   |  SELECT base_tbl.a,               +| LOCAL        | YES          | YES                | NO                   | NO                   | NO
+!                |              |            |     base_tbl.b                    +|              |              |                    |                      |                      | 
+!                |              |            |    FROM base_tbl                  +|              |              |                    |                      |                      | 
+!                |              |            |   WHERE (base_tbl.a < base_tbl.b); |              |              |                    |                      |                      | 
+! (1 row)
+! 
+  INSERT INTO rw_view1 VALUES(3,4); -- ok
+  INSERT INTO rw_view1 VALUES(4,3); -- should fail
+  ERROR:  new row violates WITH CHECK OPTION for "rw_view1"
+--- 1365,1371 ----
+  Options: check_option=local
+  
+  SELECT * FROM information_schema.views WHERE table_name = 'rw_view1';
+! ERROR:  no relation entry for relid 1880
+  INSERT INTO rw_view1 VALUES(3,4); -- ok
+  INSERT INTO rw_view1 VALUES(4,3); -- should fail
+  ERROR:  new row violates WITH CHECK OPTION for "rw_view1"
+***************
+*** 1437,1449 ****
+  Options: check_option=cascaded
+  
+  SELECT * FROM information_schema.views WHERE table_name = 'rw_view2';
+!  table_catalog | table_schema | table_name |      view_definition       | check_option | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ---------------+--------------+------------+----------------------------+--------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  regression    | public       | rw_view2   |  SELECT rw_view1.a        +| CASCADED     | YES          | YES                | NO                   | NO                   | NO
+!                |              |            |    FROM rw_view1          +|              |              |                    |                      |                      | 
+!                |              |            |   WHERE (rw_view1.a < 10); |              |              |                    |                      |                      | 
+! (1 row)
+! 
+  INSERT INTO rw_view2 VALUES (-5); -- should fail
+  ERROR:  new row violates WITH CHECK OPTION for "rw_view1"
+  DETAIL:  Failing row contains (-5).
+--- 1410,1416 ----
+  Options: check_option=cascaded
+  
+  SELECT * FROM information_schema.views WHERE table_name = 'rw_view2';
+! ERROR:  no relation entry for relid 1880
+  INSERT INTO rw_view2 VALUES (-5); -- should fail
+  ERROR:  new row violates WITH CHECK OPTION for "rw_view1"
+  DETAIL:  Failing row contains (-5).
+***************
+*** 1477,1489 ****
+  Options: check_option=local
+  
+  SELECT * FROM information_schema.views WHERE table_name = 'rw_view2';
+!  table_catalog | table_schema | table_name |      view_definition       | check_option | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ---------------+--------------+------------+----------------------------+--------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  regression    | public       | rw_view2   |  SELECT rw_view1.a        +| LOCAL        | YES          | YES                | NO                   | NO                   | NO
+!                |              |            |    FROM rw_view1          +|              |              |                    |                      |                      | 
+!                |              |            |   WHERE (rw_view1.a < 10); |              |              |                    |                      |                      | 
+! (1 row)
+! 
+  INSERT INTO rw_view2 VALUES (-10); -- ok, but not in view
+  INSERT INTO rw_view2 VALUES (20); -- should fail
+  ERROR:  new row violates WITH CHECK OPTION for "rw_view2"
+--- 1444,1450 ----
+  Options: check_option=local
+  
+  SELECT * FROM information_schema.views WHERE table_name = 'rw_view2';
+! ERROR:  no relation entry for relid 1880
+  INSERT INTO rw_view2 VALUES (-10); -- ok, but not in view
+  INSERT INTO rw_view2 VALUES (20); -- should fail
+  ERROR:  new row violates WITH CHECK OPTION for "rw_view2"
+***************
+*** 1517,1529 ****
+    WHERE rw_view1.a < 10;
+  
+  SELECT * FROM information_schema.views WHERE table_name = 'rw_view2';
+!  table_catalog | table_schema | table_name |      view_definition       | check_option | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ---------------+--------------+------------+----------------------------+--------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  regression    | public       | rw_view2   |  SELECT rw_view1.a        +| NONE         | YES          | YES                | NO                   | NO                   | NO
+!                |              |            |    FROM rw_view1          +|              |              |                    |                      |                      | 
+!                |              |            |   WHERE (rw_view1.a < 10); |              |              |                    |                      |                      | 
+! (1 row)
+! 
+  INSERT INTO rw_view2 VALUES (30); -- ok, but not in view
+  SELECT * FROM base_tbl;
+    a  
+--- 1478,1484 ----
+    WHERE rw_view1.a < 10;
+  
+  SELECT * FROM information_schema.views WHERE table_name = 'rw_view2';
+! ERROR:  no relation entry for relid 1880
+  INSERT INTO rw_view2 VALUES (30); -- ok, but not in view
+  SELECT * FROM base_tbl;
+    a  
+***************
+*** 1543,1559 ****
+  CREATE VIEW rw_view2 AS SELECT * FROM rw_view1 WHERE a > 0;
+  CREATE VIEW rw_view3 AS SELECT * FROM rw_view2 WITH CHECK OPTION;
+  SELECT * FROM information_schema.views WHERE table_name LIKE E'rw\\_view_' ORDER BY table_name;
+!  table_catalog | table_schema | table_name |      view_definition      | check_option | is_updatable | is_insertable_into | is_trigger_updatable | is_trigger_deletable | is_trigger_insertable_into 
+! ---------------+--------------+------------+---------------------------+--------------+--------------+--------------------+----------------------+----------------------+----------------------------
+!  regression    | public       | rw_view1   |  SELECT base_tbl.a       +| CASCADED     | YES          | YES                | NO                   | NO                   | NO
+!                |              |            |    FROM base_tbl;         |              |              |                    |                      |                      | 
+!  regression    | public       | rw_view2   |  SELECT rw_view1.a       +| NONE         | YES          | YES                | NO                   | NO                   | NO
+!                |              |            |    FROM rw_view1         +|              |              |                    |                      |                      | 
+!                |              |            |   WHERE (rw_view1.a > 0); |              |              |                    |                      |                      | 
+!  regression    | public       | rw_view3   |  SELECT rw_view2.a       +| CASCADED     | YES          | YES                | NO                   | NO                   | NO
+!                |              |            |    FROM rw_view2;         |              |              |                    |                      |                      | 
+! (3 rows)
+! 
+  INSERT INTO rw_view1 VALUES (-1); -- ok
+  INSERT INTO rw_view1 VALUES (1); -- ok
+  INSERT INTO rw_view2 VALUES (-2); -- ok, but not in view
+--- 1498,1504 ----
+  CREATE VIEW rw_view2 AS SELECT * FROM rw_view1 WHERE a > 0;
+  CREATE VIEW rw_view3 AS SELECT * FROM rw_view2 WITH CHECK OPTION;
+  SELECT * FROM information_schema.views WHERE table_name LIKE E'rw\\_view_' ORDER BY table_name;
+! ERROR:  no relation entry for relid 1880
+  INSERT INTO rw_view1 VALUES (-1); -- ok
+  INSERT INTO rw_view1 VALUES (1); -- ok
+  INSERT INTO rw_view2 VALUES (-2); -- ok, but not in view
+
+======================================================================
+
+*** /home/tomas/work/postgres/src/test/regress/expected/sanity_check.out	2014-10-29 00:22:04.812171313 +0100
+--- /home/tomas/work/postgres/src/test/regress/results/sanity_check.out	2014-11-10 02:54:44.150052357 +0100
+***************
+*** 113,118 ****
+--- 113,119 ----
+  pg_language|t
+  pg_largeobject|t
+  pg_largeobject_metadata|t
++ pg_mv_statistic|t
+  pg_namespace|t
+  pg_opclass|t
+  pg_operator|t
+
+======================================================================
+
+*** /home/tomas/work/postgres/src/test/regress/expected/rowsecurity.out	2014-10-29 00:22:04.811171313 +0100
+--- /home/tomas/work/postgres/src/test/regress/results/rowsecurity.out	2014-11-10 02:54:45.775052238 +0100
+***************
+*** 901,925 ****
+  -- prepared statement with rls_regress_user0 privilege
+  PREPARE p1(int) AS SELECT * FROM t1 WHERE a <= $1;
+  EXECUTE p1(2);
+!  a |  b  
+! ---+-----
+!  2 | bbb
+!  2 | bcd
+!  2 | yyy
+! (3 rows)
+! 
+  EXPLAIN (COSTS OFF) EXECUTE p1(2);
+!                   QUERY PLAN                  
+! ----------------------------------------------
+!  Append
+!    ->  Seq Scan on t1
+!          Filter: ((a <= 2) AND ((a % 2) = 0))
+!    ->  Seq Scan on t2
+!          Filter: ((a <= 2) AND ((a % 2) = 0))
+!    ->  Seq Scan on t3
+!          Filter: ((a <= 2) AND ((a % 2) = 0))
+! (7 rows)
+! 
+  -- superuser is allowed to bypass RLS checks
+  RESET SESSION AUTHORIZATION;
+  SET row_security TO OFF;
+--- 901,909 ----
+  -- prepared statement with rls_regress_user0 privilege
+  PREPARE p1(int) AS SELECT * FROM t1 WHERE a <= $1;
+  EXECUTE p1(2);
+! ERROR:  no relation entry for relid 530
+  EXPLAIN (COSTS OFF) EXECUTE p1(2);
+! ERROR:  no relation entry for relid 530
+  -- superuser is allowed to bypass RLS checks
+  RESET SESSION AUTHORIZATION;
+  SET row_security TO OFF;
+
+======================================================================
+
+*** /home/tomas/work/postgres/src/test/regress/expected/rules.out	2014-10-29 00:22:04.812171313 +0100
+--- /home/tomas/work/postgres/src/test/regress/results/rules.out	2014-11-10 02:54:48.329052050 +0100
+***************
+*** 1353,1358 ****
+--- 1353,1368 ----
+       LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)))
+       LEFT JOIN pg_tablespace t ON ((t.oid = c.reltablespace)))
+    WHERE (c.relkind = 'm'::"char");
++ pg_mv_stats| SELECT n.nspname AS schemaname,
++     c.relname AS tablename,
++     s.stakeys AS attnums,
++     length(s.stamcv) AS mcvbytes,
++     pg_mv_stats_mvclist_info(s.stamcv) AS mcvinfo,
++     length(s.stahist) AS histbytes,
++     pg_mv_stats_histogram_info(s.stahist) AS histinfo
++    FROM ((pg_mv_statistic s
++      JOIN pg_class c ON ((c.oid = s.starelid)))
++      LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace)));
+  pg_policies| SELECT n.nspname AS schemaname,
+      c.relname AS tablename,
+      rs.rsecpolname AS policyname,
+
+======================================================================
+
diff --git a/src/test/regress/regression.out b/src/test/regress/regression.out
new file mode 100644
index 0000000..48a4a25
--- /dev/null
+++ b/src/test/regress/regression.out
@@ -0,0 +1,147 @@
+test tablespace               ... ok
+test boolean                  ... ok
+test char                     ... ok
+test name                     ... ok
+test varchar                  ... ok
+test text                     ... ok
+test int2                     ... ok
+test int4                     ... ok
+test int8                     ... ok
+test oid                      ... ok
+test float4                   ... ok
+test float8                   ... ok
+test bit                      ... ok
+test numeric                  ... ok
+test txid                     ... ok
+test uuid                     ... ok
+test enum                     ... ok
+test money                    ... ok
+test rangetypes               ... ok
+test pg_lsn                   ... ok
+test regproc                  ... ok
+test strings                  ... ok
+test numerology               ... ok
+test point                    ... ok
+test lseg                     ... ok
+test line                     ... ok
+test box                      ... ok
+test path                     ... ok
+test polygon                  ... ok
+test circle                   ... ok
+test date                     ... ok
+test time                     ... ok
+test timetz                   ... ok
+test timestamp                ... ok
+test timestamptz              ... ok
+test interval                 ... ok
+test abstime                  ... ok
+test reltime                  ... ok
+test tinterval                ... ok
+test inet                     ... ok
+test macaddr                  ... ok
+test tstypes                  ... ok
+test comments                 ... ok
+test geometry                 ... ok
+test horology                 ... ok
+test regex                    ... ok
+test oidjoins                 ... ok
+test type_sanity              ... ok
+test opr_sanity               ... ok
+test insert                   ... ok
+test create_function_1        ... ok
+test create_type              ... ok
+test create_table             ... ok
+test create_function_2        ... ok
+test copy                     ... ok
+test copyselect               ... ok
+test create_misc              ... ok
+test create_operator          ... ok
+test create_index             ... ok
+test create_view              ... ok
+test create_aggregate         ... ok
+test create_function_3        ... ok
+test create_cast              ... ok
+test constraints              ... ok
+test triggers                 ... ok
+test inherit                  ... ok
+test create_table_like        ... ok
+test typed_table              ... ok
+test vacuum                   ... ok
+test drop_if_exists           ... ok
+test updatable_views          ... FAILED
+test sanity_check             ... FAILED
+test errors                   ... ok
+test select                   ... ok
+test select_into              ... ok
+test select_distinct          ... ok
+test select_distinct_on       ... ok
+test select_implicit          ... ok
+test select_having            ... ok
+test subselect                ... ok
+test union                    ... ok
+test case                     ... ok
+test join                     ... ok
+test aggregates               ... ok
+test transactions             ... ok
+test random                   ... ok
+test portals                  ... ok
+test arrays                   ... ok
+test btree_index              ... ok
+test hash_index               ... ok
+test update                   ... ok
+test delete                   ... ok
+test namespace                ... ok
+test prepared_xacts           ... ok
+test privileges               ... ok
+test security_label           ... ok
+test collate                  ... ok
+test matview                  ... ok
+test lock                     ... ok
+test replica_identity         ... ok
+test rowsecurity              ... FAILED
+test alter_generic            ... ok
+test brin                     ... ok
+test misc                     ... ok
+test psql                     ... ok
+test async                    ... ok
+test rules                    ... FAILED
+test event_trigger            ... ok
+test select_views             ... ok
+test portals_p2               ... ok
+test foreign_key              ... ok
+test cluster                  ... ok
+test dependency               ... ok
+test guc                      ... ok
+test bitmapops                ... ok
+test combocid                 ... ok
+test tsearch                  ... ok
+test tsdicts                  ... ok
+test foreign_data             ... ok
+test window                   ... ok
+test xmlmap                   ... ok
+test functional_deps          ... ok
+test advisory_lock            ... ok
+test json                     ... ok
+test jsonb                    ... ok
+test indirect_toast           ... ok
+test equivclass               ... ok
+test plancache                ... ok
+test limit                    ... ok
+test plpgsql                  ... ok
+test copy2                    ... ok
+test temp                     ... ok
+test domain                   ... ok
+test rangefuncs               ... ok
+test prepare                  ... ok
+test without_oid              ... ok
+test conversion               ... ok
+test truncate                 ... ok
+test alter_table              ... ok
+test sequence                 ... ok
+test polymorphism             ... ok
+test rowtypes                 ... ok
+test returning                ... ok
+test largeobject              ... ok
+test with                     ... ok
+test xml                      ... ok
+test stats                    ... ok
