summaryrefslogtreecommitdiff
path: root/doc/src
diff options
context:
space:
mode:
authorTom Lane2009-08-02 22:14:53 +0000
committerTom Lane2009-08-02 22:14:53 +0000
commit90725929465474648de133d216b873bdb69fe357 (patch)
tree7285d55b3b98e6050d7bd035aacf25a20191d888 /doc/src
parent527f0ae3fa48c3c3a8ba1bde19039545e88a52b6 (diff)
Add ALTER TABLE ... ALTER COLUMN ... SET STATISTICS DISTINCT
Robert Haas
Diffstat (limited to 'doc/src')
-rw-r--r--doc/src/sgml/catalogs.sgml22
-rw-r--r--doc/src/sgml/ref/alter_table.sgml32
-rw-r--r--doc/src/sgml/ref/analyze.sgml13
3 files changed, 59 insertions, 8 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 99aee810da4..6cca9749817 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.203 2009/07/29 20:56:17 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.204 2009/08/02 22:14:51 tgl Exp $ -->
<!--
Documentation of the system catalogs, directed toward PostgreSQL developers
-->
@@ -887,6 +887,19 @@
</row>
<row>
+ <entry><structfield>attdistinct</structfield></entry>
+ <entry><type>float4</type></entry>
+ <entry></entry>
+ <entry>
+ <structfield>attdistinct</structfield>, if nonzero, is a user-specified
+ number-of-distinct-values figure to be used instead of estimating the
+ number of distinct values during <command>ANALYZE</>. Nonzero values
+ have the same meanings as for
+ <link linkend="catalog-pg-statistic"><structname>pg_statistic</></link>.<structfield>stadistinct</>
+ </entry>
+ </row>
+
+ <row>
<entry><structfield>attlen</structfield></entry>
<entry><type>int2</type></entry>
<entry></entry>
@@ -4303,9 +4316,10 @@
<entry></entry>
<entry>The number of distinct nonnull data values in the column.
A value greater than zero is the actual number of distinct values.
- A value less than zero is the negative of a fraction of the number
- of rows in the table (for example, a column in which values appear about
- twice on the average could be represented by <structfield>stadistinct</> = -0.5).
+ A value less than zero is the negative of a multiplier for the number
+ of rows in the table; for example, a column in which values appear about
+ twice on the average could be represented by
+ <structfield>stadistinct</> = -0.5.
A zero value means the number of distinct values is unknown
</entry>
</row>
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 87892f84b1e..01aefb91307 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1,5 +1,5 @@
<!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.107 2009/07/20 02:42:27 adunstan Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.108 2009/08/02 22:14:51 tgl Exp $
PostgreSQL documentation
-->
@@ -39,6 +39,7 @@ where <replaceable class="PARAMETER">action</replaceable> is one of:
ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> DROP DEFAULT
ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> { SET | DROP } NOT NULL
ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET STATISTICS <replaceable class="PARAMETER">integer</replaceable>
+ ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET STATISTICS DISTINCT <replaceable class="PARAMETER">number</replaceable>
ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET STORAGE { PLAIN | EXTERNAL | EXTENDED | MAIN }
ADD <replaceable class="PARAMETER">table_constraint</replaceable>
DROP CONSTRAINT [ IF EXISTS ] <replaceable class="PARAMETER">constraint_name</replaceable> [ RESTRICT | CASCADE ]
@@ -90,8 +91,8 @@ where <replaceable class="PARAMETER">action</replaceable> is one of:
dropped as well. You will need to say <literal>CASCADE</> if
anything outside the table depends on the column, for example,
foreign key references or views.
- If <literal>IF EXISTS</literal> is specified and the column
- does not exist, no error is thrown. In this case a notice
+ If <literal>IF EXISTS</literal> is specified and the column
+ does not exist, no error is thrown. In this case a notice
is issued instead.
</para>
</listitem>
@@ -157,6 +158,31 @@ where <replaceable class="PARAMETER">action</replaceable> is one of:
</varlistentry>
<varlistentry>
+ <term><literal>SET STATISTICS DISTINCT</literal></term>
+ <listitem>
+ <para>
+ This form overrides the number-of-distinct-values estimate made by
+ subsequent <xref linkend="sql-analyze" endterm="sql-analyze-title">
+ operations. When set to a positive value, <command>ANALYZE</> will
+ assume that the column contains exactly the specified number of distinct
+ nonnull values. When set to a negative value, which must be greater
+ than or equal to -1, <command>ANALYZE</> will assume that the number of
+ distinct nonnull values in the column is linear in the size of the
+ table; the exact count is to be computed by multiplying the estimated
+ table size by the absolute value of the given number. For example,
+ a value of -1 implies that all values in the column are distinct, while
+ a value of -0.5 implies that each value appears twice on the average.
+ This can be useful when the size of the table changes over time, since
+ the multiplication by the number of rows in the table is not performed
+ until query planning time. Specify a value of 0 to revert to estimating
+ the number of distinct values normally. For more information on the use
+ of statistics by the <productname>PostgreSQL</productname> query
+ planner, refer to <xref linkend="planner-stats">.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<indexterm>
<primary>TOAST</primary>
<secondary>per-column storage settings</secondary>
diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml
index 418082e6d93..c0d2673dc50 100644
--- a/doc/src/sgml/ref/analyze.sgml
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -1,5 +1,5 @@
<!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/analyze.sgml,v 1.25 2008/12/13 19:13:44 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/analyze.sgml,v 1.26 2009/08/02 22:14:51 tgl Exp $
PostgreSQL documentation
-->
@@ -165,6 +165,17 @@ ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ ( <re
the target causes a proportional increase in the time and space needed
to do <command>ANALYZE</command>.
</para>
+
+ <para>
+ One of the values estimated by <command>ANALYZE</command> is the number of
+ distinct values that appear in each column. Because only a subset of the
+ rows are examined, this estimate can sometimes be quite inaccurate, even
+ with the largest possible statistics target. If this inaccuracy leads to
+ bad query plans, a more accurate value can be determined manually and then
+ installed with
+ <command>ALTER TABLE ... ALTER COLUMN ... SET STATISTICS DISTINCT</>
+ (see <xref linkend="sql-altertable" endterm="sql-altertable-title">).
+ </para>
</refsect1>
<refsect1>