summaryrefslogtreecommitdiffstats
path: root/src/bm/dataqualitystats.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/bm/dataqualitystats.cpp')
-rw-r--r--src/bm/dataqualitystats.cpp174
1 files changed, 174 insertions, 0 deletions
diff --git a/src/bm/dataqualitystats.cpp b/src/bm/dataqualitystats.cpp
new file mode 100644
index 0000000..ad6743a
--- /dev/null
+++ b/src/bm/dataqualitystats.cpp
@@ -0,0 +1,174 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the BM project on Qt Labs.
+**
+** This file may be used under the terms of the GNU General Public
+** License version 2.0 or 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of
+** this file. Please review the following information to ensure GNU
+** General Public Licensing requirements will be met:
+** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+**
+** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+**
+****************************************************************************/
+
+#include "dataqualitystats.h"
+#include <QList>
+#include <QMap>
+#include <QDebug>
+
+/* NOTES
+
+Params:
+
+- diffTolerance (in percent: 0 <= x <= 100)
+- stabTolerance (a positive integer: x >= 2)
+
+-------------------------------------------------
+
+Def: An equality subsequence (ESS) is a subsequence v1, v2, ..., vn of a result history
+ for which the following condition holds:
+
+ ∀ i >= 1 : 100 * (max(vi, v1) / min(vi, v1) - 1) <= diffTolerance
+
+
+Def: A maximal equality subsequence (MaxESS) is one of the subsequences formed by
+ partitioning a result history into the smallest possible number of ESS'es.
+
+
+Def: The stability fraction (SF) of a result history is the fraction (given as a
+ percentage: 0 <= SF <= 100) of its MaxESS'es that are stable.
+ More precisely,
+
+ SF = 100 * (stableMaxESS / totalMaxESS),
+
+ where stableMaxESS is the the number of MaxESS'es that have a length of at least
+ stabTolerance and totalMaxESS is the total number of MaxESS'es.
+
+-------
+
+Stats 1: Percentile distribution of the SF values for the contributing result histories.
+
+P_95 = x -> x is the smallest SF value that is larger than or equal to that
+ of 95% of the result histories (i.e. 95% of the result histories have an SF value
+ that is smaller than or equal to x)
+
+Example:
+
+100 100 100 100 100 (good)
+100 80 30 20 10 (bad)
+
+The following 10 values: 95, 90, 80, 50, 40, 40, 40, 40, 5, 0
+gives the following percentile distribution:
+
+P_100 = 95 (the worst 100 % of the RHs have a SF of 95 or worse, and 95 is also the max SF)
+P_90 = 90 ( 90 90 )
+P_80 = 80 ( 80 80 )
+P_70 = 50 ( 70 50 )
+P_60 = 40 ( 60 40 )
+P_50 = 40 ( 50 40 )
+P_40 = 40 ( 40 40 )
+P_30 = 40 ( 30 40 )
+P_20 = 5 ( 20 5 )
+P_10 = 0 ( 10 0 )
+
+Note: The quality of a RH is proportional to its SF value, so we want the percentile distribution
+ to start (at P_100) as high as possible (ideally at 100), and end (at P_10) as high
+ as possible.
+
+*/
+
+
+// ### 2 B DOCUMENTED!
+void DataQualityStats::compute(const QList<ResultHistoryInfo *> &rhInfos)
+{
+ // Step 1: Compute the MaxESSTotalCount and MaxESSStableCount for each RH
+ // (compute for the exact median-smoothed values that formed the
+ // basis for computing the index, i.e. simply ignore the outliers)
+ //
+ // Step 2: Compute the complete distribution of MaxESSTotalCount
+ // (note that the number of distinct counts are likely to be only a small
+ // fraction of the number of RHs):
+ //
+ // TC(c1) = <# of RHs with a MaxESSTotalCount of c1>
+ // TC(c2) = <# of RHs with a MaxESSTotalCount of c2>
+ // ...
+ // TC(cN) = <# of RHs with a MaxESSTotalCount of cN>
+ //
+ // (TC = Total Count, and N is number of distinct counts)
+ //
+ // Step 3: Compute the percentile distribution (for the 10 levels 10%, 20%, ..., 100%)
+ // of the SF (stability fraction) values (where the SF for a given
+ // RH is MaxESSStableCount / MaxESSTotalCount):
+ //
+ // SFP(100) = <the max SF value for the worst 100% of the RHs (i.e. all RHs!)>
+ // SFP(90) = <the max SF value for the worst 90% of the RHs>
+ // SFP(80) = <the max SF value for the worst 80% of the RHs>
+ // ...
+ // SFP(10) = <the max SF value for the worst 10% of the RHs>
+ //
+ // (SFP = Stability Fraction Percentile)
+
+
+
+ // *** Step 1: Extract total and stable MaxESS counts for each result history ***
+
+ Q_ASSERT(diffTolerance >= 0.0);
+ Q_ASSERT(stabTolerance >= 2);
+
+ QList<int> totalMaxESS;
+ QList<int> stableMaxESS;
+
+ for (int i = 0; i < rhInfos.size(); ++i) {
+ int total = 0;
+ int stable = 0;
+ rhInfos.at(i)->computeMaxESSStats(diffTolerance, stabTolerance, &total, &stable);
+ totalMaxESS.append(total);
+ stableMaxESS.append(stable);
+ }
+
+
+ // *** Step 2: Compute the frequency distribution of the total MaxESS counts ***
+ totalMaxESSFreq_.clear();
+ for (int i = 0; i < totalMaxESS.size(); ++i)
+ ++(totalMaxESSFreq_[totalMaxESS.at(i)]);
+
+
+ // *** Step 3: Compute the percentile distribution of the stability fractions ***
+
+ // All subsequence counts (subsequence counts >= 0):
+ QList<qreal> stabFractions0;
+
+ // Subsequence counts >= 2 (i.e. result histories with actual changes in them and thus
+ // the ones that are really interesting):
+ QList<qreal> stabFractions2;
+
+ for (int i = 0; i < totalMaxESS.size(); ++i) {
+ if (totalMaxESS.at(i) > 0) {
+ const qreal sf = stableMaxESS.at(i) / static_cast<qreal>(totalMaxESS.at(i));
+ stabFractions0.append(sf);
+ if (totalMaxESS.at(i) >= 2)
+ stabFractions2.append(sf);
+ }
+ }
+ qSort(stabFractions0);
+ qSort(stabFractions2);
+
+
+ // --------
+ for (int p = 10; p <= 100; p += 10) {
+ const int i0 = (stabFractions0.size() - 1) * (p / 100.0);
+ const int i2 = (stabFractions2.size() - 1) * (p / 100.0);
+ stabFracPercentiles_.insert(
+ p, qMakePair(100 * stabFractions0.at(i0), 100 * stabFractions2.at(i2)));
+ }
+}