diff options
Diffstat (limited to 'src/bm/dataqualitystats.cpp')
| -rw-r--r-- | src/bm/dataqualitystats.cpp | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/src/bm/dataqualitystats.cpp b/src/bm/dataqualitystats.cpp new file mode 100644 index 0000000..ad6743a --- /dev/null +++ b/src/bm/dataqualitystats.cpp @@ -0,0 +1,174 @@ +/**************************************************************************** +** +** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). +** Contact: Qt Software Information (qt-info@nokia.com) +** +** This file is part of the BM project on Qt Labs. +** +** This file may be used under the terms of the GNU General Public +** License version 2.0 or 3.0 as published by the Free Software Foundation +** and appearing in the file LICENSE.GPL included in the packaging of +** this file. Please review the following information to ensure GNU +** General Public Licensing requirements will be met: +** http://www.fsf.org/licensing/licenses/info/GPLv2.html and +** http://www.gnu.org/copyleft/gpl.html. +** +** If you are unsure which license is appropriate for your use, please +** contact the sales department at qt-sales@nokia.com. +** +** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE +** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. +** +****************************************************************************/ + +#include "dataqualitystats.h" +#include <QList> +#include <QMap> +#include <QDebug> + +/* NOTES + +Params: + +- diffTolerance (in percent: 0 <= x <= 100) +- stabTolerance (a positive integer: x >= 2) + +------------------------------------------------- + +Def: An equality subsequence (ESS) is a subsequence v1, v2, ..., vn of a result history + for which the following condition holds: + + ∀ i >= 1 : 100 * (max(vi, v1) / min(vi, v1) - 1) <= diffTolerance + + +Def: A maximal equality subsequence (MaxESS) is one of the subsequences formed by + partitioning a result history into the smallest possible number of ESS'es. + + +Def: The stability fraction (SF) of a result history is the fraction (given as a + percentage: 0 <= SF <= 100) of its MaxESS'es that are stable. + More precisely, + + SF = 100 * (stableMaxESS / totalMaxESS), + + where stableMaxESS is the the number of MaxESS'es that have a length of at least + stabTolerance and totalMaxESS is the total number of MaxESS'es. + +------- + +Stats 1: Percentile distribution of the SF values for the contributing result histories. + +P_95 = x -> x is the smallest SF value that is larger than or equal to that + of 95% of the result histories (i.e. 95% of the result histories have an SF value + that is smaller than or equal to x) + +Example: + +100 100 100 100 100 (good) +100 80 30 20 10 (bad) + +The following 10 values: 95, 90, 80, 50, 40, 40, 40, 40, 5, 0 +gives the following percentile distribution: + +P_100 = 95 (the worst 100 % of the RHs have a SF of 95 or worse, and 95 is also the max SF) +P_90 = 90 ( 90 90 ) +P_80 = 80 ( 80 80 ) +P_70 = 50 ( 70 50 ) +P_60 = 40 ( 60 40 ) +P_50 = 40 ( 50 40 ) +P_40 = 40 ( 40 40 ) +P_30 = 40 ( 30 40 ) +P_20 = 5 ( 20 5 ) +P_10 = 0 ( 10 0 ) + +Note: The quality of a RH is proportional to its SF value, so we want the percentile distribution + to start (at P_100) as high as possible (ideally at 100), and end (at P_10) as high + as possible. + +*/ + + +// ### 2 B DOCUMENTED! +void DataQualityStats::compute(const QList<ResultHistoryInfo *> &rhInfos) +{ + // Step 1: Compute the MaxESSTotalCount and MaxESSStableCount for each RH + // (compute for the exact median-smoothed values that formed the + // basis for computing the index, i.e. simply ignore the outliers) + // + // Step 2: Compute the complete distribution of MaxESSTotalCount + // (note that the number of distinct counts are likely to be only a small + // fraction of the number of RHs): + // + // TC(c1) = <# of RHs with a MaxESSTotalCount of c1> + // TC(c2) = <# of RHs with a MaxESSTotalCount of c2> + // ... + // TC(cN) = <# of RHs with a MaxESSTotalCount of cN> + // + // (TC = Total Count, and N is number of distinct counts) + // + // Step 3: Compute the percentile distribution (for the 10 levels 10%, 20%, ..., 100%) + // of the SF (stability fraction) values (where the SF for a given + // RH is MaxESSStableCount / MaxESSTotalCount): + // + // SFP(100) = <the max SF value for the worst 100% of the RHs (i.e. all RHs!)> + // SFP(90) = <the max SF value for the worst 90% of the RHs> + // SFP(80) = <the max SF value for the worst 80% of the RHs> + // ... + // SFP(10) = <the max SF value for the worst 10% of the RHs> + // + // (SFP = Stability Fraction Percentile) + + + + // *** Step 1: Extract total and stable MaxESS counts for each result history *** + + Q_ASSERT(diffTolerance >= 0.0); + Q_ASSERT(stabTolerance >= 2); + + QList<int> totalMaxESS; + QList<int> stableMaxESS; + + for (int i = 0; i < rhInfos.size(); ++i) { + int total = 0; + int stable = 0; + rhInfos.at(i)->computeMaxESSStats(diffTolerance, stabTolerance, &total, &stable); + totalMaxESS.append(total); + stableMaxESS.append(stable); + } + + + // *** Step 2: Compute the frequency distribution of the total MaxESS counts *** + totalMaxESSFreq_.clear(); + for (int i = 0; i < totalMaxESS.size(); ++i) + ++(totalMaxESSFreq_[totalMaxESS.at(i)]); + + + // *** Step 3: Compute the percentile distribution of the stability fractions *** + + // All subsequence counts (subsequence counts >= 0): + QList<qreal> stabFractions0; + + // Subsequence counts >= 2 (i.e. result histories with actual changes in them and thus + // the ones that are really interesting): + QList<qreal> stabFractions2; + + for (int i = 0; i < totalMaxESS.size(); ++i) { + if (totalMaxESS.at(i) > 0) { + const qreal sf = stableMaxESS.at(i) / static_cast<qreal>(totalMaxESS.at(i)); + stabFractions0.append(sf); + if (totalMaxESS.at(i) >= 2) + stabFractions2.append(sf); + } + } + qSort(stabFractions0); + qSort(stabFractions2); + + + // -------- + for (int p = 10; p <= 100; p += 10) { + const int i0 = (stabFractions0.size() - 1) * (p / 100.0); + const int i2 = (stabFractions2.size() - 1) * (p / 100.0); + stabFracPercentiles_.insert( + p, qMakePair(100 * stabFractions0.at(i0), 100 * stabFractions2.at(i2))); + } +} |
