summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjasplin <qt-info@nokia.com>2010-05-07 12:44:40 +0200
committerjasplin <qt-info@nokia.com>2010-05-07 12:44:40 +0200
commit62b3ac5532740b14cfa6d2dc43f29c908131c7df (patch)
treed7baebd91603f779b838d1eae342bc4baf910dd3
parent856f10686aaebe984ccc4befdd1fdb08422472dd (diff)
First working version of the "data quality stats" feature.
-rw-r--r--src/bm/bm.pro6
-rw-r--r--src/bm/bmrequest.cpp175
-rw-r--r--src/bm/dataqualitystats.cpp174
-rw-r--r--src/bm/dataqualitystats.h50
-rw-r--r--src/bm/index.cpp34
-rw-r--r--src/bm/index.h9
-rw-r--r--src/bm/resulthistoryinfo.cpp67
-rw-r--r--src/bm/resulthistoryinfo.h4
-rw-r--r--src/bmweb/indexsection.js3
9 files changed, 480 insertions, 42 deletions
diff --git a/src/bm/bm.pro b/src/bm/bm.pro
index cff490b..70ae5dd 100644
--- a/src/bm/bm.pro
+++ b/src/bm/bm.pro
@@ -1,8 +1,10 @@
TEMPLATE = lib
CONFIG += shared
TARGET = bm
-SOURCES += bm.cpp bmrequest.cpp bmmisc.cpp plotter.cpp resulthistoryinfo.cpp cache.cpp index.cpp
-HEADERS += bm.h bmrequest.h bmmisc.h plotter.h resulthistoryinfo.h cache.h index.h
+SOURCES += bm.cpp bmrequest.cpp bmmisc.cpp plotter.cpp resulthistoryinfo.cpp cache.cpp index.cpp \
+ dataqualitystats.cpp
+HEADERS += bm.h bmrequest.h bmmisc.h plotter.h resulthistoryinfo.h cache.h index.h \
+ dataqualitystats.h
QT += network
QT += xml
QT += sql
diff --git a/src/bm/bmrequest.cpp b/src/bm/bmrequest.cpp
index 50c05a4..765e808 100644
--- a/src/bm/bmrequest.cpp
+++ b/src/bm/bmrequest.cpp
@@ -23,6 +23,7 @@
#include "bmrequest.h"
#include "resulthistoryinfo.h"
+#include "dataqualitystats.h"
#include "plotter.h"
#include "cache.h"
#include <QSqlDatabase>
@@ -5100,11 +5101,14 @@ QByteArray BMRequest_IndexGetValues::toReplyBuffer()
QList<qreal> indexValues;
QList<int> contrCounts;
+ QString error_;
QList<QList<Index::RankedInfo> > topContr; // Top contributors for each index value
const int topContrLimit = 10; // ### hard-coded for now!
- QString error_;
+ DataQualityStats dqStats(dqStatsDiffTol, dqStatsStabTol); // Stats for all contributors
+
if (!index.computeValues(
- &indexValues, &baseValuePos, &contrCounts, &error_, &topContr, topContrLimit)) {
+ &indexValues, &baseValuePos, &contrCounts, &error_, &topContr, topContrLimit,
+ dataQualityStats ? &dqStats : static_cast<DataQualityStats *>(0))) {
return xmlConvert(
errorReply(name(), QString("failed to compute index (2): %1").arg(error_)));
}
@@ -5133,6 +5137,35 @@ QByteArray BMRequest_IndexGetValues::toReplyBuffer()
reply += "</value>";
}
+
+ if (dataQualityStats) {
+ // Add data quality stats to reply ...
+
+ reply += QString("<dataQualityStats diffTol=\"%1\" stabTol=\"%2\">")
+ .arg(dqStatsDiffTol).arg(dqStatsStabTol);
+
+ const QMap<int, int> totalMaxESSFreq_ = dqStats.totalMaxESSFreq();
+ {
+ QMap<int, int>::const_iterator it;
+ for (it = totalMaxESSFreq_.constBegin(); it != totalMaxESSFreq_.constEnd(); ++it)
+ reply += QString("<totMaxESSCountFreq count=\"%1\" freq=\"%2\" />")
+ .arg(it.key()).arg(it.value());
+ }
+
+ const QMap<int, QPair<qreal, qreal> > stabFracPercentiles_ =
+ dqStats.stabFracPercentiles();
+ {
+ QMap<int, QPair<qreal, qreal> >::const_iterator it;
+ for (it = stabFracPercentiles_.constBegin();
+ it != stabFracPercentiles_.constEnd(); ++it)
+ reply += QString("<stabFracPercentile p=\"%1\" val=\"%2\" val2=\"%3\" />")
+ .arg(it.key())
+ .arg(it.value().first)
+ .arg(it.value().second);
+ }
+
+ reply += "</dataQualityStats>";
+ }
}
@@ -5392,7 +5425,15 @@ void BMRequest_IndexGetValues::handleReply_HTML(const QStringList &args) const
// *** Meta information (characterizing the results) ***
- reply += "\n<br /><br /><table>\n";
+
+ reply += "\n<br /><br /><table style=\"border:0px\">\n";
+ reply += "<tr>\n";
+
+ // Left main table ...
+
+ reply += "<td style=\"border:0px\">\n";
+
+ reply += "<table>\n";
int baseTimestamp = argsElem.attributeNode("baseTimestamp").value().toInt(&ok);
Q_ASSERT(ok);
if (baseTimestamp < 0)
@@ -5427,7 +5468,7 @@ void BMRequest_IndexGetValues::handleReply_HTML(const QStringList &args) const
// 'Filters' table ...
- reply += "\n<br /><br />Filters:<table>\n";
+ reply += "\n<br /><br /><b>Filters:</b><table>\n";
appendToFilterTable(testCaseFilter, "Test case", &reply);
appendToFilterTable(metricFilter, "Metric", &reply);
appendToFilterTable(platformFilter, "Platform", &reply);
@@ -5436,7 +5477,7 @@ void BMRequest_IndexGetValues::handleReply_HTML(const QStringList &args) const
reply += "</table>";
// 'Number of contributors' table ...
- reply += "\n<br /><br />Number of contributors:<table style=\"text-align:right\">\n";
+ reply += "\n<br /><br /><b>Number of contributors:</b><table style=\"text-align:right\">\n";
const int totalRH = argsElem.attributeNode("totalRH").value().toInt(&ok);
Q_ASSERT(ok);
reply += QString(
@@ -5468,7 +5509,8 @@ void BMRequest_IndexGetValues::handleReply_HTML(const QStringList &args) const
}
}
const qreal secsInDay = 86400.0;
- reply += "\n<br /><br />Timestamp neighbor distances:<table style=\"text-align:right\">\n";
+ reply +=
+ "\n<br /><br /><b>Timestamp neighbor distances:</b><table style=\"text-align:right\">\n";
reply += QString("<tr><td>Min days:</td><td>%1</td></tr>\n")
.arg((timestamps.size() < 2) ? "n/a" : QString().setNum(minDistSecs / secsInDay, 'f', 2));
reply += QString("<tr><td>Avg days:</td><td>%1</td></tr>\n")
@@ -5478,6 +5520,127 @@ void BMRequest_IndexGetValues::handleReply_HTML(const QStringList &args) const
.arg((timestamps.size() < 2) ? "n/a" : QString().setNum(maxDistSecs / secsInDay, 'f', 2));
reply += "</table>\n";
+ reply += "</td>\n";
+
+
+ // Right main table ...
+
+ reply += "<td style=\"border:0px\">\n";
+
+ QDomNodeList dqStatsNodes = doc.elementsByTagName("dataQualityStats");
+ if (dqStatsNodes.size() > 0) {
+
+ QDomElement dqStatsElem = dqStatsNodes.at(0).toElement();
+
+ // Get tolerance values ...
+ dqStatsDiffTol = dqStatsElem.attributeNode("diffTol").value().toDouble(&ok);
+ Q_ASSERT(ok);
+ dqStatsStabTol = dqStatsElem.attributeNode("stabTol").value().toInt(&ok);
+ Q_ASSERT(ok);
+
+ reply += "<fieldset><legend style=\"font-size:12\">Data Quality Statistics</legend>";
+
+ // --- BEGIN Legend table ---
+ reply += "<table style=\"border:0px\">"
+ "<tr><td style=\"border:0px; background-color:#eeeeee\">"
+ "<table style=\"border:0px\">";
+ reply += QString(
+ "<tr><td style=\"padding:1px; border:0px; text-align:right\">"
+ "Subsequence:&nbsp;</td><td style=\"padding:1px; border:0px; text-align:left\">"
+ "In the result history of any given contributor, a maximum contiguous sequence "
+ "in which all values differ by at most <u>%1 %</u>.</td></tr>")
+ .arg(dqStatsDiffTol);
+ reply += "<tr><td style=\"padding:1px; border:0px; text-align:right; font-weight:bold\">"
+ "SSC:&nbsp;</td><td style=\"padding:1px; border:0px; text-align:left\">"
+ "Subsequence count in a single contributor.</td></tr>";
+ reply += "<tr><td style=\"padding:1px; border:0px; text-align:right; font-weight:bold\">"
+ "Freq:&nbsp;</td><td style=\"padding:1px; border:0px; text-align:left\">"
+ "Frequency (number of occurrences).</td></tr>";
+ reply += QString(
+ "<tr><td style=\"padding:1px; border:0px; text-align:right\">"
+ "Stability&nbsp;fraction:&nbsp;</td><td style=\"padding:1px; border:0px; "
+ "text-align:left\">"
+ "In any given contributor, the percentage of subsequences that have a length "
+ "of at least <u>%1</u>.</td></tr>")
+ .arg(dqStatsStabTol);
+ reply += "<tr><td style=\"padding:1px; border:0px; text-align:right; font-weight:bold\">"
+ "SF:&nbsp;</td><td style=\"padding:1px; border:0px; text-align:left\">"
+ "Stability fraction in a single contributor (regardless of its subsequence count)."
+ "</td></tr>";
+ reply += "<tr><td style=\"padding:1px; border:0px; text-align:right; font-weight:bold\">"
+ "SF2:&nbsp;</td><td style=\"padding:1px; border:0px; text-align:left\">"
+ "Stability fraction in a single contributor having subsequence count >= 2."
+ "</td></tr>";
+ reply += "<tr><td style=\"padding:1px; border:0px; text-align:right; font-weight:bold\">"
+ "P<sub><i>k</i></sub>:&nbsp;</td><td style=\"padding:1px; border:0px; "
+ "text-align:left\">"
+ "The <i>k</i>th percentile of the stability fractions (i.e. the <i>maximum</i> "
+ "stability fraction of the <i>worst</i> <i>k</i>&nbsp;% of the contributors)."
+ "</td></tr>";
+ reply += "</table></td></tr></table>";
+ // --- END Legend table ---
+
+ reply += "<table style=\"border:0px\">";
+
+ // --- BEGIN Stats tables ---
+ reply += "<tr>";
+
+ // .. subsequence count ...
+ reply += "<td style=\"border:0px\"><table><tr><th>SSC</th><th>Freq</th></tr>\n";
+ QDomNodeList ssFreqNodes = dqStatsElem.elementsByTagName("totMaxESSCountFreq");
+ for (int i = 0; i < ssFreqNodes.size(); ++i) {
+ QDomElement ssFreqElem = ssFreqNodes.at(i).toElement();
+ reply += QString("<tr style=\"text-align:right\"><td>%1</td><td>%2</td></tr>\n")
+ .arg(ssFreqElem.attributeNode("count").value())
+ .arg(ssFreqElem.attributeNode("freq").value());
+ }
+ reply += "</table></td>\n";
+
+ QDomNodeList sfPercNodes = dqStatsElem.elementsByTagName("stabFracPercentile");
+
+ // .. stable fraction percentiles (all subsequence counts) ...
+ reply += "<td style=\"border:0px\"><table>"
+ "<tr><th></th><th>SF</th></tr>\n";
+ for (int i = sfPercNodes.size() - 1; i >= 0; --i) {
+ QDomElement sfPercElem = sfPercNodes.at(i).toElement();
+ reply += QString(
+ "<tr><td><b>P<sub>%1</sub></b></td><td style=\"text-align:right\">%2%</td></tr>\n")
+ .arg(sfPercElem.attributeNode("p").value())
+ .arg(QString()
+ .setNum(sfPercElem.attributeNode("val").value().toDouble(&ok), 'f', 1));
+ Q_ASSERT(ok);
+ }
+ reply += "</table></td>\n";
+
+ // .. stable fraction percentiles (subsequence counts >= 2) ...
+ reply += "<td style=\"border:0px\"><table>"
+ "<tr><th></th><th>SF2</th></tr>\n";
+ for (int i = sfPercNodes.size() - 1; i >= 0; --i) {
+ QDomElement sfPercElem = sfPercNodes.at(i).toElement();
+ reply += QString(
+ "<tr><td><b>P<sub>%1</sub></b></td><td style=\"text-align:right\">%2%</td></tr>\n")
+ .arg(sfPercElem.attributeNode("p").value())
+ .arg(QString()
+ .setNum(sfPercElem.attributeNode("val2").value().toDouble(&ok), 'f', 1));
+ Q_ASSERT(ok);
+ }
+ reply += "</table></td>\n";
+
+ reply += "</tr>\n";
+ // --- END Stats tables ---
+
+ reply += "</table>\n";
+
+ reply += "</fieldset>";
+ }
+
+ reply += "</td>\n";
+
+
+ reply += "</tr>\n";
+ reply += "<table>\n";
+
+
// *** Values ***
reply += "\n<br /><br /><table style=\"border: 0px\">\n";
reply += "<tr style=\"border: 0px\">\n";
diff --git a/src/bm/dataqualitystats.cpp b/src/bm/dataqualitystats.cpp
new file mode 100644
index 0000000..ad6743a
--- /dev/null
+++ b/src/bm/dataqualitystats.cpp
@@ -0,0 +1,174 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the BM project on Qt Labs.
+**
+** This file may be used under the terms of the GNU General Public
+** License version 2.0 or 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of
+** this file. Please review the following information to ensure GNU
+** General Public Licensing requirements will be met:
+** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+**
+** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+**
+****************************************************************************/
+
+#include "dataqualitystats.h"
+#include <QList>
+#include <QMap>
+#include <QDebug>
+
+/* NOTES
+
+Params:
+
+- diffTolerance (in percent: 0 <= x <= 100)
+- stabTolerance (a positive integer: x >= 2)
+
+-------------------------------------------------
+
+Def: An equality subsequence (ESS) is a subsequence v1, v2, ..., vn of a result history
+ for which the following condition holds:
+
+ ∀ i >= 1 : 100 * (max(vi, v1) / min(vi, v1) - 1) <= diffTolerance
+
+
+Def: A maximal equality subsequence (MaxESS) is one of the subsequences formed by
+ partitioning a result history into the smallest possible number of ESS'es.
+
+
+Def: The stability fraction (SF) of a result history is the fraction (given as a
+ percentage: 0 <= SF <= 100) of its MaxESS'es that are stable.
+ More precisely,
+
+ SF = 100 * (stableMaxESS / totalMaxESS),
+
+ where stableMaxESS is the the number of MaxESS'es that have a length of at least
+ stabTolerance and totalMaxESS is the total number of MaxESS'es.
+
+-------
+
+Stats 1: Percentile distribution of the SF values for the contributing result histories.
+
+P_95 = x -> x is the smallest SF value that is larger than or equal to that
+ of 95% of the result histories (i.e. 95% of the result histories have an SF value
+ that is smaller than or equal to x)
+
+Example:
+
+100 100 100 100 100 (good)
+100 80 30 20 10 (bad)
+
+The following 10 values: 95, 90, 80, 50, 40, 40, 40, 40, 5, 0
+gives the following percentile distribution:
+
+P_100 = 95 (the worst 100 % of the RHs have a SF of 95 or worse, and 95 is also the max SF)
+P_90 = 90 ( 90 90 )
+P_80 = 80 ( 80 80 )
+P_70 = 50 ( 70 50 )
+P_60 = 40 ( 60 40 )
+P_50 = 40 ( 50 40 )
+P_40 = 40 ( 40 40 )
+P_30 = 40 ( 30 40 )
+P_20 = 5 ( 20 5 )
+P_10 = 0 ( 10 0 )
+
+Note: The quality of a RH is proportional to its SF value, so we want the percentile distribution
+ to start (at P_100) as high as possible (ideally at 100), and end (at P_10) as high
+ as possible.
+
+*/
+
+
+// ### 2 B DOCUMENTED!
+void DataQualityStats::compute(const QList<ResultHistoryInfo *> &rhInfos)
+{
+ // Step 1: Compute the MaxESSTotalCount and MaxESSStableCount for each RH
+ // (compute for the exact median-smoothed values that formed the
+ // basis for computing the index, i.e. simply ignore the outliers)
+ //
+ // Step 2: Compute the complete distribution of MaxESSTotalCount
+ // (note that the number of distinct counts are likely to be only a small
+ // fraction of the number of RHs):
+ //
+ // TC(c1) = <# of RHs with a MaxESSTotalCount of c1>
+ // TC(c2) = <# of RHs with a MaxESSTotalCount of c2>
+ // ...
+ // TC(cN) = <# of RHs with a MaxESSTotalCount of cN>
+ //
+ // (TC = Total Count, and N is number of distinct counts)
+ //
+ // Step 3: Compute the percentile distribution (for the 10 levels 10%, 20%, ..., 100%)
+ // of the SF (stability fraction) values (where the SF for a given
+ // RH is MaxESSStableCount / MaxESSTotalCount):
+ //
+ // SFP(100) = <the max SF value for the worst 100% of the RHs (i.e. all RHs!)>
+ // SFP(90) = <the max SF value for the worst 90% of the RHs>
+ // SFP(80) = <the max SF value for the worst 80% of the RHs>
+ // ...
+ // SFP(10) = <the max SF value for the worst 10% of the RHs>
+ //
+ // (SFP = Stability Fraction Percentile)
+
+
+
+ // *** Step 1: Extract total and stable MaxESS counts for each result history ***
+
+ Q_ASSERT(diffTolerance >= 0.0);
+ Q_ASSERT(stabTolerance >= 2);
+
+ QList<int> totalMaxESS;
+ QList<int> stableMaxESS;
+
+ for (int i = 0; i < rhInfos.size(); ++i) {
+ int total = 0;
+ int stable = 0;
+ rhInfos.at(i)->computeMaxESSStats(diffTolerance, stabTolerance, &total, &stable);
+ totalMaxESS.append(total);
+ stableMaxESS.append(stable);
+ }
+
+
+ // *** Step 2: Compute the frequency distribution of the total MaxESS counts ***
+ totalMaxESSFreq_.clear();
+ for (int i = 0; i < totalMaxESS.size(); ++i)
+ ++(totalMaxESSFreq_[totalMaxESS.at(i)]);
+
+
+ // *** Step 3: Compute the percentile distribution of the stability fractions ***
+
+ // All subsequence counts (subsequence counts >= 0):
+ QList<qreal> stabFractions0;
+
+ // Subsequence counts >= 2 (i.e. result histories with actual changes in them and thus
+ // the ones that are really interesting):
+ QList<qreal> stabFractions2;
+
+ for (int i = 0; i < totalMaxESS.size(); ++i) {
+ if (totalMaxESS.at(i) > 0) {
+ const qreal sf = stableMaxESS.at(i) / static_cast<qreal>(totalMaxESS.at(i));
+ stabFractions0.append(sf);
+ if (totalMaxESS.at(i) >= 2)
+ stabFractions2.append(sf);
+ }
+ }
+ qSort(stabFractions0);
+ qSort(stabFractions2);
+
+
+ // --------
+ for (int p = 10; p <= 100; p += 10) {
+ const int i0 = (stabFractions0.size() - 1) * (p / 100.0);
+ const int i2 = (stabFractions2.size() - 1) * (p / 100.0);
+ stabFracPercentiles_.insert(
+ p, qMakePair(100 * stabFractions0.at(i0), 100 * stabFractions2.at(i2)));
+ }
+}
diff --git a/src/bm/dataqualitystats.h b/src/bm/dataqualitystats.h
new file mode 100644
index 0000000..aa9c8f5
--- /dev/null
+++ b/src/bm/dataqualitystats.h
@@ -0,0 +1,50 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the BM project on Qt Labs.
+**
+** This file may be used under the terms of the GNU General Public
+** License version 2.0 or 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of
+** this file. Please review the following information to ensure GNU
+** General Public Licensing requirements will be met:
+** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+**
+** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+**
+****************************************************************************/
+
+#ifndef DATAQUALITYSTATS_H
+#define DATAQUALITYSTATS_H
+
+#include "resulthistoryinfo.h"
+#include <QList>
+#include <QMap>
+#include <QPair>
+
+class DataQualityStats {
+public:
+ DataQualityStats(const qreal diffTolerance, const int stabTolerance)
+ : diffTolerance(diffTolerance)
+ , stabTolerance(stabTolerance)
+ {}
+
+ void compute(const QList<ResultHistoryInfo *> &rhInfos);
+ QMap<int, int> totalMaxESSFreq() const { return totalMaxESSFreq_; }
+ QMap<int, QPair<qreal, qreal> > stabFracPercentiles() const { return stabFracPercentiles_; }
+
+private:
+ qreal diffTolerance;
+ int stabTolerance;
+ QMap<int, int> totalMaxESSFreq_;
+ QMap<int, QPair<qreal, qreal> > stabFracPercentiles_;
+};
+
+#endif // DATAQUALITYSTATS_H
diff --git a/src/bm/index.cpp b/src/bm/index.cpp
index f71e0d8..314b369 100644
--- a/src/bm/index.cpp
+++ b/src/bm/index.cpp
@@ -23,6 +23,7 @@
#include "index.h"
#include "resulthistoryinfo.h"
+#include "dataqualitystats.h"
#include "bmmisc.h"
#include <QBitArray>
#include <QDebug>
@@ -103,7 +104,7 @@ void Index::init(
bool IndexAlgorithm1::computeValues(
QList<qreal> *values, int *baseValuePos, QList<int> *contrCounts, QString *error,
- QList<QList<RankedInfo> > *topContr, int topContrLimit) const
+ QList<QList<RankedInfo> > *topContr, int topContrLimit, DataQualityStats *dqStats) const
{
if (!isValid()) {
*error = invalidReason();
@@ -280,35 +281,8 @@ bool IndexAlgorithm1::computeValues(
}
- const bool computeDataQualityStatistics = false; // 4 NOW
- if (computeDataQualityStatistics) {
- // Step 1: Compute the MaxESSTotalCount and MaxESSStableCount for each
- // contributing RH (compute for the exact median-smoothed
- // values that formed the basis for computing the index)
- //
- // Step 2: Compute the complete distribution of MaxESSTotalCount
- // (note that the number of distinct counts are likely to be only a small
- // fraction of the number of RHs):
- //
- // TC(c1) = <# of RHs with a MaxESSTotalCount of c1>
- // TC(c2) = <# of RHs with a MaxESSTotalCount of c2>
- // ...
- // TC(cN) = <# of RHs with a MaxESSTotalCount of cN>
- //
- // (TC = Total Count, and N is number of distinct counts)
- //
- // Step 3: Compute the percentile distribution (for the 10 levels 10%, 20%, ..., 100%)
- // of the SF (stability fraction) values (where the SF for a given
- // RH is MaxESSStableCount / MaxESSTotalCount):
- //
- // SFP(100) = <the max SF value for the worst 100% of the RHs (i.e. all RHs!)>
- // SFP(90) = <the max SF value for the worst 90% of the RHs>
- // SFP(80) = <the max SF value for the worst 80% of the RHs>
- // ...
- // SFP(10) = <the max SF value for the worst 10% of the RHs>
- //
- // (SFP = Stability Fraction Percentile)
- }
+ if (dqStats)
+ dqStats->compute(rhInfos);
return true;
}
diff --git a/src/bm/index.h b/src/bm/index.h
index f07e4f0..db0fce7 100644
--- a/src/bm/index.h
+++ b/src/bm/index.h
@@ -28,6 +28,7 @@
#include <QList>
class ResultHistoryInfo;
+class DataQualityStats;
class Index {
public:
@@ -59,9 +60,12 @@ public:
// in \a contrCounts.
// If non-null, the \a topContr list will upon return contain a list of the \a topContrLimit
// most significant contributors at each evaluation timestamp.
+ // If non-null, the \a dqStats object will upon return contain data quality statistics for
+ // the contributing result histories.
virtual bool computeValues(
QList<qreal> *values, int *baseValuePos, QList<int> *contrCounts, QString *error,
- QList<QList<RankedInfo> > *topContr = 0, int topContrLimit = -1) const = 0;
+ QList<QList<RankedInfo> > *topContr = 0, int topContrLimit = -1,
+ DataQualityStats *dqStats = 0) const = 0;
protected:
QList<ResultHistoryInfo *> rhInfos;
@@ -88,7 +92,8 @@ public:
bool computeValues(
QList<qreal> *values, int *baseValuePos, QList<int> *contrCounts, QString *error,
- QList<QList<RankedInfo> > *topContr = 0, int topContrLimit = -1) const;
+ QList<QList<RankedInfo> > *topContr = 0, int topContrLimit = -1,
+ DataQualityStats *dqStats = 0) const;
};
#endif // INDEX_H
diff --git a/src/bm/resulthistoryinfo.cpp b/src/bm/resulthistoryinfo.cpp
index ad13c2c..cb18924 100644
--- a/src/bm/resulthistoryinfo.cpp
+++ b/src/bm/resulthistoryinfo.cpp
@@ -23,6 +23,8 @@
#include "resulthistoryinfo.h"
#include "bmmisc.h"
+#include <qnumeric.h>
+#include <QDebug>
// Searches for the (zero-based) position of the last non-outlier value at or
// before \a timestamp. If found, the position is passed in \a pos and the function returns
@@ -107,3 +109,68 @@ void ResultHistoryInfo::markOutliers() const
outliersMarked = true;
}
+
+// ### 2 B DOCUMENTED!
+static void appendMaxESSStats(int seqSize, int *total, int *stable, int stabTolerance)
+{
+ if (seqSize <= 0)
+ return;
+ (*total)++;
+ if (seqSize >= stabTolerance)
+ (*stable)++;
+}
+
+// ### 2 B DOCUMENTED!
+bool ResultHistoryInfo::equal(int i, int j, int diffTolerance) const
+{
+ if ((i < 0) || (j < 0) || (i >= values.size()) || (j >= values.size()))
+ return false;
+
+ if (i == j)
+ return true;
+
+ const qreal vi = values.at(i);
+ const qreal vj = values.at(j);
+ if (vi == vj)
+ return true;
+
+ const qreal maxRatio = qAbs(qMax(vi, vj) / qMin(vi, vj));
+ if (!qIsFinite(maxRatio))
+ return false;
+
+ return (100 * (maxRatio - 1)) <= diffTolerance;
+}
+
+// ### 2 B DOCUMENTED!
+void ResultHistoryInfo::computeMaxESSStats(
+ qreal diffTolerance, int stabTolerance, int *total, int *stable)
+{
+ *total = *stable = 0;
+
+ int basePos = -1;
+ int seqSize = 0;
+
+ for (int i = 0; i < values.size(); ++i) {
+
+ if (isOutlier(i)) {
+ if (i == values.size() - 1)
+ appendMaxESSStats(seqSize, total, stable, stabTolerance);
+
+ } else {
+
+ ++seqSize;
+
+ if (basePos == -1) {
+ basePos = i;
+ } else if (!equal(basePos, i, diffTolerance)) {
+ appendMaxESSStats(seqSize, total, stable, stabTolerance);
+ seqSize = 1;
+ basePos = i;
+ }
+
+ if (i == values.size() - 1)
+ appendMaxESSStats(seqSize, total, stable, stabTolerance);
+ }
+
+ }
+}
diff --git a/src/bm/resulthistoryinfo.h b/src/bm/resulthistoryinfo.h
index 6cb7366..a65757f 100644
--- a/src/bm/resulthistoryinfo.h
+++ b/src/bm/resulthistoryinfo.h
@@ -69,6 +69,8 @@ public:
QString testFunction() const { return testFunction_; }
QString dataTag() const { return dataTag_; }
+ void computeMaxESSStats(qreal diffTolerance, int stabTolerance, int *total, int *stable);
+
private:
int bmcontextId_;
QList<int> timestamps_;
@@ -92,6 +94,8 @@ private:
mutable bool outliersMarked;
mutable QBitArray outliers;
void markOutliers() const;
+
+ bool equal(int i, int j, int diffTolerance) const;
};
#endif // RESULTHISTORYINFO_H
diff --git a/src/bmweb/indexsection.js b/src/bmweb/indexsection.js
index 9623f0f..1947979 100644
--- a/src/bmweb/indexsection.js
+++ b/src/bmweb/indexsection.js
@@ -569,8 +569,7 @@ function IndexSection()
fieldset = td.appendChild(document.createElement("fieldset"));
legend = fieldset.appendChild(document.createElement("legend"));
legend.appendChild(document.createTextNode("Data Quality Statistics"));
- legend.innerHTML =
- "Data Quality Statistics<span style=\"color:red\"> (not implemented yet)</span>";
+ legend.innerHTML = "Data Quality Statistics";
legend.setAttribute("style", "font-size:12");
table = fieldset.appendChild(document.createElement("table"));
table.setAttribute("style", "border:0px; padding:0px");