54 template <
typename IteratorType>
70 template <
typename IteratorType>
86 template <
typename IteratorType1,
typename IteratorType2>
88 IteratorType1 begin_b, IteratorType1 end_b,
89 IteratorType2 begin_a, IteratorType2 end_a)
91 if ((begin_b == end_b) ^ (begin_a == end_a))
102 template <
typename IteratorType>
103 static double sum(IteratorType begin, IteratorType end)
105 return std::accumulate(begin, end, 0.0);
115 template <
typename IteratorType>
116 static double mean(IteratorType begin, IteratorType end)
119 return sum(begin, end) / std::distance(begin, end);
133 template <
typename IteratorType>
134 static double median(IteratorType begin, IteratorType end,
140 std::sort(begin, end);
143 Size size = std::distance(begin, end);
146 IteratorType it1 = begin;
147 std::advance(it1, size / 2 - 1);
148 IteratorType it2 = it1;
149 std::advance(it2, 1);
150 return (*it1 + *it2) / 2.0;
154 IteratorType it = begin;
155 std::advance(it, (size - 1) / 2);
180 template <
typename IteratorType>
181 double MAD(IteratorType begin, IteratorType end,
double median_of_numbers)
183 std::vector<double> diffs;
184 diffs.reserve(std::distance(begin, end));
185 for (IteratorType it = begin; it != end; ++it)
187 diffs.push_back(fabs(*it - median_of_numbers));
189 return median(diffs.begin(), diffs.end(),
false);
210 template <
typename IteratorType>
213 double mean_value {0};
214 for (IteratorType it = begin; it != end; ++it)
216 mean_value += fabs(*it - mean_of_numbers);
218 return mean_value / std::distance(begin, end);
234 template <
typename IteratorType>
242 std::sort(begin, end);
245 Size size = std::distance(begin, end);
248 return median(begin, begin + (size/2)-1,
true);
250 return median(begin, begin + (size/2),
true);
266 template <
typename IteratorType>
268 IteratorType begin, IteratorType end,
bool sorted =
false)
273 std::sort(begin, end);
276 Size size = std::distance(begin, end);
277 return median(begin + (size/2)+1, end,
true);
305 template <
typename IteratorType>
306 static double quantile(IteratorType begin, IteratorType end,
double q)
309 "Math::quantile expects a sorted range. Sort before calling.");
313 const Size n = std::distance(begin, end);
318 if (q < 0.0 || q > 1.0)
321 "q must be in [0,1]",
String(q));
323 if (n == 1)
return static_cast<double>(*begin);
325 const double pos = q *
static_cast<double>(n - 1);
326 const Size i =
static_cast<Size>(std::floor(pos));
327 const double frac = pos -
static_cast<double>(i);
329 const auto it_i = begin +
static_cast<typename std::iterator_traits<IteratorType>::difference_type
>(i);
330 if (frac == 0.0)
return static_cast<double>(*it_i);
332 const auto it_ip1 = it_i + 1;
333 return (1.0 - frac) *
static_cast<double>(*it_i) + frac *
static_cast<double>(*it_ip1);
350 template <
typename IteratorType>
353 std::vector<double> v;
354 v.reserve(std::distance(begin, end));
355 for (
auto it = begin; it != end; ++it)
357 if (std::isfinite(*it)) v.push_back(
static_cast<double>(*it));
359 if (v.size() < 4)
return std::numeric_limits<double>::infinity();
361 std::sort(v.begin(), v.end());
362 const double q1 =
quantile(v.begin(), v.end(), 0.25);
363 const double q3 =
quantile(v.begin(), v.end(), 0.75);
364 const double iqr = q3 - q1;
365 if (!(iqr > 0.0))
return std::numeric_limits<double>::infinity();
379 template <
typename IteratorType>
382 size_t n = 0, n_tail = 0;
383 for (
auto it = begin; it != end; ++it)
385 const double x =
static_cast<double>(*it);
386 if (!std::isfinite(x))
continue;
388 if (x > threshold) ++n_tail;
390 return (n == 0) ? 0.0 :
static_cast<double>(n_tail) /
static_cast<double>(n);
411 template <
typename IteratorType>
414 std::vector<double> v;
415 v.reserve(std::distance(begin, end));
416 for (
auto it = begin; it != end; ++it)
418 const double x =
static_cast<double>(*it);
419 if (!std::isfinite(x))
continue;
422 if (v.empty())
return 0.0;
424 if (std::isfinite(upper_fence))
428 if (x > upper_fence) x = upper_fence;
429 if (x < 0.0) x = 0.0;
432 std::sort(v.begin(), v.end());
433 return quantile(v.begin(), v.end(), q);
470 template <
typename IteratorType>
473 double r_sparse = 0.01,
474 double r_dense = 0.10)
479 std::vector<double> v;
480 v.reserve(std::distance(begin, end));
481 for (
auto it = begin; it != end; ++it)
483 if (std::isfinite(*it)) v.push_back(
static_cast<double>(*it));
490 std::sort(v.begin(), v.end());
491 const double half_raw =
quantile(v.begin(), v.end(), q);
495 const double r = std::isfinite(uf) ?
tailFractionAbove(v.begin(), v.end(), uf) : 0.0;
500 if (r_dense <= r_sparse)
502 w = (r > r_sparse) ? 1.0 : 0.0;
506 const double t = (r - r_sparse) / (r_dense - r_sparse);
507 w = std::max(0.0, std::min(1.0, t));
515 res.
blended = (1.0 - w) * half_rob + w * half_raw;
528 template <
typename IteratorType>
529 static double variance(IteratorType begin, IteratorType end,
530 double mean = std::numeric_limits<double>::max())
533 double sum_value = 0.0;
534 if (
mean == std::numeric_limits<double>::max())
538 for (IteratorType iter=begin; iter!=end; ++iter)
540 double diff = *iter -
mean;
541 sum_value += diff * diff;
543 return sum_value / (std::distance(begin, end)-1);
555 template <
typename IteratorType>
556 static double sd(IteratorType begin, IteratorType end,
557 double mean = std::numeric_limits<double>::max())
570 template <
typename IteratorType>
571 static double absdev(IteratorType begin, IteratorType end,
572 double mean = std::numeric_limits<double>::max())
575 double sum_value = 0.0;
576 if (
mean == std::numeric_limits<double>::max())
580 for (IteratorType iter=begin; iter!=end; ++iter)
582 sum_value += *iter -
mean;
584 return sum_value / std::distance(begin, end);
596 template <
typename IteratorType1,
typename IteratorType2>
597 static double covariance(IteratorType1 begin_a, IteratorType1 end_a,
598 IteratorType2 begin_b, IteratorType2 end_b)
603 double sum_value = 0.0;
606 IteratorType1 iter_a = begin_a;
607 IteratorType2 iter_b = begin_b;
608 for (; iter_a != end_a; ++iter_a, ++iter_b)
612 sum_value += (*iter_a - mean_a) * (*iter_b - mean_b);
616 Size n = std::distance(begin_a, end_a);
617 return sum_value / (n-1);
629 template <
typename IteratorType1,
typename IteratorType2>
631 IteratorType2 begin_b, IteratorType2 end_b)
636 SignedSize dist = std::distance(begin_a, end_a);
638 IteratorType1 iter_a = begin_a;
639 IteratorType2 iter_b = begin_b;
640 for (; iter_a != end_a; ++iter_a, ++iter_b)
645 double tmp(*iter_a - *iter_b);
666 template <
typename IteratorType1,
typename IteratorType2>
668 IteratorType2 begin_b, IteratorType2 end_b)
682 template <
typename IteratorType1,
typename IteratorType2>
684 IteratorType2 begin_b, IteratorType2 end_b)
689 SignedSize dist = std::distance(begin_a, end_a);
691 IteratorType1 iter_a = begin_a;
692 IteratorType2 iter_b = begin_b;
693 for (; iter_a != end_a; ++iter_a, ++iter_b)
697 if ((*iter_a < 0 && *iter_b >= 0) || (*iter_a >= 0 && *iter_b < 0))
706 return double(correct) / dist;
721 template <
typename IteratorType1,
typename IteratorType2>
723 IteratorType1 begin_a, IteratorType1 end_a,
724 IteratorType2 begin_b, IteratorType2 end_b)
733 IteratorType1 iter_a = begin_a;
734 IteratorType2 iter_b = begin_b;
735 for (; iter_a != end_a; ++iter_a, ++iter_b)
740 if (*iter_a < 0 && *iter_b >= 0)
744 else if (*iter_a < 0 && *iter_b < 0)
748 else if (*iter_a >= 0 && *iter_b >= 0)
752 else if (*iter_a >= 0 && *iter_b < 0)
760 return (tp * tn - fp * fn) / std::sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
774 template <
typename IteratorType1,
typename IteratorType2>
776 IteratorType1 begin_a, IteratorType1 end_a,
777 IteratorType2 begin_b, IteratorType2 end_b)
783 SignedSize dist = std::distance(begin_a, end_a);
784 double avg_a = std::accumulate(begin_a, end_a, 0.0) / dist;
785 double avg_b = std::accumulate(begin_b, end_b, 0.0) / dist;
787 double numerator = 0;
788 double denominator_a = 0;
789 double denominator_b = 0;
790 IteratorType1 iter_a = begin_a;
791 IteratorType2 iter_b = begin_b;
792 for (; iter_a != end_a; ++iter_a, ++iter_b)
796 double temp_a = *iter_a - avg_a;
797 double temp_b = *iter_b - avg_b;
798 numerator += (temp_a * temp_b);
799 denominator_a += (temp_a * temp_a);
800 denominator_b += (temp_b * temp_b);
804 return numerator / std::sqrt(denominator_a * denominator_b);
808 template <
typename Value>
814 Size n = (w.size() - 1);
816 std::vector<std::pair<Size, Value> > w_idx;
817 for (
Size j = 0; j < w.size(); ++j)
819 w_idx.push_back(std::make_pair(j, w[j]));
822 std::sort(w_idx.begin(), w_idx.end(),
823 [](
const auto& pair1,
const auto& pair2) { return pair1.second < pair2.second; });
828 if (fabs(w_idx[i + 1].second - w_idx[i].second) > 0.0000001 * fabs(w_idx[i + 1].second))
830 w_idx[i].second = Value(i + 1);
836 for (z = i + 1; (z <= n) && fabs(w_idx[z].second - w_idx[i].second) <= 0.0000001 * fabs(w_idx[z].second); ++z)
840 rank = 0.5 * (i + z + 1);
842 for (
Size v = i; v <= z - 1; ++v)
844 w_idx[v].second = rank;
850 w_idx[n].second = Value(n + 1);
852 for (
Size j = 0; j < w.size(); ++j)
854 w[w_idx[j].first] = w_idx[j].second;
869 template <
typename IteratorType1,
typename IteratorType2>
871 IteratorType1 begin_a, IteratorType1 end_a,
872 IteratorType2 begin_b, IteratorType2 end_b)
878 SignedSize dist = std::distance(begin_a, end_a);
879 std::vector<double> ranks_data;
880 ranks_data.reserve(dist);
881 std::vector<double> ranks_model;
882 ranks_model.reserve(dist);
883 IteratorType1 iter_a = begin_a;
884 IteratorType2 iter_b = begin_b;
885 for (; iter_a != end_a; ++iter_a, ++iter_b)
890 ranks_model.push_back(*iter_a);
891 ranks_data.push_back(*iter_b);
900 double mu = double(ranks_data.size() + 1) / 2.;
904 double sum_model_data = 0;
905 double sqsum_data = 0;
906 double sqsum_model = 0;
908 for (
Int i = 0; i < dist; ++i)
910 sum_model_data += (ranks_data[i] - mu) * (ranks_model[i] - mu);
911 sqsum_data += (ranks_data[i] - mu) * (ranks_data[i] - mu);
912 sqsum_model += (ranks_model[i] - mu) * (ranks_model[i] - mu);
916 if (!sqsum_data || !sqsum_model)
921 return sum_model_data / (std::sqrt(sqsum_data) * std::sqrt(sqsum_model));
941 sort(data.begin(), data.end());
Invalid range exception.
Definition: Exception.h:257
Invalid value exception.
Definition: Exception.h:305
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:104
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:94
static double classificationRate(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the classification rate for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:683
static double median(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the median of a range of values.
Definition: StatisticFunctions.h:134
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:116
static double covariance(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the covariance of two ranges of values.
Definition: StatisticFunctions.h:597
static double quantile3rd(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the third quantile of a range of values.
Definition: StatisticFunctions.h:267
static void checkIteratorsNotNULL(IteratorType begin, IteratorType end)
Helper function checking if two iterators are not equal.
Definition: StatisticFunctions.h:55
static double matthewsCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Matthews correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:722
double MeanAbsoluteDeviation(IteratorType begin, IteratorType end, double mean_of_numbers)
mean absolute deviation (MeanAbsoluteDeviation)
Definition: StatisticFunctions.h:211
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:103
static double absdev(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the absolute deviation of a range of values.
Definition: StatisticFunctions.h:571
static double rootMeanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the root mean square error (RMSE) for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:667
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:775
static double sd(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the standard deviation of a range of values.
Definition: StatisticFunctions.h:556
double MAD(IteratorType begin, IteratorType end, double median_of_numbers)
median absolute deviation (MAD)
Definition: StatisticFunctions.h:181
static double rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:870
static void checkIteratorsAreValid(IteratorType1 begin_b, IteratorType1 end_b, IteratorType2 begin_a, IteratorType2 end_a)
Helper function checking if an iterator and a co-iterator both have a next element.
Definition: StatisticFunctions.h:87
static double quantile1st(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the first quantile of a range of values.
Definition: StatisticFunctions.h:235
static void checkIteratorsEqual(IteratorType begin, IteratorType end)
Helper function checking if two iterators are equal.
Definition: StatisticFunctions.h:71
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:630
const double k
Definition: Constants.h:132
double half_raw
Definition: StatisticFunctions.h:40
double tail_fraction
Definition: StatisticFunctions.h:43
double blended
Definition: StatisticFunctions.h:39
T1::value_type quantile(const T1 &x, double q)
Returns the value of the q th quantile (0-1) in a sorted non-empty vector x.
Definition: MathFunctions.h:453
double upper_fence
Definition: StatisticFunctions.h:42
double weight
Definition: StatisticFunctions.h:44
static void computeRank(std::vector< Value > &w)
Replaces the elements in vector w by their ranks.
Definition: StatisticFunctions.h:809
double tukeyUpperFence(IteratorType begin, IteratorType end, double k=1.5)
Tukey upper fence (UF) for outlier detection.
Definition: StatisticFunctions.h:351
double winsorizedQuantile(IteratorType begin, IteratorType end, double q, double upper_fence)
Quantile after winsorizing at an upper fence.
Definition: StatisticFunctions.h:412
double half_rob
Definition: StatisticFunctions.h:41
static double variance(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Definition: StatisticFunctions.h:529
double tailFractionAbove(IteratorType begin, IteratorType end, double threshold)
Fraction of values above a threshold.
Definition: StatisticFunctions.h:380
AdaptiveQuantileResult adaptiveQuantile(IteratorType begin, IteratorType end, double q, double k=1.5, double r_sparse=0.01, double r_dense=0.10)
Adaptive quantile that blends RAW and IQR-winsorized quantiles based on tail density beyond the Tukey...
Definition: StatisticFunctions.h:471
Result of adaptiveQuantile computation.
Definition: StatisticFunctions.h:38
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Helper class to gather (and dump) some statistics from a e.g. vector<double>.
Definition: StatisticFunctions.h:927
double lowerq
Definition: StatisticFunctions.h:952
double variance
Definition: StatisticFunctions.h:952
SummaryStatistics()=default
T::value_type max
Definition: StatisticFunctions.h:953
SummaryStatistics(T &data)
Definition: StatisticFunctions.h:931
double median
Definition: StatisticFunctions.h:952
size_t count
Definition: StatisticFunctions.h:954
double mean
Definition: StatisticFunctions.h:952
double upperq
Definition: StatisticFunctions.h:952
T::value_type min
Definition: StatisticFunctions.h:953