OpenMS
StatisticFunctions.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Clemens Groepl, Johannes Junker, Mathias Walzer, Chris Bielow $
7 // --------------------------------------------------------------------------
8 #pragma once
9 
10 #include <vector>
12 #include <OpenMS/CONCEPT/Macros.h>
13 #include <OpenMS/CONCEPT/Types.h>
15 
16 #include <algorithm>
17 #include <cmath>
18 #include <iterator>
19 #include <numeric>
20 
21 namespace OpenMS
22 {
23 
24  namespace Math
25  {
38  {
39  double blended{0.0};
40  double half_raw{0.0};
41  double half_rob{0.0};
42  double upper_fence{std::numeric_limits<double>::infinity()};
43  double tail_fraction{0.0};
44  double weight{0.0};
45  };
46 
54  template <typename IteratorType>
55  static void checkIteratorsNotNULL(IteratorType begin, IteratorType end)
56  {
57  if (begin == end)
58  {
59  throw Exception::InvalidRange(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
60  }
61  }
62 
70  template <typename IteratorType>
71  static void checkIteratorsEqual(IteratorType begin, IteratorType end)
72  {
73  if (begin != end)
74  {
75  throw Exception::InvalidRange(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
76  }
77  }
78 
86  template <typename IteratorType1, typename IteratorType2>
88  IteratorType1 begin_b, IteratorType1 end_b,
89  IteratorType2 begin_a, IteratorType2 end_a)
90  {
91  if ((begin_b == end_b) ^ (begin_a == end_a))
92  {
93  throw Exception::InvalidRange(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
94  }
95  }
96 
102  template <typename IteratorType>
103  static double sum(IteratorType begin, IteratorType end)
104  {
105  return std::accumulate(begin, end, 0.0);
106  }
107 
115  template <typename IteratorType>
116  static double mean(IteratorType begin, IteratorType end)
117  {
118  checkIteratorsNotNULL(begin, end);
119  return sum(begin, end) / std::distance(begin, end);
120  }
121 
133  template <typename IteratorType>
134  static double median(IteratorType begin, IteratorType end,
135  bool sorted = false)
136  {
137  checkIteratorsNotNULL(begin, end);
138  if (!sorted)
139  {
140  std::sort(begin, end);
141  }
142 
143  Size size = std::distance(begin, end);
144  if (size % 2 == 0) // even size => average two middle values
145  {
146  IteratorType it1 = begin;
147  std::advance(it1, size / 2 - 1);
148  IteratorType it2 = it1;
149  std::advance(it2, 1);
150  return (*it1 + *it2) / 2.0;
151  }
152  else
153  {
154  IteratorType it = begin;
155  std::advance(it, (size - 1) / 2);
156  return *it;
157  }
158  }
159 
160 
180  template <typename IteratorType>
181  double MAD(IteratorType begin, IteratorType end, double median_of_numbers)
182  {
183  std::vector<double> diffs;
184  diffs.reserve(std::distance(begin, end));
185  for (IteratorType it = begin; it != end; ++it)
186  {
187  diffs.push_back(fabs(*it - median_of_numbers));
188  }
189  return median(diffs.begin(), diffs.end(), false);
190  }
191 
210  template <typename IteratorType>
211  double MeanAbsoluteDeviation(IteratorType begin, IteratorType end, double mean_of_numbers)
212  {
213  double mean_value {0};
214  for (IteratorType it = begin; it != end; ++it)
215  {
216  mean_value += fabs(*it - mean_of_numbers);
217  }
218  return mean_value / std::distance(begin, end);
219  }
220 
234  template <typename IteratorType>
235  static double quantile1st(IteratorType begin, IteratorType end,
236  bool sorted = false)
237  {
238  checkIteratorsNotNULL(begin, end);
239 
240  if (!sorted)
241  {
242  std::sort(begin, end);
243  }
244 
245  Size size = std::distance(begin, end);
246  if (size % 2 == 0)
247  {
248  return median(begin, begin + (size/2)-1, true); //-1 to exclude median values
249  }
250  return median(begin, begin + (size/2), true);
251  }
252 
266  template <typename IteratorType>
267  static double quantile3rd(
268  IteratorType begin, IteratorType end, bool sorted = false)
269  {
270  checkIteratorsNotNULL(begin, end);
271  if (!sorted)
272  {
273  std::sort(begin, end);
274  }
275 
276  Size size = std::distance(begin, end);
277  return median(begin + (size/2)+1, end, true); //+1 to exclude median values
278  }
279 
305  template <typename IteratorType>
306  static double quantile(IteratorType begin, IteratorType end, double q)
307  {
308  OPENMS_PRECONDITION(std::is_sorted(begin, end),
309  "Math::quantile expects a sorted range. Sort before calling.");
310 
311  checkIteratorsNotNULL(begin, end);
312 
313  const Size n = std::distance(begin, end);
314  if (n == 0)
315  {
316  throw Exception::InvalidRange(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
317  }
318  if (q < 0.0 || q > 1.0)
319  {
320  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
321  "q must be in [0,1]", String(q));
322  }
323  if (n == 1) return static_cast<double>(*begin);
324 
325  const double pos = q * static_cast<double>(n - 1);
326  const Size i = static_cast<Size>(std::floor(pos));
327  const double frac = pos - static_cast<double>(i);
328 
329  const auto it_i = begin + static_cast<typename std::iterator_traits<IteratorType>::difference_type>(i);
330  if (frac == 0.0) return static_cast<double>(*it_i);
331 
332  const auto it_ip1 = it_i + 1;
333  return (1.0 - frac) * static_cast<double>(*it_i) + frac * static_cast<double>(*it_ip1);
334  }
335 
350  template <typename IteratorType>
351  double tukeyUpperFence(IteratorType begin, IteratorType end, double k = 1.5)
352  {
353  std::vector<double> v;
354  v.reserve(std::distance(begin, end));
355  for (auto it = begin; it != end; ++it)
356  {
357  if (std::isfinite(*it)) v.push_back(static_cast<double>(*it));
358  }
359  if (v.size() < 4) return std::numeric_limits<double>::infinity();
360 
361  std::sort(v.begin(), v.end());
362  const double q1 = quantile(v.begin(), v.end(), 0.25);
363  const double q3 = quantile(v.begin(), v.end(), 0.75);
364  const double iqr = q3 - q1;
365  if (!(iqr > 0.0)) return std::numeric_limits<double>::infinity();
366 
367  return q3 + k * iqr;
368  }
369 
379  template <typename IteratorType>
380  double tailFractionAbove(IteratorType begin, IteratorType end, double threshold)
381  {
382  size_t n = 0, n_tail = 0;
383  for (auto it = begin; it != end; ++it)
384  {
385  const double x = static_cast<double>(*it);
386  if (!std::isfinite(x)) continue;
387  ++n;
388  if (x > threshold) ++n_tail;
389  }
390  return (n == 0) ? 0.0 : static_cast<double>(n_tail) / static_cast<double>(n);
391  }
392 
411  template <typename IteratorType>
412  double winsorizedQuantile(IteratorType begin, IteratorType end, double q, double upper_fence)
413  {
414  std::vector<double> v;
415  v.reserve(std::distance(begin, end));
416  for (auto it = begin; it != end; ++it)
417  {
418  const double x = static_cast<double>(*it);
419  if (!std::isfinite(x)) continue;
420  v.push_back(x);
421  }
422  if (v.empty()) return 0.0;
423 
424  if (std::isfinite(upper_fence))
425  {
426  for (double& x : v)
427  {
428  if (x > upper_fence) x = upper_fence;
429  if (x < 0.0) x = 0.0; // defensive; useful when passing |residual|
430  }
431  }
432  std::sort(v.begin(), v.end());
433  return quantile(v.begin(), v.end(), q);
434  }
435 
470  template <typename IteratorType>
471  AdaptiveQuantileResult adaptiveQuantile(IteratorType begin, IteratorType end, double q,
472  double k = 1.5,
473  double r_sparse = 0.01,
474  double r_dense = 0.10)
475  {
477 
478  // Copy finite values
479  std::vector<double> v;
480  v.reserve(std::distance(begin, end));
481  for (auto it = begin; it != end; ++it)
482  {
483  if (std::isfinite(*it)) v.push_back(static_cast<double>(*it));
484  }
485  if (v.empty())
486  {
487  return res;
488  }
489 
490  std::sort(v.begin(), v.end());
491  const double half_raw = quantile(v.begin(), v.end(), q);
492 
493  // Robust path (winsorization at Tukey fence)
494  const double uf = tukeyUpperFence(v.begin(), v.end(), k);
495  const double r = std::isfinite(uf) ? tailFractionAbove(v.begin(), v.end(), uf) : 0.0;
496  const double half_rob = winsorizedQuantile(v.begin(), v.end(), q, uf);
497 
498  // Blend weight w(r)
499  double w = 0.0;
500  if (r_dense <= r_sparse)
501  {
502  w = (r > r_sparse) ? 1.0 : 0.0;
503  }
504  else
505  {
506  const double t = (r - r_sparse) / (r_dense - r_sparse);
507  w = std::max(0.0, std::min(1.0, t));
508  }
509 
510  res.half_raw = half_raw;
511  res.half_rob = half_rob;
512  res.upper_fence = uf;
513  res.tail_fraction = r;
514  res.weight = w;
515  res.blended = (1.0 - w) * half_rob + w * half_raw;
516  return res;
517  }
518 
528  template <typename IteratorType>
529  static double variance(IteratorType begin, IteratorType end,
530  double mean = std::numeric_limits<double>::max())
531  {
532  checkIteratorsNotNULL(begin, end);
533  double sum_value = 0.0;
534  if (mean == std::numeric_limits<double>::max())
535  {
536  mean = Math::mean(begin, end);
537  }
538  for (IteratorType iter=begin; iter!=end; ++iter)
539  {
540  double diff = *iter - mean;
541  sum_value += diff * diff;
542  }
543  return sum_value / (std::distance(begin, end)-1);
544  }
545 
555  template <typename IteratorType>
556  static double sd(IteratorType begin, IteratorType end,
557  double mean = std::numeric_limits<double>::max())
558  {
559  checkIteratorsNotNULL(begin, end);
560  return std::sqrt( variance(begin, end, mean) );
561  }
562 
570  template <typename IteratorType>
571  static double absdev(IteratorType begin, IteratorType end,
572  double mean = std::numeric_limits<double>::max())
573  {
574  checkIteratorsNotNULL(begin, end);
575  double sum_value = 0.0;
576  if (mean == std::numeric_limits<double>::max())
577  {
578  mean = Math::mean(begin, end);
579  }
580  for (IteratorType iter=begin; iter!=end; ++iter)
581  {
582  sum_value += *iter - mean;
583  }
584  return sum_value / std::distance(begin, end);
585  }
586 
596  template <typename IteratorType1, typename IteratorType2>
597  static double covariance(IteratorType1 begin_a, IteratorType1 end_a,
598  IteratorType2 begin_b, IteratorType2 end_b)
599  {
600  //no data or different lengths
601  checkIteratorsNotNULL(begin_a, end_a);
602 
603  double sum_value = 0.0;
604  double mean_a = Math::mean(begin_a, end_a);
605  double mean_b = Math::mean(begin_b, end_b);
606  IteratorType1 iter_a = begin_a;
607  IteratorType2 iter_b = begin_b;
608  for (; iter_a != end_a; ++iter_a, ++iter_b)
609  {
610  /* assure both ranges have the same number of elements */
611  checkIteratorsAreValid(begin_b, end_b, begin_a, end_a);
612  sum_value += (*iter_a - mean_a) * (*iter_b - mean_b);
613  }
614  /* assure both ranges have the same number of elements */
615  checkIteratorsEqual(iter_b, end_b);
616  Size n = std::distance(begin_a, end_a);
617  return sum_value / (n-1);
618  }
619 
629  template <typename IteratorType1, typename IteratorType2>
630  static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a,
631  IteratorType2 begin_b, IteratorType2 end_b)
632  {
633  //no data or different lengths
634  checkIteratorsNotNULL(begin_a, end_a);
635 
636  SignedSize dist = std::distance(begin_a, end_a);
637  double error = 0;
638  IteratorType1 iter_a = begin_a;
639  IteratorType2 iter_b = begin_b;
640  for (; iter_a != end_a; ++iter_a, ++iter_b)
641  {
642  /* assure both ranges have the same number of elements */
643  checkIteratorsAreValid(iter_b, end_b, iter_a, end_a);
644 
645  double tmp(*iter_a - *iter_b);
646  error += tmp * tmp;
647  }
648  /* assure both ranges have the same number of elements */
649  checkIteratorsEqual(iter_b, end_b);
650 
651  return error / dist;
652  }
653 
666  template <typename IteratorType1, typename IteratorType2>
667  static double rootMeanSquareError(IteratorType1 begin_a, IteratorType1 end_a,
668  IteratorType2 begin_b, IteratorType2 end_b)
669  {
670  return std::sqrt(meanSquareError(begin_a, end_a, begin_b, end_b));
671  }
672 
682  template <typename IteratorType1, typename IteratorType2>
683  static double classificationRate(IteratorType1 begin_a, IteratorType1 end_a,
684  IteratorType2 begin_b, IteratorType2 end_b)
685  {
686  //no data or different lengths
687  checkIteratorsNotNULL(begin_a, end_a);
688 
689  SignedSize dist = std::distance(begin_a, end_a);
690  SignedSize correct = dist;
691  IteratorType1 iter_a = begin_a;
692  IteratorType2 iter_b = begin_b;
693  for (; iter_a != end_a; ++iter_a, ++iter_b)
694  {
695  /* assure both ranges have the same number of elements */
696  checkIteratorsAreValid(iter_b, end_b, iter_a, end_a);
697  if ((*iter_a < 0 && *iter_b >= 0) || (*iter_a >= 0 && *iter_b < 0))
698  {
699  --correct;
700  }
701 
702  }
703  /* assure both ranges have the same number of elements */
704  checkIteratorsEqual(iter_b, end_b);
705 
706  return double(correct) / dist;
707  }
708 
721  template <typename IteratorType1, typename IteratorType2>
723  IteratorType1 begin_a, IteratorType1 end_a,
724  IteratorType2 begin_b, IteratorType2 end_b)
725  {
726  //no data or different lengths
727  checkIteratorsNotNULL(begin_a, end_b);
728 
729  double tp = 0;
730  double fp = 0;
731  double tn = 0;
732  double fn = 0;
733  IteratorType1 iter_a = begin_a;
734  IteratorType2 iter_b = begin_b;
735  for (; iter_a != end_a; ++iter_a, ++iter_b)
736  {
737  /* assure both ranges have the same number of elements */
738  checkIteratorsAreValid(iter_b, end_b, iter_a, end_a);
739 
740  if (*iter_a < 0 && *iter_b >= 0)
741  {
742  ++fn;
743  }
744  else if (*iter_a < 0 && *iter_b < 0)
745  {
746  ++tn;
747  }
748  else if (*iter_a >= 0 && *iter_b >= 0)
749  {
750  ++tp;
751  }
752  else if (*iter_a >= 0 && *iter_b < 0)
753  {
754  ++fp;
755  }
756  }
757  /* assure both ranges have the same number of elements */
758  checkIteratorsEqual(iter_b, end_b);
759 
760  return (tp * tn - fp * fn) / std::sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn));
761  }
762 
774  template <typename IteratorType1, typename IteratorType2>
776  IteratorType1 begin_a, IteratorType1 end_a,
777  IteratorType2 begin_b, IteratorType2 end_b)
778  {
779  //no data or different lengths
780  checkIteratorsNotNULL(begin_a, end_a);
781 
782  //calculate average
783  SignedSize dist = std::distance(begin_a, end_a);
784  double avg_a = std::accumulate(begin_a, end_a, 0.0) / dist;
785  double avg_b = std::accumulate(begin_b, end_b, 0.0) / dist;
786 
787  double numerator = 0;
788  double denominator_a = 0;
789  double denominator_b = 0;
790  IteratorType1 iter_a = begin_a;
791  IteratorType2 iter_b = begin_b;
792  for (; iter_a != end_a; ++iter_a, ++iter_b)
793  {
794  /* assure both ranges have the same number of elements */
795  checkIteratorsAreValid(iter_b, end_b, iter_a, end_a);
796  double temp_a = *iter_a - avg_a;
797  double temp_b = *iter_b - avg_b;
798  numerator += (temp_a * temp_b);
799  denominator_a += (temp_a * temp_a);
800  denominator_b += (temp_b * temp_b);
801  }
802  /* assure both ranges have the same number of elements */
803  checkIteratorsEqual(iter_b, end_b);
804  return numerator / std::sqrt(denominator_a * denominator_b);
805  }
806 
808  template <typename Value>
809  static void computeRank(std::vector<Value> & w)
810  {
811  Size i = 0; // main index
812  Size z = 0; // "secondary" index
813  Value rank = 0;
814  Size n = (w.size() - 1);
815  //store original indices for later
816  std::vector<std::pair<Size, Value> > w_idx;
817  for (Size j = 0; j < w.size(); ++j)
818  {
819  w_idx.push_back(std::make_pair(j, w[j]));
820  }
821  //sort
822  std::sort(w_idx.begin(), w_idx.end(),
823  [](const auto& pair1, const auto& pair2) { return pair1.second < pair2.second; });
824  //replace pairs <orig_index, value> in w_idx by pairs <orig_index, rank>
825  while (i < n)
826  {
827  // test for equality with tolerance:
828  if (fabs(w_idx[i + 1].second - w_idx[i].second) > 0.0000001 * fabs(w_idx[i + 1].second)) // no tie
829  {
830  w_idx[i].second = Value(i + 1);
831  ++i;
832  }
833  else // tie, replace by mean rank
834  {
835  // count number of ties
836  for (z = i + 1; (z <= n) && fabs(w_idx[z].second - w_idx[i].second) <= 0.0000001 * fabs(w_idx[z].second); ++z)
837  {
838  }
839  // compute mean rank of tie
840  rank = 0.5 * (i + z + 1);
841  // replace intensities by rank
842  for (Size v = i; v <= z - 1; ++v)
843  {
844  w_idx[v].second = rank;
845  }
846  i = z;
847  }
848  }
849  if (i == n)
850  w_idx[n].second = Value(n + 1);
851  //restore original order and replace elements of w with their ranks
852  for (Size j = 0; j < w.size(); ++j)
853  {
854  w[w_idx[j].first] = w_idx[j].second;
855  }
856  }
857 
869  template <typename IteratorType1, typename IteratorType2>
871  IteratorType1 begin_a, IteratorType1 end_a,
872  IteratorType2 begin_b, IteratorType2 end_b)
873  {
874  //no data or different lengths
875  checkIteratorsNotNULL(begin_a, end_a);
876 
877  // store and sort intensities of model and data
878  SignedSize dist = std::distance(begin_a, end_a);
879  std::vector<double> ranks_data;
880  ranks_data.reserve(dist);
881  std::vector<double> ranks_model;
882  ranks_model.reserve(dist);
883  IteratorType1 iter_a = begin_a;
884  IteratorType2 iter_b = begin_b;
885  for (; iter_a != end_a; ++iter_a, ++iter_b)
886  {
887  /* assure both ranges have the same number of elements */
888  checkIteratorsAreValid(iter_b, end_b, iter_a, end_a);
889 
890  ranks_model.push_back(*iter_a);
891  ranks_data.push_back(*iter_b);
892  }
893  /* assure both ranges have the same number of elements */
894  checkIteratorsEqual(iter_b, end_b);
895 
896  // replace entries by their ranks
897  computeRank(ranks_data);
898  computeRank(ranks_model);
899 
900  double mu = double(ranks_data.size() + 1) / 2.; // mean of ranks
901  // Was the following, but I think the above is more correct ... (Clemens)
902  // double mu = (ranks_data.size() + 1) / 2;
903 
904  double sum_model_data = 0;
905  double sqsum_data = 0;
906  double sqsum_model = 0;
907 
908  for (Int i = 0; i < dist; ++i)
909  {
910  sum_model_data += (ranks_data[i] - mu) * (ranks_model[i] - mu);
911  sqsum_data += (ranks_data[i] - mu) * (ranks_data[i] - mu);
912  sqsum_model += (ranks_model[i] - mu) * (ranks_model[i] - mu);
913  }
914 
915  // check for division by zero
916  if (!sqsum_data || !sqsum_model)
917  {
918  return 0;
919  }
920 
921  return sum_model_data / (std::sqrt(sqsum_data) * std::sqrt(sqsum_model));
922  }
923 
925  template<typename T>
927  {
928  SummaryStatistics() = default;
929 
930  // Ctor with data
932  {
933  count = data.size();
934  // Sanity check: avoid core dump if no data points present.
935  if (data.empty())
936  {
937  mean = variance = min = lowerq = median = upperq = max = 0.0;
938  }
939  else
940  {
941  sort(data.begin(), data.end());
942  mean = Math::mean(data.begin(), data.end());
943  variance = Math::variance(data.begin(), data.end(), mean);
944  min = data.front();
945  lowerq = Math::quantile1st(data.begin(), data.end(), true);
946  median = Math::median(data.begin(), data.end(), true);
947  upperq = Math::quantile3rd(data.begin(), data.end(), true);
948  max = data.back();
949  }
950  }
951 
952  double mean = 0, variance = 0 , lowerq = 0, median = 0, upperq = 0;
953  typename T::value_type min = 0, max = 0;
954  size_t count = 0;
955  };
956 
957  } // namespace Math
958 } // namespace OpenMS
959 
Invalid range exception.
Definition: Exception.h:257
Invalid value exception.
Definition: Exception.h:305
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:104
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:94
static double classificationRate(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the classification rate for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:683
static double median(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the median of a range of values.
Definition: StatisticFunctions.h:134
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:116
static double covariance(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the covariance of two ranges of values.
Definition: StatisticFunctions.h:597
static double quantile3rd(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the third quantile of a range of values.
Definition: StatisticFunctions.h:267
static void checkIteratorsNotNULL(IteratorType begin, IteratorType end)
Helper function checking if two iterators are not equal.
Definition: StatisticFunctions.h:55
static double matthewsCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Matthews correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:722
double MeanAbsoluteDeviation(IteratorType begin, IteratorType end, double mean_of_numbers)
mean absolute deviation (MeanAbsoluteDeviation)
Definition: StatisticFunctions.h:211
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:103
static double absdev(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the absolute deviation of a range of values.
Definition: StatisticFunctions.h:571
static double rootMeanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the root mean square error (RMSE) for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:667
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:775
static double sd(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Calculates the standard deviation of a range of values.
Definition: StatisticFunctions.h:556
double MAD(IteratorType begin, IteratorType end, double median_of_numbers)
median absolute deviation (MAD)
Definition: StatisticFunctions.h:181
static double rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:870
static void checkIteratorsAreValid(IteratorType1 begin_b, IteratorType1 end_b, IteratorType2 begin_a, IteratorType2 end_a)
Helper function checking if an iterator and a co-iterator both have a next element.
Definition: StatisticFunctions.h:87
static double quantile1st(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the first quantile of a range of values.
Definition: StatisticFunctions.h:235
static void checkIteratorsEqual(IteratorType begin, IteratorType end)
Helper function checking if two iterators are equal.
Definition: StatisticFunctions.h:71
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:630
const double k
Definition: Constants.h:132
double half_raw
Definition: StatisticFunctions.h:40
double tail_fraction
Definition: StatisticFunctions.h:43
double blended
Definition: StatisticFunctions.h:39
T1::value_type quantile(const T1 &x, double q)
Returns the value of the q th quantile (0-1) in a sorted non-empty vector x.
Definition: MathFunctions.h:453
double upper_fence
Definition: StatisticFunctions.h:42
double weight
Definition: StatisticFunctions.h:44
static void computeRank(std::vector< Value > &w)
Replaces the elements in vector w by their ranks.
Definition: StatisticFunctions.h:809
double tukeyUpperFence(IteratorType begin, IteratorType end, double k=1.5)
Tukey upper fence (UF) for outlier detection.
Definition: StatisticFunctions.h:351
double winsorizedQuantile(IteratorType begin, IteratorType end, double q, double upper_fence)
Quantile after winsorizing at an upper fence.
Definition: StatisticFunctions.h:412
double half_rob
Definition: StatisticFunctions.h:41
static double variance(IteratorType begin, IteratorType end, double mean=std::numeric_limits< double >::max())
Definition: StatisticFunctions.h:529
double tailFractionAbove(IteratorType begin, IteratorType end, double threshold)
Fraction of values above a threshold.
Definition: StatisticFunctions.h:380
AdaptiveQuantileResult adaptiveQuantile(IteratorType begin, IteratorType end, double q, double k=1.5, double r_sparse=0.01, double r_dense=0.10)
Adaptive quantile that blends RAW and IQR-winsorized quantiles based on tail density beyond the Tukey...
Definition: StatisticFunctions.h:471
Result of adaptiveQuantile computation.
Definition: StatisticFunctions.h:38
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Helper class to gather (and dump) some statistics from a e.g. vector<double>.
Definition: StatisticFunctions.h:927
double lowerq
Definition: StatisticFunctions.h:952
double variance
Definition: StatisticFunctions.h:952
T::value_type max
Definition: StatisticFunctions.h:953
SummaryStatistics(T &data)
Definition: StatisticFunctions.h:931
double median
Definition: StatisticFunctions.h:952
size_t count
Definition: StatisticFunctions.h:954
double mean
Definition: StatisticFunctions.h:952
double upperq
Definition: StatisticFunctions.h:952
T::value_type min
Definition: StatisticFunctions.h:953