statistics.h
Go to the documentation of this file.
1 
13 #pragma once
14 
15 #include <cstddef>
16 #include <limits>
17 #include <numeric>
18 #include <algorithm>
19 #include "interop/util/assert.h"
20 #include "interop/util/math.h"
21 
22 namespace illumina { namespace interop { namespace util
23 {
24 
25  namespace op
26  {
30  {
31  };
32 
35  template<class T, typename R, typename P1=parameter_none_type>
37  {
43  const_member_function_w(P1 param1, R (T::*func )(P1) const) : m_param1(param1), m_function(func)
44  { }
45 
52  template<class F>
53  F operator()(const F val, const T &obj) const
54  {
55  return val + operator()(obj);
56  }
63  float operator()(const float val, const T &obj) const
64  {
65  const float ret = static_cast<float>(operator()(obj));
66  if(std::isnan(ret)) return val;
67  return val + ret;
68  }
75  double operator()(const double val, const T &obj) const
76  {
77  const double ret = static_cast<double>(operator()(obj));
78  if(std::isnan(ret)) return val;
79  return val + ret;
80  }
81 
87  R operator()(const T &obj) const
88  {
89  return (obj.*m_function)(m_param1);
90  }
91 
92  private:
93  P1 m_param1;
94 
95  R (T::*m_function )(P1) const;
96  };
97 
100  template<class T, typename R>
102  {
107  const_member_function_w(R (T::*func )() const) : m_function(func)
108  { }
109 
116  template<class F>
117  F operator()(const F val, const T &obj) const
118  {
119  const F ret = static_cast<F>(operator()(obj));
120  return val + ret;
121  }
128  float operator()(const float val, const T &obj) const
129  {
130  const float ret = static_cast<float>(operator()(obj));
131  if(std::isnan(ret)) return val;
132  return val + ret;
133  }
140  double operator()(const double val, const T &obj) const
141  {
142  const double ret = static_cast<double>(operator()(obj));
143  if(std::isnan(ret)) return val;
144  return val + ret;
145  }
146 
152  R operator()(const T &obj) const
153  {
154  return (obj.*m_function)();
155  }
156 
157  private:
158  R (T::*m_function )() const;
159  };
160 
163  template<class T, typename R, typename P1=parameter_none_type>
165  {
171  { }
172 
179  bool operator()(const T &lhs, const T &rhs) const
180  {
181  return m_func(lhs) < m_func(rhs);
182  }
183 
184  private:
186  };
188  struct dummy_arg { dummy_arg(){}};
189 
196  template<class T, typename R, typename P2, typename P1>
197  const_member_function_w<T, R, P1> const_member_function(const P2& param1, R (T::*func )(P1) const)
198  {
199  return const_member_function_w<T, R, P1>(param1, func);
200  }
201 
207  template<class T, typename R>
209  {
210  return const_member_function_w<T, R>(func);
211  }
212 
213 
220  template<class T, typename R>
222  {
223  return const_member_function_w<T, R>(func);
224  }
225 
232  template<class T, typename R, typename P2, typename P1>
233  const_member_function_less_w<T, R, P1> const_member_function_less(const P2& param1, R (T::*func )(P1) const)
234  {
236  }
237 
243  template<class T, typename R>
245  {
247  }
248 
252  {
259  template<typename F, typename T>
260  F operator()(const F val1, const T &val2)
261  {
262  return static_cast<F>(val1 + val2);
263  }
264 
270  template<class T>
271  const T &operator()(const T &val)
272  {
273  return val;
274  }
275  };
276 
279  template<typename UnaryOp>
280  struct nan_check
281  {
286  nan_check(const UnaryOp &op) : m_op(op)
287  { }
288 
294  template<class T>
295  size_t operator()(const T &obj) const
296  {
297  return !std::isnan(m_op(obj));
298  }
299 
300  private:
301  UnaryOp m_op;
302  };
303  }
304 
312  template<typename I, typename O, typename F>
313  void outliers_lower(I beg, I end, const F bound, O out)
314  {
315  for (; beg != end; ++beg)
316  {
317  if (*beg < bound)
318  {
319  *out = *beg;
320  ++out;
321  }
322  else break;
323  }
324  }
325 
333  template<typename I, typename O, typename F>
334  void outliers_upper(I beg, I end, const F bound, O out)
335  {
336  if (end == beg) return;
337  for (--end; beg != end; --end)
338  {
339  if (*end > bound)
340  {
341  *out = *end;
342  ++out;
343  }
344  else return;
345  }
346  if (beg == end) return;
347  *out = *end;
348  ++out;
349 
350  }
351 
364  template<typename I>
365  I percentile(I beg, I end, const size_t percentile)
366  {
367  INTEROP_ASSERT(percentile > 0 && percentile <= 100);
368  const size_t n = static_cast<size_t>(std::distance(beg, end));
369  const size_t nth_index = static_cast<size_t>(std::ceil(percentile * n / 100.0) - 1);
370  I nth_element_iterator = beg + nth_index;
371  std::nth_element(beg, nth_element_iterator, end);
372  return nth_element_iterator;
373  }
374 
388  template<typename I, typename Compare>
389  I percentile(I beg, I end, const size_t percentile, Compare comp)
390  {
391  INTEROP_ASSERT(percentile > 0 && percentile <= 100);
392  const size_t n = static_cast<size_t>(std::distance(beg, end));
393  const size_t nth_index = static_cast<size_t>(std::ceil(percentile * n / 100.0) - 1);
394  I nth_element_iterator = beg + nth_index;
395  std::nth_element(beg, nth_element_iterator, end, comp);
396  return nth_element_iterator;
397  }
398 
408  template<typename F>
409  F interpolate_linear(const F y1, const F y2, const F x1, const F x2, const F xt)
410  {
411  return y1 + (y2 - y1) / (x2 - x1) * (xt - x1);
412  }
413 
421  template<typename F, typename I>
422  F percentile_sorted(I beg, I end, const size_t percentile)
423  {
424  INTEROP_ASSERT(beg != end);
425  INTEROP_ASSERT(percentile > 0 && percentile <= 100);
426  const size_t n = static_cast<size_t>(std::distance(beg, end));
427  if (n == 0) return std::numeric_limits<F>::quiet_NaN();
428  size_t nth_index = percentile * n / 100;
429  if ((n * percentile / 100.0f - nth_index) < 0.5f)
430  {
431  if (nth_index == 0) return *beg;
432  nth_index--;
433  }
434  if (nth_index >= (n - 1)) return *(end - 1);
435  const F y1 = *(beg + nth_index);
436  const F y2 = *(beg + nth_index + 1);
437  const F x1 = 100.0f * (nth_index + 0.5f) / n;
438  const F x2 = 100.0f * (nth_index + 0.5f + 1) / n;
439  return interpolate_linear(y1, y2, x1, x2, static_cast<float>(percentile));
440  }
449  template<typename F, typename I, typename Op>
450  F percentile_sorted(I beg, I end, const size_t percentile, Op op)
451  {
452  INTEROP_ASSERT(beg != end);
453  INTEROP_ASSERT(percentile > 0 && percentile <= 100);
454  const size_t n = static_cast<size_t>(std::distance(beg, end));
455  if (n == 0) return std::numeric_limits<F>::quiet_NaN();
456  size_t nth_index = percentile * n / 100;
457  if ((n * percentile / 100.0f - nth_index) < 0.5f)
458  {
459  if (nth_index == 0) return op(*beg);
460  nth_index--;
461  }
462  if (nth_index >= (n - 1)) return op(*(end - 1));
463  const F y1 = op(*(beg + nth_index));
464  const F y2 = op(*(beg + nth_index + 1));
465  const F x1 = 100.0f * (nth_index + 0.5f) / n;
466  const F x2 = 100.0f * (nth_index + 0.5f + 1) / n;
467  return interpolate_linear(y1, y2, x1, x2, static_cast<float>(percentile));
468  }
469 
470 // TODO: median using nth_element
478  template<typename I, typename UnaryOp>
479  I remove_nan(I beg, I end, UnaryOp op)
480  {
481  return std::stable_partition(beg, end, op::nan_check<UnaryOp>(op));
482  }
483 
492  template<typename I>
493  I median(I beg, I end)
494  {
495  return percentile(beg, end, 50);
496  }
497 
507  template<typename I, typename Compare>
508  I median(I beg, I end, Compare comp)
509  {
510  return percentile(beg, end, 50, comp);
511  }
520  template<typename F, typename I>
521  F median_interpolated(I beg, I end)
522  {
523  std::stable_sort(beg, end);
524  return percentile_sorted<F>(beg, end, 50);
525  }
535  template<typename F, typename I, typename Compare>
536  F median_interpolated(I beg, I end, Compare comp)
537  {
538  std::stable_sort(beg, end, comp);
539  return percentile_sorted<F>(beg, end, 50);
540  }
551  template<typename F, typename I, typename Compare, typename Op>
552  F median_interpolated(I beg, I end, Compare comp, Op op)
553  {
554  std::stable_sort(beg, end, comp);
555  return percentile_sorted<F>(beg, end, 50, op);
556  }
557 
558  //TODO: remove_nan
559  // TODO: nan_median using nth_element
560 
574  template<typename R, typename I, typename BinaryOp>
575  R nan_mean(I beg, I end, BinaryOp op)
576  {
577  ptrdiff_t n = 0;
578  R sum = 0;
579  for (; beg != end; ++beg)
580  {
581  R val = op(*beg);
582  if (std::isnan(val)) continue;
583  sum += val;
584  ++n;
585  }
586  if (n == 0)return std::numeric_limits<R>::quiet_NaN();
587  return sum / n;
588  }
589 
604  template<typename R, typename I, typename BinaryOp>
605  R nan_variance_with_mean(I beg, I end, const R mean_val, BinaryOp op)
606  {
607  ptrdiff_t n = 0;
608  R sum2 = 0;
609  R sum3 = 0;
610  for (; beg != end; ++beg)
611  {
612  const R val = op(*beg) - mean_val;
613  if (std::isnan(val)) continue;
614  sum2 += val * val;
615  sum3 += val;
616  ++n;
617  }
618  if (n <= 1) return std::numeric_limits<R>::quiet_NaN();
619  return (sum2 - sum3 * sum3 / n) / (n - 1);
620  }
621 
635  template<typename R, typename I, typename BinaryOp>
636  R nan_variance(I beg, I end, BinaryOp op)
637  {
638  const R mean_val = nan_mean<R>(beg, end, op);
639  return nan_variance_with_mean<R>(beg, end, mean_val, op);
640  }
641 
653  template<typename R, typename I, typename BinaryOp>
654  R mean(I beg, I end, BinaryOp op)
655  {
656  ptrdiff_t n = std::distance(beg, end);
657  if (n == 0) return 0;
658  return std::accumulate(beg, end, R(0), op) / R(n);
659  }
660 
673  template<typename R, typename I, typename BinaryOp>
674  R variance_with_mean(I beg, I end, const R mean_val, BinaryOp op)
675  {
676  ptrdiff_t n = std::distance(beg, end);
677  R sum2 = 0;
678  R sum3 = 0;
679  for (; beg != end; ++beg)
680  {
681  const R val = op(*beg) - mean_val;
682  sum2 += val * val;
683  sum3 += val;
684  }
685  if (n <= 1) return 0;
686  return (sum2 - sum3 * sum3 / n) / (n - 1);
687  }
688 
700  template<typename R, typename I, typename BinaryOp>
701  R variance(I beg, I end, BinaryOp op)
702  {
703  const R mean_val = mean<R>(beg, end, op);
704  return variance_with_mean<R>(beg, end, mean_val, op);
705  }
706 
707 
718  template<typename R, typename I>
719  R mean(I beg, I end)
720  {
721  return mean<R>(beg, end, op::operator_none());
722  }
723 
734  template<typename R, typename I>
735  R variance(I beg, I end)
736  {
737  return variance<R>(beg, end, op::operator_none());
738  }
739 
751  template<typename R, typename I>
752  R variance_with_mean(I beg, I end, const R mean)
753  {
754  return variance_with_mean<R>(beg, end, mean, op::operator_none());
755  }
756 
757 }}}
758 
759 
Definition: enum_description.h:15
size_t operator()(const T &obj) const
Definition: statistics.h:295
double operator()(const double val, const T &obj) const
Definition: statistics.h:140
Definition: statistics.h:280
void outliers_upper(I beg, I end, const F bound, O out)
Definition: statistics.h:334
F operator()(const F val, const T &obj) const
Definition: statistics.h:117
nan_check(const UnaryOp &op)
Definition: statistics.h:286
void outliers_lower(I beg, I end, const F bound, O out)
Definition: statistics.h:313
double operator()(const double val, const T &obj) const
Definition: statistics.h:75
bool operator()(const T &lhs, const T &rhs) const
Definition: statistics.h:179
float operator()(const float val, const T &obj) const
Definition: statistics.h:63
F percentile_sorted(I beg, I end, const size_t percentile)
Definition: statistics.h:422
const_member_function_less_w< T, R, P1 > const_member_function_less(const P2 &param1, R(T::*func)(P1) const)
Definition: statistics.h:233
F operator()(const F val, const T &obj) const
Definition: statistics.h:53
float operator()(const float val, const T &obj) const
Definition: statistics.h:128
#define INTEROP_ASSERT(TST)
Definition: assert.h:21
const_member_function_w< T, R, P1 > const_member_function(const P2 &param1, R(T::*func)(P1) const)
Definition: statistics.h:197
I percentile(I beg, I end, const size_t percentile)
Definition: statistics.h:365
R variance(I beg, I end, BinaryOp op)
Definition: statistics.h:701
F median_interpolated(I beg, I end)
Definition: statistics.h:521
const_member_function_w(R(T::*func)() const)
Definition: statistics.h:107
I median(I beg, I end)
Definition: statistics.h:493
R operator()(const T &obj) const
Definition: statistics.h:87
Definition: enums.h:301
F interpolate_linear(const F y1, const F y2, const F x1, const F x2, const F xt)
Definition: statistics.h:409
const T & operator()(const T &val)
Definition: statistics.h:271
dummy_arg()
Definition: statistics.h:188
R variance_with_mean(I beg, I end, const R mean_val, BinaryOp op)
Definition: statistics.h:674
const_member_function_less_w(const const_member_function_w< T, R, P1 > &func)
Definition: statistics.h:170
F operator()(const F val1, const T &val2)
Definition: statistics.h:260
R nan_variance_with_mean(I beg, I end, const R mean_val, BinaryOp op)
Definition: statistics.h:605
R mean(I beg, I end, BinaryOp op)
Definition: statistics.h:654
I remove_nan(I beg, I end, UnaryOp op)
Definition: statistics.h:479
const_member_function_w(P1 param1, R(T::*func)(P1) const)
Definition: statistics.h:43
Definition: statistics.h:188
R nan_variance(I beg, I end, BinaryOp op)
Definition: statistics.h:636
Definition: statistics.h:251
R nan_mean(I beg, I end, BinaryOp op)
Definition: statistics.h:575