error_summary.h
Go to the documentation of this file.
1 
8 #pragma once
9 
10 #include <vector>
11 #include "interop/util/map.h"
12 #include "interop/util/exception.h"
13 #include "interop/util/length_of.h"
21 
22 
23 namespace illumina { namespace interop { namespace logic { namespace summary
24 {
29  {
30  public:
32  error_cache_element() : m_error_sum(0.0f), m_error_count(0), m_max_cycle(0) {}
37  void update_error(const float error_rate)
38  {
39  if(std::isnan(error_rate)) return;
40  m_error_sum += error_rate;
41  m_error_count++;
42  }
47  void update_cycle(const size_t cycle_within_read)
48  {
49  m_max_cycle = std::max(m_max_cycle, cycle_within_read);
50  }
55  float average()const
56  {
57  if(m_error_count == 0) return std::numeric_limits<float>::quiet_NaN();
58  return divide(m_error_sum, static_cast<float>(m_error_count));
59  }
64  size_t max_cycle()const
65  {
66  return m_max_cycle;
67  }
72  bool is_empty()const
73  {
74  return m_error_count == 0;
75  }
76 
77  private:
78  float m_error_sum;
79  size_t m_error_count;
80  size_t m_max_cycle;
81  };
82 
95  template<typename I>
97  I end,
98  const size_t max_cycle,
99  const std::vector<read_cycle> &cycle_to_read,
100  const constants::tile_naming_method naming_method,
101  summary_by_lane_read<float> &read_lane_cache,
102  summary_by_lane_read<float> &read_lane_surface_cache)
104  {
105  typedef std::pair<size_t, size_t> key_t;
106  typedef INTEROP_ORDERED_MAP(key_t, error_cache_element) error_tile_t;
107  typedef std::vector<error_tile_t> error_by_read_tile_t;
108  error_by_read_tile_t tmp(read_lane_cache.size());
109  for (; beg != end; ++beg)
110  {
111  INTEROP_ASSERT(beg->cycle() > 0);
112  INTEROP_BOUNDS_CHECK(beg->cycle() - 1, cycle_to_read.size(), "Cycle exceeds total cycles from Reads in the RunInfo.xml");
113  const read_cycle &read = cycle_to_read[beg->cycle() - 1];
114  const key_t key = std::make_pair(beg->lane(), beg->tile());
115  const size_t read_number = read.number - 1;
116  INTEROP_BOUNDS_CHECK(read_number, tmp.size(), "Read number exceeds total reads in the RunInfo.xml");
117  tmp[read_number][key].update_cycle(read.cycle_within_read);
118  if (read.cycle_within_read > max_cycle || read.is_last_cycle_in_read) continue;
119  tmp[read_number][key].update_error(beg->error_rate());
120  }
121  for (size_t read = 0; read < tmp.size(); ++read)
122  {
123  for (typename error_tile_t::const_iterator ebeg = tmp[read].begin(), eend = tmp[read].end();
124  ebeg != eend; ++ebeg)
125  {
126  INTEROP_ASSERT(read < read_lane_cache.read_count());
127  const size_t lane = ebeg->first.first - 1;
128  INTEROP_BOUNDS_CHECK(lane, read_lane_cache.lane_count(), "Lane exceeds number of lanes in RunInfo.xml");
129  if(max_cycle < std::numeric_limits<size_t>::max() && ebeg->second.max_cycle() < max_cycle) continue;
130  if(ebeg->second.is_empty()) continue;
131  const float err_avg = ebeg->second.average();
132  read_lane_cache(read, lane).push_back(err_avg);
133  if(read_lane_surface_cache.surface_count() < 2) continue;
134  const ::uint32_t surface = logic::metric::surface(static_cast< ::uint32_t >(ebeg->first.second), naming_method);
135  INTEROP_ASSERT(surface <= read_lane_surface_cache.surface_count());
136  INTEROP_ASSERT(surface > 0);
137  read_lane_surface_cache(read, lane, surface-1).push_back(err_avg);
138  }
139  }
140  }
141 
151  summary_by_lane_read<float> &read_lane_surface_cache,
154  const bool skip_median=false)
155  {
156  for (size_t read = 0; read < run.size(); ++read)
157  {
158  INTEROP_ASSERT(read < read_lane_cache.read_count());
159  INTEROP_ASSERT(read < run.size());
160  for (size_t lane = 0; lane < run[read].size(); ++lane)
161  {
162  INTEROP_ASSERT(lane < run[read].size());
163  INTEROP_ASSERT(lane < read_lane_cache.lane_count());
165  stat.clear();
166  summarize(read_lane_cache(read, lane).begin(),
167  read_lane_cache(read, lane).end(),
168  stat,
169  skip_median);
170  (run[read][lane].*func)(stat);
171  if(read_lane_surface_cache.surface_count() < 2) continue;
172  for(size_t surface=0;surface<read_lane_surface_cache.surface_count();++surface)
173  {
174  stat.clear();
175  summarize(read_lane_surface_cache(read, lane, surface).begin(),
176  read_lane_surface_cache(read, lane, surface).end(),
177  stat,
178  skip_median);
179  (run[read][lane][surface].*func)(stat);
180  }
181  }
182  }
183  }
184 
206  template<typename I>
208  I end,
209  const read_cycle_vector_t &cycle_to_read,
210  const constants::tile_naming_method naming_method,
212  const bool skip_median=false) INTEROP_THROW_SPEC((model::index_out_of_bounds_exception))
213  {
214  typedef summary_by_lane_read<float> summary_by_lane_read_t;
215  typedef void (model::summary::stat_summary::*error_functor_t )(const model::summary::metric_stat&);
216  typedef std::pair<size_t, error_functor_t> cycle_functor_pair_t;
217 
218  if (beg == end) return;
219  if (run.size() == 0) return;
220  const size_t surface_count = run.surface_count();
221  summary_by_lane_read_t read_lane_cache(run, std::distance(beg, end));
222  summary_by_lane_read_t read_lane_surface_cache(run, std::distance(beg, end), surface_count);
223 
224  cycle_functor_pair_t cycle_functor_pairs[] = {
225  cycle_functor_pair_t(35u, &model::summary::stat_summary::error_rate_35),
226  cycle_functor_pair_t(50u, &model::summary::stat_summary::error_rate_50),
227  cycle_functor_pair_t(75u, &model::summary::stat_summary::error_rate_75),
228  cycle_functor_pair_t(100u, &model::summary::stat_summary::error_rate_100),
229  };
230  for (size_t i = 0; i < util::length_of(cycle_functor_pairs); ++i)
231  {
233  end,
234  cycle_functor_pairs[i].first,
235  cycle_to_read,
236  naming_method,
237  read_lane_cache,
238  read_lane_surface_cache);
239  error_summary_from_cache(read_lane_cache,
240  read_lane_surface_cache,
241  run,
242  cycle_functor_pairs[i].second,
243  skip_median);
244  read_lane_cache.clear();
245  read_lane_surface_cache.clear();
246  }
247 
248 
250  end,
251  std::numeric_limits<size_t>::max(),
252  cycle_to_read,
253  naming_method,
254  read_lane_cache,
255  read_lane_surface_cache);
256 
257  float error_rate = 0;
258  size_t total = 0;
259  float error_rate_nonindex = 0;
260  size_t total_nonindex = 0;
261  for (size_t read = 0; read < run.size(); ++read)
262  {
263  INTEROP_ASSERT(read < run.size());
264  float error_rate_by_read = 0;
265  size_t total_by_read = 0;
266  for (size_t lane = 0; lane < run[read].size(); ++lane)
267  {
268  INTEROP_ASSERT(lane < run[read].size());
269  model::summary::metric_stat error_stat;
270  error_stat.clear();
271  summarize(read_lane_cache(read, lane).begin(),
272  read_lane_cache(read, lane).end(),
273  error_stat,
274  skip_median);
275  run[read][lane].error_rate(error_stat);
276  error_rate_by_read += std::accumulate(read_lane_cache(read, lane).begin(),
277  read_lane_cache(read, lane).end(),
278  float(0));
279  total_by_read += read_lane_cache(read, lane).size();
280  if(surface_count < 2) continue;
281  for(size_t surface=0;surface<surface_count;++surface)
282  {
283  error_stat.clear();
284  summarize(read_lane_surface_cache(read, lane, surface).begin(),
285  read_lane_surface_cache(read, lane, surface).end(),
286  error_stat,
287  skip_median);
288  run[read][lane][surface].error_rate(error_stat);
289  }
290  }
291  if (total_by_read > 0)
292  run[read].summary().error_rate(divide(error_rate_by_read, static_cast<float>(total_by_read)));
293  error_rate += error_rate_by_read;
294  total += total_by_read;
295 
296  // We keep track of the throughput for non-index reads
297  if (!run[read].read().is_index())
298  {
299  error_rate_nonindex += error_rate_by_read;
300  total_nonindex += total_by_read;
301  }
302  }
303  run.nonindex_summary().error_rate(divide(error_rate_nonindex, static_cast<float>(total_nonindex)));
304  run.total_summary().error_rate(divide(error_rate, static_cast<float>(total)));
305  }
306 
307 }}}}
308 
const metric_summary & total_summary() const
Definition: run_summary.h:314
Definition: enum_description.h:15
size_t surface_count() const
Definition: run_summary.h:287
const metric_stat_t & error_rate_35() const
Definition: stat_summary.h:196
#define INTEROP_THROW_SPEC(SPEC)
Definition: exception_specification.h:15
inline::uint32_t surface(const ::uint32_t tile_id, const constants::tile_naming_method method)
Definition: tile_metric.h:93
size_t size() const
Definition: run_summary.h:224
float average() const
Definition: error_summary.h:55
void cache_error_by_lane_read(I beg, I end, const size_t max_cycle, const std::vector< read_cycle > &cycle_to_read, const constants::tile_naming_method naming_method, summary_by_lane_read< float > &read_lane_cache, summary_by_lane_read< float > &read_lane_surface_cache)
Definition: error_summary.h:96
size_t read_count() const
Definition: summary_statistics.h:106
error_cache_element()
Definition: error_summary.h:32
void update_cycle(const size_t cycle_within_read)
Definition: error_summary.h:47
size_t max_cycle() const
Definition: error_summary.h:64
#define INTEROP_ASSERT(TST)
Definition: assert.h:21
void error_summary_from_cache(summary_by_lane_read< float > &read_lane_cache, summary_by_lane_read< float > &read_lane_surface_cache, model::summary::run_summary &run, void(model::summary::stat_summary::*func)(const model::summary::metric_stat &), const bool skip_median=false)
Definition: error_summary.h:150
const metric_stat_t & error_rate_50() const
Definition: stat_summary.h:207
size_t lane_count() const
Definition: summary_statistics.h:115
def summary(run_metrics, level='Total', columns=None, dtype='f4', ignore_missing_columns=True, extra)
Definition: core.py:217
void summarize(I beg, I end, S &stat, const bool skip_median)
Definition: summary_statistics.h:149
Definition: map_cycle_to_read.h:19
void clear()
Definition: metric_stat.h:37
def read(run, valid_to_load=None, requires=None, search_paths=None, extra)
Definition: core.py:754
const metric_stat_t & error_rate_75() const
Definition: stat_summary.h:218
std::vector< read_cycle > read_cycle_vector_t
Definition: map_cycle_to_read.h:39
#define INTEROP_ORDERED_MAP(key_t, value_t)
Definition: map.h:23
float error_rate() const
Definition: metric_summary.h:52
#define INTEROP_BOUNDS_CHECK(VALUE, RANGE, MESSAGE)
Definition: exception.h:24
bool is_empty() const
Definition: error_summary.h:72
void summarize_error_metrics(I beg, I end, const read_cycle_vector_t &cycle_to_read, const constants::tile_naming_method naming_method, model::summary::run_summary &run, const bool skip_median=false) INTEROP_THROW_SPEC((model
Definition: error_summary.h:207
size_t length_of(const T &val)
Definition: length_of.h:55
size_t surface_count() const
Definition: summary_statistics.h:123
tile_naming_method
Definition: enums.h:294
const metric_stat_t & error_rate_100() const
Definition: stat_summary.h:229
float divide(const float num, const float div)
Definition: summary_statistics.h:231
const metric_summary & nonindex_summary() const
Definition: run_summary.h:342
void update_error(const float error_rate)
Definition: error_summary.h:37