quality_summary.h
Go to the documentation of this file.
1 
8 #pragma once
9 #include <vector>
10 #include <map>
11 #include "interop/util/exception.h"
17 
18 namespace illumina { namespace interop { namespace logic { namespace summary
19 {
22  struct qval_total
23  {
29  qval_total(const ::uint64_t qval=0, const ::uint64_t tot=0) : above_qval(qval), total(tot){}
34  float percent_above_qval()const{return 100*average();}
39  float average()const{return divide(static_cast<float>(above_qval),static_cast<float>(total));}
41  ::uint64_t above_qval;
43  ::uint64_t total;
44  };
48  class qval_cache
49  {
50  typedef std::vector< qval_total > qval_total_vector_t;
51  typedef std::vector < qval_total_vector_t > qval_total_vector2d_t;
52  private:
53  typedef std::vector< size_t > size_vector_t;
54  typedef std::vector< size_vector_t > size_vector2d_t;
55  private:
56  typedef std::vector< std::set<size_t> > tile_lookup_t;
57 
58  public:
64  qval_cache(const model::summary::run_summary &run, const size_t surface_count=1) :
65  m_read_lane_cache(run.size(), qval_total_vector_t(run.lane_count()*surface_count)),
66  m_metrics_in_read(run.size(), size_vector_t(run.lane_count()*surface_count)),
67  m_tile_lookup(run.lane_count()*surface_count),
68  m_surface_count(surface_count)
69  {}
78  const size_t read_number,
79  const size_t lane,
80  const size_t surface=0)
81  {
82  INTEROP_ASSERT(read_number < m_read_lane_cache.size());
83  const size_t lane_surface_index = index_of(lane, surface);
84  INTEROP_ASSERT(lane_surface_index < m_read_lane_cache[read_number].size());
85  m_read_lane_cache[read_number][lane_surface_index].above_qval+=metric.q30();
86  m_read_lane_cache[read_number][lane_surface_index].total += metric.total();
87 
88  INTEROP_ASSERT(lane_surface_index < m_tile_lookup.size());
89  m_tile_lookup[lane_surface_index].insert(metric.tile());
90 
91 
92  INTEROP_ASSERT(read_number < m_metrics_in_read.size());
93  INTEROP_ASSERT(lane_surface_index < m_metrics_in_read[read_number].size());
94  m_metrics_in_read[read_number][lane_surface_index] += 1;
95  }
103  const qval_total& total_for(const size_t read_number, const size_t lane, const size_t surface=0)const
104  {
105  INTEROP_ASSERT(read_number < m_read_lane_cache.size());
106  const size_t index = index_of(lane, surface);
107  INTEROP_ASSERT(index < m_read_lane_cache[read_number].size());
108  return m_read_lane_cache[read_number][index];
109  }
117  size_t metric_count(const size_t read_number, const size_t lane, const size_t surface=0)const
118  {
119  INTEROP_ASSERT(read_number < m_metrics_in_read.size());
120  const size_t index = index_of(lane, surface);
121  INTEROP_ASSERT(index < m_metrics_in_read[read_number].size());
122  return m_metrics_in_read[read_number][index];
123  }
130  size_t tile_count(const size_t lane, const size_t surface=0)const
131  {
132  const size_t index = index_of(lane, surface);
133  INTEROP_ASSERT(lane < m_tile_lookup.size());
134  return m_tile_lookup[index].size();
135  }
136 
137  private:
138  size_t index_of(const size_t lane, const size_t surface)const
139  {
140  return lane*m_surface_count+surface;
141  }
142 
143  private:
144  qval_total_vector2d_t m_read_lane_cache;
145  size_vector2d_t m_metrics_in_read;
146  tile_lookup_t m_tile_lookup;
147  size_t m_surface_count;
148  };
149 
170  template<typename I>
172  I end,
173  const read_cycle_vector_t& cycle_to_read,
174  const constants::tile_naming_method naming_method,
177  {
178  typedef model::summary::lane_summary lane_summary;
179  if( beg == end ) return;
180  if( run.size()==0 )return;
181  const size_t surface_count = run.surface_count();
182  qval_cache read_lane_cache(run);
183  qval_cache read_lane_surface_cache(run, surface_count);
184 
185  for(;beg != end;++beg)
186  {
187  INTEROP_ASSERT(beg->cycle() > 0);
188  INTEROP_BOUNDS_CHECK(beg->cycle() - 1, cycle_to_read.size(), "Cycle exceeds total cycles from Reads in the RunInfo.xml");
189  const size_t read_number = cycle_to_read[beg->cycle()-1].number-1;
190  if(cycle_to_read[beg->cycle()-1].is_last_cycle_in_read) continue;
191  const size_t lane = beg->lane()-1;
192  INTEROP_BOUNDS_CHECK(lane, run.lane_count(), "Lane exceeds number of lanes in RunInfo.xml");
193  read_lane_cache.add(*beg, read_number, lane);
194 
195  if(surface_count < 2) continue;
196  const size_t surface = beg->surface(naming_method);
197  INTEROP_ASSERT(surface > 0);
198  read_lane_surface_cache.add(*beg, read_number, lane, surface-1);
199  }
200 
201  ::uint64_t total_useable_calls = 0;
202  ::uint64_t useable_calls_gt_q30 = 0;
203  float overall_projected_yield = 0;
204  float yield_g = 0;
205 
206  ::uint64_t total_useable_calls_nonindex = 0;
207  ::uint64_t useable_calls_gt_q30_nonindex = 0;
208  float projected_yield_nonindex = 0;
209  float yield_g_nonindex = 0;
210  for(size_t read=0;read<run.size();++read)
211  {
212  INTEROP_ASSERT(read < run.size());
213  const size_t useable_cycles = run[read].read().useable_cycles();
214  ::uint64_t total_useable_calls_by_read=0;
215  ::uint64_t useable_calls_gt_q30_by_read = 0;
216  float read_projected_yield = 0;
217  for (size_t lane = 0; lane < run[read].size(); ++lane)
218  {
219  INTEROP_ASSERT(lane < run[read].size());
220  const size_t total_metrics_in_lane_read = read_lane_cache.metric_count(read, lane);
221  model::summary::stat_summary& stat_summary=run[read][lane];
222  if( total_metrics_in_lane_read > 0)
223  {
224  const float prediction_factor = divide(float(useable_cycles*read_lane_cache.tile_count(lane)),
225  float(total_metrics_in_lane_read));
226  const qval_total& curr_tot = read_lane_cache.total_for(read, lane);
227  total_useable_calls_by_read += curr_tot.total;
228  useable_calls_gt_q30_by_read += curr_tot.above_qval;
229  stat_summary.percent_gt_q30(curr_tot.percent_above_qval());
230  stat_summary.yield_g(curr_tot.total / 1e9f);
231  const float projected_yield = curr_tot.total * prediction_factor;
232  read_projected_yield += projected_yield;
233  stat_summary.projected_yield_g(
234  ::uint64_t(projected_yield + 0.5f) / 1e9f);
235  }
236  else
237  {
238  float total = 0;
239  float metric_count = 0;
240  for(size_t read_index=0, read_count=read+1;read_index<read_count;++read_index)
241  {
242  if(read_lane_cache.total_for(read_index, lane).total > 0)
243  {
244  total += run[read_index][lane].projected_yield_g();
245  metric_count += static_cast<float>(run[read_index].read().useable_cycles());
246  }
247  }
248  if(metric_count > 0) total /= metric_count;
249  const float projected_yield = total*useable_cycles * 1e9f;
250  stat_summary.projected_yield_g(::uint64_t(projected_yield+0.5f)/ 1e9f);
251  read_projected_yield += projected_yield;
252  }
253  if(surface_count < 2) continue;
254  for(size_t surface=0;surface<surface_count;++surface)
255  {
256  const size_t total_in_surface_lane_read = read_lane_surface_cache.metric_count(read, lane, surface);
257  model::summary::stat_summary& stat_summary_surface=run[read][lane][surface];
258  if( total_in_surface_lane_read > 0)
259  {
260  const float prediction_factor = divide(float(useable_cycles*read_lane_surface_cache.tile_count(lane, surface)),
261  float(total_metrics_in_lane_read));
262 
263  const qval_total& curr_tot = read_lane_surface_cache.total_for(read, lane, surface);
264  stat_summary_surface.percent_gt_q30(curr_tot.percent_above_qval());
265  stat_summary_surface.yield_g(curr_tot.total / 1e9f);
266  const float projected_yield = curr_tot.total * prediction_factor;
267  stat_summary_surface.projected_yield_g(
268  ::uint64_t(projected_yield + 0.5f) / 1e9f);
269  }
270  else
271  {
272  float total = 0;
273  float metric_count = 0;
274  for(size_t r=0, n=read+1;r<n;++r)
275  {
276  if(read_lane_surface_cache.total_for(r, lane, surface).total > 0)
277  {
278  total += run[r][lane][surface].projected_yield_g();
279  metric_count += static_cast<float>(run[r].read().useable_cycles());
280  }
281  }
282  if(metric_count > 0) total /= metric_count;
283  const float projected_yield = total*useable_cycles * 1e9f;
284  stat_summary_surface.projected_yield_g(::uint64_t(projected_yield+0.5f)/ 1e9f);
285  }
286  }
287  }
288  run[read].summary().projected_yield_g(::uint64_t(read_projected_yield+0.5f)/ 1e9f);
289  run[read].summary().yield_g(std::accumulate(run[read].begin(),
290  run[read].end(),
291  float(0),
292  util::op::const_member_function(&lane_summary::yield_g)));
293  run[read].summary().percent_gt_q30(100 * divide(float(useable_calls_gt_q30_by_read), float(total_useable_calls_by_read)));
294  total_useable_calls += total_useable_calls_by_read;
295  useable_calls_gt_q30 += useable_calls_gt_q30_by_read;
296  overall_projected_yield += read_projected_yield;
297  if(!std::isnan(run[read].summary().yield_g()))
298  yield_g += run[read].summary().yield_g();
299  // Certain metrics can be biased by the index read, e.g. C1 intensity, total yield
300  // So, we include totals that skip the index reads
301  if(!run[read].read().is_index())
302  {
303  total_useable_calls_nonindex += total_useable_calls_by_read;
304  useable_calls_gt_q30_nonindex += useable_calls_gt_q30_by_read;
305  projected_yield_nonindex += read_projected_yield;
306  if(!std::isnan(run[read].summary().yield_g()))
307  yield_g_nonindex += run[read].summary().yield_g();
308  }
309  }
310  run.nonindex_summary().projected_yield_g(::uint64_t(projected_yield_nonindex+0.5f)/1e9f);
311  run.nonindex_summary().yield_g(yield_g_nonindex);
312  run.nonindex_summary().percent_gt_q30(100 * divide(float(useable_calls_gt_q30_nonindex), float(total_useable_calls_nonindex)));
313 
314  run.total_summary().projected_yield_g(::uint64_t(overall_projected_yield+0.5f)/1e9f);
315  run.total_summary().yield_g(yield_g);
316  run.total_summary().percent_gt_q30(100 * divide(float(useable_calls_gt_q30), float(total_useable_calls)));
317  }
318 }}}}
319 
qval_total(const ::uint64_t qval=0, const ::uint64_t tot=0)
Definition: quality_summary.h:29
Definition: enum_description.h:15
uint_t tile() const
Definition: base_metric.h:284
#define INTEROP_THROW_SPEC(SPEC)
Definition: exception_specification.h:15
inline::uint32_t surface(const ::uint32_t tile_id, const constants::tile_naming_method method)
Definition: tile_metric.h:93
uint_t total() const
Definition: q_collapsed_metric.h:154
size_t tile_count(const size_t lane, const size_t surface=0) const
Definition: quality_summary.h:130
Definition: quality_summary.h:22
float projected_yield_g() const
Definition: stat_summary.h:83
void add(const model::metrics::q_collapsed_metric &metric, const size_t read_number, const size_t lane, const size_t surface=0)
Definition: quality_summary.h:77
#define INTEROP_ASSERT(TST)
Definition: assert.h:21
const_member_function_w< T, R, P1 > const_member_function(const P2 &param1, R(T::*func)(P1) const)
Definition: statistics.h:197
def summary(run_metrics, level='Total', columns=None, dtype='f4', ignore_missing_columns=True, extra)
Definition: core.py:217
::uint64_t total
Definition: quality_summary.h:43
size_t metric_count(const size_t read_number, const size_t lane, const size_t surface=0) const
Definition: quality_summary.h:117
float average() const
Definition: quality_summary.h:39
const qval_total & total_for(const size_t read_number, const size_t lane, const size_t surface=0) const
Definition: quality_summary.h:103
def read(run, valid_to_load=None, requires=None, search_paths=None, extra)
Definition: core.py:752
::uint64_t above_qval
Definition: quality_summary.h:41
std::vector< read_cycle > read_cycle_vector_t
Definition: map_cycle_to_read.h:39
float percent_gt_q30() const
Definition: stat_summary.h:61
#define INTEROP_BOUNDS_CHECK(VALUE, RANGE, MESSAGE)
Definition: exception.h:24
float yield_g() const
Definition: stat_summary.h:72
float percent_above_qval() const
Definition: quality_summary.h:34
Definition: quality_summary.h:48
void summarize_collapsed_quality_metrics(I beg, I end, const read_cycle_vector_t &cycle_to_read, const constants::tile_naming_method naming_method, model::summary::run_summary &run)
Definition: quality_summary.h:171
tile_naming_method
Definition: enums.h:294
qval_cache(const model::summary::run_summary &run, const size_t surface_count=1)
Definition: quality_summary.h:64
uint_t q30() const
Definition: q_collapsed_metric.h:146
float divide(const float num, const float div)
Definition: summary_statistics.h:231