tile_summary.h
Go to the documentation of this file.
1 
8 #pragma once
9 
10 #include <vector>
11 #include "interop/util/exception.h"
16 
17 namespace illumina { namespace interop { namespace logic { namespace summary
18 {
25  template<class ExtendedTileVector>
26  void update_extended_tile_summary_from_cache(std::vector<ExtendedTileVector>& extended_tile_data,
27  model::summary::stat_summary& stat_summary,
28  const bool skip_median)
29  {
31  nan_summarize(extended_tile_data.begin(),
32  extended_tile_data.end(),
33  stat,
36  skip_median);
37  stat_summary.percent_occupied(stat);
38  }
39 
40 
47  template<class TileVector>
48  void update_tile_summary_from_cache(std::vector<TileVector>& tile_data,
49  model::summary::stat_summary& stat_summary,
50  const bool skip_median)
51  {
53  nan_summarize(tile_data.begin(),
54  tile_data.end(),
55  stat,
58  skip_median);
59  stat_summary.density(stat);
60  stat.clear();
61  nan_summarize(tile_data.begin(),
62  tile_data.end(),
63  stat,
66  skip_median);
67  stat_summary.density_pf(stat);
68  stat.clear();
69  nan_summarize(tile_data.begin(),
70  tile_data.end(),
71  stat,
74  skip_median);
75  stat_summary.cluster_count(stat);
76  stat.clear();
77  nan_summarize(tile_data.begin(),
78  tile_data.end(),
79  stat,
82  skip_median);
83  stat_summary.cluster_count_pf(stat);
84  stat.clear();
85  nan_summarize(tile_data.begin(),
86  tile_data.end(),
87  stat,
90  skip_median);
91  stat_summary.percent_pf(stat);
92  stat_summary.reads(nan_accumulate(tile_data.begin(),
93  tile_data.end(),
94  uint64_t(0),
97  stat_summary.reads_pf(nan_accumulate(tile_data.begin(),
98  tile_data.end(),
99  uint64_t(0),
102  }
111  model::summary::stat_summary& stat_summary,
112  const bool skip_median)
113  {
115  const size_t non_nan = nan_summarize(read_data_cache.begin(),
116  read_data_cache.end(),
117  stat,
122  skip_median);
123  stat_summary.percent_aligned(stat);
124  stat.clear();
125  nan_summarize(read_data_cache.begin(),
126  read_data_cache.end(),
127  stat,
130  skip_median);
131  stat_summary.prephasing(stat);
132  stat.clear();
133  nan_summarize(read_data_cache.begin(),
134  read_data_cache.end(),
135  stat,
138  skip_median);
139  stat_summary.phasing(stat);
140  return non_nan;
141  }
164  template<typename I>
166  I end,
167  const constants::tile_naming_method naming_method,
169  const bool skip_median=false)
170  INTEROP_THROW_SPEC((model::index_out_of_bounds_exception))
171  {
172  typedef typename model::metrics::tile_metric::read_metric_vector read_metric_vector_t;
173  typedef typename read_metric_vector_t::const_iterator const_read_metric_iterator;
174  typedef std::vector<model::metrics::tile_metric> tile_vector_t;
175  typedef std::vector<tile_vector_t> tile_by_lane_vector_t;
176 
177  if (beg == end) return;
178  if (run.size() == 0)return;
179  const size_t surface_count = run.surface_count();
180  const ptrdiff_t n = std::distance(beg, end);
181 
182  tile_by_lane_vector_t tile_data_by_lane(run.lane_count());
183  reserve(tile_data_by_lane.begin(), tile_data_by_lane.end(), n);
184  tile_by_lane_vector_t tile_data_by_lane_surface(run.lane_count()*surface_count);
185  reserve(tile_data_by_lane_surface.begin(), tile_data_by_lane_surface.end(), n);
186 
187  summary_by_lane_read<model::metrics::read_metric> read_data_by_lane_read(run, n);
188  summary_by_lane_read<model::metrics::read_metric> read_data_by_surface_lane_read(run, n, surface_count);
189 
190  for (; beg != end; ++beg)
191  {
192  const size_t surface = beg->surface(naming_method);
193  INTEROP_ASSERT(surface > 0);
194  const size_t lane = beg->lane() - 1;
195  INTEROP_BOUNDS_CHECK(lane, tile_data_by_lane.size(), "Lane exceeds number of lanes in RunInfo.xml");
196  tile_data_by_lane[beg->lane() - 1].push_back(*beg);// TODO: make more efficient by copying only tile data
197  for (const_read_metric_iterator rb = beg->read_metrics().begin(), re = beg->read_metrics().end();
198  rb != re; ++rb)
199  {
200  const size_t read = rb->read() - 1;
201  INTEROP_BOUNDS_CHECK(read, read_data_by_lane_read.read_count(), "Read exceeds number of reads in RunInfo.xml");
202  read_data_by_lane_read(read, lane).push_back(*rb);
203  if(surface_count < 2) continue;
204  read_data_by_surface_lane_read(read, lane, surface-1).push_back(*rb);
205  }
206  if(surface_count < 2) continue;
207  const size_t index = lane*surface_count+(surface-1);
208  tile_data_by_lane_surface[index].push_back(*beg);// TODO: make more efficient by copying only tile data
209  }
210 
211 
212  //reads and reads pf
213  // percent pf
214  INTEROP_ASSERT(run.size() > 0);
215 
217  for (size_t lane = 0; lane < run[0].size(); ++lane)
218  {
219  INTEROP_ASSERT(lane < tile_data_by_lane.size());
220  INTEROP_ASSERT(lane < run[0].size());
221  update_tile_summary_from_cache(tile_data_by_lane[lane], run[0][lane], skip_median);
222 
223  for (size_t read = 1; read < run.size(); ++read)
224  {
225  INTEROP_ASSERT(read < run.size());
226  run[read][lane].density(run[0][lane].density());
227  run[read][lane].density_pf(run[0][lane].density_pf());
228  run[read][lane].cluster_count(run[0][lane].cluster_count());
229  run[read][lane].cluster_count_pf(run[0][lane].cluster_count_pf());
230  run[read][lane].percent_pf(run[0][lane].percent_pf());
231  run[read][lane].reads(run[0][lane].reads());
232  run[read][lane].reads_pf(run[0][lane].reads_pf());
233  }
234  if(surface_count < 2) continue;
235  for(size_t surface=0;surface<surface_count;++surface)
236  {
237 
238  update_tile_summary_from_cache(tile_data_by_lane_surface[lane * surface_count + surface],
239  run[0][lane][surface], skip_median);
240  for (size_t read = 1; read < run.size(); ++read)
241  {
242  INTEROP_ASSERT(read < run.size());
243  run[read][lane][surface].density(run[0][lane][surface].density());
244  run[read][lane][surface].density_pf(run[0][lane][surface].density_pf());
245  run[read][lane][surface].cluster_count(run[0][lane][surface].cluster_count());
246  run[read][lane][surface].cluster_count_pf(run[0][lane][surface].cluster_count_pf());
247  run[read][lane][surface].percent_pf(run[0][lane][surface].percent_pf());
248  run[read][lane][surface].reads(run[0][lane][surface].reads());
249  run[read][lane][surface].reads_pf(run[0][lane][surface].reads_pf());
250  }
251  }
252  }
253  float percent_aligned = 0;
254  size_t total = 0;
255  float percent_aligned_nonindex = 0;
256  size_t total_nonindex = 0;
257  double cluster_count_raw = 0;
258  double cluster_count_pf = 0;
259  uint64_t total_reads_raw = 0;
260  uint64_t total_reads_pf = 0;
261  for (size_t read = 0; read < run.size(); ++read)
262  {
263  INTEROP_ASSERT(read < run.size());
264  float percent_aligned_by_read = 0;
265  size_t total_by_read = 0;
266  for (size_t lane = 0; lane < run[read].size(); ++lane)
267  {
268  INTEROP_ASSERT(lane < run[0].size());
269  if(read == 0)
270  {
271  cluster_count_pf += run[read][lane].reads_pf();
272  cluster_count_raw += run[read][lane].reads();
273  }
274  const size_t non_nan = update_read_summary(read_data_by_lane_read(read, lane),
275  run[read][lane],
276  skip_median);
277  if(non_nan == 0) continue;
278  INTEROP_ASSERT(!std::isnan(run[read][lane].percent_aligned().mean()));
279  percent_aligned_by_read += run[read][lane].percent_aligned().mean() * non_nan;
280  total_by_read += non_nan;
281  if(surface_count < 2) continue;
282  for(size_t surface=0;surface<surface_count;++surface)
283  {
284  update_read_summary(read_data_by_surface_lane_read(read, lane, surface),
285  run[read][lane][surface],
286  skip_median);
287  }
288  }
289  run[read].summary().reads(static_cast<uint64_t>(cluster_count_raw));
290  run[read].summary().reads_pf(static_cast<uint64_t>(cluster_count_pf));
291  run[read].summary().cluster_count(cluster_count_raw);
292  run[read].summary().cluster_count_pf(cluster_count_pf);
293  total_reads_raw = static_cast<uint64_t>(cluster_count_raw);
294  total_reads_pf = static_cast<uint64_t>(cluster_count_pf);
295  run[read].summary().percent_aligned(divide(percent_aligned_by_read, float(total_by_read)));
296  percent_aligned += percent_aligned_by_read;
297  total += total_by_read;
298  if (!run[read].read().is_index())
299  {
300  percent_aligned_nonindex += percent_aligned_by_read;
301  total_nonindex += total_by_read;
302  }
303  }
304  run.nonindex_summary().percent_aligned(divide(percent_aligned_nonindex, static_cast<float>(total_nonindex)));
305  run.total_summary().percent_aligned(divide(percent_aligned, static_cast<float>(total)));
306  run.nonindex_summary().reads(total_reads_raw);
307  run.total_summary().reads(total_reads_raw);
308  run.nonindex_summary().reads_pf(total_reads_pf);
309  run.total_summary().reads_pf(total_reads_pf);
310  run.nonindex_summary().cluster_count(cluster_count_raw);
311  run.total_summary().cluster_count(cluster_count_raw);
312  run.nonindex_summary().cluster_count_pf(cluster_count_pf);
313  run.total_summary().cluster_count_pf(cluster_count_pf);
314  }
315 
325  template<typename I>
327  I end,
328  const constants::tile_naming_method naming_method,
331  {
332  typedef std::vector<model::metrics::extended_tile_metric> tile_vector_t;
333  typedef std::vector<tile_vector_t> tile_by_lane_vector_t;
334  if (beg == end) return;
335  if (run.size() == 0)return;
336  const size_t surface_count = run.surface_count();
337  const ptrdiff_t n = std::distance(beg, end);
338  tile_by_lane_vector_t tile_data_by_lane(run.lane_count());
339  reserve(tile_data_by_lane.begin(), tile_data_by_lane.end(), n);
340  tile_by_lane_vector_t tile_data_by_lane_surface(run.lane_count()*surface_count);
341  reserve(tile_data_by_lane_surface.begin(), tile_data_by_lane_surface.end(), n);
342 
343  for (; beg != end; ++beg)
344  {
345  const size_t surface = beg->surface(naming_method);
346  INTEROP_ASSERT(surface > 0);
347  const size_t lane = beg->lane() - 1;
348  INTEROP_BOUNDS_CHECK(lane, tile_data_by_lane.size(), "Lane exceeds number of lanes in RunInfo.xml");
349  tile_data_by_lane[beg->lane() - 1].push_back(*beg);// TODO: make more efficient by copying only tile data
350  if(surface_count < 2) continue;
351  const size_t index = lane*surface_count+(surface-1);
352  tile_data_by_lane_surface[index].push_back(*beg);// TODO: make more efficient by copying only tile data
353  }
354 
355 
356  model::summary::metric_stat count_stat;
357  double total_cluster_occupied = 0;
358  double total_cluster_count = 0;
359  const bool skip_median=false;
360  for (size_t lane = 0; lane < run[0].size(); ++lane)
361  {
362  INTEROP_ASSERT(lane < tile_data_by_lane.size());
363  INTEROP_ASSERT(lane < run[0].size());
364 
365  tile_vector_t& tile_lane_data = tile_data_by_lane[lane];
366  tile_vector_t::iterator tile_lane_end = util::remove_nan(tile_lane_data.begin(), tile_lane_data.end(),
368  const float occupied_mean =
369  util::mean<float>(tile_lane_data.begin(),
370  tile_lane_end,
373  count_stat = run[0][lane].cluster_count();
374  update_extended_tile_summary_from_cache(tile_data_by_lane[lane], run[0][lane], skip_median);
375  if(!std::isnan(count_stat.mean()) && !std::isnan(occupied_mean))
376  {
377  total_cluster_count += count_stat.mean();
378  total_cluster_occupied += occupied_mean;
379  }
380 
381  for (size_t read = 1; read < run.size(); ++read)
382  {
383  INTEROP_ASSERT(read < run.size());
384  run[read][lane].percent_occupied(run[0][lane].percent_occupied());
385  }
386  if(surface_count < 2) continue;
387  for(size_t surface=0;surface<surface_count;++surface)
388  {
389  update_extended_tile_summary_from_cache(tile_data_by_lane_surface[lane * surface_count + surface],
390  run[0][lane][surface], skip_median);
391  for (size_t read = 1; read < run.size(); ++read)
392  {
393  INTEROP_ASSERT(read < run.size());
394  run[read][lane][surface].percent_occupied(run[0][lane][surface].percent_occupied());
395  }
396  }
397  }
398  for (size_t read = 0; read < run.size(); ++read)
399  {
400  run[read].summary().percent_occupied(static_cast<float>(divide(total_cluster_occupied, total_cluster_count))*100);
401  }
402  run.nonindex_summary().percent_occupied(static_cast<float>(divide(total_cluster_occupied, total_cluster_count))*100);
403  run.total_summary().percent_occupied(static_cast<float>(divide(total_cluster_occupied, total_cluster_count))*100);
404  }
405 
406 }}}}
407 
void update_extended_tile_summary_from_cache(std::vector< ExtendedTileVector > &extended_tile_data, model::summary::stat_summary &stat_summary, const bool skip_median)
Definition: tile_summary.h:26
const metric_summary & total_summary() const
Definition: run_summary.h:314
const metric_stat_t & cluster_count_pf() const
Definition: stat_summary.h:138
Definition: enum_description.h:15
float cluster_count() const
Definition: tile_metric.h:334
size_t surface_count() const
Definition: run_summary.h:287
const metric_stat_t & prephasing() const
Definition: stat_summary.h:165
float percent_prephasing() const
Definition: tile_metric.h:126
float percent_aligned() const
Definition: metric_summary.h:60
#define INTEROP_THROW_SPEC(SPEC)
Definition: exception_specification.h:15
std::vector< read_metric > read_metric_vector
Definition: tile_metric.h:177
S nan_accumulate(I beg, I end, const S init, Op op)
Definition: summary_statistics.h:209
inline::uint32_t surface(const ::uint32_t tile_id, const constants::tile_naming_method method)
Definition: tile_metric.h:93
size_t size() const
Definition: run_summary.h:224
float cluster_density() const
Definition: tile_metric.h:306
void summarize_extended_tile_metrics(I beg, I end, const constants::tile_naming_method naming_method, model::summary::run_summary &run)
Definition: tile_summary.h:326
const metric_stat_t & percent_pf() const
Definition: stat_summary.h:147
float percent_phasing() const
Definition: tile_metric.h:118
float cluster_count_pf() const
Definition: tile_metric.h:355
const metric_stat_t & density() const
Definition: stat_summary.h:111
const_member_function_less_w< T, R, P1 > const_member_function_less(const P2 &param1, R(T::*func)(P1) const)
Definition: statistics.h:233
#define INTEROP_ASSERT(TST)
Definition: assert.h:21
size_t nan_summarize(I beg, I end, S &stat, BinaryOp op, Compare comp, const bool skip_median)
Definition: summary_statistics.h:187
const_member_function_w< T, R, P1 > const_member_function(const P2 &param1, R(T::*func)(P1) const)
Definition: statistics.h:197
float percent_occupied() const
Definition: extended_tile_metric.h:129
def summary(run_metrics, level='Total', columns=None, dtype='f4', ignore_missing_columns=True, extra)
Definition: core.py:217
void reserve(I beg, I end, const ptrdiff_t n)
Definition: summary_statistics.h:24
const metric_stat_t & percent_occupied() const
Definition: stat_summary.h:278
float percent_pf() const
Definition: tile_metric.h:376
void clear()
Definition: metric_stat.h:37
double cluster_count() const
Definition: metric_summary.h:125
def read(run, valid_to_load=None, requires=None, search_paths=None, extra)
Definition: core.py:752
float cluster_density_pf() const
Definition: tile_metric.h:320
void summarize_tile_metrics(I beg, I end, const constants::tile_naming_method naming_method, model::summary::run_summary &run, const bool skip_median=false)
Definition: tile_summary.h:165
R mean(I beg, I end, BinaryOp op)
Definition: statistics.h:654
float cluster_count_occupied() const
Definition: extended_tile_metric.h:107
I remove_nan(I beg, I end, UnaryOp op)
Definition: statistics.h:479
uint64_t reads_pf() const
Definition: metric_summary.h:150
std::vector< T > vector_t
Definition: summary_statistics.h:36
uint64_t reads() const
Definition: metric_summary.h:142
#define INTEROP_BOUNDS_CHECK(VALUE, RANGE, MESSAGE)
Definition: exception.h:24
const metric_stat_t & density_pf() const
Definition: stat_summary.h:120
const metric_stat_t & percent_aligned() const
Definition: stat_summary.h:174
uint64_t reads() const
Definition: stat_summary.h:92
size_t update_read_summary(summary_by_lane_read< model::metrics::read_metric >::vector_t &read_data_cache, model::summary::stat_summary &stat_summary, const bool skip_median)
Definition: tile_summary.h:110
const metric_stat_t & phasing() const
Definition: stat_summary.h:156
const metric_stat_t & cluster_count() const
Definition: stat_summary.h:129
uint64_t reads_pf() const
Definition: stat_summary.h:101
float percent_aligned() const
Definition: tile_metric.h:110
tile_naming_method
Definition: enums.h:294
size_t lane_count() const
Definition: run_summary.h:270
void update_tile_summary_from_cache(std::vector< TileVector > &tile_data, model::summary::stat_summary &stat_summary, const bool skip_median)
Definition: tile_summary.h:48
double cluster_count_pf() const
Definition: metric_summary.h:134
float divide(const float num, const float div)
Definition: summary_statistics.h:231
const metric_summary & nonindex_summary() const
Definition: run_summary.h:342