core Namespace Reference

Functions

def index_summary (run_metrics, level='Lane', columns=None, dtype='f4', extra)
 
def index_summary_columns
 
def summary (run_metrics, level='Total', columns=None, dtype='f4', ignore_missing_columns=True, extra)
 
def load_summary_metrics ()
 
def summary_columns
 
def indexing (run_metrics, per_sample=True, dtype='f4', stype='O', extra)
 
def imaging (run_metrics, dtype='f4', extra)
 
def imaging_columns (run_metrics, extra)
 
def read (run, valid_to_load=None, requires=None, search_paths=None, extra)
 
def read_metric
 
def create_valid_to_load (interop_prefixes)
 
def enable_metrics (valid_to_load, interop_prefixes)
 
def load_to_string_list (valid_to_load)
 
def group_from_filename (filename)
 
def load_imaging_metrics ()
 

Variables

tuple _summary_levels = ('Total', 'NonIndex', 'Read', 'Lane', 'Surface')
 
tuple _index_summary_levels = ('Lane', 'Barcode')
 

Detailed Description

@package interop         {#interop_core}
Core routines to simplify using the InterOp Library

InterOp is built around a single data structure alled a `run_metrics` object. This contains the full set of InterOps
along with the RunInfo.xml and some of the RunParameters.xml.

A run metrics object can be read in as follows:
>>> from interop import read
>>> run_metrics = read("some/path/run_folder_name") # doctest: +SKIP

Core routines take the run_metrics object and convert it into a table represented by a structured NumPy array. This can,
in turn, be converted to a pandas DataFrame or any other data structure.

The core routines include the following:

>>> from interop import index_summary
>>> index_summary(run_metrics_with_indexing)
array([(1, 0.46, 1015.56, 520.67, 1536.22, 1800., 2000.)],
      dtype=[('Lane', '<u2'), ('Mapped Reads Cv', '<f4'), ('Max Mapped Reads', '<f4'), ('Min Mapped Reads', '<f4'), ('Total Fraction Mapped Reads', '<f4'), ('Total Pf Reads', '<f4'), ('Total Reads', '<f4')])

>>> from interop import summary
>>> summary(run_metrics_example)
array([(0.37, 6.67, 0., 0., 0.)],
      dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])

>>> from interop import indexing
>>> indexing(run_metrics_with_indexing)
array([(1., 1101., 'ATCACGAC-AAGGTTCA', '1', 4570., 900., 507.78),
       (1., 1101., 'ATCACGAC-GGGGGGGG', '2', 2343., 900., 260.33),
       (1., 1102., 'ATCACGAC-AAGGTTCA', '1', 4570.,   0.,   0.  ),
       (1., 1102., 'ATCACGAC-GGGGGGGG', '2', 2343.,   0.,   0.  )],
      dtype=[('Lane', '<f4'), ('Tile', '<f4'), ('Barcode', 'O'), ('SampleID', 'O'), ('Cluster Count', '<f4'), ('Cluster Count PF', '<f4'), ('% Demux', '<f4')])

>>> from interop import imaging
>>> imaging(run_metrics_example)
rec.array([(1., 1101., 1., 1., 1., 0.1, 10., 10., 25. , 33.3, 33.3, 33.3, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 2., 1., 2., 0.2,  5., 15., 12.5, 42.9, 28.6, 28.6, 0.,  5., 15., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 3., 1., 3., 0.3, 10., 10., 25. , 33.3, 50. , 16.7, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 4., 2., 1., 0.4, 10.,  5., 25. , 16.7, 50. , 33.3, 0., 10.,  5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 5., 3., 1., 0.5, 15.,  5., 37.5, 20. , 40. , 40. , 0., 15.,  5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.)],
          dtype=[('Lane', '<f4'), ('Tile', '<f4'), ('Cycle', '<f4'), ('Read', '<f4'), ('Cycle Within Read', '<f4'), ('Error Rate', '<f4'), ('P90/green', '<f4'), ('P90/blue', '<f4'), ('% No Calls', '<f4'), ('% Base/A', '<f4'), ('% Base/C', '<f4'), ('% Base/G', '<f4'), ('% Base/T', '<f4'), ('Fwhm/green', '<f4'), ('Fwhm/blue', '<f4'), ('Corrected/A', '<f4'), ('Corrected/C', '<f4'), ('Corrected/G', '<f4'), ('Corrected/T', '<f4'), ('Called/A', '<f4'), ('Called/C', '<f4'), ('Called/G', '<f4'), ('Called/T', '<f4'), ('Signal To Noise', '<f4'), ('Surface', '<f4'), ('Swath', '<f4'), ('Tile Number', '<f4')])

Any of the core routines above can take a `run_metrics` object or a string containing a file path to a valid run folder.

>>> ar = index_summary("some/path/run_folder_name") # doctest: +SKIP

The structured NumPy array can be converted to a Pandas DataFrame just so:

>>> import pandas as pd # doctest: +SKIP
>>> df = pd.DataFrame(ar) # doctest: +SKIP

For more information see the documentation around each function below.

Function Documentation

def core.create_valid_to_load (   interop_prefixes)
Create list of metrics valid to load by the InterOp library

List of validate metric_names can be gotten using `list_interop_files`

>>> from interop import create_valid_to_load
>>> int(create_valid_to_load(['Extraction'])[0])
0
>>> create_valid_to_load(0)
Traceback (most recent call last):
    ...
TypeError: Parameter valid_to_load must be a collection of values

:param interop_prefixes: list of strings containing InterOp metric names
:return: py_interop_run.uchar_vector
def core.enable_metrics (   valid_to_load,
  interop_prefixes 
)
Enable metrics in valid_to_load

>>> from interop import enable_metrics, load_to_string_list
>>> import interop.py_interop_run as interop_run
>>> valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 0)
>>> load_to_string_list(enable_metrics(valid_to_load, 'Extraction'))
['Extraction']
>>> load_to_string_list(enable_metrics(valid_to_load, ['Error', 'Q']))
['Error', 'Extraction', 'Q']

Nothing changes when passing in an empty list
>>> load_to_string_list(enable_metrics(valid_to_load, []))
['Error', 'Extraction', 'Q']

Here are some example exceptions when the improper parameter is given

>>> enable_metrics(valid_to_load, None)
Traceback (most recent call last):
  ...
TypeError: 'NoneType' object is not iterable
>>> enable_metrics(None, [])
Traceback (most recent call last):
...
TypeError: Parameter valid_to_load must be of type interop.py_interop_run.uchar_vector
>>> enable_metrics("None", [])
Traceback (most recent call last):
...
TypeError: Parameter valid_to_load must be of type interop.py_interop_run.uchar_vector


:param valid_to_load: interop_run.uchar_vector (boolean array)
:param interop_prefixes: list of metrics to enable
:return: interop_run.uchar_vector (It is updated in-place so the return can be ignored)
def core.group_from_filename (   filename)
Get the metric group id from an InterOp filename path

>>> from interop import group_from_filename
>>> import interop.py_interop_run as interop_run
>>> group_from_filename("some/path/run/InterOp/ExtractionMetricsOut.bin")
2
>>> interop_run.Extraction
2

This group id can be used to load a metric from a binary buffer as in `interop.core.read_metric`

:param filename: path to interop metric
:return: interop_run.metric_group
def core.imaging (   run_metrics,
  dtype = 'f4',
  extra 
)
Convert InterOp run_metrics (or read run_metrics from disk) to a numpy structured array containing the imaging
table

We can read an imaging table directly from a run folder. Note, this does not load all metrics, only those required
by the imaging table. See `load_imaging_metrics` for that list.

Also note that loading only tile level metrics (e.g. metrics without cycles) will result in an empty table. This is
a limitation of the imaging table.

>>> from interop import imaging
>>> from interop import load_imaging_metrics
>>> import interop.py_interop_run_metrics as interop_metrics
>>> import numpy as np
>>> ar = imaging("some/path/run_folder_name") # doctest: +SKIP

The above function is equivalent to
>>> ar = imaging("some/path/run_folder_name", valid_to_load=load_imaging_metrics()) # doctest: +SKIP

We can select a subset of metrics to include based on metric groups
>>> ar = imaging("some/path/run_folder_name", valid_to_load=['Error']) # doctest: +SKIP

See `read` below for more examples.

The following example will rely on an existing run_metrics object (possibly created by the `read` function below).

>>> ar = imaging(run_metrics_example)
>>> ar
rec.array([(1., 1101., 1., 1., 1., 0.1, 10., 10., 25. , 33.3, 33.3, 33.3, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 2., 1., 2., 0.2,  5., 15., 12.5, 42.9, 28.6, 28.6, 0.,  5., 15., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 3., 1., 3., 0.3, 10., 10., 25. , 33.3, 50. , 16.7, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 4., 2., 1., 0.4, 10.,  5., 25. , 16.7, 50. , 33.3, 0., 10.,  5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 5., 3., 1., 0.5, 15.,  5., 37.5, 20. , 40. , 40. , 0., 15.,  5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.)],
          dtype=[('Lane', '<f4'), ('Tile', '<f4'), ('Cycle', '<f4'), ('Read', '<f4'), ('Cycle Within Read', '<f4'), ('Error Rate', '<f4'), ('P90/green', '<f4'), ('P90/blue', '<f4'), ('% No Calls', '<f4'), ('% Base/A', '<f4'), ('% Base/C', '<f4'), ('% Base/G', '<f4'), ('% Base/T', '<f4'), ('Fwhm/green', '<f4'), ('Fwhm/blue', '<f4'), ('Corrected/A', '<f4'), ('Corrected/C', '<f4'), ('Corrected/G', '<f4'), ('Corrected/T', '<f4'), ('Called/A', '<f4'), ('Called/C', '<f4'), ('Called/G', '<f4'), ('Called/T', '<f4'), ('Signal To Noise', '<f4'), ('Surface', '<f4'), ('Swath', '<f4'), ('Tile Number', '<f4')])

>>> ar.dtype
dtype((numpy.record, [('Lane', '<f4'), ('Tile', '<f4'), ('Cycle', '<f4'), ('Read', '<f4'), ('Cycle Within Read', '<f4'), ('Error Rate', '<f4'), ('P90/green', '<f4'), ('P90/blue', '<f4'), ('% No Calls', '<f4'), ('% Base/A', '<f4'), ('% Base/C', '<f4'), ('% Base/G', '<f4'), ('% Base/T', '<f4'), ('Fwhm/green', '<f4'), ('Fwhm/blue', '<f4'), ('Corrected/A', '<f4'), ('Corrected/C', '<f4'), ('Corrected/G', '<f4'), ('Corrected/T', '<f4'), ('Called/A', '<f4'), ('Called/C', '<f4'), ('Called/G', '<f4'), ('Called/T', '<f4'), ('Signal To Noise', '<f4'), ('Surface', '<f4'), ('Swath', '<f4'), ('Tile Number', '<f4')]))

We can convert the numpy array to a Pandas DataFrame as follows:

>>> import pandas as pd  # doctest: +SKIP
>>> df = pd.DataFrame(ar)  # doctest: +SKIP
>>> df  # doctest: +SKIP
   Lane  ...  Tile Number
0   1.0  ...          1.0
1   1.0  ...          1.0
2   1.0  ...          1.0
3   1.0  ...          1.0
4   1.0  ...          1.0
<BLANKLINE>
[5 rows x 27 columns]

You can also change the dtype of the resulting data array table.
>>> imaging(run_metrics_example, dtype=np.float32)
rec.array([(1., 1101., 1., 1., 1., 0.1, 10., 10., 25. , 33.3, 33.3, 33.3, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 2., 1., 2., 0.2,  5., 15., 12.5, 42.9, 28.6, 28.6, 0.,  5., 15., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 3., 1., 3., 0.3, 10., 10., 25. , 33.3, 50. , 16.7, 0., 10., 10., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 4., 2., 1., 0.4, 10.,  5., 25. , 16.7, 50. , 33.3, 0., 10.,  5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.),
           (1., 1101., 5., 3., 1., 0.5, 15.,  5., 37.5, 20. , 40. , 40. , 0., 15.,  5., nan, nan, nan, nan, nan, nan, nan, nan, nan, 1., 1., 1.)],
          dtype=[('Lane', '<f4'), ('Tile', '<f4'), ('Cycle', '<f4'), ('Read', '<f4'), ('Cycle Within Read', '<f4'), ('Error Rate', '<f4'), ('P90/green', '<f4'), ('P90/blue', '<f4'), ('% No Calls', '<f4'), ('% Base/A', '<f4'), ('% Base/C', '<f4'), ('% Base/G', '<f4'), ('% Base/T', '<f4'), ('Fwhm/green', '<f4'), ('Fwhm/blue', '<f4'), ('Corrected/A', '<f4'), ('Corrected/C', '<f4'), ('Corrected/G', '<f4'), ('Corrected/T', '<f4'), ('Called/A', '<f4'), ('Called/C', '<f4'), ('Called/G', '<f4'), ('Called/T', '<f4'), ('Signal To Noise', '<f4'), ('Surface', '<f4'), ('Swath', '<f4'), ('Tile Number', '<f4')])

Here is the output if an empty run_metrics was provided
>>> imaging(interop_metrics.run_metrics())
array([], dtype=float64)

Here is an example exception if an improper input is given
>>> imaging(None)
Traceback (most recent call last):
...
ValueError: Expected interop.py_interop_run_metrics.run_metrics or str for `run_metrics`

:param run_metrics: py_interop_run_metrics.run_metrics or str file path to a run folder
:param dtype: data type for the array (Default: 'f4')
:param extra: all extra parameters are passed to `read` if the first parameter is a str file path to a run folder
:return: structured with column names and dype - np.array
def core.imaging_columns (   run_metrics,
  extra 
)
Get a list of imaging table columns

>>> from interop import imaging_columns
>>> from interop import load_imaging_metrics
>>> import interop.py_interop_run_metrics as interop_metrics
>>> import numpy as np
>>> ar = imaging_columns("some/path/run_folder_name") # doctest: +SKIP

The above function is equivalent to
>>> ar = imaging_columns("some/path/run_folder_name", valid_to_load=load_imaging_metrics()) # doctest: +SKIP

We can select a subset of metrics to include based on metric groups
>>> ar = imaging_columns("some/path/run_folder_name", valid_to_load=['Error']) # doctest: +SKIP

See `read` below for more examples.

The following example will rely on an existing run_metrics object (possibly created by the `read` function below).

>>> imaging_columns(run_metrics_example)
['Lane', 'Tile', 'Cycle', 'Read', 'Cycle Within Read', 'Error Rate', 'P90/green', 'P90/blue', '% No Calls', '% Base/A', '% Base/C', '% Base/G', '% Base/T', 'Fwhm/green', 'Fwhm/blue', 'Corrected/A', 'Corrected/C', 'Corrected/G', 'Corrected/T', 'Called/A', 'Called/C', 'Called/G', 'Called/T', 'Signal To Noise', 'Surface', 'Swath', 'Tile Number']

:param run_metrics: py_interop_run_metrics.run_metrics or str file path to a run folder
:param extra: all extra parameters are passed to `read` if the first parameter is a str file path to a run folder
:return: list of string headers
def core.index_summary (   run_metrics,
  level = 'Lane',
  columns = None,
  dtype = 'f4',
  extra 
)
Index summary table

>>> from interop import index_summary
>>> ar = index_summary("some/path/run_folder_name") # doctest: +SKIP

>>> index_summary(run_metrics_with_indexing)
array([(1, 0.46, 1015.56, 520.67, 1536.22, 1800., 2000.)],
      dtype=[('Lane', '<u2'), ('Mapped Reads Cv', '<f4'), ('Max Mapped Reads', '<f4'), ('Min Mapped Reads', '<f4'), ('Total Fraction Mapped Reads', '<f4'), ('Total Pf Reads', '<f4'), ('Total Reads', '<f4')])

>>> index_summary(run_metrics_with_indexing, level='Barcode')
array([(1, 18280., 1015.56, 1., 'ATCACGAC', 'AAGGTTCA', 'TSCAIndexes', '1'),
       (1,  9372.,  520.67, 2., 'ATCACGAC', 'GGGGGGGG', 'TSCAIndexes', '2')],
      dtype=[('Lane', '<u2'), ('Cluster Count', '<f4'), ('Fraction Mapped', '<f4'), ('Id', '<f4'), ('Index1', 'O'), ('Index2', 'O'), ('Project Name', 'O'), ('Sample Id', 'O')])

>>> index_summary(run_metrics_with_indexing, columns=['Total Fraction Mapped Reads'])
array([(1, 1536.22)],
      dtype=[('Lane', '<u2'), ('Total Fraction Mapped Reads', '<f4')])

>>> index_summary(run_metrics_with_indexing, columns=['Incorrect'])
Traceback (most recent call last):
...
ValueError: Column `Incorrect` not found in: ['Mapped Reads Cv', 'Max Mapped Reads', 'Min Mapped Reads', 'Total Fraction Mapped Reads', 'Total Pf Reads', 'Total Reads'] - column not consistent with level or misspelled

>>> index_summary(run_metrics_with_indexing, level='Incorrect')
Traceback (most recent call last):
...
ValueError: level=Incorrect not in ('Lane', 'Barcode')

:param run_metrics: py_interop_run_metrics.run_metrics or string run folder path
:param level: level of the data to summarize, valid values include: 'Total', 'NonIndex', 'Read', 'Lane', 'Surface' (Default: Total)
:param columns: list of columns (valid values depend on the level) see `summary_columns`
:param dtype: data type for the array (Default: 'f4')
:param extra: all extra parameters are passed to `read` if the first parameter is a str file path to a run folder
:return: structured with column names and dype - np.array
def core.index_summary_columns (   level = 'Lane',
  ret_dict = False 
)
List the columns of the `index_summary` table

>>> from interop import index_summary_columns
>>> index_summary_columns()
('Mapped Reads Cv', 'Max Mapped Reads', 'Min Mapped Reads', 'Total Fraction Mapped Reads', 'Total Pf Reads', 'Total Reads')


>>> index_summary_columns('Barcode')
('Cluster Count', 'Fraction Mapped', 'Id', 'Index1', 'Index2', 'Project Name', 'Sample Id')

:param level: level of the data to summarize, valid values include: 'Lane', 'Barcode' (Default: Lane)
:param ret_dict: if true, return a dict mapping from column name to method name (Default: False)
:return: tuple of columns (or dictionary mapping column name to method depending on `ret_dict` parameter)
def core.indexing (   run_metrics,
  per_sample = True,
  dtype = 'f4',
  stype = 'O',
  extra 
)
Convert InterOp run_metrics (or read run_metrics from disk) to a numpy structured array containing an
indexing table

We can read an indexing table directly from a run folder. Note, this does not load all metrics, only those required
by the indexing table, e.g. IndexMetricsOut.bin

>>> from interop import indexing
>>> ar = indexing("some/path/run_folder_name") # doctest: +SKIP

Note that `valid_to_load` in `read` is ignored.


We can also convert a `run_metrics` object to an indexing table as follows
>>> ar = indexing(run_metrics_with_indexing)
>>> ar
array([(1., 1101., 'ATCACGAC-AAGGTTCA', '1', 4570., 900., 507.78),
       (1., 1101., 'ATCACGAC-GGGGGGGG', '2', 2343., 900., 260.33),
       (1., 1102., 'ATCACGAC-AAGGTTCA', '1', 4570.,   0.,   0.  ),
       (1., 1102., 'ATCACGAC-GGGGGGGG', '2', 2343.,   0.,   0.  )],
      dtype=[('Lane', '<f4'), ('Tile', '<f4'), ('Barcode', 'O'), ('SampleID', 'O'), ('Cluster Count', '<f4'), ('Cluster Count PF', '<f4'), ('% Demux', '<f4')])

The `indexing` function also provides an overall sample view by setting `per_sample=False`.

>>> ar = indexing(run_metrics_with_indexing, per_sample=False)
>>> ar
array([(1., 1101., 1000., 900., 768.11), (1., 1102.,    0.,   0.,   0.  )],
      dtype=[('Lane', '<f4'), ('Tile', '<f4'), ('Cluster Count', '<f4'), ('Cluster Count PF', '<f4'), ('% Demux', '<f4')])

:param run_metrics: py_interop_run_metrics.run_metrics or string run folder path
:param per_sample: return demux per sample (Default: True)
:param dtype: data type for the array (Default: 'f4')
:param stype: string type for the array (Default: 'O')
:param extra: all extra parameters are passed to `read` if the first parameter is a str file path to a run folder
:return: structured with column names and dype - np.array
def core.load_imaging_metrics ( )
List of valid imaging metrics to load

>>> from interop import load_to_string_list
>>> from interop import load_imaging_metrics
>>> load_to_string_list(load_imaging_metrics())
['CorrectedInt', 'Error', 'Extraction', 'Image', 'Q', 'Tile', 'QByLane', 'QCollapsed', 'EmpiricalPhasing', 'DynamicPhasing', 'ExtendedTile']

:return: valid_to_load
def core.load_summary_metrics ( )
List of valid summary metrics to load

>>> from interop import load_to_string_list
>>> from interop import load_summary_metrics
>>> load_to_string_list(load_summary_metrics())
['CorrectedInt', 'Error', 'Extraction', 'Q', 'Tile', 'QByLane', 'QCollapsed', 'EmpiricalPhasing', 'ExtendedTile']

:return: valid_to_load
def core.load_to_string_list (   valid_to_load)
Create a string list of names for each enabled metric in `valid_to_load`

>>> from interop import create_valid_to_load, load_to_string_list
>>> import interop.py_interop_run as interop_run
>>> valid_to_load = create_valid_to_load('Extraction')
>>> load_to_string_list(valid_to_load)
['Extraction']
>>> valid_to_load = interop_run.uchar_vector(interop_run.MetricCount, 1)
>>> load_to_string_list(valid_to_load)
['CorrectedInt', 'Error', 'Extraction', 'Image', 'Index', 'Q', 'Tile', 'QByLane', 'QCollapsed', 'EmpiricalPhasing', 'DynamicPhasing', 'ExtendedTile', 'SummaryRun']

:param valid_to_load: boolean buffer
:return: list of strings containing the name of each metric enabled in `valid_to_load`
def core.read (   run,
  valid_to_load = None,
  requires = None,
  search_paths = None,
  extra 
)
Read InterOp metrics into a run_metrics object

- List of validate valid_to_load names can be gotten using `list_interop_files`
- If run is `interop.py_interop_run_metrics.run_metrics` then run is returned.
- If an InterOp file is missing from the `requires` list, then an empty run_metrics object is returned

Read in all metrics from a run folder
>>> from interop import read
>>> metrics = read("some/path/run_folder_name") # doctest: +SKIP

Read in only ErrorMetricsOut.bin in a run folder
>>> metrics = read("some/path/run_folder_name", valid_to_load=['Error']) # doctest: +SKIP

Read in ErrorMetricsOut.bin and ExtractionMetricsOut.bin but if ErrorMetricsOut.bin is missing return an empty
>>> metrics = read("some/path/run_folder_name", valid_to_load=['Error', 'Extraction'], requires=['Error']) # doctest: +SKIP

Read in IndexMetricsOut.bin and search for it outside the run folder in `fastq/reports`
>>> metrics = read("some/path/run_folder_name", valid_to_load=['Index'], search_paths=['fastq/reports']) # doctest: +SKIP

Read in a run folder that is not found
>>> metrics = read("some/non/existing/run_folder_name")
Traceback (most recent call last):
...
interop.py_interop_run.xml_file_not_found_exception: cannot open file some/non/existing/run_folder_name/RunInfo.xml

Read from a None object
>>> metrics = read(None)
Traceback (most recent call last):
...
ValueError: invalid null reference in method 'run_metrics_read', argument 2 of type 'std::string const &'

:param run: string path including name of run folder (or run_metrics object)
:param valid_to_load: list of strings containing InterOp metric names (Default: None, load everything)
:param requires: list of required metric (Default: None, check nothing)
:param search_paths: list of paths to search when looking for `IndexMetricsOut.bin` (Default: None, do not search)
:return: interop.py_interop_run_metrics.run_metrics
def core.read_metric (   filename,
  run_metrics = None,
  finalize = False 
)
Read a specific metric from a file into a run_metrics object

This function allows incremental reading of metric files from disk. The last call should set
`finalize=True`.

Read in `ErrorMetricsOut.bin` into a run_metrics object and finalize since this is the only metric we plan to read

>>> from interop import read_metric
>>> metrics = read_metric("some/path/run_folder_name/InterOp/ErrorMetricsOut.bin", finalize=True) # doctest: +SKIP

:param filename: path to InterOp file
:param run_metrics: existing run_metrics object (Default None, one will be created)
:param finalize: if true, then call finalize_after_load (last call to `read_metric` should set finalize=True)
:return: interop.py_interop_run_metrics.run_metrics
def core.summary (   run_metrics,
  level = 'Total',
  columns = None,
  dtype = 'f4',
  ignore_missing_columns = True,
  extra 
)
Generate a summary table with the given level, columns and dtype from a run_metrics object or run_folder path

Note that not all columns will be included if InterOp files are missing or purposing excluded using `valid_to_load`.

The following examples show the different levels that one can summarize the data including:

 - Total (Default)
 - NonIndex
 - Read
 - Lane
 - Summary

>>> from interop import summary
>>> ar = summary("some/path/run_folder_name") # doctest: +SKIP
>>> ar = summary("some/path/run_folder_name", valid_to_load=['Error']) # doctest: +SKIP


>>> summary(run_metrics_example)
array([(0.37, 6.67, 0., 0., 0.)],
      dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])

>>> summary(run_metrics_example, 'Total')
array([(0.37, 6.67, 0., 0., 0.)],
      dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])

>>> summary(run_metrics_example, 'NonIndex')
array([(0.2, 10., 0., 0., 0.)],
      dtype=[('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])

>>> summary(run_metrics_example, 'Read')
array([(1, 78, 0.2, 10., 0., 0., 0.), (2, 89, 0.4,  5., 0., 0., 0.),
       (3, 89, 0.5,  5., 0., 0., 0.)],
      dtype=[('ReadNumber', '<u2'), ('IsIndex', 'u1'), ('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4')])

>>> summary(run_metrics_example, 'Lane')
array([(1, 78, 1, 0.2, 10., 0., 0., 0., 1.),
       (2, 89, 1, 0.4,  5., 0., 0., 0., 1.),
       (3, 89, 1, 0.5,  5., 0., 0., 0., 1.)],
      dtype=[('ReadNumber', '<u2'), ('IsIndex', 'u1'), ('Lane', '<u2'), ('Error Rate', '<f4'), ('First Cycle Intensity', '<f4'), ('Projected Yield G', '<f4'), ('Reads', '<f4'), ('Reads Pf', '<f4'), ('Tile Count', '<f4')])

For a single surface, as is this example, nothing is reported.
>>> summary(run_metrics_example, 'Surface')
array([], dtype=float64)

We can select specific columns using the `columns` parameter
>>> summary(run_metrics_example, 'Total', columns=['First Cycle Intensity', 'Error Rate'])
array([(6.67, 0.37)],
      dtype=[('First Cycle Intensity', '<f4'), ('Error Rate', '<f4')])

If a column values are NaN, or missing, then it will automatically be excluded
>>> summary(run_metrics_example, 'Total', columns=['% Aligned', 'Error Rate'])
array([(0.37,)], dtype=[('Error Rate', '<f4')])

To include missing columns, set `ignore_missing_columns=False`
>>> summary(run_metrics_example, 'Total', ignore_missing_columns=False, columns=['% Aligned', 'Error Rate'])
array([(nan, 0.37)], dtype=[('% Aligned', '<f4'), ('Error Rate', '<f4')])

>>> summary(run_metrics_example, 'Total', columns=['Incorrect'])
Traceback (most recent call last):
 ...
ValueError: Column `Incorrect` not found in: ['Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Yield G'] - column not consistent with level or misspelled


:param run_metrics: py_interop_run_metrics.run_metrics or string run folder path
:param level: level of the data to summarize, valid values include: 'Total', 'NonIndex', 'Read', 'Lane', 'Surface' (Default: Total)
:param columns: list of columns (valid values depend on the level) see `summary_columns`
:param dtype: data type for the array (Default: 'f4')
:param ignore_missing_columns: ignore missing columns, e.g. those with NaN values (Default: True)
:param extra: all extra parameters are passed to `read` if the first parameter is a str file path to a run folder
:return: structured with column names and dype - np.array
def core.summary_columns (   level = 'Total',
  ret_dict = False 
)
Get a list of column names supported at each level of the summary table

>>> from interop import summary_columns

The default columns are for the Run/Read level
>>> summary_columns()
('Cluster Count', 'Cluster Count Pf', 'Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Reads', 'Reads Pf', 'Yield G')
>>> summary_columns(level='Total')
('Cluster Count', 'Cluster Count Pf', 'Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Reads', 'Reads Pf', 'Yield G')
>>> summary_columns(level='NonIndex')
('Cluster Count', 'Cluster Count Pf', 'Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Reads', 'Reads Pf', 'Yield G')
>>> summary_columns(level='Read')
('Cluster Count', 'Cluster Count Pf', 'Error Rate', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupancy Proxy', '% Occupied', 'Projected Yield G', 'Reads', 'Reads Pf', 'Yield G')

The lane/surface level give another set of columns for the summary table
>>> summary_columns(level='Lane')
('Cluster Count', 'Cluster Count Pf', 'Density', 'Density Pf', 'Error Rate', 'Error Rate 100', 'Error Rate 35', 'Error Rate 50', 'Error Rate 75', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupied', '% Pf', 'Phasing', 'Phasing Offset', 'Phasing Slope', 'Prephasing', 'Prephasing Offset', 'Prephasing Slope', 'Projected Yield G', 'Reads', 'Reads Pf', 'Tile Count', 'Yield G')
>>> summary_columns(level='Surface')
('Cluster Count', 'Cluster Count Pf', 'Density', 'Density Pf', 'Error Rate', 'Error Rate 100', 'Error Rate 35', 'Error Rate 50', 'Error Rate 75', 'First Cycle Intensity', '% Aligned', '% >= Q30', '% Occupied', '% Pf', 'Phasing', 'Phasing Offset', 'Phasing Slope', 'Prephasing', 'Prephasing Offset', 'Prephasing Slope', 'Projected Yield G', 'Reads', 'Reads Pf', 'Tile Count', 'Yield G')

:param level: level of the data to summarize, valid values include: 'Run', 'Read', 'Lane', 'Surface' (Default: Run)
:param ret_dict: if true, return a dict mapping from column name to method name (Default: False)
:return: tuple of columns - each column is a tuple, or a tuple of lambda functions that take the run_info as an argument

Variable Documentation

tuple _index_summary_levels = ('Lane', 'Barcode')
tuple _summary_levels = ('Total', 'NonIndex', 'Read', 'Lane', 'Surface')