OpenMS
PeptideAndProteinQuant.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hendrik Weisser $
6 // $Authors: Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
18 
19 
20 namespace OpenMS
21 {
29  class OPENMS_DLLAPI PeptideAndProteinQuant :
30  public DefaultParamHandler
31  {
32 public:
33 
35  typedef std::map<UInt64, double> SampleAbundances;
36 
38  struct PeptideData
39  {
41  std::map<Int, std::map<String, std::map<Int, std::map<Int, double>>>> abundances;
42 
44  std::map<Int, std::map<String, std::map<Int, UInt64>>> psm_counts;
45 
48 
51 
53  std::set<String> accessions;
54 
56  Size psm_count = 0;
57 
59  PeptideData() = default;
60  };
61 
63  typedef std::map<AASequence, PeptideData> PeptideQuant;
64 
66  struct ProteinData
67  {
69  std::map<String, SampleAbundances> peptide_abundances;
70 
71  std::map<String, SampleAbundances> peptide_psm_counts;
72 
74  std::map<String, std::map<Int, double>> channel_level_abundances;
75 
77  std::map<String, UInt64> file_level_psm_counts;
78 
81 
84 
87 
89  Size psm_count = 0;
90 
92  ProteinData() = default;
93  };
94 
96  typedef std::map<String, ProteinData> ProteinQuant;
97 
99  struct Statistics
100  {
103 
106 
109 
111  Size quant_proteins, too_few_peptides;
112 
114  Size quant_peptides, total_peptides;
115 
117  Size quant_features, total_features, blank_features, ambig_features;
118 
121  n_samples(0), quant_proteins(0), too_few_peptides(0),
122  quant_peptides(0), total_peptides(0), quant_features(0),
123  total_features(0), blank_features(0), ambig_features(0) {}
124  };
125 
128 
131 
137  void readQuantData(FeatureMap& features, const ExperimentalDesign& ed);
138 
144  void readQuantData(ConsensusMap& consensus, const ExperimentalDesign& ed);
145 
151  void readQuantData(std::vector<ProteinIdentification>& proteins,
152  PeptideIdentificationList& peptides,
153  const ExperimentalDesign& ed);
154 
166 
167 
176 
177 
178  std::map<OpenMS::String, OpenMS::String> mapAccessionToLeader(const OpenMS::ProteinIdentification& proteins) const;
179 
182 
185 
188 
191  const ProteinQuant& protein_quants,
192  ProteinIdentification& proteins,
193  bool remove_unquantified = true);
194 
195 private:
196 
199 
202 
205 
208 
209 
217 
227  void quantifyFeature_(const FeatureHandle& feature,
228  size_t fraction,
229  const String& filename,
230  const PeptideHit& hit,
231  Int channel_or_label);
232 
240  bool getBest_(
241  const std::map<Int, std::map<String, std::map<Int, std::map<Int, double>>>> & peptide_abundances,
242  std::tuple<size_t, String, size_t, Int> & best);
243 
249  template <typename T>
250  void orderBest_(const std::map<T, SampleAbundances> & abundances,
251  std::vector<T>& result)
252  {
253  typedef std::pair<Size, double> PairType;
254  std::multimap<PairType, T, std::greater<PairType> > order;
255  for (typename std::map<T, SampleAbundances>::const_iterator ab_it =
256  abundances.begin(); ab_it != abundances.end(); ++ab_it)
257  {
258  double total = 0.0;
259  for (SampleAbundances::const_iterator samp_it = ab_it->second.begin();
260  samp_it != ab_it->second.end(); ++samp_it)
261  {
262  total += samp_it->second;
263  }
264  if (total <= 0.0) continue; // not quantified
265  PairType key = std::make_pair(ab_it->second.size(), total);
266  order.insert(std::make_pair(key, ab_it->first));
267  }
268  result.clear();
269  for (typename std::multimap<PairType, T, std::greater<PairType> >::
270  iterator ord_it = order.begin(); ord_it != order.end(); ++ord_it)
271  {
272  result.push_back(ord_it->second);
273  }
274  }
275 
276 
277 
282 
291 
300  std::vector<String> selectPeptidesForQuantification_(const String& protein_accession,
301  Size top_n,
302  bool fix_peptides);
303 
311  double aggregateAbundances_(const std::vector<double>& abundances,
312  const String& method) const;
313 
323  void calculateProteinAbundances_(const String& protein_accession,
324  const std::vector<String>& selected_peptides,
325  const String& aggregate_method,
326  Size top_n,
327  bool include_all);
328 
340  const std::vector<String>& selected_peptides,
341  const String& aggregate_method,
342  Size top_n,
343  bool include_all,
344  const std::map<String, String>& accession_to_leader);
345 
352 
365  String getAccession_(const std::set<String>& pep_accessions,
366  const std::map<String, String>& accession_to_leader) const;
367 
374 
384  Int channel_or_label,
385  const ExperimentalDesign& ed) const;
386 
388  void updateMembers_() override;
389 
390  }; // class
391 
392 } // namespace
A container for consensus elements.
Definition: ConsensusMap.h:68
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:109
Representation of a Peak2D, RichPeak2D or Feature .
Definition: FeatureHandle.h:34
A container for features.
Definition: FeatureMap.h:82
Helper class for peptide and protein quantification based on feature data annotated with IDs.
Definition: PeptideAndProteinQuant.h:31
void calculateFileAndChannelLevelProteinAbundances_(const String &protein_accession, const std::vector< String > &selected_peptides, const String &aggregate_method, Size top_n, bool include_all, const std::map< String, String > &accession_to_leader)
Calculate detailed protein abundances at channel level using selected peptides.
void readQuantData(ConsensusMap &consensus, const ExperimentalDesign &ed)
Read quantitative data from a consensus map.
void annotateQuantificationsToProteins(const ProteinQuant &protein_quants, ProteinIdentification &proteins, bool remove_unquantified=true)
Annotate protein quant results as meta data to protein ids.
std::map< AASequence, PeptideData > PeptideQuant
Mapping: peptide sequence (modified) -> peptide data.
Definition: PeptideAndProteinQuant.h:63
void readQuantData(FeatureMap &features, const ExperimentalDesign &ed)
Read quantitative data from a feature map.
std::map< String, ProteinData > ProteinQuant
Mapping: protein accession -> protein data.
Definition: PeptideAndProteinQuant.h:96
size_t getSampleIDFromFilenameAndChannel_(const String &filename, Int channel_or_label, const ExperimentalDesign &ed) const
Map (filename, channel) to sample using ExperimentalDesign.
const ProteinQuant & getProteinResults()
Get protein abundance data.
ExperimentalDesign experimental_design_
Experimental design for filename/channel to sample mapping.
Definition: PeptideAndProteinQuant.h:207
void performIbaqNormalization_(const ProteinIdentification &proteins)
Perform iBAQ normalization on protein abundances.
void countPeptides_(PeptideIdentificationList &peptides)
Count the number of identifications (best hits only) of each peptide sequence.
void quantifyPeptides(const PeptideIdentificationList &peptides=PeptideIdentificationList())
Compute peptide abundances.
PeptideQuant pep_quant_
Peptide quantification data.
Definition: PeptideAndProteinQuant.h:201
void transferPeptideDataToProteins_(const ProteinIdentification &proteins)
Transfer peptide-level quantitative data to protein-level data structures.
const PeptideQuant & getPeptideResults()
Get peptide abundance data.
String getAccession_(const std::set< String > &pep_accessions, const std::map< String, String > &accession_to_leader) const
Get the "canonical" protein accession from the list of protein accessions of a peptide.
~PeptideAndProteinQuant() override
Destructor.
Definition: PeptideAndProteinQuant.h:130
std::vector< String > selectPeptidesForQuantification_(const String &protein_accession, Size top_n, bool fix_peptides)
Select peptides for protein quantification based on filtering criteria.
void calculateProteinAbundances_(const String &protein_accession, const std::vector< String > &selected_peptides, const String &aggregate_method, Size top_n, bool include_all)
Calculate protein abundances for a single protein using selected peptides.
void readQuantData(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, const ExperimentalDesign &ed)
Read quantitative data from identification results (for quantification via spectral counting).
void updateMembers_() override
Clear all data when parameters are set.
std::map< OpenMS::String, OpenMS::String > mapAccessionToLeader(const OpenMS::ProteinIdentification &proteins) const
double aggregateAbundances_(const std::vector< double > &abundances, const String &method) const
Aggregate abundances using the specified mathematical method.
void quantifyFeature_(const FeatureHandle &feature, size_t fraction, const String &filename, const PeptideHit &hit, Int channel_or_label)
Gather quantitative information from a feature.
void normalizePeptides_()
Normalize peptide abundances across samples by (multiplicative) scaling to equal medians.
void quantifyProteins(const ProteinIdentification &proteins=ProteinIdentification())
Compute protein abundances.
Statistics stats_
Processing statistics for output in the end.
Definition: PeptideAndProteinQuant.h:198
const Statistics & getStatistics()
Get summary statistics.
bool getBest_(const std::map< Int, std::map< String, std::map< Int, std::map< Int, double >>>> &peptide_abundances, std::tuple< size_t, String, size_t, Int > &best)
Determine fraction, filename, charge state, and channel of a peptide with the highest number of abund...
PeptideHit getAnnotation_(PeptideIdentificationList &peptides)
Get the "canonical" annotation (a single peptide hit) of a feature/consensus feature from the associa...
void orderBest_(const std::map< T, SampleAbundances > &abundances, std::vector< T > &result)
Order keys (charges/peptides for peptide/protein quantification) according to how many samples they a...
Definition: PeptideAndProteinQuant.h:250
std::map< UInt64, double > SampleAbundances
Mapping: sample ID -> abundance.
Definition: PeptideAndProteinQuant.h:35
ProteinQuant prot_quant_
Protein quantification data.
Definition: PeptideAndProteinQuant.h:204
PeptideAndProteinQuant()
Constructor.
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Quantitative and associated data for a peptide.
Definition: PeptideAndProteinQuant.h:39
SampleAbundances total_psm_counts
spectral counting-based abundances
Definition: PeptideAndProteinQuant.h:50
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition: PeptideAndProteinQuant.h:47
std::set< String > accessions
protein accessions for this peptide
Definition: PeptideAndProteinQuant.h:53
std::map< Int, std::map< String, std::map< Int, UInt64 > > > psm_counts
mapping: fraction -> filename -> charge -> abundance
Definition: PeptideAndProteinQuant.h:44
std::map< Int, std::map< String, std::map< Int, std::map< Int, double > > > > abundances
mapping: fraction -> filename -> charge -> channel/label -> abundance
Definition: PeptideAndProteinQuant.h:41
Quantitative and associated data for a protein.
Definition: PeptideAndProteinQuant.h:67
std::map< String, std::map< Int, double > > channel_level_abundances
mapping: filename -> channel/label -> abundance
Definition: PeptideAndProteinQuant.h:74
SampleAbundances total_psm_counts
spectral counting-based abundances
Definition: PeptideAndProteinQuant.h:83
SampleAbundances total_abundances
mapping: sample -> total abundance
Definition: PeptideAndProteinQuant.h:80
std::map< String, UInt64 > file_level_psm_counts
mapping: filename -> PSM counts
Definition: PeptideAndProteinQuant.h:77
std::map< String, SampleAbundances > peptide_psm_counts
Definition: PeptideAndProteinQuant.h:71
std::map< String, SampleAbundances > peptide_abundances
mapping: peptide (unmodified) -> sample -> abundance
Definition: PeptideAndProteinQuant.h:69
SampleAbundances total_distinct_peptides
number of distinct peptide sequences
Definition: PeptideAndProteinQuant.h:86
Statistics for processing summary.
Definition: PeptideAndProteinQuant.h:100
Size quant_proteins
protein statistics
Definition: PeptideAndProteinQuant.h:111
Size quant_peptides
peptide statistics
Definition: PeptideAndProteinQuant.h:114
Size n_samples
number of samples (or assays in mzTab terms)
Definition: PeptideAndProteinQuant.h:102
Size n_fractions
number of fractions
Definition: PeptideAndProteinQuant.h:105
Statistics()
constructor
Definition: PeptideAndProteinQuant.h:120
Size n_ms_files
number of MS files
Definition: PeptideAndProteinQuant.h:108
Size ambig_features
Definition: PeptideAndProteinQuant.h:117