All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
FalseDiscoveryRate.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow $
6 // $Authors: Andreas Bertsch, Chris Bielow $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
17 
18 #include <unordered_map>
19 
20 #include <vector>
21 #include <unordered_set>
22 
23 namespace OpenMS
24 {
25 
26  struct ScoreToTgtDecLabelPairs;
27 
52  class OPENMS_DLLAPI FalseDiscoveryRate :
53  public DefaultParamHandler
54  {
55 public:
58 
66 
73  void apply(PeptideIdentificationList& id, bool annotate_peptide_fdr = false) const;
74 
81  void apply(std::vector<ProteinIdentification>& fwd_ids, std::vector<ProteinIdentification>& rev_ids) const;
82 
88  void apply(std::vector<ProteinIdentification>& ids) const;
89 
95  void applyEstimated(std::vector<ProteinIdentification>& ids) const;
96 
106  double applyEvaluateProteinIDs(const std::vector<ProteinIdentification>& ids, double pepCutoff = 1.0, UInt fpCutoff = 50, double diffWeight = 0.2) const;
116  double applyEvaluateProteinIDs(const ProteinIdentification& ids, double pepCutoff = 1.0, UInt fpCutoff = 50, double diffWeight = 0.2) const;
117 
127  double applyEvaluateProteinIDs(ScoreToTgtDecLabelPairs& score_to_tgt_dec_fraction_pairs, double pepCutoff = 1.0, UInt fpCutoff = 50, double diffWeight = 0.2) const;
128 
130  void applyBasic(const std::vector<ProteinIdentification> & run_info, PeptideIdentificationList & ids);
131 
133  void applyBasic(PeptideIdentificationList & ids, bool higher_score_better, int charge = 0, String identifier = "", bool only_best_per_pep = false);
139  void applyBasicPeptideLevel(ConsensusMap & ids, bool use_unassigned_peptides = true);
141  void applyBasic(ConsensusMap & cmap, bool use_unassigned_peptides = true);
143  void applyBasic(ProteinIdentification & id, bool groups_too = true);
144 
154  void applyPickedProteinFDR(ProteinIdentification& id, String decoy_string = "", bool prefix = true, bool groups_too = true);
155 
158  double rocN(const PeptideIdentificationList& ids, Size fp_cutoff) const;
159 
162  double rocN(const PeptideIdentificationList& ids, Size fp_cutoff, const String& identifier) const;
163 
166  double rocN(const ConsensusMap& ids, Size fp_cutoff, bool include_unassigned_peptides = false) const;
167 
170  double rocN(const ConsensusMap& ids, Size fp_cutoff, const String& identifier, bool include_unassigned_peptides = false) const;
171 
172  //TODO the next two methods could potentially be merged for speed (they iterate over the same structure)
173  //But since they have different cutoff types and it is more generic, I leave it like this.
175  double diffEstimatedEmpirical(const ScoreToTgtDecLabelPairs& scores_labels, double pepCutoff = 1.0) const;
176 
179  double rocN(const ScoreToTgtDecLabelPairs& scores_labels, Size fpCutoff = 50) const;
180 
190 
195  {
196  public:
200  struct Result
201  {
202  bool success;
204  bool is_prefix;
205  };
206 
214  };
215 private:
216 
219 
222 
224  void calculateFDRs_(std::map<double, double>& score_to_fdr, std::vector<double>& target_scores, std::vector<double>& decoy_scores, bool q_value, bool higher_score_better) const;
225 
230  std::vector<double>& target_scores,
231  std::vector<double>& decoy_scores,
232  std::map<IdentificationData::IdentifiedMolecule, bool>& molecule_to_decoy,
233  std::map<IdentificationData::ObservationMatchRef, double>& match_to_score) const;
234 
237  void calculateEstimatedQVal_(std::map<double, double> &scores_to_FDR,
238  ScoreToTgtDecLabelPairs &scores_labels,
239  bool higher_score_better) const;
240 
246  void calculateFDRBasic_(std::map<double,double>& scores_to_FDR, ScoreToTgtDecLabelPairs& scores_labels, bool qvalue, bool higher_score_better) const;
247 
250  double trapezoidal_area_xEqy(double exp1, double exp2, double act1, double act2) const;
251 
253  double trapezoidal_area(double x1, double x2, double y1, double y2) const;
254  };
255 
256 } // namespace OpenMS
A container for consensus elements.
Definition: ConsensusMap.h:68
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Finds decoy strings in ProteinIdentification runs.
Definition: FalseDiscoveryRate.h:195
bool is_prefix
on success, was it a prefix or suffix
Definition: FalseDiscoveryRate.h:204
bool success
did more than 30% of proteins have the same prefix or suffix
Definition: FalseDiscoveryRate.h:202
static Result findDecoyString(const ProteinIdentification &proteins)
String name
on success, what was the decoy string?
Definition: FalseDiscoveryRate.h:203
Finds the most common decoy string in the accessions of proteins. Checks for suffix and prefix and so...
Definition: FalseDiscoveryRate.h:201
Calculates false discovery rates (FDR) from identifications.
Definition: FalseDiscoveryRate.h:54
void applyBasic(ConsensusMap &cmap, bool use_unassigned_peptides=true)
simpler reimplementation of the apply function above for peptides in ConsensusMaps.
double applyEvaluateProteinIDs(const std::vector< ProteinIdentification > &ids, double pepCutoff=1.0, UInt fpCutoff=50, double diffWeight=0.2) const
Calculate a linear combination of the area of the difference in estimated vs. empirical (TD) FDR and ...
FalseDiscoveryRate & operator=(const FalseDiscoveryRate &)
Not implemented.
void applyBasic(PeptideIdentificationList &ids, bool higher_score_better, int charge=0, String identifier="", bool only_best_per_pep=false)
simpler reimplementation of the apply function above for PSMs or peptides.
void apply(PeptideIdentificationList &id, bool annotate_peptide_fdr=false) const
Calculates the FDR of one run from a concatenated sequence DB search.
double rocN(const ConsensusMap &ids, Size fp_cutoff, const String &identifier, bool include_unassigned_peptides=false) const
void applyBasicPeptideLevel(ConsensusMap &ids, bool use_unassigned_peptides=true)
void applyPickedProteinFDR(ProteinIdentification &id, String decoy_string="", bool prefix=true, bool groups_too=true)
Applies a picked protein FDR. Behaves like a normal target-decoy FDR where only the score of the best...
void apply(std::vector< ProteinIdentification > &fwd_ids, std::vector< ProteinIdentification > &rev_ids) const
Calculates the FDR of two runs, a forward run and decoy run on protein level.
FalseDiscoveryRate()
Default constructor.
double diffEstimatedEmpirical(const ScoreToTgtDecLabelPairs &scores_labels, double pepCutoff=1.0) const
calculates the area of the difference between estimated and empirical FDR on the fly....
void applyEstimated(std::vector< ProteinIdentification > &ids) const
Calculate the FDR based on PEPs or PPs (if present) and modifies the IDs inplace.
IdentificationData::ScoreTypeRef applyToObservationMatches(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref) const
Calculate FDR on the level of observation matches (e.g. peptide-spectrum matches) for "general" ident...
void calculateFDRBasic_(std::map< double, double > &scores_to_FDR, ScoreToTgtDecLabelPairs &scores_labels, bool qvalue, bool higher_score_better) const
FalseDiscoveryRate(const FalseDiscoveryRate &)
Not implemented.
void applyBasicPeptideLevel(PeptideIdentificationList &ids)
double applyEvaluateProteinIDs(ScoreToTgtDecLabelPairs &score_to_tgt_dec_fraction_pairs, double pepCutoff=1.0, UInt fpCutoff=50, double diffWeight=0.2) const
Calculate a linear combination of the area of the difference in estimated vs. empirical (TD) FDR and ...
void apply(std::vector< ProteinIdentification > &ids) const
Calculate the FDR of one run from a concatenated sequence db search.
double rocN(const ConsensusMap &ids, Size fp_cutoff, bool include_unassigned_peptides=false) const
double trapezoidal_area_xEqy(double exp1, double exp2, double act1, double act2) const
void handleObservationMatch_(IdentificationData::ObservationMatchRef match_ref, IdentificationData::ScoreTypeRef score_ref, std::vector< double > &target_scores, std::vector< double > &decoy_scores, std::map< IdentificationData::IdentifiedMolecule, bool > &molecule_to_decoy, std::map< IdentificationData::ObservationMatchRef, double > &match_to_score) const
Helper function for applyToObservationMatches()
void applyBasic(const std::vector< ProteinIdentification > &run_info, PeptideIdentificationList &ids)
simpler reimplementation of the apply function above for PSMs. With charge and identifier info from r...
void calculateFDRs_(std::map< double, double > &score_to_fdr, std::vector< double > &target_scores, std::vector< double > &decoy_scores, bool q_value, bool higher_score_better) const
calculates the FDR, given two vectors of scores
double rocN(const PeptideIdentificationList &ids, Size fp_cutoff, const String &identifier) const
double trapezoidal_area(double x1, double x2, double y1, double y2) const
calculates the trapezoidal area for a trapezoid with a flat horizontal base e.g. for an AUC
void applyBasic(ProteinIdentification &id, bool groups_too=true)
simpler reimplementation of the apply function above for proteins.
double applyEvaluateProteinIDs(const ProteinIdentification &ids, double pepCutoff=1.0, UInt fpCutoff=50, double diffWeight=0.2) const
Calculate a linear combination of the area of the difference in estimated vs. empirical (TD) FDR and ...
double rocN(const ScoreToTgtDecLabelPairs &scores_labels, Size fpCutoff=50) const
void calculateEstimatedQVal_(std::map< double, double > &scores_to_FDR, ScoreToTgtDecLabelPairs &scores_labels, bool higher_score_better) const
void apply(PeptideIdentificationList &fwd_ids, PeptideIdentificationList &rev_ids) const
Calculates the FDR of two runs, a forward run and a decoy run on peptide level.
double rocN(const PeptideIdentificationList &ids, Size fp_cutoff) const
Definition: IdentificationData.h:87
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
A more convenient string class.
Definition: String.h:34
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
static String prefix(const String &this_s, size_t length)
Definition: StringUtilsSimple.h:122
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20
Definition: IDScoreGetterSetter.h:32