All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
FFIDAlgoExternalIDHandler.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
14 #include <OpenMS/KERNEL/Feature.h>
21 #include <OpenMS/CONCEPT/Types.h>
22 
23 #include <map>
24 #include <set>
25 #include <vector>
26 
27 namespace OpenMS
28 {
29 namespace Internal
30 {
37  class OPENMS_DLLAPI FFIDAlgoExternalIDHandler
38  {
39  public:
41  typedef std::multimap<double, PeptideIdentification*> ExternalRTMap;
42 
44  typedef std::map<Int, ExternalRTMap> ExternalChargeMap;
45 
47  typedef std::map<AASequence, ExternalChargeMap> ExternalPeptideMap;
48 
51 
53  void reset();
54 
57 
60 
63  const PeptideIdentificationList& peptides_internal,
64  const PeptideIdentificationList& peptides_external,
65  double rt_quantile);
66 
68  double transformRT(double rt) const;
69 
71  bool hasRTTransformation() const;
72 
75 
77  void classifyFeaturesWithSVM(FeatureMap& features, const Param& param);
78 
80  void filterClassifiedFeatures(FeatureMap& features, double quality_cutoff);
81 
83  void calculateFDR(FeatureMap& features);
84 
86  const std::map<double, std::pair<Size, Size> >& getSVMProbsInternal() const;
87 
88  private:
91  std::map<AASequence,
92  std::map<Int, std::pair<std::multimap<double, PeptideIdentification*>,
93  std::multimap<double, PeptideIdentification*>>>>& peptide_map);
94 
96  bool fillExternalRTMap_(const AASequence& sequence, Int charge,
97  std::multimap<double, PeptideIdentification*>& rt_map);
98 
101 
103  void initSVMParameters_(const Param& param);
104 
106  void finalizeAssayFeatures_(Feature& best_feature, double best_quality, double quality_cutoff);
107 
109  void getRandomSample_(std::map<Size, double>& training_labels);
110 
112  void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const;
113 
115  void getUnbiasedSample_(const std::multimap<double, std::pair<Size, bool> >& valid_obs,
116  std::map<Size, double>& training_labels);
117 
119  void addDummyPeptideID_(Feature& feature, const PeptideIdentification* ext_id);
120 
122  void handleExternalFeature_(Feature& feature, double prob_positive, double quality_cutoff);
123 
125  void adjustFDRForExternalFeatures_(std::vector<double>& fdr_probs,
126  std::vector<double>& fdr_qvalues,
127  Size n_internal_features);
128 
131 
134 
137 
140 
142  std::multiset<double> svm_probs_external_;
143 
145  std::map<double, std::pair<Size, Size> > svm_probs_internal_;
146 
149 
152 
155 
158 
160  std::vector<String> svm_predictor_names_;
161 
164 
167 
170  };
171 
172 } // namespace Internal
173 } // namespace OpenMS
Representation of a peptide/protein sequence.
Definition: AASequence.h:86
A container for features.
Definition: FeatureMap.h:82
An LC-MS feature.
Definition: Feature.h:46
Class for handling external peptide identifications in feature finding.
Definition: FFIDAlgoExternalIDHandler.h:38
String svm_xval_out_
SVM cross-validation output file.
Definition: FFIDAlgoExternalIDHandler.h:163
std::vector< String > svm_predictor_names_
SVM predictor names.
Definition: FFIDAlgoExternalIDHandler.h:160
std::map< Int, ExternalRTMap > ExternalChargeMap
Charge to External RTMap mapping.
Definition: FFIDAlgoExternalIDHandler.h:44
bool hasRTTransformation() const
Check if we have RT transformation data.
void addDummyPeptideID_(Feature &feature, const PeptideIdentification *ext_id)
Add dummy peptide identification from external data.
void classifyFeaturesWithSVM(FeatureMap &features, const Param &param)
Classify features using SVM.
void annotateFeatureWithExternalIDs_(Feature &feature)
Check and set feature class based on external data.
void getUnbiasedSample_(const std::multimap< double, std::pair< Size, bool > > &valid_obs, std::map< Size, double > &training_labels)
Get unbiased sample for SVM training.
ExternalPeptideMap external_peptide_map_
External peptide storage.
Definition: FFIDAlgoExternalIDHandler.h:130
Size svm_n_parts_
SVM number of parts for cross-validation.
Definition: FFIDAlgoExternalIDHandler.h:148
Size svm_n_samples_
SVM number of samples for training.
Definition: FFIDAlgoExternalIDHandler.h:151
double alignInternalAndExternalIDs(const PeptideIdentificationList &peptides_internal, const PeptideIdentificationList &peptides_external, double rt_quantile)
Align internal and external IDs to estimate RT shifts and return RT uncertainty.
void adjustFDRForExternalFeatures_(std::vector< double > &fdr_probs, std::vector< double > &fdr_qvalues, Size n_internal_features)
Adjust FDR calculation for external features.
void calculateFDR(FeatureMap &features)
Calculate FDR for classified features.
Size n_internal_features_
Number of internal features.
Definition: FFIDAlgoExternalIDHandler.h:169
void filterClassifiedFeatures(FeatureMap &features, double quality_cutoff)
Filter classified features.
bool fillExternalRTMap_(const AASequence &sequence, Int charge, std::multimap< double, PeptideIdentification * > &rt_map)
Fill an external RTMap from our data for a specific peptide and charge.
const std::map< double, std::pair< Size, Size > > & getSVMProbsInternal() const
Get SVM probabilities for internal features.
std::multimap< double, PeptideIdentification * > ExternalRTMap
RTMap for external data structure storage.
Definition: FFIDAlgoExternalIDHandler.h:41
Size n_external_peptides_
Number of external peptides.
Definition: FFIDAlgoExternalIDHandler.h:136
void addExternalPeptideToMap_(PeptideIdentification &peptide, std::map< AASequence, std::map< Int, std::pair< std::multimap< double, PeptideIdentification * >, std::multimap< double, PeptideIdentification * >>>> &peptide_map)
Add external peptide to charge map (merged version for compatibility)
double svm_min_prob_
SVM minimum probability threshold.
Definition: FFIDAlgoExternalIDHandler.h:154
Int debug_level_
Debug level.
Definition: FFIDAlgoExternalIDHandler.h:166
void checkNumObservations_(Size n_pos, Size n_neg, const String &note="") const
Check observation counts for SVM.
Size n_external_features_
Number of external features.
Definition: FFIDAlgoExternalIDHandler.h:139
double transformRT(double rt) const
Transform RT from internal to external scale.
FFIDAlgoExternalIDHandler()
Default constructor.
void handleExternalFeature_(Feature &feature, double prob_positive, double quality_cutoff)
Handle external feature probability.
std::multiset< double > svm_probs_external_
SVM probabilities for external features.
Definition: FFIDAlgoExternalIDHandler.h:142
std::map< AASequence, ExternalChargeMap > ExternalPeptideMap
Sequence to External Charge Map mapping.
Definition: FFIDAlgoExternalIDHandler.h:47
void processExternalPeptides(PeptideIdentificationList &peptides_ext)
Process external peptide IDs.
const TransformationDescription & getRTTransformation() const
Get the RT transformation.
void reset()
Reset the handler's state.
void initSVMParameters_(const Param &param)
Initialize SVM parameters.
void addExternalPeptide(PeptideIdentification &peptide)
Add an external peptide to the handler's map.
void finalizeAssayFeatures_(Feature &best_feature, double best_quality, double quality_cutoff)
Finalize assay features.
TransformationDescription rt_transformation_
RT transformation description.
Definition: FFIDAlgoExternalIDHandler.h:133
void getRandomSample_(std::map< Size, double > &training_labels)
Get random sample for SVM training.
double svm_quality_cutoff
SVM quality cutoff.
Definition: FFIDAlgoExternalIDHandler.h:157
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probabilities for internal features.
Definition: FFIDAlgoExternalIDHandler.h:145
Management and storage of parameters / INI files.
Definition: Param.h:44
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition: PeptideIdentification.h:63
A more convenient string class.
Definition: String.h:34
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:37
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19