OpenMS
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
FFIDAlgoExternalIDHandler.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
14 #include <OpenMS/KERNEL/Feature.h>
20 #include <OpenMS/CONCEPT/Types.h>
21 
22 #include <map>
23 #include <set>
24 #include <vector>
25 
26 namespace OpenMS
27 {
28 namespace Internal
29 {
36  class OPENMS_DLLAPI FFIDAlgoExternalIDHandler
37  {
38  public:
40  typedef std::multimap<double, PeptideIdentification*> ExternalRTMap;
41 
43  typedef std::map<Int, ExternalRTMap> ExternalChargeMap;
44 
46  typedef std::map<AASequence, ExternalChargeMap> ExternalPeptideMap;
47 
50 
52  void reset();
53 
56 
58  void processExternalPeptides(std::vector<PeptideIdentification>& peptides_ext);
59 
62  const std::vector<PeptideIdentification>& peptides_internal,
63  const std::vector<PeptideIdentification>& peptides_external,
64  double rt_quantile);
65 
67  double transformRT(double rt) const;
68 
70  bool hasRTTransformation() const;
71 
74 
76  void classifyFeaturesWithSVM(FeatureMap& features, const Param& param);
77 
79  void filterClassifiedFeatures(FeatureMap& features, double quality_cutoff);
80 
82  void calculateFDR(FeatureMap& features);
83 
85  const std::map<double, std::pair<Size, Size> >& getSVMProbsInternal() const;
86 
87  private:
90  std::map<AASequence,
91  std::map<Int, std::pair<std::multimap<double, PeptideIdentification*>,
92  std::multimap<double, PeptideIdentification*>>>>& peptide_map);
93 
95  bool fillExternalRTMap_(const AASequence& sequence, Int charge,
96  std::multimap<double, PeptideIdentification*>& rt_map);
97 
100 
102  void initSVMParameters_(const Param& param);
103 
105  void finalizeAssayFeatures_(Feature& best_feature, double best_quality, double quality_cutoff);
106 
108  void getRandomSample_(std::map<Size, double>& training_labels);
109 
111  void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const;
112 
114  void getUnbiasedSample_(const std::multimap<double, std::pair<Size, bool> >& valid_obs,
115  std::map<Size, double>& training_labels);
116 
118  void addDummyPeptideID_(Feature& feature, const PeptideIdentification* ext_id);
119 
121  void handleExternalFeature_(Feature& feature, double prob_positive, double quality_cutoff);
122 
124  void adjustFDRForExternalFeatures_(std::vector<double>& fdr_probs,
125  std::vector<double>& fdr_qvalues,
126  Size n_internal_features);
127 
130 
133 
136 
139 
141  std::multiset<double> svm_probs_external_;
142 
144  std::map<double, std::pair<Size, Size> > svm_probs_internal_;
145 
148 
151 
154 
157 
159  std::vector<String> svm_predictor_names_;
160 
163 
166 
169  };
170 
171 } // namespace Internal
172 } // namespace OpenMS
Representation of a peptide/protein sequence.
Definition: AASequence.h:86
A container for features.
Definition: FeatureMap.h:80
An LC-MS feature.
Definition: Feature.h:46
Class for handling external peptide identifications in feature finding.
Definition: FFIDAlgoExternalIDHandler.h:37
double alignInternalAndExternalIDs(const std::vector< PeptideIdentification > &peptides_internal, const std::vector< PeptideIdentification > &peptides_external, double rt_quantile)
Align internal and external IDs to estimate RT shifts and return RT uncertainty.
String svm_xval_out_
SVM cross-validation output file.
Definition: FFIDAlgoExternalIDHandler.h:162
std::vector< String > svm_predictor_names_
SVM predictor names.
Definition: FFIDAlgoExternalIDHandler.h:159
std::map< Int, ExternalRTMap > ExternalChargeMap
Charge to External RTMap mapping.
Definition: FFIDAlgoExternalIDHandler.h:43
bool hasRTTransformation() const
Check if we have RT transformation data.
void addDummyPeptideID_(Feature &feature, const PeptideIdentification *ext_id)
Add dummy peptide identification from external data.
void classifyFeaturesWithSVM(FeatureMap &features, const Param &param)
Classify features using SVM.
void annotateFeatureWithExternalIDs_(Feature &feature)
Check and set feature class based on external data.
void getUnbiasedSample_(const std::multimap< double, std::pair< Size, bool > > &valid_obs, std::map< Size, double > &training_labels)
Get unbiased sample for SVM training.
ExternalPeptideMap external_peptide_map_
External peptide storage.
Definition: FFIDAlgoExternalIDHandler.h:129
Size svm_n_parts_
SVM number of parts for cross-validation.
Definition: FFIDAlgoExternalIDHandler.h:147
Size svm_n_samples_
SVM number of samples for training.
Definition: FFIDAlgoExternalIDHandler.h:150
void adjustFDRForExternalFeatures_(std::vector< double > &fdr_probs, std::vector< double > &fdr_qvalues, Size n_internal_features)
Adjust FDR calculation for external features.
void processExternalPeptides(std::vector< PeptideIdentification > &peptides_ext)
Process external peptide IDs.
void calculateFDR(FeatureMap &features)
Calculate FDR for classified features.
Size n_internal_features_
Number of internal features.
Definition: FFIDAlgoExternalIDHandler.h:168
void filterClassifiedFeatures(FeatureMap &features, double quality_cutoff)
Filter classified features.
bool fillExternalRTMap_(const AASequence &sequence, Int charge, std::multimap< double, PeptideIdentification * > &rt_map)
Fill an external RTMap from our data for a specific peptide and charge.
const std::map< double, std::pair< Size, Size > > & getSVMProbsInternal() const
Get SVM probabilities for internal features.
std::multimap< double, PeptideIdentification * > ExternalRTMap
RTMap for external data structure storage.
Definition: FFIDAlgoExternalIDHandler.h:40
Size n_external_peptides_
Number of external peptides.
Definition: FFIDAlgoExternalIDHandler.h:135
void addExternalPeptideToMap_(PeptideIdentification &peptide, std::map< AASequence, std::map< Int, std::pair< std::multimap< double, PeptideIdentification * >, std::multimap< double, PeptideIdentification * >>>> &peptide_map)
Add external peptide to charge map (merged version for compatibility)
double svm_min_prob_
SVM minimum probability threshold.
Definition: FFIDAlgoExternalIDHandler.h:153
Int debug_level_
Debug level.
Definition: FFIDAlgoExternalIDHandler.h:165
void checkNumObservations_(Size n_pos, Size n_neg, const String &note="") const
Check observation counts for SVM.
Size n_external_features_
Number of external features.
Definition: FFIDAlgoExternalIDHandler.h:138
double transformRT(double rt) const
Transform RT from internal to external scale.
FFIDAlgoExternalIDHandler()
Default constructor.
void handleExternalFeature_(Feature &feature, double prob_positive, double quality_cutoff)
Handle external feature probability.
std::multiset< double > svm_probs_external_
SVM probabilities for external features.
Definition: FFIDAlgoExternalIDHandler.h:141
std::map< AASequence, ExternalChargeMap > ExternalPeptideMap
Sequence to External Charge Map mapping.
Definition: FFIDAlgoExternalIDHandler.h:46
const TransformationDescription & getRTTransformation() const
Get the RT transformation.
void reset()
Reset the handler's state.
void initSVMParameters_(const Param &param)
Initialize SVM parameters.
void addExternalPeptide(PeptideIdentification &peptide)
Add an external peptide to the handler's map.
void finalizeAssayFeatures_(Feature &best_feature, double best_quality, double quality_cutoff)
Finalize assay features.
TransformationDescription rt_transformation_
RT transformation description.
Definition: FFIDAlgoExternalIDHandler.h:132
void getRandomSample_(std::map< Size, double > &training_labels)
Get random sample for SVM training.
double svm_quality_cutoff
SVM quality cutoff.
Definition: FFIDAlgoExternalIDHandler.h:156
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probabilities for internal features.
Definition: FFIDAlgoExternalIDHandler.h:144
Management and storage of parameters / INI files.
Definition: Param.h:44
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition: PeptideIdentification.h:63
A more convenient string class.
Definition: String.h:34
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:37
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19