OpenMS
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
FeatureFinderIdentificationAlgorithm.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
19 
20 #include <vector>
21 #include <fstream>
22 #include <map>
23 
24 namespace OpenMS {
26  public DefaultParamHandler
27 {
28 public:
31 
44  void run(
45  std::vector<PeptideIdentification> peptides,
46  const std::vector<ProteinIdentification>& proteins,
47  std::vector<PeptideIdentification> peptides_ext,
48  std::vector<ProteinIdentification> proteins_ext,
49  FeatureMap& features,
50  const FeatureMap& seeds = FeatureMap(),
51  const String& spectra_file = ""
52  );
53 
54  void runOnCandidates(FeatureMap& features);
55 
57  const PeakMap& getMSData() const;
58 
60  void setMSData(const PeakMap& ms_data); // for pyOpenMS
61  void setMSData(PeakMap&& ms_data); // moves peak data and saves the copy. Note that getMSData() will give back a processed/modified version.
62 
64  const PeakMap& getChromatograms() const;
65 
68 
71 
72 protected:
73 
76 
78  typedef std::multimap<double, PeptideIdentification*> RTMap;
80  typedef std::map<Int, std::pair<RTMap, RTMap> > ChargeMap;
82  typedef std::map<AASequence, ChargeMap> PeptideMap;
84  typedef std::map<String, std::pair<RTMap, RTMap> > PeptideRefRTMap;
85 
87 
90 
92  double rt_window_;
93  double mz_window_;
95 
97 
98  double isotope_pmin_;
100 
101  double rt_quantile_;
102 
103  double peak_width_;
106 
108 
109  // SVM related parameters
116 
117  // output file (before filtering)
119 
121 
122  void updateMembers_() override;
123 
125  struct RTRegion
126  {
127  double start, end;
129  };
130 
133  {
134  bool operator()(const Feature& feature)
135  {
136  return feature.getOverallQuality() == 0.0;
137  }
138  } feature_filter_quality_;
139 
142  {
143  bool operator()(const Feature& feature)
144  {
145  return feature.getPeptideIdentifications().empty();
146  }
147  } feature_filter_peptides_;
148 
151  {
153  const PeptideIdentification& p2)
154  {
155  const String& seq1 = p1.getHits()[0].getSequence().toString();
156  const String& seq2 = p2.getHits()[0].getSequence().toString();
157  if (seq1 == seq2)
158  {
159  Int charge1 = p1.getHits()[0].getCharge();
160  Int charge2 = p2.getHits()[0].getCharge();
161  if (charge1 == charge2)
162  {
163  return p1.getRT() < p2.getRT();
164  }
165  return charge1 < charge2;
166  }
167  return seq1 < seq2;
168  }
169  } peptide_compare_;
170 
173  {
174  bool operator()(const Feature& f1, const Feature& f2)
175  {
176  const String& ref1 = f1.getMetaValue("PeptideRef");
177  const String& ref2 = f2.getMetaValue("PeptideRef");
178  if (ref1 == ref2)
179  {
180  return f1.getRT() < f2.getRT();
181  }
182  return ref1 < ref2;
183  }
184  } feature_compare_;
185 
189 
191  double add_mass_offset_peptides_{0.0};
194  std::vector<PeptideIdentification> unassignedIDs_;
195 
196  const double seed_rt_window_ = 60.0;
197 
199  std::map<double, std::pair<Size, Size> > svm_probs_internal_;
201  std::multiset<double> svm_probs_external_;
205  std::map<String, double> isotope_probs_;
208 
210 
212  void generateTransitions_(const String& peptide_id, double mz, Int charge,
213  const IsotopeDistribution& iso_dist);
214 
215  void addPeptideRT_(TargetedExperiment::Peptide& peptide, double rt) const;
216 
218  void getRTRegions_(ChargeMap& peptide_data, std::vector<RTRegion>& rt_regions, bool clear_IDs = true) const;
219 
221  FeatureMap& features,
222  std::map<Size, std::vector<PeptideIdentification*> >& feat_ids,
223  RTMap& rt_internal);
224 
226  void annotateFeatures_(FeatureMap& features, PeptideRefRTMap& ref_rt_map);
227 
228  void ensureConvexHulls_(Feature& feature) const;
229 
230  void postProcess_(FeatureMap& features, bool with_external_ids);
231 
233  void statistics_(const FeatureMap& features) const;
234 
238  void createAssayLibrary_(const PeptideMap::iterator& begin, const PeptideMap::iterator& end, PeptideRefRTMap& ref_rt_map, bool clear_IDs = true);
239 
244  PeptideMap& peptide_map,
245  bool external = false);
246 
247  void filterFeatures_(FeatureMap& features, bool classified);
248 
249  // seeds for untargeted extraction
250  Size addSeeds_(std::vector<PeptideIdentification>& peptides, const FeatureMap& seeds);
251 
252  // quant. decoys
253  Size addOffsetPeptides_(std::vector<PeptideIdentification>& peptides, double offset);
254 
257  template <typename It>
258  std::vector<std::pair<It,It>>
259  chunk_(It range_from, It range_to, const std::ptrdiff_t batch_size)
260  {
261  /* Aliases, to make the rest of the code more readable. */
262  using std::vector;
263  using std::pair;
264  using std::make_pair;
265  using std::distance;
266  using diff_t = std::ptrdiff_t;
267 
268  /* Total item number and batch_size size. */
269  const diff_t total {distance(range_from, range_to)};
270  const diff_t num {total / batch_size};
271 
272  vector<pair<It,It>> chunks(num);
273 
274  It batch_end {range_from};
275 
276  /* Use the 'generate' algorithm to create batches. */
277  std::generate(begin(chunks), end(chunks), [&batch_end, batch_size]()
278  {
279  It batch_start {batch_end };
280 
281  std::advance(batch_end, batch_size);
282  return make_pair(batch_start, batch_end);
283  });
284 
285  /* The last batch_size's end must always be 'range_to'. */
286  if (chunks.empty())
287  {
288  chunks.emplace_back(range_from, range_to);
289  }
290  else
291  {
292  chunks.back().second = range_to;
293  }
294 
295  return chunks;
296  }
297 }; // namespace OpenMS
298 } // namespace OpenMS
299 
const std::vector< PeptideIdentification > & getPeptideIdentifications() const
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Definition: FeatureFinderIdentificationAlgorithm.h:27
FeatureFinderAlgorithmPickedHelperStructs::MassTraces MassTraces
Definition: FeatureFinderIdentificationAlgorithm.h:75
void postProcess_(FeatureMap &features, bool with_external_ids)
double rt_window_
RT window width.
Definition: FeatureFinderIdentificationAlgorithm.h:92
void getRTRegions_(ChargeMap &peptide_data, std::vector< RTRegion > &rt_regions, bool clear_IDs=true) const
get regions in which peptide eludes (ideally only one) by clustering RT elution times
void runOnCandidates(FeatureMap &features)
std::map< AASequence, ChargeMap > PeptideMap
mapping: sequence -> charge -> internal/external ID information
Definition: FeatureFinderIdentificationAlgorithm.h:82
const TargetedExperiment & getLibrary() const
void run(std::vector< PeptideIdentification > peptides, const std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > peptides_ext, std::vector< ProteinIdentification > proteins_ext, FeatureMap &features, const FeatureMap &seeds=FeatureMap(), const String &spectra_file="")
void createAssayLibrary_(const PeptideMap::iterator &begin, const PeptideMap::iterator &end, PeptideRefRTMap &ref_rt_map, bool clear_IDs=true)
String svm_xval_out_
Definition: FeatureFinderIdentificationAlgorithm.h:112
String elution_model_
Definition: FeatureFinderIdentificationAlgorithm.h:107
PeptideMap peptide_map_
Definition: FeatureFinderIdentificationAlgorithm.h:86
double end
Definition: FeatureFinderIdentificationAlgorithm.h:127
Internal::FFIDAlgoExternalIDHandler external_id_handler_
Handler for external peptide IDs.
Definition: FeatureFinderIdentificationAlgorithm.h:207
MRMFeatureFinderScoring feat_finder_
OpenSWATH feature finder.
Definition: FeatureFinderIdentificationAlgorithm.h:206
FeatureFinderIdentificationAlgorithm()
default constructor
void filterFeatures_(FeatureMap &features, bool classified)
Size n_external_peps_
number of external peptides
Definition: FeatureFinderIdentificationAlgorithm.h:89
double signal_to_noise_
Definition: FeatureFinderIdentificationAlgorithm.h:105
void generateTransitions_(const String &peptide_id, double mz, Int charge, const IsotopeDistribution &iso_dist)
generate transitions (isotopic traces) for a peptide ion and add them to the library:
const ProgressLogger & getProgressLogger() const
bool quantify_decoys_
Definition: FeatureFinderIdentificationAlgorithm.h:190
TargetedExperiment library_
accumulated assays for peptides
Definition: FeatureFinderIdentificationAlgorithm.h:188
void annotateFeaturesFinalizeAssay_(FeatureMap &features, std::map< Size, std::vector< PeptideIdentification * > > &feat_ids, RTMap &rt_internal)
double min_peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:104
StringList svm_predictor_names_
Definition: FeatureFinderIdentificationAlgorithm.h:111
std::vector< PeptideIdentification > unassignedIDs_
Definition: FeatureFinderIdentificationAlgorithm.h:194
void setMSData(const PeakMap &ms_data)
set the MS data used for feature detection
std::map< Int, std::pair< RTMap, RTMap > > ChargeMap
mapping: charge -> internal/external: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:80
Size svm_n_parts_
number of partitions for SVM cross-validation
Definition: FeatureFinderIdentificationAlgorithm.h:114
Size svm_n_samples_
number of samples for SVM training
Definition: FeatureFinderIdentificationAlgorithm.h:115
Size n_internal_peps_
number of internal peptide
Definition: FeatureFinderIdentificationAlgorithm.h:88
Size addSeeds_(std::vector< PeptideIdentification > &peptides, const FeatureMap &seeds)
Size n_internal_features_
internal feature counter (for FDR calculation)
Definition: FeatureFinderIdentificationAlgorithm.h:202
ProgressLogger prog_log_
Definition: FeatureFinderIdentificationAlgorithm.h:209
PeakMap ms_data_
input LC-MS data
Definition: FeatureFinderIdentificationAlgorithm.h:186
void statistics_(const FeatureMap &features) const
some statistics on detected features
Size addOffsetPeptides_(std::vector< PeptideIdentification > &peptides, double offset)
Size batch_size_
nr of peptides to use at the same time during chromatogram extraction
Definition: FeatureFinderIdentificationAlgorithm.h:91
double mz_window_
m/z window width
Definition: FeatureFinderIdentificationAlgorithm.h:93
double svm_min_prob_
Definition: FeatureFinderIdentificationAlgorithm.h:110
std::map< String, double > isotope_probs_
TransformationDescription trafo_; // RT transformation (to range 0-1)
Definition: FeatureFinderIdentificationAlgorithm.h:205
double peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:103
Size n_external_features_
Definition: FeatureFinderIdentificationAlgorithm.h:203
FeatureFinderAlgorithmPickedHelperStructs::MassTrace MassTrace
Definition: FeatureFinderIdentificationAlgorithm.h:74
Size n_isotopes_
number of isotopes for peptide assay
Definition: FeatureFinderIdentificationAlgorithm.h:99
double mapping_tolerance_
RT tolerance for mapping IDs to features.
Definition: FeatureFinderIdentificationAlgorithm.h:96
std::map< String, std::pair< RTMap, RTMap > > PeptideRefRTMap
mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:84
Size debug_level_
Definition: FeatureFinderIdentificationAlgorithm.h:120
double rt_quantile_
Definition: FeatureFinderIdentificationAlgorithm.h:101
std::vector< std::pair< It, It > > chunk_(It range_from, It range_to, const std::ptrdiff_t batch_size)
Definition: FeatureFinderIdentificationAlgorithm.h:259
std::multiset< double > svm_probs_external_
SVM probabilities for "external" features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:201
ChargeMap ids
internal/external peptide IDs (per charge) in this region
Definition: FeatureFinderIdentificationAlgorithm.h:128
bool mz_window_ppm_
m/z window width is given in PPM (not Da)?
Definition: FeatureFinderIdentificationAlgorithm.h:94
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
bool use_psm_cutoff_
Definition: FeatureFinderIdentificationAlgorithm.h:192
void addPeptideRT_(TargetedExperiment::Peptide &peptide, double rt) const
PeakMap chrom_data_
accumulated chromatograms (XICs)
Definition: FeatureFinderIdentificationAlgorithm.h:187
void ensureConvexHulls_(Feature &feature) const
std::multimap< double, PeptideIdentification * > RTMap
mapping: RT (not necessarily unique) -> pointer to peptide
Definition: FeatureFinderIdentificationAlgorithm.h:78
void addPeptideToMap_(PeptideIdentification &peptide, PeptideMap &peptide_map, bool external=false)
double psm_score_cutoff_
Definition: FeatureFinderIdentificationAlgorithm.h:193
void annotateFeatures_(FeatureMap &features, PeptideRefRTMap &ref_rt_map)
annotate identified features with m/z, isotope probabilities, etc.
String candidates_out_
Definition: FeatureFinderIdentificationAlgorithm.h:118
double isotope_pmin_
min. isotope probability for peptide assay
Definition: FeatureFinderIdentificationAlgorithm.h:98
double svm_quality_cutoff
Definition: FeatureFinderIdentificationAlgorithm.h:113
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probability -> number of pos./neg. features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:199
region in RT in which a peptide elutes:
Definition: FeatureFinderIdentificationAlgorithm.h:126
A container for features.
Definition: FeatureMap.h:80
An LC-MS feature.
Definition: Feature.h:46
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
Class for handling external peptide identifications in feature finding.
Definition: FFIDAlgoExternalIDHandler.h:37
Definition: IsotopeDistribution.h:39
The MRMFeatureFinder finds and scores peaks of transitions that co-elute.
Definition: MRMFeatureFinderScoring.h:65
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:49
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string, or DataValue::EMPTY if not found.
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:184
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition: PeptideIdentification.h:63
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
A more convenient string class.
Definition: String.h:34
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:333
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:39
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:44
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Helper struct for mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:54
Helper struct for a collection of mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:85
comparison functor for features
Definition: FeatureFinderIdentificationAlgorithm.h:173
bool operator()(const Feature &f1, const Feature &f2)
Definition: FeatureFinderIdentificationAlgorithm.h:174
predicate for filtering features by assigned peptides:
Definition: FeatureFinderIdentificationAlgorithm.h:142
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:143
predicate for filtering features by overall quality:
Definition: FeatureFinderIdentificationAlgorithm.h:133
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:134
comparison functor for (unassigned) peptide IDs
Definition: FeatureFinderIdentificationAlgorithm.h:151
bool operator()(const PeptideIdentification &p1, const PeptideIdentification &p2)
Definition: FeatureFinderIdentificationAlgorithm.h:152