OpenMS
FragmentIndex.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: $
6 // $Authors: $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
15 #include <OpenMS/KERNEL/Peak1D.h>
16 
17 
18 #include <vector>
19 #include <functional>
20 
21 namespace OpenMS
22 {
29  class OPENMS_DLLAPI FragmentIndex : public DefaultParamHandler
30  {
31  public:
32 
33 
45  struct Peptide {
46 
47  // We need a constructor in order to emplace back
48  Peptide(UInt32 protein_idx, UInt32 modification_idx, std::pair<uint16_t , uint16_t> sequence, float precursor_mz):
49  protein_idx(protein_idx),
50  modification_idx_(modification_idx),
51  sequence_(sequence),
52  precursor_mz_(precursor_mz)
53  {}
54 
57  std::pair<uint16_t , uint16_t> sequence_;
58  float precursor_mz_;
59  };
60 
65  {
66  uint32_t num_matched_{};
67  uint16_t precursor_charge_{};
68  int16_t isotope_error_{};
69  size_t peptide_idx_{};
70  };
71 
72 
77  {
78  std::vector<SpectrumMatch> hits_;
79 
80 
81  SpectrumMatchesTopN() = default;
82 
90  {
91 
92  this->hits_.insert(this->hits_.end(), other.hits_.begin(), other.hits_.end());
93  return *this;
94  }
95 
96  void clear()
97  {
98  hits_.clear();
99 
100  }
101  };
112 
119  ~FragmentIndex() override = default;
120 
130  bool isBuild() const;
131 
144  const std::vector<Peptide>& getPeptides() const;
145 
146 #ifdef DEBUG_FRAGMENT_INDEX
170  void addSpecialPeptide(AASequence& peptide, Size source_idx);
171 #endif
172 
179  void build(const std::vector<FASTAFile::FASTAEntry> & fasta_entries);
180 
182  void clear();
183 
184 
191  std::pair<size_t, size_t> getPeptidesInPrecursorRange(float precursor_mass,
192  const std::pair<float, float>& window);
193 
197  struct Hit
198  {
199  Hit(UInt32 peptide_idx, float fragment_mz) :
200  peptide_idx(peptide_idx),
201  fragment_mz(fragment_mz)
202  {}
203  UInt32 peptide_idx; // index in database
204  float fragment_mz;
205  };
206 
213  std::vector<Hit> query(const Peak1D& peak,
214  const std::pair<size_t,size_t>& peptide_idx_range,
215  uint16_t peak_charge);
216 
224  void querySpectrum(const MSSpectrum& spectrum,
225  SpectrumMatchesTopN& sms);
226 
227 protected:
228 
229 
232  struct Fragment
233  {
234  Fragment(UInt32 peptide_idx, float fragment_mz):
235  peptide_idx_(peptide_idx),
236  fragment_mz_(fragment_mz)
237  {}
238  UInt32 peptide_idx_; // 32 bit in sage
240  };
241 
242  bool is_build_{false};
243 
244  void updateMembers_() override;
245 
252  void generatePeptides(const std::vector<FASTAFile::FASTAEntry>& fasta_entries);
253 
254  std::vector<Peptide> fi_peptides_;
255  std::vector<Fragment> fi_fragments_;
256 
259  size_t bucketsize_;
260  std::vector<float> bucket_min_mz_;
262  bool precursor_mz_tolerance_unit_ppm_{true};
264  bool fragment_mz_tolerance_unit_ppm_{true};
265 private:
266 
267 
277  void queryPeaks(SpectrumMatchesTopN& candidates,
278  const MSSpectrum& spectrum,
279  const std::pair<size_t, size_t>& candidates_range,
280  const int16_t isotope_error,
281  const uint16_t precursor_charge);
291  float precursor_mass,
292  SpectrumMatchesTopN& sms,
293  uint16_t charge);
294 
298  void trimHits(SpectrumMatchesTopN& init_hits) const;
299 
300  //since we work with TheoreticalSpectrumGenerator, we must transfer some of those member variables
307 
308  // SpectrumGenerator independend member variables
309  std::string digestion_enzyme_;
310 
316 
320 
321  // Search Related member variables
322 
330 
332  bool isOpenSearchMode_() const
333  {
334  return precursor_mz_tolerance_unit_ppm_
335  ? (precursor_mz_tolerance_ > 1000.0)
336  : (precursor_mz_tolerance_ > 1.0);
337  }
338 
341 
342 
343  };
344 
345 }
Representation of a peptide/protein sequence.
Definition: AASequence.h:86
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Generates from a set of Fasta files a 2D-datastructure which stores all theoretical masses of all b a...
Definition: FragmentIndex.h:30
size_t bucketsize_
number of fragments per outer node
Definition: FragmentIndex.h:259
uint16_t min_matched_peaks_
PSM with less hits are discarded.
Definition: FragmentIndex.h:323
bool add_x_ions_
Definition: FragmentIndex.h:305
const std::vector< Peptide > & getPeptides() const
Returns a reference to the internal peptide container.
bool add_a_ions_
Definition: FragmentIndex.h:303
void querySpectrum(const MSSpectrum &spectrum, SpectrumMatchesTopN &sms)
: queries one complete experimental spectra against the Database. Loops over all precursor charges St...
bool add_b_ions_
Definition: FragmentIndex.h:301
void queryPeaks(SpectrumMatchesTopN &candidates, const MSSpectrum &spectrum, const std::pair< size_t, size_t > &candidates_range, const int16_t isotope_error, const uint16_t precursor_charge)
queries peaks for a given experimental spectrum with a set range of potential peptides,...
StringList modifications_fixed_
Modification that are one all peptides.
Definition: FragmentIndex.h:317
float fragment_mz_tolerance_
Definition: FragmentIndex.h:263
bool add_y_ions_
Definition: FragmentIndex.h:302
std::vector< Peptide > fi_peptides_
vector of all (digested) peptides
Definition: FragmentIndex.h:254
size_t missed_cleavages_
number of missed cleavages
Definition: FragmentIndex.h:311
float fragment_min_mz_
smallest fragment mz
Definition: FragmentIndex.h:257
uint16_t min_precursor_charge_
minimal possible precursor charge (usually always 1)
Definition: FragmentIndex.h:326
uint32_t max_processed_hits_
The amount of PSM that will be used. the rest is filtered out.
Definition: FragmentIndex.h:329
float peptide_max_mass_
Definition: FragmentIndex.h:313
uint16_t max_fragment_charge_
The maximal possible charge of the fragments.
Definition: FragmentIndex.h:328
std::pair< size_t, size_t > getPeptidesInPrecursorRange(float precursor_mass, const std::pair< float, float > &window)
std::vector< Fragment > fi_fragments_
vector of all theoretical fragments (b- and y- ions)
Definition: FragmentIndex.h:255
int16_t max_isotope_error_
Maximal possible isotope error (both only used for closed search)
Definition: FragmentIndex.h:325
float open_precursor_window_lower_
Defines the lower bound of the precursor-mass range.
Definition: FragmentIndex.h:339
float fragment_max_mz_
largest fragment mz
Definition: FragmentIndex.h:258
std::string digestion_enzyme_
Definition: FragmentIndex.h:309
bool isBuild() const
Indicates whether the fragment index has been built.
size_t peptide_max_length_
Definition: FragmentIndex.h:315
~FragmentIndex() override=default
Default destructor.
bool isOpenSearchMode_() const
Helper function to determine if open search should be used based on tolerance.
Definition: FragmentIndex.h:332
std::vector< Hit > query(const Peak1D &peak, const std::pair< size_t, size_t > &peptide_idx_range, uint16_t peak_charge)
Queries one peak.
void generatePeptides(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
Generates all peptides from given fasta entries. If Bottom-up is set to false skips digestion....
std::vector< float > bucket_min_mz_
vector of the smalles fragment mz of each bucket
Definition: FragmentIndex.h:260
float peptide_min_mass_
Definition: FragmentIndex.h:312
void searchDifferentPrecursorRanges(const MSSpectrum &spectrum, float precursor_mass, SpectrumMatchesTopN &sms, uint16_t charge)
If closed search loops over all isotope errors. For each iteration loop over all peaks with queryPeak...
int16_t min_isotope_error_
Minimal possible isotope error.
Definition: FragmentIndex.h:324
float open_precursor_window_upper_
Defines the upper bound of the precursor-mass range.
Definition: FragmentIndex.h:340
uint16_t max_precursor_charge_
maximal possible precursor charge
Definition: FragmentIndex.h:327
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
void clear()
Delete fragment index. Sets is_build=false.
StringList modifications_variable_
Variable Modification -> all possible comibnations are created.
Definition: FragmentIndex.h:318
size_t max_variable_mods_per_peptide_
Definition: FragmentIndex.h:319
void build(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
Given a set of Fasta files, builds the Fragment Index datastructure (FID). First all fragments are so...
void trimHits(SpectrumMatchesTopN &init_hits) const
places the k-largest elements in the front of the input array. Inside of the k-largest elements and o...
size_t peptide_min_length_
Definition: FragmentIndex.h:314
bool add_c_ions_
Definition: FragmentIndex.h:304
float precursor_mz_tolerance_
Definition: FragmentIndex.h:261
FragmentIndex()
Default constructor.
bool add_z_ions_
Definition: FragmentIndex.h:306
Match between a query peak and an entry in the DB.
Definition: FragmentIndex.h:65
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:28
uint32_t UInt32
Unsigned integer type (32bit)
Definition: Types.h:33
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:44
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
One entry in the fragment index.
Definition: FragmentIndex.h:233
Fragment(UInt32 peptide_idx, float fragment_mz)
Definition: FragmentIndex.h:234
UInt32 peptide_idx_
Definition: FragmentIndex.h:238
float fragment_mz_
Definition: FragmentIndex.h:239
Definition: FragmentIndex.h:198
UInt32 peptide_idx
Definition: FragmentIndex.h:203
Hit(UInt32 peptide_idx, float fragment_mz)
Definition: FragmentIndex.h:199
float fragment_mz
Definition: FragmentIndex.h:204
Compact descriptor of a peptide instance held by the FragmentIndex.
Definition: FragmentIndex.h:45
Peptide(UInt32 protein_idx, UInt32 modification_idx, std::pair< uint16_t, uint16_t > sequence, float precursor_mz)
Definition: FragmentIndex.h:48
UInt32 modification_idx_
Index into variant list produced by ModifiedPeptideGenerator for this subsequence (0 = unmodified)
Definition: FragmentIndex.h:56
std::pair< uint16_t, uint16_t > sequence_
{start, length} within the source protein sequence (start is 0-based; length in residues)
Definition: FragmentIndex.h:57
UInt32 protein_idx
0-based index into FASTA entries provided to build(); identifies the source protein
Definition: FragmentIndex.h:55
float precursor_mz_
Mono-isotopic m/z at charge 1 (M+H)+ of this peptide; used for sorting/filtering.
Definition: FragmentIndex.h:58
container for SpectrumMatch. Also keeps count of total number of candidates and total number of match...
Definition: FragmentIndex.h:77
SpectrumMatchesTopN & operator+=(const SpectrumMatchesTopN &other)
Appends the a SpectrumMatchesTopN to another one. Add the number of all matched peaks up....
Definition: FragmentIndex.h:89
void clear()
Definition: FragmentIndex.h:96
std::vector< SpectrumMatch > hits_
The preliminary candidates.
Definition: FragmentIndex.h:78