OpenMS
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
DecoyGenerator.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Timo Sachsenberg $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <OpenMS/CONCEPT/Types.h>
13 
14 #include <unordered_map>
15 
16 namespace OpenMS
17 {
18  class AASequence;
19  class DigestionEnzymeProtein;
20 
24  class OPENMS_DLLAPI DecoyGenerator
25  {
26  public:
27  // initializes random generator
29 
30  // destructor
31  ~DecoyGenerator() = default;
32 
33  // random seed for shuffling
34  void setSeed(UInt64 seed);
35 
36  /*
37  @brief reverses the protein sequence.
38  note: modifications are discarded
39  */
40  AASequence reverseProtein(const AASequence& protein) const;
41 
42  /*
43  @brief reverses the protein's peptide sequences between enzymatic cutting positions.
44  note: modifications are discarded
45  */
46  AASequence reversePeptides(const AASequence& protein, const String& protease) const;
47 
48  /*
49  @brief shuffle the protein's peptide sequences between enzymatic cutting positions.
50  each peptide is shuffled @param max_attempts times to minimize sequence identity.
51 
52  Note:
53  - Generated decoys are retrieved from a cache to prevent that same peptide (in different proteins)
54  leads to different decoys.
55  - modifications are discarded
56  */
58  const AASequence& aas,
59  const String& protease,
60  const int max_attempts = 100
61  );
62 
63  private:
64  // sequence identity by matching AAs
65  static double SequenceIdentity_(const String& decoy, const String& target);
66 
67  // portable shuffle
69 
70  // ensures that shuffling same peptide (in different proteins) leads to same decoy
71  std::unordered_map<std::string, std::string> td_cache_;
72  };
73 }
74 
Representation of a peptide/protein sequence.
Definition: AASequence.h:86
Methods to generate isobaric decoy sequences for DDA target-decoy searches.
Definition: DecoyGenerator.h:25
void setSeed(UInt64 seed)
static double SequenceIdentity_(const String &decoy, const String &target)
std::unordered_map< std::string, std::string > td_cache_
Definition: DecoyGenerator.h:71
AASequence shufflePeptides(const AASequence &aas, const String &protease, const int max_attempts=100)
Math::RandomShuffler shuffler_
Definition: DecoyGenerator.h:68
AASequence reverseProtein(const AASequence &protein) const
AASequence reversePeptides(const AASequence &protein, const String &protease) const
Definition: MathFunctions.h:471
A more convenient string class.
Definition: String.h:34
uint64_t UInt64
Unsigned integer type (64bit)
Definition: Types.h:47
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19