OpenMS
NuXLModificationsGenerator.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Timo Sachsenberg $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
15 #include <vector>
16 #include <map>
17 #include <set>
18 #include <iostream>
19 
20 namespace OpenMS
21 {
22  class AASequence;
23 
24  /*
25  formula2mass holds the map from empirical formula to mass
26 
27  mod_combinations holds the map from empirical formula to (potentially ambigious) nucleotide formulae
28  e.g.,:
29  C10H14N5O7P -> {A}
30  C10H14N5O8P -> {G}
31  C18H22N4O16P2 -> { CU-H3N1, UU-H2O1 }
32  */
33  struct OPENMS_DLLAPI NuXLModificationMassesResult
34  {
36  {
37  bool operator () (const std::string & p_lhs, const std::string & p_rhs) const
38  {
39  const size_t lhsLength = p_lhs.length() ;
40  const size_t rhsLength = p_rhs.length() ;
41  if(lhsLength == rhsLength)
42  {
43  return (p_lhs < p_rhs) ; // when two strings have the same
44  // length, defaults to the normal
45  // string comparison
46  }
47  return (lhsLength < rhsLength) ; // compares with the length
48  }
49  };
50  std::map<String, double> formula2mass;
51 
52  using NucleotideFormulas = std::set<String, MyStringLengthCompare>;
53  using MapSumFormulaToNucleotideFormulas = std::map<String, NucleotideFormulas>;
55  };
56 
57  class OPENMS_DLLAPI NuXLModificationsGenerator
58  {
59  public:
60  /* @brief generate all combinations of precursor adducts
61  @param target_nucleotides the list of nucleotides: e.g., "U", "C", "G", "A" or "U", "T", "G", "A"
62  @param can_xl the set of cross-linkable nucleotides
63  @param mappings
64  @param modifications additional losses associated with the precursor adduct: e.g., "-H2O"
65  @param sequence_restriction only precursor adducts that are substrings of this NA sequence are generated
66  @param cysteine_adduct special DTT adduct
67  @param max_length maximum oligo length
68  */
70  const StringList& nt_groups,
71  const std::set<char>& can_xl,
72  const StringList& mappings,
73  const StringList& modifications,
74  String sequence_restriction = "",
75  bool cysteine_adduct = false,
76  Int max_length = 4);
77  private:
79  static bool notInSeq(const String& res_seq, const String& query);
80 
81  static void generateTargetSequences(const String& res_seq, Size param_pos, const std::map<char, std::vector<char> >& map_source2target, StringList& target_sequences);
82  };
83 }
84 
Definition: NuXLModificationsGenerator.h:58
static NuXLModificationMassesResult initModificationMassesNA(const StringList &target_nucleotides, const StringList &nt_groups, const std::set< char > &can_xl, const StringList &mappings, const StringList &modifications, String sequence_restriction="", bool cysteine_adduct=false, Int max_length=4)
static void generateTargetSequences(const String &res_seq, Size param_pos, const std::map< char, std::vector< char > > &map_source2target, StringList &target_sequences)
static bool notInSeq(const String &res_seq, const String &query)
return true if qery is not in sequence
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:44
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
MapSumFormulaToNucleotideFormulas mod_combinations
empirical formula -> nucleotide formula(s) (formulas if modifications lead to ambiguities)
Definition: NuXLModificationsGenerator.h:54
std::set< String, MyStringLengthCompare > NucleotideFormulas
Definition: NuXLModificationsGenerator.h:52
std::map< String, NucleotideFormulas > MapSumFormulaToNucleotideFormulas
Definition: NuXLModificationsGenerator.h:53
std::map< String, double > formula2mass
empirical formula -> mass
Definition: NuXLModificationsGenerator.h:50
Definition: NuXLModificationsGenerator.h:34
Definition: NuXLModificationsGenerator.h:36