OpenMS
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
IDMergerAlgorithm.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Julianus Pfeuffer $
6 // $Authors: Julianus Pfeuffer $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
17 
18 #include <unordered_set>
19 
20 namespace OpenMS
21 {
22 
23  //TODO add params for checking consistency (i.e. how strict to check)
24  //TODO add another subclass that does score-aware merging? (i.e. only keep best per peptide[sequence])
25 
36  class OPENMS_DLLAPI IDMergerAlgorithm:
37  public DefaultParamHandler,
38  public ProgressLogger
39  {
40  public:
41  explicit IDMergerAlgorithm (const String& runIdentifier = "merged");
42 
45  void insertRuns(std::vector<ProteinIdentification>&& prots,
47  void insertRuns(const std::vector<ProteinIdentification>& prots,
48  const PeptideIdentificationList& peps);
49 
50  //TODO add methods to just insert prots or just peps. Especially makes sense if you do re-indexing anyway,
51  // then you do not need the proteins. But then we need origin information. Either externally in form of a
52  // String or StringList (like the one from ProteinID.getPrimaryMSRunPath). Or by having the file annotated
53  // at the PeptideID (with getBasename maybe?)
54  // Current solution would be to clear the ProteinIdentification if you do not need the proteins and add all the
55  // necessary information about origin(s) to this ProteinIdentification.
56 
60 
61  private:
62 
65 
68 
75  const std::vector<ProteinIdentification>& protRuns,
76  const String& experiment_type) const;
77 
85  const std::vector<ProteinIdentification>& protRuns,
86  const ProteinIdentification& ref,
87  const String& experiment_type) const;
88 
92  std::vector<ProteinIdentification>&& old_protRuns
93  );
94 
100  const std::map<String, Size>& runID_to_runIdx,
101  const std::vector<StringList>& originFiles,
102  bool annotate_origin
103  );
104 
105 
107  PeptideIdentificationList&& pepIDs,
108  std::vector<ProteinIdentification>&& old_protRuns
109  );
110 
113 
116 
117  static size_t accessionHash_(const ProteinHit& p){
118  return std::hash<String>()(p.getAccession());
119  }
120  static bool accessionEqual_(const ProteinHit& p1, const ProteinHit& p2){
121  return p1.getAccession() == p2.getAccession();
122  }
123  using hash_type = std::size_t (*)(const ProteinHit&);
124  using equal_type = bool (*)(const ProteinHit&, const ProteinHit&);
125  std::unordered_set<ProteinHit, hash_type, equal_type> collected_protein_hits_;
126 
128  bool filled_ = false;
129 
131  std::map<String, Size> file_origin_to_idx_;
132 
135  };
136 } // namespace OpenMS
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Creates a new Protein ID run into which other runs can be inserted. Creates union of protein hits but...
Definition: IDMergerAlgorithm.h:39
ProteinIdentification prot_result_
the resulting new Protein IDs
Definition: IDMergerAlgorithm.h:112
bool(*)(const ProteinHit &, const ProteinHit &) equal_type
Definition: IDMergerAlgorithm.h:124
void insertRuns(std::vector< ProteinIdentification > &&prots, PeptideIdentificationList &&peps)
void insertProteinIDs_(std::vector< ProteinIdentification > &&old_protRuns)
bool checkOldRunConsistency_(const std::vector< ProteinIdentification > &protRuns, const ProteinIdentification &ref, const String &experiment_type) const
std::size_t(*)(const ProteinHit &) hash_type
Definition: IDMergerAlgorithm.h:123
static size_t accessionHash_(const ProteinHit &p)
Definition: IDMergerAlgorithm.h:117
static bool accessionEqual_(const ProteinHit &p1, const ProteinHit &p2)
Definition: IDMergerAlgorithm.h:120
void insertRuns(const std::vector< ProteinIdentification > &prots, const PeptideIdentificationList &peps)
std::map< String, Size > file_origin_to_idx_
to keep track of the mzML origins of spectra
Definition: IDMergerAlgorithm.h:131
void returnResultsAndClear(ProteinIdentification &prots, PeptideIdentificationList &peps)
Return the merged results and reset/clear all internal data.
static void copySearchParams_(const ProteinIdentification &from, ProteinIdentification &to)
Copies over search parameters.
std::unordered_set< ProteinHit, hash_type, equal_type > collected_protein_hits_
Definition: IDMergerAlgorithm.h:125
void movePepIDsAndRefProteinsToResultFaster_(PeptideIdentificationList &&pepIDs, std::vector< ProteinIdentification > &&old_protRuns)
String getNewIdentifier_() const
Returns the new identifier. The initial identifier plus a timestamp.
String id_
the new identifier string
Definition: IDMergerAlgorithm.h:134
void updateAndMovePepIDs_(PeptideIdentificationList &&pepIDs, const std::map< String, Size > &runID_to_runIdx, const std::vector< StringList > &originFiles, bool annotate_origin)
bool checkOldRunConsistency_(const std::vector< ProteinIdentification > &protRuns, const String &experiment_type) const
PeptideIdentificationList pep_result_
the resulting new Peptide IDs
Definition: IDMergerAlgorithm.h:115
IDMergerAlgorithm(const String &runIdentifier="merged")
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
A more convenient string class.
Definition: String.h:34
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19