All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
IDRipper.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg$
6 // $Authors: Immanuel Luhn, Leon Kuchenbecker$
7 // --------------------------------------------------------------------------
8 #pragma once
9 
14 #include <unordered_map>
15 
16 
17 namespace OpenMS
18 {
28  class OPENMS_DLLAPI IDRipper :
29  public DefaultParamHandler
30  {
31 public:
33  enum OriginAnnotationFormat { FILE_ORIGIN = 0, MAP_INDEX = 1, ID_MERGE_INDEX = 2, UNKNOWN_OAF = 3, SIZE_OF_ORIGIN_ANNOTATION_FORMAT = 4 };
34 
36  static const std::array<std::string, SIZE_OF_ORIGIN_ANNOTATION_FORMAT> names_of_OriginAnnotationFormat;
37 
39  struct OPENMS_DLLAPI IdentificationRuns
40  {
42  std::map<String, UInt> index_map;
44  std::vector<StringList> spectra_data;
45 
47  IdentificationRuns(const std::vector<ProteinIdentification>& prot_ids);
48  };
49 
51  struct OPENMS_DLLAPI RipFileIdentifier
52  {
54  UInt ident_run_idx{};
56  UInt file_origin_idx{};
61 
64  const PeptideIdentification& pep_id,
65  const std::map<String, UInt>& file_origin_map,
66  const IDRipper::OriginAnnotationFormat origin_annotation_fmt,
67  bool split_ident_runs);
68 
71 
74 
76  const String & getOriginFullname() const;
77 
79  const String & getOutputBasename() const;
80  };
81 
84  {
85  bool operator()(const RipFileIdentifier& left, const RipFileIdentifier& right) const;
86  };
87 
89  struct OPENMS_DLLAPI RipFileContent
90  {
92  std::vector<ProteinIdentification> prot_idents;
96  RipFileContent(const std::vector<ProteinIdentification>& prot_idents, const PeptideIdentificationList& pep_idents)
97  : prot_idents(prot_idents), pep_idents(pep_idents) {}
99  const std::vector<ProteinIdentification> & getProteinIdentifications();
102  };
103 
105  typedef std::map<RipFileIdentifier, RipFileContent, RipFileIdentifierIdxComparator> RipFileMap;
106 
109 
111  ~IDRipper() override;
112 
125  void rip(
126  RipFileMap& ripped,
127  std::vector<ProteinIdentification>& proteins,
128  PeptideIdentificationList& peptides,
129  bool numeric_filenames,
130  bool split_ident_runs);
131 
145  // Autowrap compatible wrapper for rip(RipFileMap,...)
146  void rip(
147  std::vector<RipFileIdentifier>& rfis,
148  std::vector<RipFileContent>& rfcs,
149  std::vector<ProteinIdentification>& proteins,
150  PeptideIdentificationList& peptides,
151  bool numeric_filenames,
152  bool split_ident_runs);
153 
154 private:
155  // Not implemented
157  IDRipper(const IDRipper & rhs);
158 
159  // Not implemented
161  IDRipper & operator=(const IDRipper & rhs);
162 
164  OriginAnnotationFormat detectOriginAnnotationFormat_(std::map<String, UInt> & file_origin_map, const PeptideIdentificationList & peptide_idents);
166  void getProteinHits_(std::vector<ProteinHit> & result, const std::unordered_map<String, const ProteinHit*> & acc2protein_hits, const std::set<String> & protein_accessions);
168  std::set<String> getProteinAccessions_(const std::vector<PeptideHit> & peptide_hits);
172  bool registerBasename_(std::map<String, std::pair<UInt, UInt> >& basename_to_numeric, const IDRipper::RipFileIdentifier& rfi);
174  bool setOriginAnnotationMode_(short& mode, short const new_value);
175  };
176 
177 } // namespace OpenMS
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Ripping protein/peptide identification according their file origin.
Definition: IDRipper.h:30
bool registerBasename_(std::map< String, std::pair< UInt, UInt > > &basename_to_numeric, const IDRipper::RipFileIdentifier &rfi)
helper function, register a potential output file basename to detect duplicate output basenames
IDRipper()
Default constructor.
OriginAnnotationFormat detectOriginAnnotationFormat_(std::map< String, UInt > &file_origin_map, const PeptideIdentificationList &peptide_idents)
helper function, detects file origin annotation standard from collections of protein and peptide hits
~IDRipper() override
Destructor.
static const std::array< std::string, SIZE_OF_ORIGIN_ANNOTATION_FORMAT > names_of_OriginAnnotationFormat
String representations for the OriginAnnotationFormat enum.
Definition: IDRipper.h:36
void getProteinHits_(std::vector< ProteinHit > &result, const std::unordered_map< String, const ProteinHit * > &acc2protein_hits, const std::set< String > &protein_accessions)
helper function, extracts all protein hits that match the protein accession
std::map< RipFileIdentifier, RipFileContent, RipFileIdentifierIdxComparator > RipFileMap
Represents the result of an IDRipper process, a map assigning file content to output file identifiers...
Definition: IDRipper.h:105
int getProteinIdentification_(const PeptideIdentification &pep_ident, const IdentificationRuns &id_runs)
helper function, returns the index of the protein identification for the given peptide identification...
OriginAnnotationFormat
Possible input file encodings for the origin as used by different versions of IDMerger.
Definition: IDRipper.h:33
IDRipper(const IDRipper &rhs)
Copy constructor.
bool setOriginAnnotationMode_(short &mode, short const new_value)
helper function, sets the value of mode to new_value and returns true if the old value was identical ...
void rip(std::vector< RipFileIdentifier > &rfis, std::vector< RipFileContent > &rfcs, std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, bool numeric_filenames, bool split_ident_runs)
Ripping protein/peptide identification according their file origin.
void rip(RipFileMap &ripped, std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &peptides, bool numeric_filenames, bool split_ident_runs)
Ripping protein/peptide identification according their file origin.
IDRipper & operator=(const IDRipper &rhs)
Assignment.
std::set< String > getProteinAccessions_(const std::vector< PeptideHit > &peptide_hits)
helper function, returns the string representation of the peptide hit accession
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition: PeptideIdentification.h:63
A more convenient string class.
Definition: String.h:34
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
const std::string ID_MERGE_INDEX
Definition: Constants.h:307
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Represents a set of IdentificationRuns.
Definition: IDRipper.h:40
IdentificationRuns(const std::vector< ProteinIdentification > &prot_ids)
Generates a new IdentificationRuns object from a vector of ProteinIdentification objects.
std::vector< StringList > spectra_data
Maps the list of spectra data elements to every IdentificationRun index.
Definition: IDRipper.h:44
std::map< String, UInt > index_map
Maps a unique index to every IdentificationRun string representation (getIdentifier()).
Definition: IDRipper.h:42
Represents the content of an IDRipper output file.
Definition: IDRipper.h:90
const std::vector< ProteinIdentification > & getProteinIdentifications()
Get protein identifications.
const PeptideIdentificationList & getPeptideIdentifications()
Get peptide identifications.
PeptideIdentificationList pep_idents
Peptide identifications.
Definition: IDRipper.h:94
RipFileContent(const std::vector< ProteinIdentification > &prot_idents, const PeptideIdentificationList &pep_idents)
Constructs a new RipFileContent object.
Definition: IDRipper.h:96
std::vector< ProteinIdentification > prot_idents
Protein identifications.
Definition: IDRipper.h:92
Provides a 'less' operation for RipFileIdentifiers that ignores the out_basename and origin_fullname ...
Definition: IDRipper.h:84
bool operator()(const RipFileIdentifier &left, const RipFileIdentifier &right) const
Identifies an IDRipper output file.
Definition: IDRipper.h:52
RipFileIdentifier(const IDRipper::IdentificationRuns &id_runs, const PeptideIdentification &pep_id, const std::map< String, UInt > &file_origin_map, const IDRipper::OriginAnnotationFormat origin_annotation_fmt, bool split_ident_runs)
Constructs a new RipFileIdentifier object.
String out_basename
The output basename derived from the file_origin / spectra_data element.
Definition: IDRipper.h:58
const String & getOutputBasename() const
Get output base name.
const String & getOriginFullname() const
Get origin full name.
UInt getFileOriginIdx() const
Get file origin index.
String origin_fullname
The full length origin read from the file_origin / spectra_data element.
Definition: IDRipper.h:60
UInt getIdentRunIdx() const
Get identification run index.