OpenMS
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
XTandemXMLFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Andreas Bertsch $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
13 #include <OpenMS/FORMAT/XMLFile.h>
16 #include <stack>
17 
18 namespace OpenMS
19 {
20  class String;
21  class ProteinIdentification;
22 
31  class OPENMS_DLLAPI XTandemXMLFile :
32  protected Internal::XMLHandler,
33  public Internal::XMLFile
34  {
35 public:
36 
39 
41  ~XTandemXMLFile() override;
55  void load(const String& filename, ProteinIdentification& protein_identification, PeptideIdentificationList& id_data, ModificationDefinitionsSet& mod_def_set);
56 
57 
58 protected:
59 
60  // Docu in base class
61  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
62 
63  // Docu in base class
64  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
65 
66  // Docu in base class
67  void characters(const XMLCh* const chars, const XMLSize_t /*length*/) override;
68 
70 
72 
73 private:
74 
76 
77  // true during "note" element containing protein accession
79 
80  // true during "note" element containing spectrum ID
82 
83  // true after non-new protein entries, so that with the next "protein note" the
84  // accession will not be updated again
86 
87  // peptide hits per spectrum
88  std::map<UInt, std::vector<PeptideHit> > peptide_hits_;
89 
90  // protein hits
91  std::vector<ProteinHit> protein_hits_;
92 
93  // protein unique IDs (assigned by X! Tandem), to keep track of which proteins were already seen
94  std::set<UInt> protein_uids_;
95 
96  // accession of the current protein
98 
99  // charge of current peptide
101 
102  // X! Tandem ID of current peptide
104 
105  // tag
107 
108  // start position of current peptide in protein sequence
110 
111  // stop position of current peptide in protein sequence
113 
114  // previous peptide sequence
116 
117  // mapping from X! Tandem ID to spectrum ID
118  std::map<UInt, String> spectrum_ids_;
119 
120  // modification definitions
122 
123  // modifications used by X! Tandem by default
125 
126  // the possible type attributes of the group tag elements
127  enum class GroupType
128  {
129  MODEL,
130  PARAMETERS,
131  SUPPORT
132  };
133 
134  // stack of types of the group elements
135  // they can be nested (e.g. a support group in a model group)
136  // parsing of child elements sometimes depends on the group type
137  std::stack<GroupType> group_type_stack_;
138 
139  };
140 
141 } // namespace OpenMS
142 
char16_t XMLCh
Definition: ClassTest.h:28
Base class for loading/storing XML files that have a handler derived from XMLHandler.
Definition: XMLFile.h:23
Base class for XML handlers.
Definition: XMLHandler.h:328
Definition: ModificationDefinitionsSet.h:34
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
A more convenient string class.
Definition: String.h:34
Used to load XTandemXML files.
Definition: XTandemXMLFile.h:34
String current_protein_
Definition: XTandemXMLFile.h:97
ProteinIdentification * protein_identification_
Definition: XTandemXMLFile.h:75
bool is_spectrum_note_
Definition: XTandemXMLFile.h:81
std::map< UInt, std::vector< PeptideHit > > peptide_hits_
Definition: XTandemXMLFile.h:88
GroupType
Definition: XTandemXMLFile.h:128
ModificationDefinitionsSet default_nterm_mods_
Definition: XTandemXMLFile.h:124
String tag_
Definition: XTandemXMLFile.h:106
XTandemXMLFile & operator=(const XTandemXMLFile &rhs)
Int current_charge_
Definition: XTandemXMLFile.h:100
std::vector< ProteinHit > protein_hits_
Definition: XTandemXMLFile.h:91
ModificationDefinitionsSet mod_def_set_
Definition: XTandemXMLFile.h:121
std::stack< GroupType > group_type_stack_
Definition: XTandemXMLFile.h:137
bool skip_protein_acc_update_
Definition: XTandemXMLFile.h:85
std::set< UInt > protein_uids_
Definition: XTandemXMLFile.h:94
UInt current_id_
Definition: XTandemXMLFile.h:103
void startElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override
~XTandemXMLFile() override
Destructor.
void characters(const XMLCh *const chars, const XMLSize_t) override
std::map< UInt, String > spectrum_ids_
Definition: XTandemXMLFile.h:118
void endElement(const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override
UInt current_stop_
Definition: XTandemXMLFile.h:112
UInt current_start_
Definition: XTandemXMLFile.h:109
bool is_protein_note_
Definition: XTandemXMLFile.h:78
String previous_seq_
Definition: XTandemXMLFile.h:115
XTandemXMLFile(const XTandemXMLFile &rhs)
XTandemXMLFile()
Default constructor.
int Int
Signed integer type.
Definition: Types.h:72
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
void load(const String &filename, ProteinIdentification &protein_identification, PeptideIdentificationList &id_data, ModificationDefinitionsSet &mod_def_set)
loads data from an X! Tandem XML file
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19