OpenMS
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
MzTabFile.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Timo Sachsenberg $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <OpenMS/FORMAT/MzTab.h>
12 
18 
19 #include <vector>
20 
21 namespace OpenMS
22 {
23  class String;
24  class SVOutStream;
30  class OPENMS_DLLAPI MzTabFile
31  {
32  public:
37 
38  typedef std::map<std::pair<String, String>, std::vector<PeptideHit> > MapAccPepType;
39 
40  // store MzTab file
41  void store(const String& filename, const MzTab& mz_tab) const;
42 
43  // stream IDs to file
44  void store(
45  const String& filename,
46  const std::vector<ProteinIdentification>& protein_identifications,
47  const PeptideIdentificationList& peptide_identifications,
48  bool first_run_inference_only,
49  bool export_empty_pep_ids = false,
50  bool export_all_psms = false,
51  const String& title = "ID export from OpenMS");
52 
53  // stream ConsensusMap to file
54  void store(
55  const String& filename,
56  const ConsensusMap& cmap,
57  const bool first_run_inference_only,
58  const bool export_unidentified_features,
59  const bool export_unassigned_ids,
60  const bool export_subfeatures,
61  const bool export_empty_pep_ids = false,
62  const bool export_all_psms = false) const;
63 
64  // Set store behaviour of optional "reliability" and "uri" columns (default=no)
67  void storePSMReliabilityColumn(bool store);
69  void storeProteinUriColumn(bool store);
70  void storePeptideUriColumn(bool store);
71  void storePSMUriColumn(bool store);
72  void storeSmallMoleculeUriColumn(bool store);
73  void storeProteinGoTerms(bool store);
74 
75  // load MzTab file
76  void load(const String& filename, MzTab& mz_tab);
77 
78  protected:
95 
97 
101  const Size n_best_search_engine_scores,
102  const std::vector<String>& optional_columns,
103  const MzTabMetaData& meta,
104  size_t& n_columns) const;
105 
106  String generateMzTabSectionRow_(const MzTabProteinSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
107 
108  String generateMzTabPeptideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector<String>& optional_columns, size_t& n_columns) const;
109 
110  String generateMzTabSectionRow_(const MzTabPeptideSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
111 
112  String generateMzTabPSMHeader_(Size n_search_engine_scores, const std::vector<String>& optional_columns, size_t& n_columns) const;
113 
114  String generateMzTabSectionRow_(const MzTabPSMSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
115 
116  String generateMzTabSmallMoleculeHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector<String>& optional_columns, size_t& n_columns) const;
117 
118  String generateMzTabSectionRow_(const MzTabSmallMoleculeSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
119 
120  String generateMzTabNucleicAcidHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_scores, const std::vector<String>& optional_columns, size_t& n_columns) const;
121 
122  String generateMzTabSectionRow_(const MzTabNucleicAcidSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
123 
124  String generateMzTabOligonucleotideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, const std::vector<String>& optional_columns, size_t& n_columns) const;
125 
126  String generateMzTabSectionRow_(const MzTabOligonucleotideSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
127 
128  String generateMzTabOSMHeader_(Size n_search_engine_scores, const std::vector<String>& optional_columns, size_t& n_columns) const;
129 
130  String generateMzTabSectionRow_(const MzTabOSMSectionRow& row, const std::vector<String>& optional_columns, const MzTabMetaData& meta, size_t& n_columns) const;
131 
133  template <typename SectionRow> void generateMzTabSection_(const std::vector<SectionRow>& rows, const std::vector<String>& optional_columns, const MzTabMetaData& meta, StringList& output, size_t n_header_columns) const
134  {
135  output.reserve(output.size() + rows.size() + 1);
136  for (const auto& row : rows)
137  {
138  size_t n_section_columns = 0;
139  output.push_back(generateMzTabSectionRow_(row, optional_columns, meta, n_section_columns));
140  if (n_header_columns != n_section_columns) throw Exception::Postcondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Header and content differs in columns. Please report this bug to the OpenMS developers.");
141  }
142  }
143 
144  // auxiliary functions
145 
147  static void addOptionalColumnsToSectionRow_(const std::vector<String>& column_names, const std::vector<MzTabOptionalColumnEntry>& column_entries, StringList& output);
148 
149  // extract two integers from string (e.g. search_engine_score[1]_ms_run[2] -> 1,2)
150  static std::pair<int, int> extractIndexPairsFromBrackets_(const String& s);
151 
153 
155 
157  static void partitionIntoRuns_(const PeptideIdentificationList& pep_ids,
158  const std::vector<ProteinIdentification>& pro_ids,
159  std::map<String, PeptideIdentificationList >& map_run_to_pepids,
160  std::map<String, std::vector<ProteinIdentification> >& map_run_to_proids
161  );
162 
163 
165  static void createProteinToPeptideLinks_(const std::map<String, PeptideIdentificationList >& map_run_to_pepids, MapAccPepType& map_run_accession_to_pephits);
166 
168  static String extractProteinAccession_(const PeptideHit& peptide_hit);
169 
171  static String extractPeptideModifications_(const PeptideHit& peptide_hit);
172 
174  static String mapSearchEngineToCvParam_(const String& openms_search_engine_name);
175 
176  static String mapSearchEngineScoreToCvParam_(const String& openms_search_engine_name, double score, String score_type);
177 
178  static String extractNumPeptides_(const String& common_identifier, const String& protein_accession,
179  const MapAccPepType& map_run_accession_to_peptides);
180 
181  // mzTab definition of distinct
182  static String extractNumPeptidesDistinct_(String common_identifier, String protein_accession,
183  const MapAccPepType& map_run_accession_to_peptides);
184 
185  // same as distinct but additional constraint of uniqueness (=maps to exactly one Protein)
186  static String extractNumPeptidesUnambiguous_(String common_identifier, String protein_accession,
187  const MapAccPepType& map_run_accession_to_peptides);
188 
189  static std::map<String, Size> extractNumberOfSubSamples_(const std::map<String, std::vector<ProteinIdentification> >& map_run_to_proids);
190 
191  static void writePeptideHeader_(SVOutStream& output, std::map<String, Size> n_sub_samples);
192 
193  static void writeProteinHeader_(SVOutStream& output, std::map<String, Size> n_sub_samples);
194 
195  static void writeProteinData_(SVOutStream& output,
196  const ProteinIdentification& prot_id,
197  Size run_count,
198  String input_filename,
199  bool has_coverage,
200  const MapAccPepType& map_run_accession_to_peptides,
201  const std::map<String, Size>& map_run_to_num_sub
202  );
203 
204  private:
205  friend class MzTabMFile;
206  };
207 
208 } // namespace OpenMS
209 
A container for consensus elements.
Definition: ConsensusMap.h:68
Postcondition failed exception.
Definition: Exception.h:141
typename VecMember::iterator iterator
Definition: ExposedVector.h:68
File adapter for MzTab files.
Definition: MzTabFile.h:31
String generateMzTabProteinHeader_(const MzTabProteinSectionRow &reference_row, const Size n_best_search_engine_scores, const std::vector< String > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
void storePSMUriColumn(bool store)
static std::pair< int, int > extractIndexPairsFromBrackets_(const String &s)
String generateMzTabOSMHeader_(Size n_search_engine_scores, const std::vector< String > &optional_columns, size_t &n_columns) const
static String mapSearchEngineScoreToCvParam_(const String &openms_search_engine_name, double score, String score_type)
bool store_osm_uri_
Definition: MzTabFile.h:93
bool store_psm_uri_
Definition: MzTabFile.h:85
static void writePeptideHeader_(SVOutStream &output, std::map< String, Size > n_sub_samples)
bool store_smallmolecule_reliability_
Definition: MzTabFile.h:82
String generateMzTabSectionRow_(const MzTabPSMSectionRow &row, const std::vector< String > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
bool store_protein_goterms_
Definition: MzTabFile.h:87
void storeSmallMoleculeReliabilityColumn(bool store)
bool store_protein_uri_
Definition: MzTabFile.h:83
static void keepFirstPSM_(PeptideIdentificationList::iterator begin, PeptideIdentificationList::iterator end)
bool store_nucleic_acid_goterms_
Definition: MzTabFile.h:94
void storePSMReliabilityColumn(bool store)
bool store_protein_reliability_
Definition: MzTabFile.h:79
static String mapSearchEngineToCvParam_(const String &openms_search_engine_name)
Map search engine identifier to CV, param etc.
bool store_psm_reliability_
Definition: MzTabFile.h:81
String generateMzTabSectionRow_(const MzTabSmallMoleculeSectionRow &row, const std::vector< String > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
String generateMzTabSectionRow_(const MzTabProteinSectionRow &row, const std::vector< String > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
static void writeProteinData_(SVOutStream &output, const ProteinIdentification &prot_id, Size run_count, String input_filename, bool has_coverage, const MapAccPepType &map_run_accession_to_peptides, const std::map< String, Size > &map_run_to_num_sub)
bool store_oligonucleotide_uri_
Definition: MzTabFile.h:92
static void partitionIntoRuns_(const PeptideIdentificationList &pep_ids, const std::vector< ProteinIdentification > &pro_ids, std::map< String, PeptideIdentificationList > &map_run_to_pepids, std::map< String, std::vector< ProteinIdentification > > &map_run_to_proids)
Extract protein and peptide identifications for each run. maps are assumed empty.
String generateMzTabSectionRow_(const MzTabNucleicAcidSectionRow &row, const std::vector< String > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
static void writeProteinHeader_(SVOutStream &output, std::map< String, Size > n_sub_samples)
static void addOptionalColumnsToSectionRow_(const std::vector< String > &column_names, const std::vector< MzTabOptionalColumnEntry > &column_entries, StringList &output)
Helper function for "generateMzTabSectionRow_" functions.
static void createProteinToPeptideLinks_(const std::map< String, PeptideIdentificationList > &map_run_to_pepids, MapAccPepType &map_run_accession_to_pephits)
create links from protein to peptides
~MzTabFile()
Destructor.
String generateMzTabNucleicAcidHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_scores, const std::vector< String > &optional_columns, size_t &n_columns) const
static String extractProteinAccession_(const PeptideHit &peptide_hit)
Extracts, if possible a unique protein accession for a peptide hit in mzTab format....
static String extractPeptideModifications_(const PeptideHit &peptide_hit)
Extracts, modifications and positions of a peptide hit in mzTab format.
static String extractNumPeptidesDistinct_(String common_identifier, String protein_accession, const MapAccPepType &map_run_accession_to_peptides)
void store(const String &filename, const std::vector< ProteinIdentification > &protein_identifications, const PeptideIdentificationList &peptide_identifications, bool first_run_inference_only, bool export_empty_pep_ids=false, bool export_all_psms=false, const String &title="ID export from OpenMS")
bool store_smallmolecule_uri_
Definition: MzTabFile.h:86
bool store_oligonucleotide_reliability_
Definition: MzTabFile.h:89
String generateMzTabSmallMoleculeHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector< String > &optional_columns, size_t &n_columns) const
String generateMzTabOligonucleotideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, const std::vector< String > &optional_columns, size_t &n_columns) const
String generateMzTabPeptideHeader_(Size search_ms_runs, Size n_best_search_engine_scores, Size n_search_engine_score, Size assays, Size study_variables, const std::vector< String > &optional_columns, size_t &n_columns) const
bool store_osm_reliability_
Definition: MzTabFile.h:90
void store(const String &filename, const MzTab &mz_tab) const
std::map< std::pair< String, String >, std::vector< PeptideHit > > MapAccPepType
Definition: MzTabFile.h:38
void store(const String &filename, const ConsensusMap &cmap, const bool first_run_inference_only, const bool export_unidentified_features, const bool export_unassigned_ids, const bool export_subfeatures, const bool export_empty_pep_ids=false, const bool export_all_psms=false) const
void storeProteinReliabilityColumn(bool store)
void storeProteinGoTerms(bool store)
void generateMzTabMetaDataSection_(const MzTabMetaData &map, StringList &sl) const
String generateMzTabSectionRow_(const MzTabPeptideSectionRow &row, const std::vector< String > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
static String extractNumPeptidesUnambiguous_(String common_identifier, String protein_accession, const MapAccPepType &map_run_accession_to_peptides)
bool store_nucleic_acid_reliability_
Definition: MzTabFile.h:88
void storeSmallMoleculeUriColumn(bool store)
void load(const String &filename, MzTab &mz_tab)
MzTabFile()
Default constructor.
bool store_peptide_uri_
Definition: MzTabFile.h:84
String generateMzTabSectionRow_(const MzTabOSMSectionRow &row, const std::vector< String > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
static String extractNumPeptides_(const String &common_identifier, const String &protein_accession, const MapAccPepType &map_run_accession_to_peptides)
static void sortPSM_(PeptideIdentificationList::iterator begin, PeptideIdentificationList::iterator end)
String generateMzTabPSMHeader_(Size n_search_engine_scores, const std::vector< String > &optional_columns, size_t &n_columns) const
String generateMzTabSectionRow_(const MzTabOligonucleotideSectionRow &row, const std::vector< String > &optional_columns, const MzTabMetaData &meta, size_t &n_columns) const
static std::map< String, Size > extractNumberOfSubSamples_(const std::map< String, std::vector< ProteinIdentification > > &map_run_to_proids)
void storePeptideUriColumn(bool store)
bool store_peptide_reliability_
Definition: MzTabFile.h:80
void storeProteinUriColumn(bool store)
void generateMzTabSection_(const std::vector< SectionRow > &rows, const std::vector< String > &optional_columns, const MzTabMetaData &meta, StringList &output, size_t n_header_columns) const
Generate an mzTab section comprising multiple rows of the same type and perform sanity check.
Definition: MzTabFile.h:133
void storePeptideReliabilityColumn(bool store)
bool store_nucleic_acid_uri_
Definition: MzTabFile.h:91
File adapter for MzTab-M files.
Definition: MzTabMFile.h:25
all meta data of a mzTab file. Please refer to specification for documentation.
Definition: MzTab.h:118
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:455
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
Stream class for writing to comma/tab/...-separated values files.
Definition: SVOutStream.h:32
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:44
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
NUC - Nucleic acid section (table-based)
Definition: MzTab.h:339
OSM - OSM (oligonucleotide-spectrum match) section (table-based)
Definition: MzTab.h:409
OLI - Oligonucleotide section (table-based)
Definition: MzTab.h:374
PEP - Peptide section (Table based)
Definition: MzTab.h:220
SML Small molecule section (table based)
Definition: MzTab.h:309
PSM - PSM section (Table based)
Definition: MzTab.h:258
PRT - Protein section (Table based)
Definition: MzTab.h:181