All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
ExperimentalDesign.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Timo Sachsenberg $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
14 
15 #include <vector>
16 #include <map>
17 #include <set>
18 
19 namespace OpenMS
20 {
21  class ConsensusMap;
22  class FeatureMap;
23 
108  class OPENMS_DLLAPI ExperimentalDesign
109  {
110 
111  public:
118  class OPENMS_DLLAPI MSFileSectionEntry
119  {
120  public:
121  MSFileSectionEntry() = default;
122  unsigned fraction_group = 1;
123  unsigned fraction = 1;
124  std::string path = "UNKNOWN_FILE";
125  unsigned label = 1;
126  unsigned sample = 0;
127  String sample_name = "0";
128  };
129 
130  class OPENMS_DLLAPI SampleSection
131  {
132  public:
133 
134  SampleSection() = default;
135 
137  const std::vector< std::vector < String > >& content,
138  const std::map< String, Size >& sample_to_rowindex,
139  const std::map< String, Size >& columnname_to_columnindex
140  );
141 
142  // Get set of all samples that are present in the sample section
143  std::set< String > getSamples() const;
144 
145  // Add a sample as the last row
146  void addSample(const String& sample, const std::vector<String>& content = {});
147 
148  // TODO should it include the Sample ID column or not??
149  // Get set of all factors (column names) that were defined for the sample section
150  std::set< String > getFactors() const;
151 
152  // Checks whether sample section has row for a sample number
153  bool hasSample(const String& sample) const;
154 
155  // Checks whether Sample Section has a specific factor (i.e. column name)
156  bool hasFactor(const String &factor) const;
157 
158  // Returns value of factor for given sample and factor name
159  String getFactorValue(const String& sample_name, const String &factor) const;
160 
161  // Returns value of factor for given sample index and factor name
162  String getFactorValue(unsigned sample_idx, const String &factor) const;
163 
164  // Returns column index of factor
165  Size getFactorColIdx(const String &factor) const;
166 
167  // Returns the name/ID of the sample. Not necessarily the row index
168  String getSampleName(unsigned sample_row) const;
169 
170  // Returns the row index in the sample section for a sample name/ID
171  unsigned getSampleRow(const String& sample) const;
172 
175 
176  private:
177 
178  // The entries of the Sample Section, filled while parsing
179  // the Experimental Design File
180  std::vector< std::vector < String > > content_;
181 
182  // Maps the Sample Entry name to the row where the sample
183  // appears in the Sample section, its sample index
184  std::map< String, Size > sample_to_rowindex_;
185 
186  // Maps the column name of the SampleSection to the
187  // Index of the column
188  std::map< String, Size > columnname_to_columnindex_;
189  };
190 
191  using MSFileSection = std::vector<MSFileSectionEntry>;
192 
193  // Experimental Design c'tors
194  ExperimentalDesign() = default;
195 
196  ExperimentalDesign(const MSFileSection& msfile_section, const SampleSection& sample_section);
197 
199 
200  void setMSFileSection(const MSFileSection& msfile_section);
201 
202  // Returns the Sample Section of the experimental design file
204 
205  void setSampleSection(const SampleSection& sample_section);
206 
209  std::map<std::vector<String>, std::set<String>> getUniqueSampleRowToSampleMapping() const;
210 
213  std::map<String, unsigned> getSampleToPrefractionationMapping() const;
214 
216  //TODO this probably needs a basename parameter to be fully compatible with the other mappings!! Implicit full path.
217  std::map<unsigned int, std::vector<String> > getFractionToMSFilesMapping() const;
218 
221  //TODO this probably needs a basename parameter to be fully compatible with the other mappings!! Implicit full path.
222  std::vector<std::vector<std::pair<String, unsigned>>> getConditionToPathLabelVector() const;
223 
225  std::map<std::vector<String>, std::set<unsigned>> getConditionToSampleMapping() const;
226 
227  /*
228  * The (Path, Label) tuples in the experimental design have to be unique, so we can map them
229  * uniquely to the sample number, fraction number, and fraction_group number
230  */
231 
234  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToPrefractionationMapping(bool use_basename_only) const;
235 
238  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToConditionMapping(bool use_basename_only) const;
239 
242  std::map<String, unsigned> getSampleToConditionMapping() const;
243 
245  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToSampleMapping(bool use_basename_only) const;
246 
248  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToFractionMapping(bool use_basename_only) const;
249 
251  std::map< std::pair< String, unsigned >, unsigned> getPathLabelToFractionGroupMapping(bool use_basename_only) const;
252 
253  // @return the number of samples measured (= highest sample index)
254  unsigned getNumberOfSamples() const;
255 
256  // @return the number of fractions (= highest fraction index)
257  unsigned getNumberOfFractions() const;
258 
259  // @return the number of labels per file
260  unsigned getNumberOfLabels() const;
261 
262  // @return the number of MS files (= fractions * fraction groups)
263  unsigned getNumberOfMSFiles() const;
264 
265  // @return the number of fraction_groups
266  // Allows to group fraction ids and source files
267  unsigned getNumberOfFractionGroups() const;
268 
269  // @return sample index (depends on fraction_group and label)
270  unsigned getSample(unsigned fraction_group, unsigned label = 1);
271 
273  // This is the case if we have at least one fraction group with >= 2 fractions
274  bool isFractionated() const;
275 
279  Size filterByBasenames(const std::set<String>& bns);
280 
283 
286 
289 
291  static ExperimentalDesign fromIdentifications(const std::vector<ProteinIdentification>& proteins);
292  //TODO create another overload here, that takes two enums outerVec and innerVec with entries Replicate, Fraction, Sample
293 
294  private:
295  // MS filename column, optionally trims to basename
296  std::vector< String > getFileNames_(bool basename) const;
297 
298  // returns label column
299  std::vector<unsigned> getLabels_() const;
300 
301  // returns fraction column
302  std::vector<unsigned> getFractions_() const;
303 
305  std::map< std::pair< String, unsigned >, unsigned> pathLabelMapper_(
306  bool,
307  unsigned (*f)(const ExperimentalDesign::MSFileSectionEntry&)) const;
308 
309  // sort to obtain the default order
310  void sort_();
311 
312  template<typename T>
313  static void errorIfAlreadyExists(std::set<T> &container, T &item, const String &message);
314 
315  // basic consistency checks
316  void isValid_();
317 
320  };
321 }
322 
A container for consensus elements.
Definition: ConsensusMap.h:68
Definition: ExperimentalDesign.h:119
Definition: ExperimentalDesign.h:131
SampleSection(const std::vector< std::vector< String > > &content, const std::map< String, Size > &sample_to_rowindex, const std::map< String, Size > &columnname_to_columnindex)
bool hasSample(const String &sample) const
std::vector< std::vector< String > > content_
Definition: ExperimentalDesign.h:180
std::map< String, Size > columnname_to_columnindex_
Definition: ExperimentalDesign.h:188
void addSample(const String &sample, const std::vector< String > &content={})
String getSampleName(unsigned sample_row) const
std::map< String, Size > sample_to_rowindex_
Definition: ExperimentalDesign.h:184
Size getContentSize() const
returns the number of entries in content_ member
std::set< String > getFactors() const
Size getFactorColIdx(const String &factor) const
std::set< String > getSamples() const
bool hasFactor(const String &factor) const
String getFactorValue(const String &sample_name, const String &factor) const
unsigned getSampleRow(const String &sample) const
String getFactorValue(unsigned sample_idx, const String &factor) const
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:109
unsigned getNumberOfLabels() const
static void errorIfAlreadyExists(std::set< T > &container, T &item, const String &message)
unsigned getNumberOfFractions() const
static ExperimentalDesign fromConsensusMap(const ConsensusMap &c)
Extract experimental design from consensus map.
unsigned getSample(unsigned fraction_group, unsigned label=1)
std::map< std::vector< String >, std::set< String > > getUniqueSampleRowToSampleMapping() const
unsigned getNumberOfSamples() const
void setSampleSection(const SampleSection &sample_section)
std::vector< unsigned > getLabels_() const
bool sameNrOfMSFilesPerFraction() const
Size filterByBasenames(const std::set< String > &bns)
unsigned getNumberOfFractionGroups() const
std::map< unsigned int, std::vector< String > > getFractionToMSFilesMapping() const
return fraction index to file paths (ordered by fraction_group)
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToFractionMapping(bool use_basename_only) const
return <file_path, label> to fraction mapping
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToPrefractionationMapping(bool use_basename_only) const
std::map< std::vector< String >, std::set< unsigned > > getConditionToSampleMapping() const
return a condition (unique combination of sample section values except replicate) to Sample index map...
void setMSFileSection(const MSFileSection &msfile_section)
std::vector< MSFileSectionEntry > MSFileSection
Definition: ExperimentalDesign.h:191
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToConditionMapping(bool use_basename_only) const
MSFileSection msfile_section_
Definition: ExperimentalDesign.h:318
static ExperimentalDesign fromIdentifications(const std::vector< ProteinIdentification > &proteins)
Extract experimental design from identifications.
std::map< std::pair< String, unsigned >, unsigned > pathLabelMapper_(bool, unsigned(*f)(const ExperimentalDesign::MSFileSectionEntry &)) const
Generic Mapper (Path, Label) -> f(row)
ExperimentalDesign(const MSFileSection &msfile_section, const SampleSection &sample_section)
std::map< String, unsigned > getSampleToPrefractionationMapping() const
std::vector< unsigned > getFractions_() const
const MSFileSection & getMSFileSection() const
std::vector< String > getFileNames_(bool basename) const
const ExperimentalDesign::SampleSection & getSampleSection() const
static ExperimentalDesign fromFeatureMap(const FeatureMap &f)
Extract experimental design from feature map.
std::vector< std::vector< std::pair< String, unsigned > > > getConditionToPathLabelVector() const
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToSampleMapping(bool use_basename_only) const
return <file_path, label> to sample index mapping
SampleSection sample_section_
Definition: ExperimentalDesign.h:319
std::map< String, unsigned > getSampleToConditionMapping() const
unsigned getNumberOfMSFiles() const
std::map< std::pair< String, unsigned >, unsigned > getPathLabelToFractionGroupMapping(bool use_basename_only) const
return <file_path, label> to fraction_group mapping
A container for features.
Definition: FeatureMap.h:82
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
const double c
Definition: Constants.h:188
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19