OpenMS
GaussFilterAlgorithm.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hannes Roest $
6 // $Authors: Eva Lange $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <OpenMS/CONCEPT/Types.h>
15 
16 #include <cmath>
17 #include <vector>
18 
19 #include <iostream>
20 
21 namespace OpenMS
22 {
45 // #define DEBUG_FILTERING
46 
47  class OPENMS_DLLAPI GaussFilterAlgorithm
48  {
49 public:
52 
55 
60  {
61  // create new arrays for mz / intensity data and set their size
64  mz_array->data.resize(spectrum->getMZArray()->data.size());
65  intensity_array->data.resize(spectrum->getMZArray()->data.size());
66 
67  // apply the filter
68  bool ret_val = filter(
69  spectrum->getMZArray()->data.begin(),
70  spectrum->getMZArray()->data.end(),
71  spectrum->getIntensityArray()->data.begin(),
72  mz_array->data.begin(), intensity_array->data.begin()
73  );
74  // set the data of the spectrum to the new mz / int arrays
75  spectrum->setMZArray(mz_array);
76  spectrum->setIntensityArray(intensity_array);
77  return ret_val;
78  }
79 
84  {
85  // create new arrays for rt / intensity data and set their size
88  rt_array->data.resize(chromatogram->getTimeArray()->data.size());
89  intensity_array->data.resize(chromatogram->getTimeArray()->data.size());
90 
91  // apply the filter
92  bool ret_val = filter(
93  chromatogram->getTimeArray()->data.begin(),
94  chromatogram->getTimeArray()->data.end(),
95  chromatogram->getIntensityArray()->data.begin(),
96  rt_array->data.begin(), intensity_array->data.begin()
97  );
98  // set the data of the chromatogram to the new rt / int arrays
99  chromatogram->setTimeArray(rt_array);
100  chromatogram->setIntensityArray(intensity_array);
101  return ret_val;
102  }
103 
109  template <typename ConstIterT, typename IterT>
110  bool filter(
111  ConstIterT mz_in_start,
112  ConstIterT mz_in_end,
113  ConstIterT int_in_start,
114  IterT mz_out,
115  IterT int_out)
116  {
117  bool found_signal = false;
118 
119  ConstIterT mz_it = mz_in_start;
120  ConstIterT int_it = int_in_start;
121  for (; mz_it != mz_in_end; mz_it++, int_it++)
122  {
123  // if ppm tolerance is used, calculate a reasonable width value for this m/z
124  if (use_ppm_tolerance_)
125  {
126  initialize(Math::ppmToMass(ppm_tolerance_, *mz_it), spacing_, ppm_tolerance_, use_ppm_tolerance_);
127  }
128 
129  double new_int = integrate_(mz_it, int_it, mz_in_start, mz_in_end);
130 
131  // store new intensity and m/z into output iterator
132  *mz_out = *mz_it;
133  *int_out = new_int;
134  ++mz_out;
135  ++int_out;
136 
137  if (fabs(new_int) > 0) found_signal = true;
138  }
139  return found_signal;
140  }
141 
142  void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance);
143 
144 protected:
145 
147  std::vector<double> coeffs_;
149  double sigma_;
151  double spacing_;
152 
153  // tolerance in ppm
156 
158  template <typename InputPeakIterator>
159  double integrate_(InputPeakIterator x /* mz */, InputPeakIterator y /* int */, InputPeakIterator first, InputPeakIterator last)
160  {
161  double v = 0.;
162  // norm the gaussian kernel area to one
163  double norm = 0.;
164  Size middle = coeffs_.size();
165 
166  double start_pos = (( (*x) - (middle * spacing_)) > (*first)) ? ((*x) - (middle * spacing_)) : (*first);
167  double end_pos = (( (*x) + (middle * spacing_)) < (*(last - 1))) ? ((*x) + (middle * spacing_)) : (*(last - 1));
168 
169  InputPeakIterator help_x = x;
170  InputPeakIterator help_y = y;
171 #ifdef DEBUG_FILTERING
172 
173  std::cout << "integrate from middle to start_pos " << *help_x << " until " << start_pos << std::endl;
174 #endif
175 
176  //integrate from middle to start_pos
177  while ((help_x != first) && (*(help_x - 1) > start_pos))
178  {
179  // search for the corresponding datapoint of help in the gaussian (take the left most adjacent point)
180  double distance_in_gaussian = fabs(*x - *help_x);
181  Size left_position = (Size)floor(distance_in_gaussian / spacing_);
182 
183  // search for the true left adjacent data point (because of rounding errors)
184  for (int j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
185  {
186  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
187  {
188  left_position -= j;
189  break;
190  }
191 
192  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
193  {
194  left_position += j;
195  break;
196  }
197  }
198 
199  // interpolate between the left and right data points in the gaussian to get the true value at position distance_in_gaussian
200  Size right_position = left_position + 1;
201  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
202  // check if the right data point in the gaussian exists
203  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
204  : coeffs_[left_position];
205 #ifdef DEBUG_FILTERING
206 
207  std::cout << "distance_in_gaussian " << distance_in_gaussian << std::endl;
208  std::cout << " right_position " << right_position << std::endl;
209  std::cout << " left_position " << left_position << std::endl;
210  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
211  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
212  std::cout << "interpolated value left " << coeffs_right << std::endl;
213 #endif
214 
215 
216  // search for the corresponding datapoint for (help-1) in the gaussian (take the left most adjacent point)
217  distance_in_gaussian = fabs((*x) - (*(help_x - 1)));
218  left_position = (Size)floor(distance_in_gaussian / spacing_);
219 
220  // search for the true left adjacent data point (because of rounding errors)
221  for (UInt j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
222  {
223  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
224  {
225  left_position -= j;
226  break;
227  }
228 
229  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
230  {
231  left_position += j;
232  break;
233  }
234  }
235 
236  // start the interpolation for the true value in the gaussian
237  right_position = left_position + 1;
238  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
239  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
240  : coeffs_[left_position];
241 #ifdef DEBUG_FILTERING
242 
243  std::cout << " help_x-1 " << *(help_x - 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
244  std::cout << " right_position " << right_position << std::endl;
245  std::cout << " left_position " << left_position << std::endl;
246  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
247  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
248  std::cout << "interpolated value right " << coeffs_left << std::endl;
249 
250  std::cout << " intensity " << fabs(*(help_x - 1) - (*help_x)) / 2. << " * " << *(help_y - 1) << " * " << coeffs_left << " + " << *help_y << "* " << coeffs_right
251  << std::endl;
252 #endif
253 
254 
255  norm += fabs((*(help_x - 1)) - (*help_x)) / 2. * (coeffs_left + coeffs_right);
256 
257  v += fabs((*(help_x - 1)) - (*help_x)) / 2. * (*(help_y - 1) * coeffs_left + (*help_y) * coeffs_right);
258  --help_x;
259  --help_y;
260  }
261 
262 
263  //integrate from middle to end_pos
264  help_x = x;
265  help_y = y;
266 #ifdef DEBUG_FILTERING
267 
268  std::cout << "integrate from middle to endpos " << *help_x << " until " << end_pos << std::endl;
269 #endif
270 
271  while ((help_x != (last - 1)) && (*(help_x + 1) < end_pos))
272  {
273  // search for the corresponding datapoint for help in the gaussian (take the left most adjacent point)
274  double distance_in_gaussian = fabs((*x) - (*help_x));
275  int left_position = (UInt)floor(distance_in_gaussian / spacing_);
276 
277  // search for the true left adjacent data point (because of rounding errors)
278  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
279  {
280  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
281  {
282  left_position -= j;
283  break;
284  }
285 
286  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
287  {
288  left_position += j;
289  break;
290  }
291  }
292  // start the interpolation for the true value in the gaussian
293  Size right_position = left_position + 1;
294  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
295  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
296  : coeffs_[left_position];
297 
298 #ifdef DEBUG_FILTERING
299 
300  std::cout << " help " << *help_x << " distance_in_gaussian " << distance_in_gaussian << std::endl;
301  std::cout << " left_position " << left_position << std::endl;
302  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
303  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
304  std::cout << "interpolated value left " << coeffs_left << std::endl;
305 #endif
306 
307  // search for the corresponding datapoint for (help+1) in the gaussian (take the left most adjacent point)
308  distance_in_gaussian = fabs((*x) - (*(help_x + 1)));
309  left_position = (UInt)floor(distance_in_gaussian / spacing_);
310 
311  // search for the true left adjacent data point (because of rounding errors)
312  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
313  {
314  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
315  {
316  left_position -= j;
317  break;
318  }
319 
320  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
321  {
322  left_position += j;
323  break;
324  }
325  }
326 
327  // start the interpolation for the true value in the gaussian
328  right_position = left_position + 1;
329  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
330  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
331  : coeffs_[left_position];
332 #ifdef DEBUG_FILTERING
333 
334  std::cout << " (help + 1) " << *(help_x + 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
335  std::cout << " left_position " << left_position << std::endl;
336  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
337  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
338  std::cout << "interpolated value right " << coeffs_right << std::endl;
339 
340  std::cout << " intensity " << fabs(*help_x - *(help_x + 1)) / 2.
341  << " * " << *help_y << " * " << coeffs_left << " + " << *(help_y + 1)
342  << "* " << coeffs_right
343  << std::endl;
344 #endif
345  norm += fabs((*help_x) - (*(help_x + 1)) ) / 2. * (coeffs_left + coeffs_right);
346 
347  v += fabs((*help_x) - (*(help_x + 1)) ) / 2. * ((*help_y) * coeffs_left + (*(help_y + 1)) * coeffs_right);
348  ++help_x;
349  ++help_y;
350  }
351 
352  if (v > 0)
353  {
354  return v / norm;
355  }
356  else
357  {
358  return 0;
359  }
360  }
361 
362  };
363 
364 } // namespace OpenMS
This class represents a Gaussian lowpass-filter which works on uniform as well as on non-uniform prof...
Definition: GaussFilterAlgorithm.h:48
std::vector< double > coeffs_
Coefficients.
Definition: GaussFilterAlgorithm.h:147
GaussFilterAlgorithm()
Constructor.
void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance)
double sigma_
The standard derivation .
Definition: GaussFilterAlgorithm.h:149
bool filter(ConstIterT mz_in_start, ConstIterT mz_in_end, ConstIterT int_in_start, IterT mz_out, IterT int_out)
Smoothes two data arrays.
Definition: GaussFilterAlgorithm.h:110
double integrate_(InputPeakIterator x, InputPeakIterator y, InputPeakIterator first, InputPeakIterator last)
Computes the convolution of the raw data at position x and the gaussian kernel.
Definition: GaussFilterAlgorithm.h:159
double spacing_
The spacing of the pre-tabulated kernel coefficients.
Definition: GaussFilterAlgorithm.h:151
double ppm_tolerance_
Definition: GaussFilterAlgorithm.h:155
virtual ~GaussFilterAlgorithm()
Destructor.
bool use_ppm_tolerance_
Definition: GaussFilterAlgorithm.h:154
bool filter(OpenMS::Interfaces::SpectrumPtr spectrum)
Smoothes an Spectrum containing profile data.
Definition: GaussFilterAlgorithm.h:59
bool filter(OpenMS::Interfaces::ChromatogramPtr chromatogram)
Smoothes an Chromatogram containing profile data.
Definition: GaussFilterAlgorithm.h:83
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::shared_ptr< Chromatogram > ChromatogramPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:130
std::shared_ptr< Spectrum > SpectrumPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:210
std::shared_ptr< BinaryDataArray > BinaryDataArrayPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:54
The datastructures used by the OpenSwath interfaces.
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:47
T ppmToMass(T ppm, T mz_ref)
Compute the mass diff in [Th], given a ppm value and a reference point.
Definition: MathFunctions.h:404
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
double norm(T beg, T end)
compute the Euclidean norm of the vector
Definition: StatsHelpers.h:31