All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Modules Pages
IDFilter.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Mathias Walzer $
6 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
24 #include <OpenMS/config.h>
25 #include <algorithm>
26 #include <climits>
27 #include <functional>
28 #include <map>
29 #include <set>
30 #include <unordered_set>
31 #include <vector>
32 
33 namespace OpenMS
34 {
35  template<typename T>
37  std::is_same_v<T, PeptideIdentification> || std::is_same_v<T, ProteinIdentification>;
38 
39  template<typename T>
41  std::is_same_v<T, FeatureMap> || std::is_same_v<T, ConsensusMap>;
42 
63  class OPENMS_DLLAPI IDFilter
64  {
65  public:
67  IDFilter() = default;
68 
70  virtual ~IDFilter() = default;
71 
73  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
74  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
75  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
76 
83 
85  template<class HitType>
86  struct HasGoodScore {
87  typedef HitType argument_type; // for use as a predicate
88 
89  double score;
91 
92  HasGoodScore(double score_, bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
93  {
94  }
95 
96  bool operator()(const HitType& hit) const
97  {
98  if (higher_score_better)
99  {
100  return hit.getScore() >= score;
101  }
102  return hit.getScore() <= score;
103  }
104  };
105 
111  template<class HitType>
112  struct HasMetaValue {
113  typedef HitType argument_type; // for use as a predicate
114 
117 
118  HasMetaValue(const String& key_, const DataValue& value_) : key(key_), value(value_)
119  {
120  }
121 
122  bool operator()(const HitType& hit) const
123  {
124  DataValue found = hit.getMetaValue(key);
125  if (found.isEmpty())
126  return false; // meta value "key" not set
127  if (value.isEmpty())
128  return true; // "key" is set, value doesn't matter
129  return found == value;
130  }
131  };
132 
134  template<class HitType>
136  typedef HitType argument_type; // for use as a predicate
137 
139  double value;
140 
141  HasMaxMetaValue(const String& key_, const double& value_) : key(key_), value(value_)
142  {
143  }
144 
145  bool operator()(const HitType& hit) const
146  {
147  DataValue found = hit.getMetaValue(key);
148  if (found.isEmpty())
149  return false; // meta value "key" not set
150  return double(found) <= value;
151  }
152  };
153 
161  template<class HitType>
163  {
164  typedef HitType argument_type; // for use as a predicate
165 
167  double value;
168 
175  HasMinMetaValue(const String& key_, const double& value_) :
176  key(key_),
177  value(value_)
178  {
179  }
180 
187  bool operator()(const HitType& hit) const
188  {
189  DataValue found = hit.getMetaValue(key);
190  if (found.isEmpty())
191  {
192  return false; // meta value "key" not set
193  }
194  return static_cast<double>(found) >= value;
195  }
196  };
197 
199 
214  template<class HitType>
216  {
217  typedef HitType argument_type; // for use as a predicate
218 
219  struct HasMetaValue<HitType> target_decoy, is_decoy;
220 
227  target_decoy("target_decoy", "decoy"),
228  is_decoy("isDecoy", "true")
229  {
230  }
231 
240  bool operator()(const HitType& hit) const
241  {
242  // @TODO: this could be done slightly more efficiently by returning
243  // false if the "target_decoy" meta value is "target" or "target+decoy",
244  // without checking for an "isDecoy" meta value in that case
245  return target_decoy(hit) || is_decoy(hit);
246  }
247  };
248 
254  template<class HitType>
256  typedef HitType argument_type; // for use as a predicate
257 
258  const std::unordered_set<String>& accessions;
259 
260  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_) :
261  accessions(accessions_)
262  {
263  }
264 
265  bool operator()(const PeptideHit& hit) const
266  {
267  for (const auto& it : hit.extractProteinAccessionsSet())
268  {
269  if (accessions.count(it) > 0)
270  return true;
271  }
272  return false;
273  }
274 
275  bool operator()(const ProteinHit& hit) const
276  {
277  return (accessions.count(hit.getAccession()) > 0);
278  }
279 
280  bool operator()(const PeptideEvidence& evidence) const
281  {
282  return (accessions.count(evidence.getProteinAccession()) > 0);
283  }
284  };
285 
291  template<class HitType>
293  typedef HitType argument_type; // for use as a predicate
294 
295  const std::set<String>& accessions;
296 
297  HasMatchingAccession(const std::set<String>& accessions_) : accessions(accessions_)
298  {
299  }
300 
301  bool operator()(const PeptideHit& hit) const
302  {
303  for (const auto& it : hit.extractProteinAccessionsSet())
304  {
305  if (accessions.count(it) > 0)
306  return true;
307  }
308  return false;
309  }
310 
311  bool operator()(const ProteinHit& hit) const
312  {
313  return (accessions.count(hit.getAccession()) > 0);
314  }
315 
316  bool operator()(const PeptideEvidence& evidence) const
317  {
318  return (accessions.count(evidence.getProteinAccession()) > 0);
319  }
320  };
321 
327  template<class HitType, class Entry>
329  typedef HitType argument_type; // for use as a predicate
330  typedef std::map<String, Entry*> ItemMap; // Store pointers to avoid copying data
332 
333  GetMatchingItems(std::vector<Entry>& records)
334  {
335  for (typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
336  {
337  items[getKey(*rec_it)] = &(*rec_it);
338  }
339  }
340 
342  {
343  }
344 
345  const String& getKey(const FASTAFile::FASTAEntry& entry) const
346  {
347  return entry.identifier;
348  }
349 
350  bool exists(const HitType& hit) const
351  {
352  return items.count(getHitKey(hit)) > 0;
353  }
354 
355  const String& getHitKey(const PeptideEvidence& p) const
356  {
357  return p.getProteinAccession();
358  }
359 
360  const Entry& getValue(const PeptideEvidence& evidence) const
361  {
362  if (!exists(evidence))
363  {
364  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '" + getHitKey(evidence) + "'. peptide evidence accession not in data");
365  }
366  return *(items.find(getHitKey(evidence))->second);
367  }
368  };
369 
371 
372 
379 
381  struct HasMinPeptideLength;
382 
384  struct HasMinCharge;
385 
387  struct HasLowMZError;
388 
394  struct HasMatchingModification;
395 
401  struct HasMatchingSequence;
402 
404  struct HasNoEvidence;
405 
406 
413  {
414  private:
418 
419  public:
421  PeptideDigestionFilter(EnzymaticDigestion& digestion, Int min, Int max) : digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
422  {
423  }
424 
425  static inline Int disabledValue()
426  {
427  return -1;
428  }
429 
432  bool operator()(PeptideHit& p) const
433  {
434  const auto& fun = [&](const Int missed_cleavages) {
435  bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ : false;
436  bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ : false;
437  return max_filter || min_filter;
438  };
439  return digestion_.filterByMissedCleavages(p.getSequence().toUnmodifiedString(), fun);
440  }
441 
442  void filterPeptideSequences(std::vector<PeptideHit>& hits)
443  {
444  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
445  }
446  };
447 
448 
456 
457  // Build an accession index to avoid the linear search cost
462 
463  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries, ProteaseDigestion& digestion, bool ignore_missed_cleavages, bool methionine_cleavage) :
464  accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
465  {
466  }
467 
468  bool operator()(const PeptideEvidence& evidence) const
469  {
470  if (!evidence.hasValidLimits())
471  {
472  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
473  return true;
474  }
475 
476  if (accession_resolver_.exists(evidence))
477  {
478  return digestion_.isValidProduct(AASequence::fromString(accession_resolver_.getValue(evidence).sequence), evidence.getStart(), evidence.getEnd() - evidence.getStart(),
479  ignore_missed_cleavages_, methionine_cleavage_);
480  }
481  else
482  {
483  if (evidence.getProteinAccession().empty())
484  {
485  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
486  }
487  else
488  {
489  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession() << "' not found in fasta file!" << std::endl;
490  }
491  return true;
492  }
493  }
494 
496  {
497  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this, peptides);
498  }
499  };
500 
502 
503 
506 
508  template<class IdentificationType>
509  struct HasNoHits {
510  typedef IdentificationType argument_type; // for use as a predicate
511 
512  bool operator()(const IdentificationType& id) const
513  {
514  return id.getHits().empty();
515  }
516  };
517 
519 
520 
523 
525  struct HasRTInRange;
526 
528  struct HasMZInRange;
529 
531 
532 
539 
541  template<class Container, class Predicate>
542  static void removeMatchingItems(Container& items, const Predicate& pred)
543  {
544  items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
545  }
546 
548  template<class Container, class Predicate>
549  static void keepMatchingItems(Container& items, const Predicate& pred)
550  {
551  items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
552  }
553 
555  template<class Container, class Predicate>
556  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
557  {
558  auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
559  std::move(part, items.end(), std::back_inserter(target));
560  items.erase(part, items.end());
561  }
562 
564  template<class IDContainer, class Predicate>
565  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
566  {
567  for (auto& item : items)
568  {
569  removeMatchingItems(item.getHits(), pred);
570  }
571  }
572 
574  template<class IDContainer, class Predicate>
575  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
576  {
577  for (auto& item : items)
578  {
579  keepMatchingItems(item.getHits(), pred);
580  }
581  }
582 
583  template<class MapType, class Predicate>
584  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
585  {
586  for (auto& feat : prot_and_pep_ids)
587  {
588  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
589  }
590  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
591  }
592 
593  template<class MapType, class Predicate>
594  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
595  {
596  for (auto& feat : prot_and_pep_ids)
597  {
598  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
599  }
600  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
601  }
602 
603  template<IsFeatureOrConsensusMap MapType, class Predicate>
604  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
605  {
606  for (auto& feat : prot_and_pep_ids)
607  {
608  removeMatchingItems(feat.getPeptideIdentifications(), pred);
609  }
610  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
611  }
612 
613  // Specialization for PeptideIdentificationList
614  template<class Predicate>
616  {
617  removeMatchingItems(pep_ids, pred);
618  }
619 
621 
622 
625 
627  template<class IdentificationType>
628  static Size countHits(const std::vector<IdentificationType>& ids)
629  {
630  Size counter = 0;
631  for (typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
632  {
633  counter += id_it->getHits().size();
634  }
635  return counter;
636  }
637 
640  {
641  Size counter = 0;
642  for (const auto& id : ids)
643  {
644  counter += id.getHits().size();
645  }
646  return counter;
647  }
648 
650  static void filterHitsByRank(PeptideIdentificationList& ids, Size min_rank, Size max_rank)
651  {
652  std::vector<PeptideIdentification>& vec = ids.getData();
653  filterHitsByRank(vec, min_rank, max_rank);
654  }
655 
657  static void removeHitsMatchingProteins(PeptideIdentificationList& ids, const std::set<String>& accessions)
658  {
659  std::vector<PeptideIdentification>& vec = ids.getData();
660  removeHitsMatchingProteins(vec, accessions);
661  }
662 
664  static void keepHitsMatchingProteins(PeptideIdentificationList& ids, const std::set<String>& accessions)
665  {
666  std::vector<PeptideIdentification>& vec = ids.getData();
667  keepHitsMatchingProteins(vec, accessions);
668  }
669 
671  static bool getBestHit(PeptideIdentificationList& ids, bool assume_sorted, PeptideHit& best_hit)
672  {
673  std::vector<PeptideIdentification>& vec = ids.getData();
674  return getBestHit(vec, assume_sorted, best_hit);
675  }
676 
690  template<class IdentificationType>
691  static bool getBestHit(const std::vector<IdentificationType>& identifications, bool assume_sorted, typename IdentificationType::HitType& best_hit)
692  {
693  if (identifications.empty())
694  return false;
695 
696  typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
697  typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
698 
699  for (typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
700  {
701  if (id_it->getHits().empty())
702  continue;
703 
704  if (best_id_it == identifications.end()) // no previous "best" hit
705  {
706  best_id_it = id_it;
707  best_hit_it = id_it->getHits().begin();
708  }
709  else if (best_id_it->getScoreType() != id_it->getScoreType())
710  {
711  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
712  }
713 
714  bool higher_better = best_id_it->isHigherScoreBetter();
715  for (typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
716  {
717  if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
718  {
719  best_hit_it = hit_it;
720  }
721  if (assume_sorted)
722  break; // only consider the first hit
723  }
724  }
725 
726  if (best_id_it == identifications.end())
727  {
728  return false; // no hits in any IDs
729  }
730 
731  best_hit = *best_hit_it;
732  return true;
733  }
734 
742  static void extractPeptideSequences(const PeptideIdentificationList& peptides, std::set<String>& sequences, bool ignore_mods = false);
743 
749  static std::map<String, std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
750 
756  template<class EvidenceFilter>
757  static void FilterPeptideEvidences(EvidenceFilter& filter, PeptideIdentificationList& peptides)
758  {
759  for (PeptideIdentificationList::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
760  {
761  for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
762  {
763  std::vector<PeptideEvidence> evidences;
764  remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
765  hit_it->setPeptideEvidences(evidences);
766  }
767  }
768  }
769 
771 
772 
777  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
778 
780  static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, const PeptideIdentificationList& peptides);
783 
791  static void updateProteinReferences(PeptideIdentificationList& peptides, const std::vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false);
792 
800  static void updateProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference = false);
801 
809  static void updateProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference = false);
810 
819  static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups, const std::vector<ProteinHit>& hits);
820 
827  static void removeUngroupedProteins(const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
829 
830 
833 
835  template<IsPeptideOrProteinIdentification IdentificationType>
836  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
837  {
838  struct HasNoHits<IdentificationType> empty_filter;
839  removeMatchingItems(ids, empty_filter);
840  }
841 
847  template<class IdentificationType>
848  static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score)
849  {
850  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
851  {
852  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
853  keepMatchingItems(id_it->getHits(), score_filter);
854  }
855  }
856 
870  template<class IdentificationType>
871  static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
872  {
873  IDScoreSwitcherAlgorithm switcher;
874  bool at_least_one_found = false;
875  for (IdentificationType& id : ids)
876  {
877  if (switcher.isScoreType(id.getScoreType(), score_type))
878  {
879  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
880  keepMatchingItems(id.getHits(), score_filter);
881  }
882  else
883  {
884  // If one assumes they are all the same in the vector, this could be done in the beginning.
885  String metaval = switcher.findScoreType(id, score_type);
886  if (!metaval.empty())
887  {
888  if (switcher.isScoreTypeHigherBetter(score_type))
889  {
890  struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
891  keepMatchingItems(id.getHits(), score_filter);
892  }
893  else
894  {
895  struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
896  keepMatchingItems(id.getHits(), score_filter);
897  }
898  at_least_one_found = true;
899  }
900  }
901  }
902  if (!at_least_one_found) OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
903  }
904 
911  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
912 
918  template<class IdentificationType>
919  static void filterHitsByScore(IdentificationType& id, double threshold_score)
920  {
921  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
922  keepMatchingItems(id.getHits(), score_filter);
923  }
924 
930  template<class IdentificationType>
931  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
932  {
933  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
934  {
935  id_it->sort();
936  if (n < id_it->getHits().size())
937  id_it->getHits().resize(n);
938  }
939  }
940 
949  static void keepNBestHits(PeptideIdentificationList& pep_ids, Size n)
950  {
951  std::vector<PeptideIdentification>& vec = pep_ids.getData();
952  keepNBestHits(vec, n);
953  }
954 
969  template<class IdentificationType>
970  static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
971  {
972  for (auto& id : ids)
973  {
974  auto& hits = id.getHits();
975  if (hits.empty()) continue;
976 
977  id.sort(); // Ensure hits are properly sorted
978 
979  // ignore max_rank?
980  if (max_rank < min_rank) max_rank = hits.size();
981 
982  Size rank = 1;
983  double last_score = hits.front().getScore();
984 
985  // Remove hits not within [min_rank, max_rank], while computing rank on the fly
986  hits.erase(
987  std::remove_if(hits.begin(), hits.end(),
988  [&](const auto& hit) {
989  if (hit.getScore() != last_score)
990  {
991  ++rank;
992  last_score = hit.getScore();
993  }
994  return rank < min_rank || rank > max_rank;
995  }),
996  hits.end()
997  );
998  }
999  }
1000 
1008  template<class IdentificationType>
1009  static void removeDecoyHits(std::vector<IdentificationType>& ids)
1010  {
1011  struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
1012  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
1013  {
1014  removeMatchingItems(id_it->getHits(), decoy_filter);
1015  }
1016  }
1017 
1025  template<class IdentificationType>
1026  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
1027  {
1028  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1029  for (auto& id_it : ids)
1030  {
1031  removeMatchingItems(id_it.getHits(), acc_filter);
1032  }
1033  }
1034 
1042  template<IsPeptideOrProteinIdentification IdentificationType>
1043  static void keepHitsMatchingProteins(IdentificationType& id, const std::set<String>& accessions)
1044  {
1045  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1046  keepMatchingItems(id.getHits(), acc_filter);
1047  }
1048 
1056  template<class IdentificationType>
1057  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
1058  {
1059  for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions);
1060  }
1061 
1063 
1064 
1067 
1074  static void keepBestPeptideHits(PeptideIdentificationList& peptides, bool strict = false);
1075 
1084  static void filterPeptidesByLength(PeptideIdentificationList& peptides, Size min_length, Size max_length = UINT_MAX);
1085 
1094  static void filterPeptidesByCharge(PeptideIdentificationList& peptides, Int min_charge, Int max_charge);
1095 
1097  static void filterPeptidesByRT(PeptideIdentificationList& peptides, double min_rt, double max_rt);
1098 
1100  static void filterPeptidesByMZ(PeptideIdentificationList& peptides, double min_mz, double max_mz);
1101 
1113  static void filterPeptidesByMZError(PeptideIdentificationList& peptides, double mass_error, bool unit_ppm);
1114 
1115 
1122  template<class Filter>
1123  static void filterPeptideEvidences(Filter& filter, PeptideIdentificationList& peptides);
1124 
1136  static void filterPeptidesByRTPredictPValue(PeptideIdentificationList& peptides, const String& metavalue_key, double threshold = 0.05);
1137 
1139  static void removePeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1140 
1141  static void removePeptidesWithMatchingRegEx(PeptideIdentificationList& peptides, const String& regex);
1142 
1144  static void keepPeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1145 
1153  static void removePeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& bad_peptides, bool ignore_mods = false);
1154 
1162  static void keepPeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& good_peptides, bool ignore_mods = false);
1163 
1165  static void keepUniquePeptidesPerProtein(PeptideIdentificationList& peptides);
1166 
1173  static void removeDuplicatePeptideHits(PeptideIdentificationList& peptides, bool seq_only = false);
1174 
1176 
1177 
1180 
1182  static void filterHitsByScore(AnnotatedMSRun& annotated_data,
1183  double peptide_threshold_score,
1184  double protein_threshold_score)
1185  {
1186  // filter protein hits:
1187  filterHitsByScore(annotated_data.getProteinIdentifications(),
1188  protein_threshold_score);
1189  // don't remove empty protein IDs - they contain search meta data and may
1190  // be referenced by peptide IDs (via run ID)
1191 
1192  // filter peptide hits:
1193  for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1194  {
1195  filterHitsByScore(peptide_id, peptide_threshold_score);
1196  }
1197  updateProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1198  }
1199 
1201  static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n)
1202  {
1203  // don't filter the protein hits by "N best" here - filter the peptides
1204  // and update the protein hits!
1205  PeptideIdentificationList all_peptides; // IDs from all spectra
1206  // filter peptide hits:
1207  for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1208  {
1209  // Create a temporary vector with a single PeptideIdentification
1210  PeptideIdentificationList temp_vec = {peptide_id};
1211  keepNBestHits(temp_vec, n);
1212  // Copy back the filtered hits
1213  if (!temp_vec.empty())
1214  {
1215  peptide_id = temp_vec[0];
1216  }
1217  else
1218  {
1219  peptide_id.getHits().clear();
1220  }
1221 
1222  // Since we're working with individual PeptideIdentifications, we don't need to remove empty ones
1223  // but we still need to update protein references
1224  temp_vec = {peptide_id};
1225  updateProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1226  all_peptides.push_back(peptide_id);
1227  }
1228  // update protein hits:
1229  removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides);
1230  }
1231 
1234  static void keepNBestSpectra(PeptideIdentificationList& peptides, Size n);
1235 
1237  template<class MapType>
1238  static void keepNBestPeptideHits(MapType& map, Size n)
1239  {
1240  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1241  // which sorts Hits first.
1242  for (auto& feat : map)
1243  {
1244  keepNBestHits(feat.getPeptideIdentifications(), n);
1245  }
1246  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1247  }
1248 
1249  template<class MapType>
1250  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1251  {
1252  const auto pred = HasNoHits<PeptideIdentification>();
1253  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1254  }
1255 
1257  static void keepBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1258  {
1259  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1260  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1261  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1262  }
1263 
1264  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1265  {
1266  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1267  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1268  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1269  }
1270 
1271  // TODO allow skipping unassigned?
1272  template<class MapType>
1273  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1274  {
1275  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1276 
1277  RunToSequenceToChargeToPepHitP best_peps_per_run;
1278  for (const auto& idrun : prot_ids)
1279  {
1280  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1281  }
1282 
1283  for (auto& feat : prot_and_pep_ids)
1284  {
1285  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1286  }
1287 
1288  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1289  }
1290 
1291  template<class MapType>
1292  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1293  {
1294  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1295  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1296  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1297  }
1298 
1301  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1302  Size nr_best_spectrum)
1303  {
1304  RunToSequenceToChargeToPepHitP best_peps_per_run;
1305  for (const auto& id : prot_ids)
1306  {
1307  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1308  }
1309  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1310  }
1311 
1315  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1316  Size nr_best_spectrum)
1317  {
1318  for (auto& pep : pep_ids)
1319  {
1320  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1321  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1322  }
1323  }
1324 
1328  static void annotateBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1329  {
1330  SequenceToChargeToPepHitP best_pep;
1331  for (auto& pep : pep_ids)
1332  {
1333  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1334  }
1335  }
1336 
1341  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1342  {
1343  bool higher_score_better = pep.isHigherScoreBetter();
1344  // make sure that first = best hit
1345  pep.sort();
1346 
1347  auto pepIt = pep.getHits().begin();
1348  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1349  for (; pepIt != pepItEnd; ++pepIt)
1350  {
1351  PeptideHit& hit = *pepIt;
1352 
1353  String lookup_seq;
1354  if (ignore_mods)
1355  {
1356  lookup_seq = hit.getSequence().toUnmodifiedString();
1357  }
1358  else
1359  {
1360  lookup_seq = hit.getSequence().toString();
1361  }
1362 
1363  int lookup_charge = 0;
1364  if (!ignore_charges)
1365  {
1366  lookup_charge = hit.getCharge();
1367  }
1368 
1369  // try to insert
1370  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1371  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1372 
1373  PeptideHit*& p = it_inserted_chg.first->second; // now this gets either the old one if already present, or this
1374  if (!it_inserted_chg.second) // was already present -> possibly update
1375  {
1376  if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1377  {
1378  p->setMetaValue("best_per_peptide", 0);
1379  hit.setMetaValue("best_per_peptide", 1);
1380  p = &hit;
1381  }
1382  else // note that this was def. not the best
1383  {
1384  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1385  hit.setMetaValue("best_per_peptide", 0);
1386  }
1387  }
1388  else // newly inserted -> first for that sequence (and optionally charge)
1389  {
1390  hit.setMetaValue("best_per_peptide", 1);
1391  }
1392  }
1393  }
1394 
1397  AnnotatedMSRun& experiment,
1398  const std::vector<FASTAFile::FASTAEntry>& proteins)
1399  {
1400  std::set<String> accessions;
1401  for (auto it = proteins.begin(); it != proteins.end(); ++it)
1402  {
1403  accessions.insert(it->identifier);
1404  }
1405 
1406  // filter protein hits:
1407  keepHitsMatchingProteins(experiment.getProteinIdentifications(), accessions);
1408 
1409  // filter peptide hits:
1410  // std::pair<OpenMS::MSSpectrum&, OpenMS::PeptideIdentification&>
1411  for (auto [spectrum, peptide_id] : experiment)
1412  {
1413  if (spectrum.getMSLevel() == 2)
1414  {
1415  keepHitsMatchingProteins(peptide_id, accessions);
1416  }
1417  }
1418  removeEmptyIdentifications(experiment.getPeptideIdentifications());
1419  }
1420 
1422 
1423 
1426 
1437 
1450 
1456  static void removeDecoys(IdentificationData& id_data);
1458 
1459  // Specific overloads for PeptideIdentificationList to ensure correct template resolution
1461  {
1462  removeDecoyHits(ids.getData());
1463  }
1464 
1465  static void filterHitsByScore(PeptideIdentificationList& ids, double threshold_score)
1466  {
1467  filterHitsByScore(ids.getData(), threshold_score);
1468  }
1469 
1470  static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, PeptideIdentificationList& ids)
1471  {
1472  removeUnreferencedProteins(proteins, ids.getData());
1473  }
1474  };
1475 
1476 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
Class for storing MS run data with peptide and protein identifications.
Definition: AnnotatedMSRun.h:36
PeptideIdentificationList & getPeptideIdentifications()
Get all peptide identifications for all spectra.
std::vector< ProteinIdentification > & getProteinIdentifications()
Get the protein identification.
Definition: AnnotatedMSRun.h:69
A container for consensus elements.
Definition: ConsensusMap.h:68
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Invalid value exception.
Definition: Exception.h:305
typename VecMember::iterator iterator
Definition: ExposedVector.h:68
iterator begin() noexcept
Definition: ExposedVector.h:104
const VecMember & getData() const
read-only access to the underlying data
Definition: ExposedVector.h:328
iterator end() noexcept
Definition: ExposedVector.h:108
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:413
Int max_cleavages_
Definition: IDFilter.h:417
EnzymaticDigestion & digestion_
Definition: IDFilter.h:415
PeptideHit argument_type
Definition: IDFilter.h:420
Int min_cleavages_
Definition: IDFilter.h:416
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:432
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:442
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:421
static Int disabledValue()
Definition: IDFilter.h:425
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:64
static void removeHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:657
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:848
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:556
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:75
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:584
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:542
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:74
static void removeDecoyHits(PeptideIdentificationList &ids)
Definition: IDFilter.h:1460
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:836
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:575
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void keepHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:664
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:549
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void keepHitsMatchingProteins(AnnotatedMSRun &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters AnnotatedMSRun according to the given proteins.
Definition: IDFilter.h:1396
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:594
static void filterHitsByRank(PeptideIdentificationList &ids, Size min_rank, Size max_rank)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:650
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static Size countHits(const PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:639
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(PeptideIdentificationList &ids, bool assume_sorted, PeptideHit &best_hit)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:671
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:565
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void filterHitsByScore(PeptideIdentificationList &ids, double threshold_score)
Definition: IDFilter.h:1465
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:604
static void updateProteinReferences(PeptideIdentificationList &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, PeptideIdentificationList &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:757
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:628
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &ids)
Definition: IDFilter.h:1470
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:691
static void extractPeptideSequences(const PeptideIdentificationList &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void removeMatchingPeptideIdentifications(PeptideIdentificationList &pep_ids, Predicate &pred)
Definition: IDFilter.h:615
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void removeUnreferencedProteins(ProteinIdentification &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:73
This class is used to switch identification scores within identification or consensus feature maps.
Definition: IDScoreSwitcherAlgorithm.h:41
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
Searches for a specified score type within an identification object and its meta values.
Definition: IDScoreSwitcherAlgorithm.h:540
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition: IDScoreSwitcherAlgorithm.h:138
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score name corresponds to a specific score type.
Definition: IDScoreSwitcherAlgorithm.h:74
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:49
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
concept IsFeatureOrConsensusMap
Definition: IDFilter.h:40
concept IsPeptideOrProteinIdentification
Definition: IDFilter.h:36
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:454
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:463
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:458
void filterPeptideEvidences(PeptideIdentificationList &peptides)
Definition: IDFilter.h:495
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:468
bool ignore_missed_cleavages_
Definition: IDFilter.h:460
PeptideEvidence argument_type
Definition: IDFilter.h:455
ProteaseDigestion & digestion_
Definition: IDFilter.h:459
bool methionine_cleavage_
Definition: IDFilter.h:461
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:328
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:330
GetMatchingItems()
Definition: IDFilter.h:341
ItemMap items
Definition: IDFilter.h:331
HitType argument_type
Definition: IDFilter.h:329
bool exists(const HitType &hit) const
Definition: IDFilter.h:350
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:360
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:333
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:345
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:355
Is this a decoy hit?
Definition: IDFilter.h:216
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition: IDFilter.h:240
HitType argument_type
Definition: IDFilter.h:217
HasDecoyAnnotation()
Default constructor.
Definition: IDFilter.h:226
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:86
bool operator()(const HitType &hit) const
Definition: IDFilter.h:96
double score
Definition: IDFilter.h:89
HitType argument_type
Definition: IDFilter.h:87
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:92
bool higher_score_better
Definition: IDFilter.h:90
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:255
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:260
HitType argument_type
Definition: IDFilter.h:256
const std::unordered_set< String > & accessions
Definition: IDFilter.h:258
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:265
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:280
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:275
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:292
HitType argument_type
Definition: IDFilter.h:293
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:301
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:316
const std::set< String > & accessions
Definition: IDFilter.h:295
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:297
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:311
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:135
bool operator()(const HitType &hit) const
Definition: IDFilter.h:145
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:141
HitType argument_type
Definition: IDFilter.h:136
String key
Definition: IDFilter.h:138
double value
Definition: IDFilter.h:139
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:112
bool operator()(const HitType &hit) const
Definition: IDFilter.h:122
DataValue value
Definition: IDFilter.h:116
HitType argument_type
Definition: IDFilter.h:113
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:118
String key
Definition: IDFilter.h:115
Predicate to check if a HitType object has a minimum meta value.
Definition: IDFilter.h:163
bool operator()(const HitType &hit) const
Operator() function to check if a HitType object has a minimum meta value.
Definition: IDFilter.h:187
HitType argument_type
Definition: IDFilter.h:164
String key
Definition: IDFilter.h:166
HasMinMetaValue(const String &key_, const double &value_)
Constructor for HasMinMetaValue.
Definition: IDFilter.h:175
double value
Definition: IDFilter.h:167
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:509
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:512
IdentificationType argument_type
Definition: IDFilter.h:510
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20