OpenMS
IDFilter.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Mathias Walzer $
6 // $Authors: Nico Pfeifer, Mathias Walzer, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
24 #include <OpenMS/config.h>
25 #include <algorithm>
26 #include <climits>
27 #include <functional>
28 #include <map>
29 #include <set>
30 #include <unordered_set>
31 #include <vector>
32 
33 namespace OpenMS
34 {
35  template<typename T>
37  std::is_same_v<T, PeptideIdentification> || std::is_same_v<T, ProteinIdentification>;
38 
39  template<typename T>
41  std::is_same_v<T, FeatureMap> || std::is_same_v<T, ConsensusMap>;
42 
63  class OPENMS_DLLAPI IDFilter
64  {
65  public:
67  IDFilter() = default;
68 
70  virtual ~IDFilter() = default;
71 
73  typedef std::map<Int, PeptideHit*> ChargeToPepHitP;
74  typedef std::unordered_map<std::string, ChargeToPepHitP> SequenceToChargeToPepHitP;
75  typedef std::map<std::string, SequenceToChargeToPepHitP> RunToSequenceToChargeToPepHitP;
76 
83 
85  template<class HitType>
86  struct HasGoodScore {
87  typedef HitType argument_type; // for use as a predicate
88 
89  double score;
91 
92  HasGoodScore(double score_, bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
93  {
94  }
95 
96  bool operator()(const HitType& hit) const
97  {
98  if (higher_score_better)
99  {
100  return hit.getScore() >= score;
101  }
102  return hit.getScore() <= score;
103  }
104  };
105 
111  template<class HitType>
112  struct HasMetaValue {
113  typedef HitType argument_type; // for use as a predicate
114 
117 
118  HasMetaValue(const String& key_, const DataValue& value_) : key(key_), value(value_)
119  {
120  }
121 
122  bool operator()(const HitType& hit) const
123  {
124  DataValue found = hit.getMetaValue(key);
125  if (found.isEmpty())
126  return false; // meta value "key" not set
127  if (value.isEmpty())
128  return true; // "key" is set, value doesn't matter
129  return found == value;
130  }
131  };
132 
134  template<class HitType>
136  typedef HitType argument_type; // for use as a predicate
137 
139  double value;
140 
141  HasMaxMetaValue(const String& key_, const double& value_) : key(key_), value(value_)
142  {
143  }
144 
145  bool operator()(const HitType& hit) const
146  {
147  DataValue found = hit.getMetaValue(key);
148  if (found.isEmpty())
149  return false; // meta value "key" not set
150  return double(found) <= value;
151  }
152  };
153 
161  template<class HitType>
163  {
164  typedef HitType argument_type; // for use as a predicate
165 
167  double value;
168 
175  HasMinMetaValue(const String& key_, const double& value_) :
176  key(key_),
177  value(value_)
178  {
179  }
180 
187  bool operator()(const HitType& hit) const
188  {
189  DataValue found = hit.getMetaValue(key);
190  if (found.isEmpty())
191  {
192  return false; // meta value "key" not set
193  }
194  return static_cast<double>(found) >= value;
195  }
196  };
197 
199 
214  template<class HitType>
216  {
217  typedef HitType argument_type; // for use as a predicate
218 
219  struct HasMetaValue<HitType> target_decoy, is_decoy;
220 
227  target_decoy("target_decoy", "decoy"),
228  is_decoy("isDecoy", "true")
229  {
230  }
231 
240  bool operator()(const HitType& hit) const
241  {
242  // @TODO: this could be done slightly more efficiently by returning
243  // false if the "target_decoy" meta value is "target" or "target+decoy",
244  // without checking for an "isDecoy" meta value in that case
245  return target_decoy(hit) || is_decoy(hit);
246  }
247  };
248 
254  template<class HitType>
256  typedef HitType argument_type; // for use as a predicate
257 
258  const std::unordered_set<String>& accessions;
259 
260  HasMatchingAccessionUnordered(const std::unordered_set<String>& accessions_) :
261  accessions(accessions_)
262  {
263  }
264 
265  bool operator()(const PeptideHit& hit) const
266  {
267  for (const auto& it : hit.extractProteinAccessionsSet())
268  {
269  if (accessions.count(it) > 0)
270  return true;
271  }
272  return false;
273  }
274 
275  bool operator()(const ProteinHit& hit) const
276  {
277  return (accessions.count(hit.getAccession()) > 0);
278  }
279 
280  bool operator()(const PeptideEvidence& evidence) const
281  {
282  return (accessions.count(evidence.getProteinAccession()) > 0);
283  }
284  };
285 
291  template<class HitType>
293  typedef HitType argument_type; // for use as a predicate
294 
295  const std::set<String>& accessions;
296 
297  HasMatchingAccession(const std::set<String>& accessions_) : accessions(accessions_)
298  {
299  }
300 
301  bool operator()(const PeptideHit& hit) const
302  {
303  for (const auto& it : hit.extractProteinAccessionsSet())
304  {
305  if (accessions.count(it) > 0)
306  return true;
307  }
308  return false;
309  }
310 
311  bool operator()(const ProteinHit& hit) const
312  {
313  return (accessions.count(hit.getAccession()) > 0);
314  }
315 
316  bool operator()(const PeptideEvidence& evidence) const
317  {
318  return (accessions.count(evidence.getProteinAccession()) > 0);
319  }
320  };
321 
327  template<class HitType, class Entry>
329  typedef HitType argument_type; // for use as a predicate
330  typedef std::map<String, Entry*> ItemMap; // Store pointers to avoid copying data
332 
333  GetMatchingItems(std::vector<Entry>& records)
334  {
335  for (typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
336  {
337  items[getKey(*rec_it)] = &(*rec_it);
338  }
339  }
340 
342  {
343  }
344 
345  const String& getKey(const FASTAFile::FASTAEntry& entry) const
346  {
347  return entry.identifier;
348  }
349 
350  bool exists(const HitType& hit) const
351  {
352  return items.count(getHitKey(hit)) > 0;
353  }
354 
355  const String& getHitKey(const PeptideEvidence& p) const
356  {
357  return p.getProteinAccession();
358  }
359 
360  const Entry& getValue(const PeptideEvidence& evidence) const
361  {
362  if (!exists(evidence))
363  {
364  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Accession: '" + getHitKey(evidence) + "'. peptide evidence accession not in data");
365  }
366  return *(items.find(getHitKey(evidence))->second);
367  }
368  };
369 
371 
372 
379 
381  struct HasMinPeptideLength;
382 
384  struct HasMinCharge;
385 
387  struct HasLowMZError;
388 
394  struct HasMatchingModification;
395 
401  struct HasMatchingSequence;
402 
404  struct HasNoEvidence;
405 
406 
413  {
414  private:
418 
419  public:
421  PeptideDigestionFilter(EnzymaticDigestion& digestion, Int min, Int max) : digestion_(digestion), min_cleavages_(min), max_cleavages_(max)
422  {
423  }
424 
425  static inline Int disabledValue()
426  {
427  return -1;
428  }
429 
432  bool operator()(PeptideHit& p) const
433  {
434  const auto& fun = [&](const Int missed_cleavages) {
435  bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ : false;
436  bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ : false;
437  return max_filter || min_filter;
438  };
439  return digestion_.filterByMissedCleavages(p.getSequence().toUnmodifiedString(), fun);
440  }
441 
442  void filterPeptideSequences(std::vector<PeptideHit>& hits)
443  {
444  hits.erase(std::remove_if(hits.begin(), hits.end(), (*this)), hits.end());
445  }
446  };
447 
448 
456 
457  // Build an accession index to avoid the linear search cost
462 
463  DigestionFilter(std::vector<FASTAFile::FASTAEntry>& entries, ProteaseDigestion& digestion, bool ignore_missed_cleavages, bool methionine_cleavage) :
464  accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
465  {
466  }
467 
468  bool operator()(const PeptideEvidence& evidence) const
469  {
470  if (!evidence.hasValidLimits())
471  {
472  OPENMS_LOG_WARN << "Invalid limits! Peptide '" << evidence.getProteinAccession() << "' not filtered" << std::endl;
473  return true;
474  }
475 
476  if (accession_resolver_.exists(evidence))
477  {
478  return digestion_.isValidProduct(AASequence::fromString(accession_resolver_.getValue(evidence).sequence), evidence.getStart(), evidence.getEnd() - evidence.getStart(),
479  ignore_missed_cleavages_, methionine_cleavage_);
480  }
481  else
482  {
483  if (evidence.getProteinAccession().empty())
484  {
485  OPENMS_LOG_WARN << "Peptide accession not available! Skipping Evidence." << std::endl;
486  }
487  else
488  {
489  OPENMS_LOG_WARN << "Peptide accession '" << evidence.getProteinAccession() << "' not found in fasta file!" << std::endl;
490  }
491  return true;
492  }
493  }
494 
496  {
497  IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*this, peptides);
498  }
499  };
500 
502 
503 
506 
508  template<class IdentificationType>
509  struct HasNoHits {
510  typedef IdentificationType argument_type; // for use as a predicate
511 
512  bool operator()(const IdentificationType& id) const
513  {
514  return id.getHits().empty();
515  }
516  };
517 
519 
520 
523 
525  struct HasRTInRange;
526 
528  struct HasMZInRange;
529 
531 
532 
539 
541  template<class Container, class Predicate>
542  static void removeMatchingItems(Container& items, const Predicate& pred)
543  {
544  items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
545  }
546 
548  template<class Container, class Predicate>
549  static void keepMatchingItems(Container& items, const Predicate& pred)
550  {
551  items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
552  }
553 
555  template<class Container, class Predicate>
556  static void moveMatchingItems(Container& items, const Predicate& pred, Container& target)
557  {
558  auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
559  std::move(part, items.end(), std::back_inserter(target));
560  items.erase(part, items.end());
561  }
562 
564  template<class IDContainer, class Predicate>
565  static void removeMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
566  {
567  for (auto& item : items)
568  {
569  removeMatchingItems(item.getHits(), pred);
570  }
571  }
572 
574  template<class IDContainer, class Predicate>
575  static void keepMatchingItemsUnroll(IDContainer& items, const Predicate& pred)
576  {
577  for (auto& item : items)
578  {
579  keepMatchingItems(item.getHits(), pred);
580  }
581  }
582 
583  template<class MapType, class Predicate>
584  static void keepMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
585  {
586  for (auto& feat : prot_and_pep_ids)
587  {
588  keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
589  }
590  keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
591  }
592 
593  template<class MapType, class Predicate>
594  static void removeMatchingPeptideHits(MapType& prot_and_pep_ids, Predicate& pred)
595  {
596  for (auto& feat : prot_and_pep_ids)
597  {
598  removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
599  }
600  removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
601  }
602 
603  template<IsFeatureOrConsensusMap MapType, class Predicate>
604  static void removeMatchingPeptideIdentifications(MapType& prot_and_pep_ids, Predicate& pred)
605  {
606  for (auto& feat : prot_and_pep_ids)
607  {
608  removeMatchingItems(feat.getPeptideIdentifications(), pred);
609  }
610  removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
611  }
612 
613  // Specialization for PeptideIdentificationList
614  template<class Predicate>
616  {
617  removeMatchingItems(pep_ids, pred);
618  }
619 
621 
622 
625 
627  template<class IdentificationType>
628  static Size countHits(const std::vector<IdentificationType>& ids)
629  {
630  Size counter = 0;
631  for (typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
632  {
633  counter += id_it->getHits().size();
634  }
635  return counter;
636  }
637 
640  {
641  Size counter = 0;
642  for (const auto& id : ids)
643  {
644  counter += id.getHits().size();
645  }
646  return counter;
647  }
648 
650  static void filterHitsByRank(PeptideIdentificationList& ids, Size min_rank, Size max_rank)
651  {
652  std::vector<PeptideIdentification>& vec = ids.getData();
653  filterHitsByRank(vec, min_rank, max_rank);
654  }
655 
657  static void removeHitsMatchingProteins(PeptideIdentificationList& ids, const std::set<String>& accessions)
658  {
659  std::vector<PeptideIdentification>& vec = ids.getData();
660  removeHitsMatchingProteins(vec, accessions);
661  }
662 
664  static void keepHitsMatchingProteins(PeptideIdentificationList& ids, const std::set<String>& accessions)
665  {
666  std::vector<PeptideIdentification>& vec = ids.getData();
667  keepHitsMatchingProteins(vec, accessions);
668  }
669 
671  static bool getBestHit(PeptideIdentificationList& ids, bool assume_sorted, PeptideHit& best_hit)
672  {
673  std::vector<PeptideIdentification>& vec = ids.getData();
674  return getBestHit(vec, assume_sorted, best_hit);
675  }
676 
690  template<class IdentificationType>
691  static bool getBestHit(const std::vector<IdentificationType>& identifications, bool assume_sorted, typename IdentificationType::HitType& best_hit)
692  {
693  if (identifications.empty())
694  return false;
695 
696  typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
697  typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
698 
699  for (typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
700  {
701  if (id_it->getHits().empty())
702  continue;
703 
704  if (best_id_it == identifications.end()) // no previous "best" hit
705  {
706  best_id_it = id_it;
707  best_hit_it = id_it->getHits().begin();
708  }
709  else if (best_id_it->getScoreType() != id_it->getScoreType())
710  {
711  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Can't compare scores of different types", best_id_it->getScoreType() + "/" + id_it->getScoreType());
712  }
713 
714  bool higher_better = best_id_it->isHigherScoreBetter();
715  for (typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
716  {
717  if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
718  {
719  best_hit_it = hit_it;
720  }
721  if (assume_sorted)
722  break; // only consider the first hit
723  }
724  }
725 
726  if (best_id_it == identifications.end())
727  {
728  return false; // no hits in any IDs
729  }
730 
731  best_hit = *best_hit_it;
732  return true;
733  }
734 
742  static void extractPeptideSequences(const PeptideIdentificationList& peptides, std::set<String>& sequences, bool ignore_mods = false);
743 
749  static std::map<String, std::vector<ProteinHit>> extractUnassignedProteins(ConsensusMap& cmap);
750 
756  template<class EvidenceFilter>
757  static void FilterPeptideEvidences(EvidenceFilter& filter, PeptideIdentificationList& peptides)
758  {
759  for (PeptideIdentificationList::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
760  {
761  for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
762  {
763  std::vector<PeptideEvidence> evidences;
764  remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
765  hit_it->setPeptideEvidences(evidences);
766  }
767  }
768  }
769 
771 
772 
777  static void removeUnreferencedProteins(ConsensusMap& cmap, bool include_unassigned);
778 
780  static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, const PeptideIdentificationList& peptides);
783 
791  static void updateProteinReferences(PeptideIdentificationList& peptides, const std::vector<ProteinIdentification>& proteins, bool remove_peptides_without_reference = false);
792 
800  static void updateProteinReferences(ConsensusMap& cmap, bool remove_peptides_without_reference = false);
801 
809  static void updateProteinReferences(ConsensusMap& cmap, const ProteinIdentification& ref_run, bool remove_peptides_without_reference = false);
810 
819  static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups, const std::vector<ProteinHit>& hits);
820 
827  static void removeUngroupedProteins(const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
829 
830 
833 
835  template<IsPeptideOrProteinIdentification IdentificationType>
836  static void removeEmptyIdentifications(std::vector<IdentificationType>& ids)
837  {
838  struct HasNoHits<IdentificationType> empty_filter;
839  removeMatchingItems(ids, empty_filter);
840  }
841 
847  template<class IdentificationType>
848  static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score)
849  {
850  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
851  {
852  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
853  keepMatchingItems(id_it->getHits(), score_filter);
854  }
855  }
856 
870  template<class IdentificationType>
871  static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
872  {
873  IDScoreSwitcherAlgorithm switcher;
874  bool at_least_one_found = false;
875  for (IdentificationType& id : ids)
876  {
877  if (switcher.isScoreType(id.getScoreType(), score_type))
878  {
879  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
880  keepMatchingItems(id.getHits(), score_filter);
881  }
882  else
883  {
884  // If one assumes they are all the same in the vector, this could be done in the beginning.
885  auto result = switcher.findScoreType<IdentificationType>(id, score_type);
886  if (!result.score_name.empty())
887  {
888  String metaval = result.score_name;
889  if (switcher.isScoreTypeHigherBetter(score_type))
890  {
891  struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
892  keepMatchingItems(id.getHits(), score_filter);
893  }
894  else
895  {
896  struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
897  keepMatchingItems(id.getHits(), score_filter);
898  }
899  at_least_one_found = true;
900  }
901  }
902  }
903  if (!at_least_one_found) OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
904  }
905 
912  static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
913 
919  template<class IdentificationType>
920  static void filterHitsByScore(IdentificationType& id, double threshold_score)
921  {
922  struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
923  keepMatchingItems(id.getHits(), score_filter);
924  }
925 
931  template<class IdentificationType>
932  static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
933  {
934  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
935  {
936  id_it->sort();
937  if (n < id_it->getHits().size())
938  id_it->getHits().resize(n);
939  }
940  }
941 
950  static void keepNBestHits(PeptideIdentificationList& pep_ids, Size n)
951  {
952  std::vector<PeptideIdentification>& vec = pep_ids.getData();
953  keepNBestHits(vec, n);
954  }
955 
970  template<class IdentificationType>
971  static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
972  {
973  for (auto& id : ids)
974  {
975  auto& hits = id.getHits();
976  if (hits.empty()) continue;
977 
978  id.sort(); // Ensure hits are properly sorted
979 
980  // ignore max_rank?
981  if (max_rank < min_rank) max_rank = hits.size();
982 
983  Size rank = 1;
984  double last_score = hits.front().getScore();
985 
986  // Remove hits not within [min_rank, max_rank], while computing rank on the fly
987  hits.erase(
988  std::remove_if(hits.begin(), hits.end(),
989  [&](const auto& hit) {
990  if (hit.getScore() != last_score)
991  {
992  ++rank;
993  last_score = hit.getScore();
994  }
995  return rank < min_rank || rank > max_rank;
996  }),
997  hits.end()
998  );
999  }
1000  }
1001 
1009  template<class IdentificationType>
1010  static void removeDecoyHits(std::vector<IdentificationType>& ids)
1011  {
1012  struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
1013  for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
1014  {
1015  removeMatchingItems(id_it->getHits(), decoy_filter);
1016  }
1017  }
1018 
1026  template<class IdentificationType>
1027  static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
1028  {
1029  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1030  for (auto& id_it : ids)
1031  {
1032  removeMatchingItems(id_it.getHits(), acc_filter);
1033  }
1034  }
1035 
1043  template<IsPeptideOrProteinIdentification IdentificationType>
1044  static void keepHitsMatchingProteins(IdentificationType& id, const std::set<String>& accessions)
1045  {
1046  struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1047  keepMatchingItems(id.getHits(), acc_filter);
1048  }
1049 
1057  template<class IdentificationType>
1058  static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
1059  {
1060  for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions);
1061  }
1062 
1064 
1065 
1068 
1075  static void keepBestPeptideHits(PeptideIdentificationList& peptides, bool strict = false);
1076 
1085  static void filterPeptidesByLength(PeptideIdentificationList& peptides, Size min_length, Size max_length = UINT_MAX);
1086 
1095  static void filterPeptidesByCharge(PeptideIdentificationList& peptides, Int min_charge, Int max_charge);
1096 
1098  static void filterPeptidesByRT(PeptideIdentificationList& peptides, double min_rt, double max_rt);
1099 
1101  static void filterPeptidesByMZ(PeptideIdentificationList& peptides, double min_mz, double max_mz);
1102 
1114  static void filterPeptidesByMZError(PeptideIdentificationList& peptides, double mass_error, bool unit_ppm);
1115 
1116 
1123  template<class Filter>
1124  static void filterPeptideEvidences(Filter& filter, PeptideIdentificationList& peptides);
1125 
1137  static void filterPeptidesByRTPredictPValue(PeptideIdentificationList& peptides, const String& metavalue_key, double threshold = 0.05);
1138 
1140  static void removePeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1141 
1142  static void removePeptidesWithMatchingRegEx(PeptideIdentificationList& peptides, const String& regex);
1143 
1145  static void keepPeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1146 
1154  static void removePeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& bad_peptides, bool ignore_mods = false);
1155 
1163  static void keepPeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& good_peptides, bool ignore_mods = false);
1164 
1166  static void keepUniquePeptidesPerProtein(PeptideIdentificationList& peptides);
1167 
1174  static void removeDuplicatePeptideHits(PeptideIdentificationList& peptides, bool seq_only = false);
1175 
1177 
1178 
1181 
1183  static void filterHitsByScore(AnnotatedMSRun& annotated_data,
1184  double peptide_threshold_score,
1185  double protein_threshold_score)
1186  {
1187  // filter protein hits:
1188  filterHitsByScore(annotated_data.getProteinIdentifications(),
1189  protein_threshold_score);
1190  // don't remove empty protein IDs - they contain search meta data and may
1191  // be referenced by peptide IDs (via run ID)
1192 
1193  // filter peptide hits:
1194  for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1195  {
1196  filterHitsByScore(peptide_id, peptide_threshold_score);
1197  }
1198  updateProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1199  }
1200 
1202  static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n)
1203  {
1204  // don't filter the protein hits by "N best" here - filter the peptides
1205  // and update the protein hits!
1206  PeptideIdentificationList all_peptides; // IDs from all spectra
1207  // filter peptide hits:
1208  for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1209  {
1210  // Create a temporary vector with a single PeptideIdentification
1211  PeptideIdentificationList temp_vec = {peptide_id};
1212  keepNBestHits(temp_vec, n);
1213  // Copy back the filtered hits
1214  if (!temp_vec.empty())
1215  {
1216  peptide_id = temp_vec[0];
1217  }
1218  else
1219  {
1220  peptide_id.getHits().clear();
1221  }
1222 
1223  // Since we're working with individual PeptideIdentifications, we don't need to remove empty ones
1224  // but we still need to update protein references
1225  temp_vec = {peptide_id};
1226  updateProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1227  all_peptides.push_back(peptide_id);
1228  }
1229  // update protein hits:
1230  removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides);
1231  }
1232 
1235  static void keepNBestSpectra(PeptideIdentificationList& peptides, Size n);
1236 
1238  template<class MapType>
1239  static void keepNBestPeptideHits(MapType& map, Size n)
1240  {
1241  // The rank predicate needs annotated ranks, not sure if they are always updated. Use the following instead,
1242  // which sorts Hits first.
1243  for (auto& feat : map)
1244  {
1245  keepNBestHits(feat.getPeptideIdentifications(), n);
1246  }
1247  keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1248  }
1249 
1250  template<class MapType>
1251  static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1252  {
1253  const auto pred = HasNoHits<PeptideIdentification>();
1254  removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1255  }
1256 
1258  static void keepBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1259  {
1260  annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1261  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1262  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1263  }
1264 
1265  static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1266  {
1267  annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1268  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1269  keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1270  }
1271 
1272  // TODO allow skipping unassigned?
1273  template<class MapType>
1274  static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1275  {
1276  const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1277 
1278  RunToSequenceToChargeToPepHitP best_peps_per_run;
1279  for (const auto& idrun : prot_ids)
1280  {
1281  best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1282  }
1283 
1284  for (auto& feat : prot_and_pep_ids)
1285  {
1286  annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1287  }
1288 
1289  annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1290  }
1291 
1292  template<class MapType>
1293  static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1294  {
1295  annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1296  HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1297  keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1298  }
1299 
1302  static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1303  Size nr_best_spectrum)
1304  {
1305  RunToSequenceToChargeToPepHitP best_peps_per_run;
1306  for (const auto& id : prot_ids)
1307  {
1308  best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1309  }
1310  annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1311  }
1312 
1316  static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1317  Size nr_best_spectrum)
1318  {
1319  for (auto& pep : pep_ids)
1320  {
1321  SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1322  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1323  }
1324  }
1325 
1329  static void annotateBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1330  {
1331  SequenceToChargeToPepHitP best_pep;
1332  for (auto& pep : pep_ids)
1333  {
1334  annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1335  }
1336  }
1337 
1342  static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1343  {
1344  bool higher_score_better = pep.isHigherScoreBetter();
1345  // make sure that first = best hit
1346  pep.sort();
1347 
1348  auto pepIt = pep.getHits().begin();
1349  auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1350  for (; pepIt != pepItEnd; ++pepIt)
1351  {
1352  PeptideHit& hit = *pepIt;
1353 
1354  String lookup_seq;
1355  if (ignore_mods)
1356  {
1357  lookup_seq = hit.getSequence().toUnmodifiedString();
1358  }
1359  else
1360  {
1361  lookup_seq = hit.getSequence().toString();
1362  }
1363 
1364  int lookup_charge = 0;
1365  if (!ignore_charges)
1366  {
1367  lookup_charge = hit.getCharge();
1368  }
1369 
1370  // try to insert
1371  auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1372  auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1373 
1374  PeptideHit*& p = it_inserted_chg.first->second; // now this gets either the old one if already present, or this
1375  if (!it_inserted_chg.second) // was already present -> possibly update
1376  {
1377  if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1378  {
1379  p->setMetaValue("best_per_peptide", 0);
1380  hit.setMetaValue("best_per_peptide", 1);
1381  p = &hit;
1382  }
1383  else // note that this was def. not the best
1384  {
1385  // TODO if it is only about filtering, we can omit writing this metavalue (absence = false)
1386  hit.setMetaValue("best_per_peptide", 0);
1387  }
1388  }
1389  else // newly inserted -> first for that sequence (and optionally charge)
1390  {
1391  hit.setMetaValue("best_per_peptide", 1);
1392  }
1393  }
1394  }
1395 
1398  AnnotatedMSRun& experiment,
1399  const std::vector<FASTAFile::FASTAEntry>& proteins)
1400  {
1401  std::set<String> accessions;
1402  for (auto it = proteins.begin(); it != proteins.end(); ++it)
1403  {
1404  accessions.insert(it->identifier);
1405  }
1406 
1407  // filter protein hits:
1408  keepHitsMatchingProteins(experiment.getProteinIdentifications(), accessions);
1409 
1410  // filter peptide hits:
1411  // std::pair<OpenMS::MSSpectrum&, OpenMS::PeptideIdentification&>
1412  for (auto [spectrum, peptide_id] : experiment)
1413  {
1414  if (spectrum.getMSLevel() == 2)
1415  {
1416  keepHitsMatchingProteins(peptide_id, accessions);
1417  }
1418  }
1419  removeEmptyIdentifications(experiment.getPeptideIdentifications());
1420  }
1421 
1423 
1424 
1427 
1438 
1451 
1457  static void removeDecoys(IdentificationData& id_data);
1459 
1460  // Specific overloads for PeptideIdentificationList to ensure correct template resolution
1462  {
1463  removeDecoyHits(ids.getData());
1464  }
1465 
1466  static void filterHitsByScore(PeptideIdentificationList& ids, double threshold_score)
1467  {
1468  filterHitsByScore(ids.getData(), threshold_score);
1469  }
1470 
1471  static void removeUnreferencedProteins(std::vector<ProteinIdentification>& proteins, PeptideIdentificationList& ids)
1472  {
1473  removeUnreferencedProteins(proteins, ids.getData());
1474  }
1475  };
1476 
1477 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
Class for storing MS run data with peptide and protein identifications.
Definition: AnnotatedMSRun.h:36
PeptideIdentificationList & getPeptideIdentifications()
Get all peptide identifications for all spectra.
std::vector< ProteinIdentification > & getProteinIdentifications()
Get the protein identification.
Definition: AnnotatedMSRun.h:69
A container for consensus elements.
Definition: ConsensusMap.h:68
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Invalid value exception.
Definition: Exception.h:305
typename VecMember::iterator iterator
Definition: ExposedVector.h:68
iterator begin() noexcept
Definition: ExposedVector.h:104
const VecMember & getData() const
read-only access to the underlying data
Definition: ExposedVector.h:328
iterator end() noexcept
Definition: ExposedVector.h:108
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:413
Int max_cleavages_
Definition: IDFilter.h:417
EnzymaticDigestion & digestion_
Definition: IDFilter.h:415
PeptideHit argument_type
Definition: IDFilter.h:420
Int min_cleavages_
Definition: IDFilter.h:416
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:432
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:442
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:421
static Int disabledValue()
Definition: IDFilter.h:425
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:64
static void removeHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:657
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:848
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:556
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:75
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:584
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:542
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:74
static void removeDecoyHits(PeptideIdentificationList &ids)
Definition: IDFilter.h:1461
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:836
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:575
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void keepHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:664
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:549
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void keepHitsMatchingProteins(AnnotatedMSRun &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters AnnotatedMSRun according to the given proteins.
Definition: IDFilter.h:1397
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:594
static void filterHitsByRank(PeptideIdentificationList &ids, Size min_rank, Size max_rank)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:650
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static Size countHits(const PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:639
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(PeptideIdentificationList &ids, bool assume_sorted, PeptideHit &best_hit)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:671
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:565
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void filterHitsByScore(PeptideIdentificationList &ids, double threshold_score)
Definition: IDFilter.h:1466
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:604
static void updateProteinReferences(PeptideIdentificationList &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, PeptideIdentificationList &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:757
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:628
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &ids)
Definition: IDFilter.h:1471
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:691
static void extractPeptideSequences(const PeptideIdentificationList &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void removeMatchingPeptideIdentifications(PeptideIdentificationList &pep_ids, Predicate &pred)
Definition: IDFilter.h:615
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void removeUnreferencedProteins(ProteinIdentification &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:73
This class is used to switch identification scores within identification or consensus feature maps.
Definition: IDScoreSwitcherAlgorithm.h:42
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition: IDScoreSwitcherAlgorithm.h:139
bool isScoreType(const String &score_name, const ScoreType &type) const
Checks if the given score name corresponds to a specific score type.
Definition: IDScoreSwitcherAlgorithm.h:75
ScoreSearchResult findScoreType(const IDType &id, ScoreType score_type) const
Searches for a general score type (e.g. PEP, QVAL) in an identification data structure.
Definition: IDScoreSwitcherAlgorithm.h:176
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:49
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
concept IsFeatureOrConsensusMap
Definition: IDFilter.h:40
concept IsPeptideOrProteinIdentification
Definition: IDFilter.h:36
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:454
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:463
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:458
void filterPeptideEvidences(PeptideIdentificationList &peptides)
Definition: IDFilter.h:495
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:468
bool ignore_missed_cleavages_
Definition: IDFilter.h:460
PeptideEvidence argument_type
Definition: IDFilter.h:455
ProteaseDigestion & digestion_
Definition: IDFilter.h:459
bool methionine_cleavage_
Definition: IDFilter.h:461
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:328
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:330
GetMatchingItems()
Definition: IDFilter.h:341
ItemMap items
Definition: IDFilter.h:331
HitType argument_type
Definition: IDFilter.h:329
bool exists(const HitType &hit) const
Definition: IDFilter.h:350
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:360
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:333
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:345
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:355
Is this a decoy hit?
Definition: IDFilter.h:216
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition: IDFilter.h:240
HitType argument_type
Definition: IDFilter.h:217
HasDecoyAnnotation()
Default constructor.
Definition: IDFilter.h:226
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:86
bool operator()(const HitType &hit) const
Definition: IDFilter.h:96
double score
Definition: IDFilter.h:89
HitType argument_type
Definition: IDFilter.h:87
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:92
bool higher_score_better
Definition: IDFilter.h:90
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:255
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:260
HitType argument_type
Definition: IDFilter.h:256
const std::unordered_set< String > & accessions
Definition: IDFilter.h:258
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:265
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:280
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:275
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:292
HitType argument_type
Definition: IDFilter.h:293
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:301
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:316
const std::set< String > & accessions
Definition: IDFilter.h:295
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:297
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:311
Does a meta value of this hit have at most the given value?
Definition: IDFilter.h:135
bool operator()(const HitType &hit) const
Definition: IDFilter.h:145
HasMaxMetaValue(const String &key_, const double &value_)
Definition: IDFilter.h:141
HitType argument_type
Definition: IDFilter.h:136
String key
Definition: IDFilter.h:138
double value
Definition: IDFilter.h:139
Is a meta value with given key and value set on this hit?
Definition: IDFilter.h:112
bool operator()(const HitType &hit) const
Definition: IDFilter.h:122
DataValue value
Definition: IDFilter.h:116
HitType argument_type
Definition: IDFilter.h:113
HasMetaValue(const String &key_, const DataValue &value_)
Definition: IDFilter.h:118
String key
Definition: IDFilter.h:115
Predicate to check if a HitType object has a minimum meta value.
Definition: IDFilter.h:163
bool operator()(const HitType &hit) const
Operator() function to check if a HitType object has a minimum meta value.
Definition: IDFilter.h:187
HitType argument_type
Definition: IDFilter.h:164
String key
Definition: IDFilter.h:166
HasMinMetaValue(const String &key_, const double &value_)
Constructor for HasMinMetaValue.
Definition: IDFilter.h:175
double value
Definition: IDFilter.h:167
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:509
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:512
IdentificationType argument_type
Definition: IDFilter.h:510
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20