24 #include <OpenMS/config.h>
30 #include <unordered_set>
37 std::is_same_v<T, PeptideIdentification> || std::is_same_v<T, ProteinIdentification>;
41 std::is_same_v<T, FeatureMap> || std::is_same_v<T, ConsensusMap>;
85 template<
class HitType>
92 HasGoodScore(
double score_,
bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
98 if (higher_score_better)
100 return hit.getScore() >= score;
102 return hit.getScore() <= score;
111 template<
class HitType>
129 return found == value;
134 template<
class HitType>
150 return double(found) <= value;
161 template<
class HitType>
194 return static_cast<double>(found) >= value;
214 template<
class HitType>
227 target_decoy(
"target_decoy",
"decoy"),
228 is_decoy(
"isDecoy",
"true")
245 return target_decoy(hit) || is_decoy(hit);
254 template<
class HitType>
261 accessions(accessions_)
269 if (accessions.count(it) > 0)
291 template<
class HitType>
305 if (accessions.count(it) > 0)
327 template<
class HitType,
class Entry>
335 for (
typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
337 items[getKey(*rec_it)] = &(*rec_it);
352 return items.count(getHitKey(hit)) > 0;
362 if (!exists(evidence))
364 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '" + getHitKey(evidence) +
"'. peptide evidence accession not in data");
366 return *(items.find(getHitKey(evidence))->second);
381 struct HasMinPeptideLength;
387 struct HasLowMZError;
394 struct HasMatchingModification;
401 struct HasMatchingSequence;
404 struct HasNoEvidence;
434 const auto& fun = [&](
const Int missed_cleavages) {
435 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ :
false;
436 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ :
false;
437 return max_filter || min_filter;
444 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)), hits.end());
464 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
476 if (accession_resolver_.
exists(evidence))
479 ignore_missed_cleavages_, methionine_cleavage_);
485 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
497 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this, peptides);
508 template<
class IdentificationType>
514 return id.getHits().empty();
541 template<
class Container,
class Predicate>
544 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
548 template<
class Container,
class Predicate>
551 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
555 template<
class Container,
class Predicate>
558 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
559 std::move(part, items.end(), std::back_inserter(target));
560 items.erase(part, items.end());
564 template<
class IDContainer,
class Predicate>
567 for (
auto& item : items)
569 removeMatchingItems(item.getHits(), pred);
574 template<
class IDContainer,
class Predicate>
577 for (
auto& item : items)
579 keepMatchingItems(item.getHits(), pred);
583 template<
class MapType,
class Predicate>
586 for (
auto& feat : prot_and_pep_ids)
588 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
590 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
593 template<
class MapType,
class Predicate>
596 for (
auto& feat : prot_and_pep_ids)
598 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
600 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
603 template<IsFeatureOrConsensusMap MapType,
class Predicate>
606 for (
auto& feat : prot_and_pep_ids)
608 removeMatchingItems(feat.getPeptideIdentifications(), pred);
610 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
614 template<
class Predicate>
617 removeMatchingItems(pep_ids, pred);
627 template<
class IdentificationType>
631 for (
typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
633 counter += id_it->getHits().size();
642 for (
const auto&
id : ids)
644 counter +=
id.getHits().size();
652 std::vector<PeptideIdentification>& vec = ids.
getData();
653 filterHitsByRank(vec, min_rank, max_rank);
659 std::vector<PeptideIdentification>& vec = ids.
getData();
660 removeHitsMatchingProteins(vec, accessions);
666 std::vector<PeptideIdentification>& vec = ids.
getData();
667 keepHitsMatchingProteins(vec, accessions);
673 std::vector<PeptideIdentification>& vec = ids.
getData();
674 return getBestHit(vec, assume_sorted, best_hit);
690 template<
class IdentificationType>
691 static bool getBestHit(
const std::vector<IdentificationType>& identifications,
bool assume_sorted,
typename IdentificationType::HitType& best_hit)
693 if (identifications.empty())
696 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
697 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
699 for (
typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
701 if (id_it->getHits().empty())
704 if (best_id_it == identifications.end())
707 best_hit_it = id_it->getHits().begin();
709 else if (best_id_it->getScoreType() != id_it->getScoreType())
711 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
714 bool higher_better = best_id_it->isHigherScoreBetter();
715 for (
typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
717 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
719 best_hit_it = hit_it;
726 if (best_id_it == identifications.end())
731 best_hit = *best_hit_it;
756 template<
class Ev
idenceFilter>
761 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
763 std::vector<PeptideEvidence> evidences;
764 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
765 hit_it->setPeptideEvidences(evidences);
819 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups,
const std::vector<ProteinHit>& hits);
827 static void removeUngroupedProteins(
const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
835 template<IsPept
ideOrProteinIdentification IdentificationType>
838 struct HasNoHits<IdentificationType> empty_filter;
839 removeMatchingItems(ids, empty_filter);
847 template<
class IdentificationType>
850 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
852 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
853 keepMatchingItems(id_it->getHits(), score_filter);
870 template<class IdentificationType>
871 static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
874 bool at_least_one_found =
false;
875 for (IdentificationType&
id : ids)
877 if (switcher.
isScoreType(
id.getScoreType(), score_type))
879 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
880 keepMatchingItems(id.getHits(), score_filter);
885 auto result = switcher.
findScoreType<IdentificationType>(id, score_type);
886 if (!result.score_name.empty())
888 String metaval = result.score_name;
891 struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
892 keepMatchingItems(id.getHits(), score_filter);
896 struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
897 keepMatchingItems(id.getHits(), score_filter);
899 at_least_one_found = true;
903 if (!at_least_one_found)
OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
912 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
919 template<class IdentificationType>
920 static void filterHitsByScore(IdentificationType& id, double threshold_score)
922 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
923 keepMatchingItems(id.getHits(), score_filter);
931 template<class IdentificationType>
932 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
934 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
937 if (n < id_it->getHits().size())
938 id_it->getHits().resize(n);
950 static void keepNBestHits(PeptideIdentificationList& pep_ids, Size n)
952 std::vector<PeptideIdentification>& vec = pep_ids.getData();
953 keepNBestHits(vec, n);
970 template<class IdentificationType>
971 static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
975 auto& hits = id.getHits();
976 if (hits.empty()) continue;
981 if (max_rank < min_rank) max_rank = hits.size();
984 double last_score = hits.front().getScore();
988 std::remove_if(hits.begin(), hits.end(),
989 [&](const auto& hit) {
990 if (hit.getScore() != last_score)
993 last_score = hit.getScore();
995 return rank < min_rank || rank > max_rank;
1009 template<class IdentificationType>
1010 static void removeDecoyHits(std::vector<IdentificationType>& ids)
1012 struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
1013 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
1015 removeMatchingItems(id_it->getHits(), decoy_filter);
1026 template<class IdentificationType>
1027 static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
1029 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1030 for (auto& id_it : ids)
1032 removeMatchingItems(id_it.getHits(), acc_filter);
1043 template<IsPeptideOrProteinIdentification IdentificationType>
1044 static void keepHitsMatchingProteins(IdentificationType& id, const std::set<String>& accessions)
1046 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1047 keepMatchingItems(id.getHits(), acc_filter);
1057 template<class IdentificationType>
1058 static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
1060 for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions);
1075 static void keepBestPeptideHits(PeptideIdentificationList& peptides, bool strict = false);
1085 static void filterPeptidesByLength(PeptideIdentificationList& peptides, Size min_length, Size max_length = UINT_MAX);
1095 static void filterPeptidesByCharge(PeptideIdentificationList& peptides, Int min_charge, Int max_charge);
1098 static void filterPeptidesByRT(PeptideIdentificationList& peptides, double min_rt, double max_rt);
1101 static void filterPeptidesByMZ(PeptideIdentificationList& peptides, double min_mz, double max_mz);
1114 static void filterPeptidesByMZError(PeptideIdentificationList& peptides, double mass_error, bool unit_ppm);
1123 template<class Filter>
1124 static void filterPeptideEvidences(Filter& filter, PeptideIdentificationList& peptides);
1137 static void filterPeptidesByRTPredictPValue(PeptideIdentificationList& peptides, const String& metavalue_key, double threshold = 0.05);
1140 static void removePeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1142 static void removePeptidesWithMatchingRegEx(PeptideIdentificationList& peptides, const String& regex);
1145 static void keepPeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1154 static void removePeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& bad_peptides, bool ignore_mods = false);
1163 static void keepPeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& good_peptides, bool ignore_mods = false);
1166 static void keepUniquePeptidesPerProtein(PeptideIdentificationList& peptides);
1174 static void removeDuplicatePeptideHits(PeptideIdentificationList& peptides, bool seq_only = false);
1183 static void filterHitsByScore(AnnotatedMSRun& annotated_data,
1184 double peptide_threshold_score,
1185 double protein_threshold_score)
1188 filterHitsByScore(annotated_data.getProteinIdentifications(),
1189 protein_threshold_score);
1194 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1196 filterHitsByScore(peptide_id, peptide_threshold_score);
1198 updateProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1202 static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n)
1206 PeptideIdentificationList all_peptides;
1208 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1211 PeptideIdentificationList temp_vec = {peptide_id};
1212 keepNBestHits(temp_vec, n);
1214 if (!temp_vec.empty())
1216 peptide_id = temp_vec[0];
1220 peptide_id.getHits().clear();
1225 temp_vec = {peptide_id};
1226 updateProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1227 all_peptides.push_back(peptide_id);
1230 removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides);
1235 static void keepNBestSpectra(PeptideIdentificationList& peptides, Size n);
1238 template<class MapType>
1239 static void keepNBestPeptideHits(MapType& map, Size n)
1243 for (auto& feat : map)
1245 keepNBestHits(feat.getPeptideIdentifications(), n);
1247 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1250 template<class MapType>
1251 static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1253 const auto pred = HasNoHits<PeptideIdentification>();
1254 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1258 static void keepBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1260 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1261 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1262 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1265 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1267 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1268 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1269 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1273 template<class MapType>
1274 static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1276 const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1278 RunToSequenceToChargeToPepHitP best_peps_per_run;
1279 for (const auto& idrun : prot_ids)
1281 best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1284 for (auto& feat : prot_and_pep_ids)
1286 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1289 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1292 template<class MapType>
1293 static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1295 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1296 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1297 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1302 static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1303 Size nr_best_spectrum)
1305 RunToSequenceToChargeToPepHitP best_peps_per_run;
1306 for (const auto& id : prot_ids)
1308 best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1310 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1316 static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1317 Size nr_best_spectrum)
1319 for (auto& pep : pep_ids)
1321 SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1322 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1329 static void annotateBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1331 SequenceToChargeToPepHitP best_pep;
1332 for (auto& pep : pep_ids)
1334 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1342 static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1344 bool higher_score_better = pep.isHigherScoreBetter();
1348 auto pepIt = pep.getHits().begin();
1349 auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1350 for (; pepIt != pepItEnd; ++pepIt)
1352 PeptideHit& hit = *pepIt;
1357 lookup_seq = hit.getSequence().toUnmodifiedString();
1361 lookup_seq = hit.getSequence().toString();
1364 int lookup_charge = 0;
1365 if (!ignore_charges)
1367 lookup_charge = hit.getCharge();
1371 auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1372 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1374 PeptideHit*& p = it_inserted_chg.first->second;
1375 if (!it_inserted_chg.second)
1377 if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1379 p->setMetaValue(
"best_per_peptide", 0);
1380 hit.setMetaValue(
"best_per_peptide", 1);
1386 hit.setMetaValue(
"best_per_peptide", 0);
1391 hit.setMetaValue(
"best_per_peptide", 1);
1399 const std::vector<FASTAFile::FASTAEntry>& proteins)
1401 std::set<String> accessions;
1402 for (
auto it = proteins.begin(); it != proteins.end(); ++it)
1404 accessions.insert(it->identifier);
1412 for (
auto [spectrum, peptide_id] : experiment)
1414 if (spectrum.getMSLevel() == 2)
1416 keepHitsMatchingProteins(peptide_id, accessions);
1463 removeDecoyHits(ids.
getData());
1468 filterHitsByScore(ids.
getData(), threshold_score);
1473 removeUnreferencedProteins(proteins, ids.
getData());
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
Class for storing MS run data with peptide and protein identifications.
Definition: AnnotatedMSRun.h:36
PeptideIdentificationList & getPeptideIdentifications()
Get all peptide identifications for all spectra.
std::vector< ProteinIdentification > & getProteinIdentifications()
Get the protein identification.
Definition: AnnotatedMSRun.h:69
A container for consensus elements.
Definition: ConsensusMap.h:68
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Invalid value exception.
Definition: Exception.h:305
typename VecMember::iterator iterator
Definition: ExposedVector.h:68
iterator begin() noexcept
Definition: ExposedVector.h:104
const VecMember & getData() const
read-only access to the underlying data
Definition: ExposedVector.h:328
iterator end() noexcept
Definition: ExposedVector.h:108
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:413
Int max_cleavages_
Definition: IDFilter.h:417
EnzymaticDigestion & digestion_
Definition: IDFilter.h:415
PeptideHit argument_type
Definition: IDFilter.h:420
Int min_cleavages_
Definition: IDFilter.h:416
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:432
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:442
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:421
static Int disabledValue()
Definition: IDFilter.h:425
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:64
static void removeHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:657
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:848
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:556
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:75
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:584
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:542
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:74
static void removeDecoyHits(PeptideIdentificationList &ids)
Definition: IDFilter.h:1461
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:836
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:575
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void keepHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:664
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:549
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void keepHitsMatchingProteins(AnnotatedMSRun &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters AnnotatedMSRun according to the given proteins.
Definition: IDFilter.h:1397
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:594
static void filterHitsByRank(PeptideIdentificationList &ids, Size min_rank, Size max_rank)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:650
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static Size countHits(const PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:639
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(PeptideIdentificationList &ids, bool assume_sorted, PeptideHit &best_hit)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:671
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:565
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void filterHitsByScore(PeptideIdentificationList &ids, double threshold_score)
Definition: IDFilter.h:1466
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:604
static void updateProteinReferences(PeptideIdentificationList &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, PeptideIdentificationList &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:757
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:628
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &ids)
Definition: IDFilter.h:1471
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:691
static void extractPeptideSequences(const PeptideIdentificationList &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void removeMatchingPeptideIdentifications(PeptideIdentificationList &pep_ids, Predicate &pred)
Definition: IDFilter.h:615
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void removeUnreferencedProteins(ProteinIdentification &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:73
This class is used to switch identification scores within identification or consensus feature maps.
Definition: IDScoreSwitcherAlgorithm.h:42
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition: IDScoreSwitcherAlgorithm.h:139
bool isScoreType(const String &score_name, const ScoreType &type) const
Checks if the given score name corresponds to a specific score type.
Definition: IDScoreSwitcherAlgorithm.h:75
ScoreSearchResult findScoreType(const IDType &id, ScoreType score_type) const
Searches for a general score type (e.g. PEP, QVAL) in an identification data structure.
Definition: IDScoreSwitcherAlgorithm.h:176
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:49
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
concept IsFeatureOrConsensusMap
Definition: IDFilter.h:40
concept IsPeptideOrProteinIdentification
Definition: IDFilter.h:36
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:454
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:463
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:458
void filterPeptideEvidences(PeptideIdentificationList &peptides)
Definition: IDFilter.h:495
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:468
bool ignore_missed_cleavages_
Definition: IDFilter.h:460
PeptideEvidence argument_type
Definition: IDFilter.h:455
ProteaseDigestion & digestion_
Definition: IDFilter.h:459
bool methionine_cleavage_
Definition: IDFilter.h:461
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:328
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:330
GetMatchingItems()
Definition: IDFilter.h:341
ItemMap items
Definition: IDFilter.h:331
HitType argument_type
Definition: IDFilter.h:329
bool exists(const HitType &hit) const
Definition: IDFilter.h:350
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:360
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:333
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:345
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:355
Is this a decoy hit?
Definition: IDFilter.h:216
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition: IDFilter.h:240
HitType argument_type
Definition: IDFilter.h:217
HasDecoyAnnotation()
Default constructor.
Definition: IDFilter.h:226
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:86
bool operator()(const HitType &hit) const
Definition: IDFilter.h:96
double score
Definition: IDFilter.h:89
HitType argument_type
Definition: IDFilter.h:87
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:92
bool higher_score_better
Definition: IDFilter.h:90
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:255
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:260
HitType argument_type
Definition: IDFilter.h:256
const std::unordered_set< String > & accessions
Definition: IDFilter.h:258
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:265
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:280
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:275
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:292
HitType argument_type
Definition: IDFilter.h:293
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:301
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:316
const std::set< String > & accessions
Definition: IDFilter.h:295
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:297
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:311
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:509
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:512
IdentificationType argument_type
Definition: IDFilter.h:510
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20