24 #include <OpenMS/config.h>
30 #include <unordered_set>
37 std::is_same_v<T, PeptideIdentification> || std::is_same_v<T, ProteinIdentification>;
41 std::is_same_v<T, FeatureMap> || std::is_same_v<T, ConsensusMap>;
85 template<
class HitType>
92 HasGoodScore(
double score_,
bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
98 if (higher_score_better)
100 return hit.getScore() >= score;
102 return hit.getScore() <= score;
111 template<
class HitType>
129 return found == value;
134 template<
class HitType>
150 return double(found) <= value;
161 template<
class HitType>
194 return static_cast<double>(found) >= value;
214 template<
class HitType>
227 target_decoy(
"target_decoy",
"decoy"),
228 is_decoy(
"isDecoy",
"true")
245 return target_decoy(hit) || is_decoy(hit);
254 template<
class HitType>
261 accessions(accessions_)
269 if (accessions.count(it) > 0)
291 template<
class HitType>
305 if (accessions.count(it) > 0)
327 template<
class HitType,
class Entry>
335 for (
typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
337 items[getKey(*rec_it)] = &(*rec_it);
352 return items.count(getHitKey(hit)) > 0;
362 if (!exists(evidence))
364 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '" + getHitKey(evidence) +
"'. peptide evidence accession not in data");
366 return *(items.find(getHitKey(evidence))->second);
381 struct HasMinPeptideLength;
387 struct HasLowMZError;
394 struct HasMatchingModification;
401 struct HasMatchingSequence;
404 struct HasNoEvidence;
434 const auto& fun = [&](
const Int missed_cleavages) {
435 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ :
false;
436 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ :
false;
437 return max_filter || min_filter;
444 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)), hits.end());
464 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
476 if (accession_resolver_.
exists(evidence))
479 ignore_missed_cleavages_, methionine_cleavage_);
485 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
497 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this, peptides);
508 template<
class IdentificationType>
514 return id.getHits().empty();
541 template<
class Container,
class Predicate>
544 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
548 template<
class Container,
class Predicate>
551 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
555 template<
class Container,
class Predicate>
558 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
559 std::move(part, items.end(), std::back_inserter(target));
560 items.erase(part, items.end());
564 template<
class IDContainer,
class Predicate>
567 for (
auto& item : items)
569 removeMatchingItems(item.getHits(), pred);
574 template<
class IDContainer,
class Predicate>
577 for (
auto& item : items)
579 keepMatchingItems(item.getHits(), pred);
583 template<
class MapType,
class Predicate>
586 for (
auto& feat : prot_and_pep_ids)
588 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
590 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
593 template<
class MapType,
class Predicate>
596 for (
auto& feat : prot_and_pep_ids)
598 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
600 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
603 template<IsFeatureOrConsensusMap MapType,
class Predicate>
606 for (
auto& feat : prot_and_pep_ids)
608 removeMatchingItems(feat.getPeptideIdentifications(), pred);
610 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
614 template<
class Predicate>
617 removeMatchingItems(pep_ids, pred);
627 template<
class IdentificationType>
631 for (
typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
633 counter += id_it->getHits().size();
642 for (
const auto&
id : ids)
644 counter +=
id.getHits().size();
652 std::vector<PeptideIdentification>& vec = ids.
getData();
653 filterHitsByRank(vec, min_rank, max_rank);
659 std::vector<PeptideIdentification>& vec = ids.
getData();
660 removeHitsMatchingProteins(vec, accessions);
666 std::vector<PeptideIdentification>& vec = ids.
getData();
667 keepHitsMatchingProteins(vec, accessions);
673 std::vector<PeptideIdentification>& vec = ids.
getData();
674 return getBestHit(vec, assume_sorted, best_hit);
690 template<
class IdentificationType>
691 static bool getBestHit(
const std::vector<IdentificationType>& identifications,
bool assume_sorted,
typename IdentificationType::HitType& best_hit)
693 if (identifications.empty())
696 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
697 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
699 for (
typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
701 if (id_it->getHits().empty())
704 if (best_id_it == identifications.end())
707 best_hit_it = id_it->getHits().begin();
709 else if (best_id_it->getScoreType() != id_it->getScoreType())
711 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
714 bool higher_better = best_id_it->isHigherScoreBetter();
715 for (
typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
717 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
719 best_hit_it = hit_it;
726 if (best_id_it == identifications.end())
731 best_hit = *best_hit_it;
756 template<
class Ev
idenceFilter>
761 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
763 std::vector<PeptideEvidence> evidences;
764 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
765 hit_it->setPeptideEvidences(evidences);
819 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups,
const std::vector<ProteinHit>& hits);
827 static void removeUngroupedProteins(
const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
835 template<IsPept
ideOrProteinIdentification IdentificationType>
838 struct HasNoHits<IdentificationType> empty_filter;
839 removeMatchingItems(ids, empty_filter);
847 template<
class IdentificationType>
850 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
852 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
853 keepMatchingItems(id_it->getHits(), score_filter);
870 template<class IdentificationType>
871 static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
874 bool at_least_one_found =
false;
875 for (IdentificationType&
id : ids)
877 if (switcher.
isScoreType(
id.getScoreType(), score_type))
879 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
880 keepMatchingItems(id.getHits(), score_filter);
886 if (!metaval.empty())
890 struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
891 keepMatchingItems(id.getHits(), score_filter);
895 struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
896 keepMatchingItems(id.getHits(), score_filter);
898 at_least_one_found = true;
902 if (!at_least_one_found)
OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
911 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
918 template<class IdentificationType>
919 static void filterHitsByScore(IdentificationType& id, double threshold_score)
921 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
922 keepMatchingItems(id.getHits(), score_filter);
930 template<class IdentificationType>
931 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
933 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
936 if (n < id_it->getHits().size())
937 id_it->getHits().resize(n);
949 static void keepNBestHits(PeptideIdentificationList& pep_ids, Size n)
951 std::vector<PeptideIdentification>& vec = pep_ids.getData();
952 keepNBestHits(vec, n);
969 template<class IdentificationType>
970 static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
974 auto& hits = id.getHits();
975 if (hits.empty()) continue;
980 if (max_rank < min_rank) max_rank = hits.size();
983 double last_score = hits.front().getScore();
987 std::remove_if(hits.begin(), hits.end(),
988 [&](const auto& hit) {
989 if (hit.getScore() != last_score)
992 last_score = hit.getScore();
994 return rank < min_rank || rank > max_rank;
1008 template<class IdentificationType>
1009 static void removeDecoyHits(std::vector<IdentificationType>& ids)
1011 struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
1012 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
1014 removeMatchingItems(id_it->getHits(), decoy_filter);
1025 template<class IdentificationType>
1026 static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
1028 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1029 for (auto& id_it : ids)
1031 removeMatchingItems(id_it.getHits(), acc_filter);
1042 template<IsPeptideOrProteinIdentification IdentificationType>
1043 static void keepHitsMatchingProteins(IdentificationType& id, const std::set<String>& accessions)
1045 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
1046 keepMatchingItems(id.getHits(), acc_filter);
1056 template<class IdentificationType>
1057 static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
1059 for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions);
1074 static void keepBestPeptideHits(PeptideIdentificationList& peptides, bool strict = false);
1084 static void filterPeptidesByLength(PeptideIdentificationList& peptides, Size min_length, Size max_length = UINT_MAX);
1094 static void filterPeptidesByCharge(PeptideIdentificationList& peptides, Int min_charge, Int max_charge);
1097 static void filterPeptidesByRT(PeptideIdentificationList& peptides, double min_rt, double max_rt);
1100 static void filterPeptidesByMZ(PeptideIdentificationList& peptides, double min_mz, double max_mz);
1113 static void filterPeptidesByMZError(PeptideIdentificationList& peptides, double mass_error, bool unit_ppm);
1122 template<class Filter>
1123 static void filterPeptideEvidences(Filter& filter, PeptideIdentificationList& peptides);
1136 static void filterPeptidesByRTPredictPValue(PeptideIdentificationList& peptides, const String& metavalue_key, double threshold = 0.05);
1139 static void removePeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1141 static void removePeptidesWithMatchingRegEx(PeptideIdentificationList& peptides, const String& regex);
1144 static void keepPeptidesWithMatchingModifications(PeptideIdentificationList& peptides, const std::set<String>& modifications);
1153 static void removePeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& bad_peptides, bool ignore_mods = false);
1162 static void keepPeptidesWithMatchingSequences(PeptideIdentificationList& peptides, const PeptideIdentificationList& good_peptides, bool ignore_mods = false);
1165 static void keepUniquePeptidesPerProtein(PeptideIdentificationList& peptides);
1173 static void removeDuplicatePeptideHits(PeptideIdentificationList& peptides, bool seq_only = false);
1182 static void filterHitsByScore(AnnotatedMSRun& annotated_data,
1183 double peptide_threshold_score,
1184 double protein_threshold_score)
1187 filterHitsByScore(annotated_data.getProteinIdentifications(),
1188 protein_threshold_score);
1193 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1195 filterHitsByScore(peptide_id, peptide_threshold_score);
1197 updateProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1201 static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n)
1205 PeptideIdentificationList all_peptides;
1207 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1210 PeptideIdentificationList temp_vec = {peptide_id};
1211 keepNBestHits(temp_vec, n);
1213 if (!temp_vec.empty())
1215 peptide_id = temp_vec[0];
1219 peptide_id.getHits().clear();
1224 temp_vec = {peptide_id};
1225 updateProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1226 all_peptides.push_back(peptide_id);
1229 removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides);
1234 static void keepNBestSpectra(PeptideIdentificationList& peptides, Size n);
1237 template<class MapType>
1238 static void keepNBestPeptideHits(MapType& map, Size n)
1242 for (auto& feat : map)
1244 keepNBestHits(feat.getPeptideIdentifications(), n);
1246 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1249 template<class MapType>
1250 static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1252 const auto pred = HasNoHits<PeptideIdentification>();
1253 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1257 static void keepBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1259 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1260 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1261 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1264 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1266 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1267 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1268 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1272 template<class MapType>
1273 static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1275 const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1277 RunToSequenceToChargeToPepHitP best_peps_per_run;
1278 for (const auto& idrun : prot_ids)
1280 best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1283 for (auto& feat : prot_and_pep_ids)
1285 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1288 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1291 template<class MapType>
1292 static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1294 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1295 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1296 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1301 static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1302 Size nr_best_spectrum)
1304 RunToSequenceToChargeToPepHitP best_peps_per_run;
1305 for (const auto& id : prot_ids)
1307 best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1309 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1315 static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges,
1316 Size nr_best_spectrum)
1318 for (auto& pep : pep_ids)
1320 SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1321 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1328 static void annotateBestPerPeptide(PeptideIdentificationList& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1330 SequenceToChargeToPepHitP best_pep;
1331 for (auto& pep : pep_ids)
1333 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1341 static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1343 bool higher_score_better = pep.isHigherScoreBetter();
1347 auto pepIt = pep.getHits().begin();
1348 auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1349 for (; pepIt != pepItEnd; ++pepIt)
1351 PeptideHit& hit = *pepIt;
1356 lookup_seq = hit.getSequence().toUnmodifiedString();
1360 lookup_seq = hit.getSequence().toString();
1363 int lookup_charge = 0;
1364 if (!ignore_charges)
1366 lookup_charge = hit.getCharge();
1370 auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1371 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1373 PeptideHit*& p = it_inserted_chg.first->second;
1374 if (!it_inserted_chg.second)
1376 if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1378 p->setMetaValue(
"best_per_peptide", 0);
1379 hit.setMetaValue(
"best_per_peptide", 1);
1385 hit.setMetaValue(
"best_per_peptide", 0);
1390 hit.setMetaValue(
"best_per_peptide", 1);
1398 const std::vector<FASTAFile::FASTAEntry>& proteins)
1400 std::set<String> accessions;
1401 for (
auto it = proteins.begin(); it != proteins.end(); ++it)
1403 accessions.insert(it->identifier);
1411 for (
auto [spectrum, peptide_id] : experiment)
1413 if (spectrum.getMSLevel() == 2)
1415 keepHitsMatchingProteins(peptide_id, accessions);
1462 removeDecoyHits(ids.
getData());
1467 filterHitsByScore(ids.
getData(), threshold_score);
1472 removeUnreferencedProteins(proteins, ids.
getData());
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
Class for storing MS run data with peptide and protein identifications.
Definition: AnnotatedMSRun.h:36
PeptideIdentificationList & getPeptideIdentifications()
Get all peptide identifications for all spectra.
std::vector< ProteinIdentification > & getProteinIdentifications()
Get the protein identification.
Definition: AnnotatedMSRun.h:69
A container for consensus elements.
Definition: ConsensusMap.h:68
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Invalid value exception.
Definition: Exception.h:305
typename VecMember::iterator iterator
Definition: ExposedVector.h:68
iterator begin() noexcept
Definition: ExposedVector.h:104
const VecMember & getData() const
read-only access to the underlying data
Definition: ExposedVector.h:328
iterator end() noexcept
Definition: ExposedVector.h:108
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:413
Int max_cleavages_
Definition: IDFilter.h:417
EnzymaticDigestion & digestion_
Definition: IDFilter.h:415
PeptideHit argument_type
Definition: IDFilter.h:420
Int min_cleavages_
Definition: IDFilter.h:416
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:432
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:442
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:421
static Int disabledValue()
Definition: IDFilter.h:425
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:64
static void removeHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:657
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:848
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:556
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:75
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:584
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:542
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:74
static void removeDecoyHits(PeptideIdentificationList &ids)
Definition: IDFilter.h:1460
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:836
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:575
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void keepHitsMatchingProteins(PeptideIdentificationList &ids, const std::set< String > &accessions)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:664
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:549
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void keepHitsMatchingProteins(AnnotatedMSRun &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters AnnotatedMSRun according to the given proteins.
Definition: IDFilter.h:1396
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:594
static void filterHitsByRank(PeptideIdentificationList &ids, Size min_rank, Size max_rank)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:650
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static Size countHits(const PeptideIdentificationList &ids)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:639
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(PeptideIdentificationList &ids, bool assume_sorted, PeptideHit &best_hit)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: IDFilter.h:671
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:565
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void filterHitsByScore(PeptideIdentificationList &ids, double threshold_score)
Definition: IDFilter.h:1465
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:604
static void updateProteinReferences(PeptideIdentificationList &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, PeptideIdentificationList &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:757
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:628
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, PeptideIdentificationList &ids)
Definition: IDFilter.h:1470
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:691
static void extractPeptideSequences(const PeptideIdentificationList &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void removeMatchingPeptideIdentifications(PeptideIdentificationList &pep_ids, Predicate &pred)
Definition: IDFilter.h:615
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void removeUnreferencedProteins(ProteinIdentification &proteins, const PeptideIdentificationList &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:73
This class is used to switch identification scores within identification or consensus feature maps.
Definition: IDScoreSwitcherAlgorithm.h:41
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
Searches for a specified score type within an identification object and its meta values.
Definition: IDScoreSwitcherAlgorithm.h:540
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition: IDScoreSwitcherAlgorithm.h:138
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score name corresponds to a specific score type.
Definition: IDScoreSwitcherAlgorithm.h:74
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:49
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Container for peptide identifications from multiple spectra.
Definition: PeptideIdentificationList.h:66
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:51
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
concept IsFeatureOrConsensusMap
Definition: IDFilter.h:40
concept IsPeptideOrProteinIdentification
Definition: IDFilter.h:36
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:454
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:463
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:458
void filterPeptideEvidences(PeptideIdentificationList &peptides)
Definition: IDFilter.h:495
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:468
bool ignore_missed_cleavages_
Definition: IDFilter.h:460
PeptideEvidence argument_type
Definition: IDFilter.h:455
ProteaseDigestion & digestion_
Definition: IDFilter.h:459
bool methionine_cleavage_
Definition: IDFilter.h:461
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:328
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:330
GetMatchingItems()
Definition: IDFilter.h:341
ItemMap items
Definition: IDFilter.h:331
HitType argument_type
Definition: IDFilter.h:329
bool exists(const HitType &hit) const
Definition: IDFilter.h:350
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:360
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:333
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:345
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:355
Is this a decoy hit?
Definition: IDFilter.h:216
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition: IDFilter.h:240
HitType argument_type
Definition: IDFilter.h:217
HasDecoyAnnotation()
Default constructor.
Definition: IDFilter.h:226
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:86
bool operator()(const HitType &hit) const
Definition: IDFilter.h:96
double score
Definition: IDFilter.h:89
HitType argument_type
Definition: IDFilter.h:87
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:92
bool higher_score_better
Definition: IDFilter.h:90
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:255
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:260
HitType argument_type
Definition: IDFilter.h:256
const std::unordered_set< String > & accessions
Definition: IDFilter.h:258
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:265
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:280
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:275
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:292
HitType argument_type
Definition: IDFilter.h:293
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:301
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:316
const std::set< String > & accessions
Definition: IDFilter.h:295
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:297
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:311
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:509
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:512
IdentificationType argument_type
Definition: IDFilter.h:510
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20