23 #include <OpenMS/config.h>
29 #include <unordered_set>
36 std::is_same_v<T, PeptideIdentification> || std::is_same_v<T, ProteinIdentification>;
40 std::is_same_v<T, FeatureMap> || std::is_same_v<T, ConsensusMap>;
84 template<
class HitType>
91 HasGoodScore(
double score_,
bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
97 if (higher_score_better)
99 return hit.getScore() >= score;
101 return hit.getScore() <= score;
110 template<
class HitType>
128 return found == value;
133 template<
class HitType>
149 return double(found) <= value;
160 template<
class HitType>
193 return static_cast<double>(found) >= value;
213 template<
class HitType>
226 target_decoy(
"target_decoy",
"decoy"),
227 is_decoy(
"isDecoy",
"true")
244 return target_decoy(hit) || is_decoy(hit);
253 template<
class HitType>
260 accessions(accessions_)
268 if (accessions.count(it) > 0)
290 template<
class HitType>
304 if (accessions.count(it) > 0)
326 template<
class HitType,
class Entry>
334 for (
typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
336 items[getKey(*rec_it)] = &(*rec_it);
351 return items.count(getHitKey(hit)) > 0;
361 if (!exists(evidence))
363 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '" + getHitKey(evidence) +
"'. peptide evidence accession not in data");
365 return *(items.find(getHitKey(evidence))->second);
380 struct HasMinPeptideLength;
386 struct HasLowMZError;
393 struct HasMatchingModification;
400 struct HasMatchingSequence;
403 struct HasNoEvidence;
433 const auto& fun = [&](
const Int missed_cleavages) {
434 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ :
false;
435 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ :
false;
436 return max_filter || min_filter;
443 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)), hits.end());
463 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
475 if (accession_resolver_.
exists(evidence))
478 ignore_missed_cleavages_, methionine_cleavage_);
484 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
496 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this, peptides);
507 template<
class IdentificationType>
513 return id.getHits().empty();
540 template<
class Container,
class Predicate>
543 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
547 template<
class Container,
class Predicate>
550 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
554 template<
class Container,
class Predicate>
557 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
558 std::move(part, items.end(), std::back_inserter(target));
559 items.erase(part, items.end());
563 template<
class IDContainer,
class Predicate>
566 for (
auto& item : items)
568 removeMatchingItems(item.getHits(), pred);
573 template<
class IDContainer,
class Predicate>
576 for (
auto& item : items)
578 keepMatchingItems(item.getHits(), pred);
582 template<
class MapType,
class Predicate>
585 for (
auto& feat : prot_and_pep_ids)
587 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
589 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
592 template<
class MapType,
class Predicate>
595 for (
auto& feat : prot_and_pep_ids)
597 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
599 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
602 template<IsFeatureOrConsensusMap MapType,
class Predicate>
605 for (
auto& feat : prot_and_pep_ids)
607 removeMatchingItems(feat.getPeptideIdentifications(), pred);
609 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
619 template<
class IdentificationType>
623 for (
typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
625 counter += id_it->getHits().size();
643 template<
class IdentificationType>
644 static bool getBestHit(
const std::vector<IdentificationType>& identifications,
bool assume_sorted,
typename IdentificationType::HitType& best_hit)
646 if (identifications.empty())
649 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
650 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
652 for (
typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
654 if (id_it->getHits().empty())
657 if (best_id_it == identifications.end())
660 best_hit_it = id_it->getHits().begin();
662 else if (best_id_it->getScoreType() != id_it->getScoreType())
664 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
667 bool higher_better = best_id_it->isHigherScoreBetter();
668 for (
typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
670 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
672 best_hit_it = hit_it;
679 if (best_id_it == identifications.end())
684 best_hit = *best_hit_it;
695 static void extractPeptideSequences(
const std::vector<PeptideIdentification>& peptides, std::set<String>& sequences,
bool ignore_mods =
false);
709 template<
class Ev
idenceFilter>
712 for (std::vector<PeptideIdentification>::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
714 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
716 std::vector<PeptideEvidence> evidences;
717 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
718 hit_it->setPeptideEvidences(evidences);
744 static void updateProteinReferences(std::vector<PeptideIdentification>& peptides,
const std::vector<ProteinIdentification>& proteins,
bool remove_peptides_without_reference =
false);
772 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups,
const std::vector<ProteinHit>& hits);
780 static void removeUngroupedProteins(
const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
788 template<IsPept
ideOrProteinIdentification IdentificationType>
791 struct HasNoHits<IdentificationType> empty_filter;
792 removeMatchingItems(ids, empty_filter);
800 template<
class IdentificationType>
803 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
805 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
806 keepMatchingItems(id_it->getHits(), score_filter);
823 template<class IdentificationType>
824 static void filterHitsByScore(std::vector<IdentificationType>& ids, double threshold_score, IDScoreSwitcherAlgorithm::ScoreType score_type)
827 bool at_least_one_found =
false;
828 for (IdentificationType&
id : ids)
830 if (switcher.
isScoreType(
id.getScoreType(), score_type))
832 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
833 keepMatchingItems(id.getHits(), score_filter);
839 if (!metaval.empty())
843 struct HasMinMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
844 keepMatchingItems(id.getHits(), score_filter);
848 struct HasMaxMetaValue<typename IdentificationType::HitType> score_filter(metaval, threshold_score);
849 keepMatchingItems(id.getHits(), score_filter);
851 at_least_one_found = true;
855 if (!at_least_one_found)
OPENMS_LOG_WARN << String("Warning: No hit with the given score_type found. All hits removed.") << std::endl;
864 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
871 template<class IdentificationType>
872 static void filterHitsByScore(IdentificationType& id, double threshold_score)
874 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
875 keepMatchingItems(id.getHits(), score_filter);
883 template<class IdentificationType>
884 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
886 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
889 if (n < id_it->getHits().size())
890 id_it->getHits().resize(n);
908 template<class IdentificationType>
909 static void filterHitsByRank(std::vector<IdentificationType>& ids, Size min_rank, Size max_rank)
913 auto& hits = id.getHits();
914 if (hits.empty()) continue;
919 if (max_rank < min_rank) max_rank = hits.size();
922 double last_score = hits.front().getScore();
926 std::remove_if(hits.begin(), hits.end(),
927 [&](const auto& hit) {
928 if (hit.getScore() != last_score)
931 last_score = hit.getScore();
933 return rank < min_rank || rank > max_rank;
947 template<class IdentificationType>
948 static void removeDecoyHits(std::vector<IdentificationType>& ids)
950 struct HasDecoyAnnotation<typename IdentificationType::HitType> decoy_filter;
951 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
953 removeMatchingItems(id_it->getHits(), decoy_filter);
964 template<class IdentificationType>
965 static void removeHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String> accessions)
967 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
968 for (auto& id_it : ids)
970 removeMatchingItems(id_it.getHits(), acc_filter);
981 template<IsPeptideOrProteinIdentification IdentificationType>
982 static void keepHitsMatchingProteins(IdentificationType& id, const std::set<String>& accessions)
984 struct HasMatchingAccession<typename IdentificationType::HitType> acc_filter(accessions);
985 keepMatchingItems(id.getHits(), acc_filter);
995 template<class IdentificationType>
996 static void keepHitsMatchingProteins(std::vector<IdentificationType>& ids, const std::set<String>& accessions)
998 for (auto& id_it : ids) keepHitsMatchingProteins(id_it, accessions);
1013 static void keepBestPeptideHits(std::vector<PeptideIdentification>& peptides, bool strict = false);
1023 static void filterPeptidesByLength(std::vector<PeptideIdentification>& peptides, Size min_length, Size max_length = UINT_MAX);
1033 static void filterPeptidesByCharge(std::vector<PeptideIdentification>& peptides, Int min_charge, Int max_charge);
1036 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides, double min_rt, double max_rt);
1039 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides, double min_mz, double max_mz);
1052 static void filterPeptidesByMZError(std::vector<PeptideIdentification>& peptides, double mass_error, bool unit_ppm);
1061 template<class Filter>
1062 static void filterPeptideEvidences(Filter& filter, std::vector<PeptideIdentification>& peptides);
1075 static void filterPeptidesByRTPredictPValue(std::vector<PeptideIdentification>& peptides, const String& metavalue_key, double threshold = 0.05);
1078 static void removePeptidesWithMatchingModifications(std::vector<PeptideIdentification>& peptides, const std::set<String>& modifications);
1080 static void removePeptidesWithMatchingRegEx(std::vector<PeptideIdentification>& peptides, const String& regex);
1083 static void keepPeptidesWithMatchingModifications(std::vector<PeptideIdentification>& peptides, const std::set<String>& modifications);
1092 static void removePeptidesWithMatchingSequences(std::vector<PeptideIdentification>& peptides, const std::vector<PeptideIdentification>& bad_peptides, bool ignore_mods = false);
1101 static void keepPeptidesWithMatchingSequences(std::vector<PeptideIdentification>& peptides, const std::vector<PeptideIdentification>& good_peptides, bool ignore_mods = false);
1104 static void keepUniquePeptidesPerProtein(std::vector<PeptideIdentification>& peptides);
1112 static void removeDuplicatePeptideHits(std::vector<PeptideIdentification>& peptides, bool seq_only = false);
1121 static void filterHitsByScore(AnnotatedMSRun& annotated_data,
1122 double peptide_threshold_score,
1123 double protein_threshold_score)
1126 filterHitsByScore(annotated_data.getProteinIdentifications(),
1127 protein_threshold_score);
1132 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1134 filterHitsByScore(peptide_id, peptide_threshold_score);
1136 updateProteinReferences(annotated_data.getPeptideIdentifications(), annotated_data.getProteinIdentifications());
1140 static void keepNBestHits(AnnotatedMSRun& annotated_data, Size n)
1144 std::vector<PeptideIdentification> all_peptides;
1146 for (PeptideIdentification& peptide_id : annotated_data.getPeptideIdentifications())
1149 std::vector<PeptideIdentification> temp_vec = {peptide_id};
1150 keepNBestHits(temp_vec, n);
1152 if (!temp_vec.empty())
1154 peptide_id = temp_vec[0];
1158 peptide_id.getHits().clear();
1163 temp_vec = {peptide_id};
1164 updateProteinReferences(temp_vec, annotated_data.getProteinIdentifications());
1165 all_peptides.push_back(peptide_id);
1168 removeUnreferencedProteins(annotated_data.getProteinIdentifications(), all_peptides);
1173 static void keepNBestSpectra(std::vector<PeptideIdentification>& peptides, Size n);
1176 template<class MapType>
1177 static void keepNBestPeptideHits(MapType& map, Size n)
1181 for (auto& feat : map)
1183 keepNBestHits(feat.getPeptideIdentifications(), n);
1185 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1188 template<class MapType>
1189 static void removeEmptyIdentifications(MapType& prot_and_pep_ids)
1191 const auto pred = HasNoHits<PeptideIdentification>();
1192 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1196 static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1198 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1199 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1200 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1203 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1205 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1206 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1207 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1211 template<class MapType>
1212 static void annotateBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1214 const auto& prot_ids = prot_and_pep_ids.getProteinIdentifications();
1216 RunToSequenceToChargeToPepHitP best_peps_per_run;
1217 for (const auto& idrun : prot_ids)
1219 best_peps_per_run[idrun.getIdentifier()] = SequenceToChargeToPepHitP();
1222 for (auto& feat : prot_and_pep_ids)
1224 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1227 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1230 template<class MapType>
1231 static void keepBestPerPeptidePerRun(MapType& prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1233 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1234 HasMetaValue<PeptideHit> best_per_peptide {"best_per_peptide", 1};
1235 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1240 static void annotateBestPerPeptidePerRun(const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges,
1241 Size nr_best_spectrum)
1243 RunToSequenceToChargeToPepHitP best_peps_per_run;
1244 for (const auto& id : prot_ids)
1246 best_peps_per_run[id.getIdentifier()] = SequenceToChargeToPepHitP();
1248 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1254 static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP& best_peps_per_run, std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges,
1255 Size nr_best_spectrum)
1257 for (auto& pep : pep_ids)
1259 SequenceToChargeToPepHitP& best_pep = best_peps_per_run[pep.getIdentifier()];
1260 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1267 static void annotateBestPerPeptide(std::vector<PeptideIdentification>& pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1269 SequenceToChargeToPepHitP best_pep;
1270 for (auto& pep : pep_ids)
1272 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1280 static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP& best_pep, PeptideIdentification& pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
1282 bool higher_score_better = pep.isHigherScoreBetter();
1286 auto pepIt = pep.getHits().begin();
1287 auto pepItEnd = nr_best_spectrum == 0 || pep.getHits().size() <= nr_best_spectrum ? pep.getHits().end() : pep.getHits().begin() + nr_best_spectrum;
1288 for (; pepIt != pepItEnd; ++pepIt)
1290 PeptideHit& hit = *pepIt;
1295 lookup_seq = hit.getSequence().toUnmodifiedString();
1299 lookup_seq = hit.getSequence().toString();
1302 int lookup_charge = 0;
1303 if (!ignore_charges)
1305 lookup_charge = hit.getCharge();
1309 auto it_inserted = best_pep.emplace(std::move(lookup_seq), ChargeToPepHitP());
1310 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1312 PeptideHit*& p = it_inserted_chg.first->second;
1313 if (!it_inserted_chg.second)
1315 if ((higher_score_better && (hit.getScore() > p->getScore())) || (!higher_score_better && (hit.getScore() < p->getScore())))
1317 p->setMetaValue(
"best_per_peptide", 0);
1318 hit.setMetaValue(
"best_per_peptide", 1);
1324 hit.setMetaValue(
"best_per_peptide", 0);
1329 hit.setMetaValue(
"best_per_peptide", 1);
1337 const std::vector<FASTAFile::FASTAEntry>& proteins)
1339 std::set<String> accessions;
1340 for (
auto it = proteins.begin(); it != proteins.end(); ++it)
1342 accessions.insert(it->identifier);
1350 for (
auto [spectrum, peptide_id] : experiment)
1352 if (spectrum.getMSLevel() == 2)
1354 keepHitsMatchingProteins(peptide_id, accessions);
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
Class for storing MS run data with peptide and protein identifications.
Definition: AnnotatedMSRun.h:34
std::vector< ProteinIdentification > & getProteinIdentifications()
Get the protein identification.
Definition: AnnotatedMSRun.h:67
std::vector< PeptideIdentification > & getPeptideIdentifications()
Get all peptide identifications for all spectra.
A container for consensus elements.
Definition: ConsensusMap.h:66
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Invalid value exception.
Definition: Exception.h:305
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:412
Int max_cleavages_
Definition: IDFilter.h:416
EnzymaticDigestion & digestion_
Definition: IDFilter.h:414
PeptideHit argument_type
Definition: IDFilter.h:419
Int min_cleavages_
Definition: IDFilter.h:415
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:431
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:441
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:420
static Int disabledValue()
Definition: IDFilter.h:424
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:63
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:801
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:555
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:74
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:583
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:541
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:73
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:789
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:574
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:548
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void keepHitsMatchingProteins(AnnotatedMSRun &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters AnnotatedMSRun according to the given proteins.
Definition: IDFilter.h:1335
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:593
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:710
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:564
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:603
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:620
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:644
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:72
This class is used to switch identification scores within identification or consensus feature maps.
Definition: IDScoreSwitcherAlgorithm.h:40
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
Searches for a specified score type within an identification object and its meta values.
Definition: IDScoreSwitcherAlgorithm.h:524
bool isScoreTypeHigherBetter(ScoreType score_type)
Determines whether a higher score type is better given a ScoreType enum.
Definition: IDScoreSwitcherAlgorithm.h:137
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score name corresponds to a specific score type.
Definition: IDScoreSwitcherAlgorithm.h:73
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:49
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
concept IsFeatureOrConsensusMap
Definition: IDFilter.h:39
concept IsPeptideOrProteinIdentification
Definition: IDFilter.h:35
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:453
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:462
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:457
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:494
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:467
bool ignore_missed_cleavages_
Definition: IDFilter.h:459
PeptideEvidence argument_type
Definition: IDFilter.h:454
ProteaseDigestion & digestion_
Definition: IDFilter.h:458
bool methionine_cleavage_
Definition: IDFilter.h:460
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:327
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:329
GetMatchingItems()
Definition: IDFilter.h:340
ItemMap items
Definition: IDFilter.h:330
HitType argument_type
Definition: IDFilter.h:328
bool exists(const HitType &hit) const
Definition: IDFilter.h:349
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:359
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:332
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:344
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:354
Is this a decoy hit?
Definition: IDFilter.h:215
bool operator()(const HitType &hit) const
Operator to check if a HitType object has decoy annotation.
Definition: IDFilter.h:239
HitType argument_type
Definition: IDFilter.h:216
HasDecoyAnnotation()
Default constructor.
Definition: IDFilter.h:225
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:85
bool operator()(const HitType &hit) const
Definition: IDFilter.h:95
double score
Definition: IDFilter.h:88
HitType argument_type
Definition: IDFilter.h:86
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:91
bool higher_score_better
Definition: IDFilter.h:89
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:254
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:259
HitType argument_type
Definition: IDFilter.h:255
const std::unordered_set< String > & accessions
Definition: IDFilter.h:257
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:264
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:279
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:274
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:291
HitType argument_type
Definition: IDFilter.h:292
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:300
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:315
const std::set< String > & accessions
Definition: IDFilter.h:294
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:296
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:310
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:508
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:511
IdentificationType argument_type
Definition: IDFilter.h:509
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20