22 #include <unordered_map>
25 #include <boost/function.hpp>
26 #include <boost/blank.hpp>
27 #include <boost/serialization/strong_typedef.hpp>
28 #include <boost/graph/adjacency_list.hpp>
29 #include <boost/graph/depth_first_search.hpp>
30 #include <boost/graph/filtered_graph.hpp>
31 #include <boost/graph/properties.hpp>
32 #include <boost/variant.hpp>
33 #include <boost/variant/detail/hash_variant.hpp>
34 #include <boost/variant/static_visitor.hpp>
38 struct ScoreToTgtDecLabelPairs;
63 #pragma clang diagnostic push
64 #pragma clang diagnostic ignored "-Wextra-semi"
88 #pragma clang diagnostic pop
93 typedef boost::variant<ProteinHit*, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit*>
IDPointer;
94 typedef boost::variant<const ProteinHit*, const ProteinGroup*, const PeptideCluster*, const Peptide, const RunIndex, const Charge, const PeptideHit*>
IDPointerConst;
99 typedef boost::adjacency_list <boost::setS, boost::vecS, boost::undirectedS, IDPointer>
Graph;
101 typedef boost::adjacency_list <boost::setS, boost::vecS, boost::undirectedS, IDPointer>
GraphConst;
103 typedef boost::graph_traits<Graph>::vertex_descriptor
vertex_t;
104 typedef boost::graph_traits<Graph>::edge_descriptor
edge_t;
112 public boost::default_dfs_visitor
116 : gs(vgs), curr_v(0), next_v(0), m()
119 template <
typename Vertex,
typename Graph >
123 next_v = boost::add_vertex(tg[u], gs.back());
127 template <
typename Vertex,
typename Graph >
133 template <
typename Edge,
typename Graph >
136 if (m.find(e.m_target) == m.end())
138 next_v = boost::add_vertex(tg[e.m_target], gs.back());
139 m[e.m_target] = next_v;
143 next_v = m[e.m_target];
146 boost::add_edge(m[e.m_source], next_v, gs.back());
152 std::map<vertex_t, vertex_t>
m;
158 public boost::static_visitor<OpenMS::String>
189 return "rep" +
String(ri);
194 return "chg" +
String(chg);
201 template<
class CharT>
203 public boost::static_visitor<>
218 stream_ << prot->
getAccession() <<
": " << prot << std::endl;
223 stream_ <<
"PG" << std::endl;
228 stream_ <<
"PepClust" << std::endl;
233 stream_ << peptide << std::endl;
238 stream_ <<
"rep" << ri << std::endl;
243 stream_ <<
"chg" << chg << std::endl;
253 public boost::static_visitor<>
269 pg.
score = posterior;
284 public boost::static_visitor<double>
316 public boost::static_visitor<std::pair<double,bool>>
339 return {-1.0,
false};
345 std::vector<PeptideIdentification>& idedSpectra,
348 bool best_psms_annotated,
349 const std::optional<const ExperimentalDesign>& ed = std::optional<const ExperimentalDesign>());
355 bool use_unassigned_ids,
356 bool best_psms_annotated,
357 const std::optional<const ExperimentalDesign>& ed = std::optional<const ExperimentalDesign>());
433 bool stop_at_first, std::vector<vertex_t>& result);
444 bool stop_at_first, std::vector<vertex_t>& result);
459 struct SequenceToReplicateChargeVariantHierarchy;
489 #ifdef INFERENCE_BENCH
491 std::vector<std::tuple<vertex_t, vertex_t, unsigned long, double>> sizes_and_times_{1};
509 Size nrPrefractionationGroups_ = 0;
534 std::vector<PeptideIdentification>& idedSpectra,
536 bool best_psms_annotated =
false);
541 bool use_unassigned_ids,
542 bool best_psms_annotated =
false);
548 const std::unordered_map<std::string, ProteinHit*>& accession_map,
550 bool best_psms_annotated);
554 std::unordered_map<unsigned, unsigned>& indexToPrefractionationGroup,
556 std::unordered_map<std::string, ProteinHit*>& accession_map,
570 bool use_unassigned_ids,
574 std::vector<PeptideIdentification>& idedSpectra,
582 template<
class NodeType>
585 Graph::adjacency_iterator adjIt, adjIt_end;
586 boost::tie(adjIt, adjIt_end) = boost::adjacent_vertices(start, graph);
587 for (;adjIt != adjIt_end; ++adjIt)
589 if (graph[*adjIt].type() ==
typeid(NodeType))
591 result.emplace_back(boost::get<NodeType>(graph[*adjIt]));
593 else if (graph[*adjIt].which() > graph[start].which())
595 getDownstreamNodes(*adjIt, graph, result);
600 template<
class NodeType>
603 Graph::adjacency_iterator adjIt, adjIt_end;
604 boost::tie(adjIt, adjIt_end) = boost::adjacent_vertices(start, graph);
605 for (;adjIt != adjIt_end; ++adjIt)
607 if (graph[*adjIt].type() ==
typeid(NodeType))
609 result.emplace_back(boost::get<NodeType>(graph[*adjIt]));
611 else if (graph[*adjIt].which() < graph[start].which())
613 getUpstreamNodes(*adjIt, graph, result);
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
A container for consensus elements.
Definition: ConsensusMap.h:66
String toString(bool full_precision=true) const
Conversion to String full_precision Controls number of fractional digits for all double types or list...
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:219
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:285
double operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:288
double operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:293
double operator()(T &) const
Definition: IDBoostGraph.h:305
double operator()(ProteinGroup &pg) const
Definition: IDBoostGraph.h:298
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:317
std::pair< double, bool > operator()(T &) const
Definition: IDBoostGraph.h:337
std::pair< double, bool > operator()(ProteinGroup &pg) const
Definition: IDBoostGraph.h:330
std::pair< double, bool > operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:320
std::pair< double, bool > operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:325
Visits nodes in the boost graph (ptrs to an ID Object) and depending on their type creates a label e....
Definition: IDBoostGraph.h:159
OpenMS::String operator()(const Peptide &peptide) const
Definition: IDBoostGraph.h:182
OpenMS::String operator()(const Charge &chg) const
Definition: IDBoostGraph.h:192
OpenMS::String operator()(const PeptideHit *pep) const
Definition: IDBoostGraph.h:162
OpenMS::String operator()(const ProteinGroup &) const
Definition: IDBoostGraph.h:172
OpenMS::String operator()(const RunIndex &ri) const
Definition: IDBoostGraph.h:187
OpenMS::String operator()(const ProteinHit *prot) const
Definition: IDBoostGraph.h:167
OpenMS::String operator()(const PeptideCluster &) const
Definition: IDBoostGraph.h:177
Visits nodes in the boost graph (ptrs to an ID Object) and depending on their type prints the address...
Definition: IDBoostGraph.h:204
void operator()(const Charge &chg) const
Definition: IDBoostGraph.h:241
std::basic_ostream< CharT > stream_
Definition: IDBoostGraph.h:246
PrintAddressVisitor(std::basic_ostream< CharT > stream)
Definition: IDBoostGraph.h:207
void operator()(const PeptideCluster &) const
Definition: IDBoostGraph.h:226
void operator()(const RunIndex &ri) const
Definition: IDBoostGraph.h:236
void operator()(PeptideHit *pep) const
Definition: IDBoostGraph.h:211
void operator()(ProteinHit *prot) const
Definition: IDBoostGraph.h:216
void operator()(const Peptide &peptide) const
Definition: IDBoostGraph.h:231
void operator()(const ProteinGroup &) const
Definition: IDBoostGraph.h:221
Visits nodes in the boost graph (either ptrs to an ID Object or some lightweight surrogates) and depe...
Definition: IDBoostGraph.h:254
void operator()(T &, double) const
Definition: IDBoostGraph.h:274
void operator()(PeptideHit *pep, double posterior) const
Definition: IDBoostGraph.h:257
void operator()(ProteinGroup &pg, double posterior) const
Definition: IDBoostGraph.h:267
void operator()(ProteinHit *prot, double posterior) const
Definition: IDBoostGraph.h:262
A boost dfs visitor that copies connected components into a vector of graphs.
Definition: IDBoostGraph.h:113
std::map< vertex_t, vertex_t > m
A mapping from old node id to new node id to not duplicate existing ones in the new graph.
Definition: IDBoostGraph.h:152
void start_vertex(Vertex u, const Graph &tg)
Definition: IDBoostGraph.h:120
void examine_edge(Edge e, const Graph &tg)
Definition: IDBoostGraph.h:134
dfs_ccsplit_visitor(Graphs &vgs)
Definition: IDBoostGraph.h:115
vertex_t curr_v
Definition: IDBoostGraph.h:150
void discover_vertex(Vertex, const Graph &)
Definition: IDBoostGraph.h:128
Graphs & gs
Definition: IDBoostGraph.h:149
Creates and maintains a boost graph based on the OpenMS ID datastructures.
Definition: IDBoostGraph.h:57
void addPeptideIDWithAssociatedProteins_(PeptideIdentification &spectrum, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, const std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms, bool best_psms_annotated)
Used during building.
const ProteinIdentification & getProteinIDs()
Returns the underlying protein identifications for viewing.
void buildGraphWithRunInfo_(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, const ExperimentalDesign &ed)
void buildGraph_(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool best_psms_annotated=false)
boost::graph_traits< Graph >::vertex_descriptor vertex_t
Definition: IDBoostGraph.h:103
BOOST_STRONG_TYPEDEF(boost::blank, PeptideCluster)
placeholder for peptides with the same parent proteins or protein groups
BOOST_STRONG_TYPEDEF(int, Charge)
in which charge state a PSM was observed
std::unordered_map< vertex_t, Size > pepHitVtx_to_run_
Definition: IDBoostGraph.h:503
boost::variant< const ProteinHit *, const ProteinGroup *, const PeptideCluster *, const Peptide, const RunIndex, const Charge, const PeptideHit * > IDPointerConst
Definition: IDBoostGraph.h:94
void addPeptideAndAssociatedProteinsWithRunInfo_(PeptideIdentification &spectrum, std::unordered_map< unsigned, unsigned > &indexToPrefractionationGroup, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms)
std::vector< Graph > Graphs
Definition: IDBoostGraph.h:100
double score
Definition: IDBoostGraph.h:75
IDBoostGraph(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_run_info, bool use_unassigned_ids, bool best_psms_annotated, const std::optional< const ExperimentalDesign > &ed=std::optional< const ExperimentalDesign >())
void getDownstreamNodes(const vertex_t &start, const Graph &graph, std::vector< NodeType > &result)
Definition: IDBoostGraph.h:583
ProteinIdentification & protIDs_
Definition: IDBoostGraph.h:457
void getUpstreamNodes(const vertex_t &start, const Graph graph, std::vector< NodeType > &result)
Definition: IDBoostGraph.h:601
void computeConnectedComponents()
Splits the initialized graph into connected components and clears it.
void clusterIndistProteinsAndPeptides()
void getProteinGroupScoresAndHitchhikingTgtFraction(ScoreToTgtDecLabelPairs &scores_and_tgt_fraction)
int tgts
Definition: IDBoostGraph.h:74
Size getNrConnectedComponents()
Zero means the graph was not split yet.
void resolveGraphPeptideCentric_(Graph &fg, bool removeAssociationsInData)
see equivalent public method
void getUpstreamNodesNonRecursive(std::queue< vertex_t > &q, const Graph &graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
Searches for all upstream nodes from a (set of) start nodes that are lower or equal than a given leve...
const Graph & getComponent(Size cc)
Returns a specific connected component of the graph as a graph itself.
void applyFunctorOnCCsST(const std::function< void(Graph &)> &functor)
Do sth on connected components single threaded (your functor object has to inherit from std::function...
Graph g
the initial boost Graph (will be cleared when split into CCs)
Definition: IDBoostGraph.h:459
void annotateIndistProteins_(const Graph &fg, bool addSingletons)
internal function to annotate the underlying ID structures based on the given Graph
void clusterIndistProteinsAndPeptidesAndExtendGraph()
std::set< IDBoostGraph::vertex_t > PeptideNodeSet
Definition: IDBoostGraph.h:107
std::set< IDBoostGraph::vertex_t > ProteinNodeSet
Definition: IDBoostGraph.h:106
void buildGraphWithRunInfo_(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, const ExperimentalDesign &ed)
boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointer > GraphConst
Definition: IDBoostGraph.h:101
void calculateAndAnnotateIndistProteins(bool addSingletons=true)
static void printGraph(std::ostream &out, const Graph &fg)
Prints a graph (component or if not split, the full graph) in graphviz (i.e. dot) format.
void calculateAndAnnotateIndistProteins_(const Graph &fg, bool addSingletons)
boost::graph_traits< Graph >::edge_descriptor edge_t
Definition: IDBoostGraph.h:104
BOOST_STRONG_TYPEDEF(String, Peptide)
an (currently unmodified) peptide sequence
void annotateIndistProteins(bool addSingletons=true)
BOOST_STRONG_TYPEDEF(Size, RunIndex)
in which run a PSM was observed
void resolveGraphPeptideCentric(bool removeAssociationsInData=true)
IDBoostGraph(ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool use_run_info, bool best_psms_annotated, const std::optional< const ExperimentalDesign > &ed=std::optional< const ExperimentalDesign >())
Constructors.
void getProteinGroupScoresAndTgtFraction(ScoreToTgtDecLabelPairs &scores_and_tgt_fraction)
void buildGraph_(ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, bool best_psms_annotated=false)
void getProteinScores_(ScoreToTgtDecLabelPairs &scores_and_tgt)
boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointer > Graph
Definition: IDBoostGraph.h:99
void getDownstreamNodesNonRecursive(std::queue< vertex_t > &q, const Graph &graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
Searches for all downstream nodes from a (set of) start nodes that are higher or equal than a given l...
boost::variant< ProteinHit *, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit * > IDPointer
Definition: IDBoostGraph.h:93
void applyFunctorOnCCs(const std::function< unsigned long(Graph &, unsigned int)> &functor)
Do sth on connected components (your functor object has to inherit from std::function or be a lambda)
Graphs ccs_
the Graph split into connected components
Definition: IDBoostGraph.h:486
vertex_t addVertexWithLookup_(const IDPointer &ptr, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map)
indistinguishable protein groups (size, nr targets, score)
Definition: IDBoostGraph.h:72
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition: PeptideHit.h:50
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
Int getCharge() const
returns the charge of the peptide
void setScore(double score)
sets the PSM score
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition: PeptideIdentification.h:63
Representation of a protein hit.
Definition: ProteinHit.h:34
double getScore() const
returns the score of the protein hit
void setScore(const double score)
sets the score of the protein hit
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Definition: IDScoreGetterSetter.h:31