5 #ifndef YMIR_EM_ALGORITHM_H 6 #define YMIR_EM_ALGORITHM_H 9 #include "statisticalinferencealgorithm.h" 29 virtual std::vector<prob_t> statisticalInference(
const ClonesetView &repertoire,
32 ErrorMode error_mode = NO_ERRORS)
const {
33 cout <<
"Statistical inference on a PAM:\t" << model.name() << endl;
34 cout <<
"\tMurugan EM-algorithm.";
35 if (error_mode == COMPUTE_ERRORS) {
36 cout <<
"\t(with sequence errors)";
38 std::cout << std::endl;
40 if (!algo_param.check(
"niter") && !algo_param.check(
"sample")) {
41 return std::vector<prob_t>();
45 size_t sample = algo_param[
"sample"].asUInt();
46 ClonesetView rep_nonc = repertoire.noncoding().sample(sample);
47 cout <<
"Number of noncoding clonotypes:\t" << (size_t) rep_nonc.size() << endl;
49 std::vector<prob_t> logLvec;
53 new_param_vec.
fill(1);
54 new_param_vec.set_error_prob(.0003);
58 MAAGRepertoire maag_rep = model.
buildGraphs(rep_nonc, SAVE_METADATA, error_mode, NUCLEOTIDE,
true);
60 vector<prob_t> prob_vec;
62 prob_t prev_ll = 0, cur_ll = 0;
64 cout <<
"Computing full assembling probabilities..." << endl;
65 vector<bool> good_clonotypes;
66 size_t removed, zero_prob, no_alignments;
67 this->filterOut(rep_nonc, maag_rep, prob_vec, good_clonotypes, removed, zero_prob, no_alignments);
69 cout << endl <<
"Initial data summary:" << endl;
70 prob_summary(prob_vec);
72 prev_ll = loglikelihood(prob_vec);
73 logLvec.push_back(prev_ll);
75 std::chrono::system_clock::time_point tp1, tp2;
77 for (
size_t iter = 1; iter <= algo_param[
"niter"].asUInt(); ++iter) {
78 cout << endl <<
"Iteration:\t" << (size_t) iter << endl;
80 new_param_vec.
fill(0);
82 tp1 = std::chrono::system_clock::now();
83 for (
size_t i = 0; i < maag_rep.size(); ++i) {
84 if ((i+1) % 25000 == 0) {
85 cout <<
"Processed " << (int) i <<
" / " << (
int) maag_rep.size() <<
" MAAGs.\t" << (std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) - std::chrono::system_clock::to_time_t(tp1)) <<
" sec." << endl;
87 if (good_clonotypes[i]) {
88 if(!this->updateTempVec(fb, maag_rep[i], new_param_vec, error_mode)) {
89 cout <<
"bad maag:\t" << (int) i << endl;
94 this->updateModel(model, new_param_vec, maag_rep, prob_vec, prev_ll, removed, error_mode);
96 std::cout << new_param_vec.error_prob() << std::endl;
98 logLvec.push_back(prev_ll);
101 cout << endl <<
"Done. Resulting loglikelihood:\t" << loglikelihood(prob_vec) << endl << endl;
112 ErrorMode error_mode)
const 114 if (!fb.process(maag, error_mode)) {
118 while (!fb.is_empty()) {
119 event_pair_t ep = fb.nextEvent();
120 new_param_vec[ep.first] += ep.second;
122 if (std::isnan(ep.second)) {
127 if (error_mode) { new_param_vec.set_error_prob(new_param_vec.error_prob() + fb.err_prob()); }
130 new_param_vec[new_param_vec.
event_index(VJ_VAR_JOI_INS_NUC, 0, 0)] += fb.VJ_nuc_probs()[0];
131 new_param_vec[new_param_vec.
event_index(VJ_VAR_JOI_INS_NUC, 0, 1)] += fb.VJ_nuc_probs()[1];
132 new_param_vec[new_param_vec.
event_index(VJ_VAR_JOI_INS_NUC, 0, 2)] += fb.VJ_nuc_probs()[2];
133 new_param_vec[new_param_vec.
event_index(VJ_VAR_JOI_INS_NUC, 0, 3)] += fb.VJ_nuc_probs()[3];
136 for (
auto prev_nuc = 0; prev_nuc < 4; ++prev_nuc) {
137 for (
auto next_nuc = 0; next_nuc < 4; ++next_nuc, ++k) {
138 new_param_vec[new_param_vec.
event_index(VDJ_VAR_DIV_INS_NUC, prev_nuc, next_nuc)] += fb.VD_nuc_probs()[k];
143 for (
auto prev_nuc = 0; prev_nuc < 4; ++prev_nuc) {
144 for (
auto next_nuc = 0; next_nuc < 4; ++next_nuc, ++k) {
145 new_param_vec[new_param_vec.
event_index(VDJ_DIV_JOI_INS_NUC, prev_nuc, next_nuc)] += fb.DJ_nuc_probs()[k];
156 MAAGRepertoire &maag_rep,
157 vector<prob_t> &prob_vec,
160 ErrorMode error_mode)
const 163 new_param_vec.set_error_prob(.0003);
170 for (
size_t i = 0; i < maag_rep.size(); ++i) {
171 prob_vec[i] = maag_rep[i].fullProbability();
173 prob_summary(prob_vec, prev_ll);
174 prev_ll = loglikelihood(prob_vec);
181 #endif //YMIR_EM_ALGORITHM_H
Interface for algorithms for statistical inference of assembling model parameters.
Definition: statisticalinferencealgorithm.h:46
event_ind_t event_index(EventClass event_class, event_ind_t event_family, event_ind_t event_index) const
Definition: modelparametervector.h:261
Definition: maagforwardbackwardalgorithm.h:20
Multi-Alignment Assembly Graph - basic class for representing all possible generation scenarios of a ...
Definition: maag.h:46
const ModelParameterVector & event_probabilities() const
Access to vector of probabilities.
Definition: probabilisticassemblingmodel.h:185
void updateEventProbabilities(MAAGRepertoire *repertoire, bool verbose=true)
Update event probabilities in the given MAAG repertoire with new ones.
Definition: probabilisticassemblingmodel.h:143
Definition: statisticalinferencealgorithm.h:49
void normaliseEventFamilies()
Normalise each event family to have sum equal to 1.
Definition: modelparametervector.h:316
MAAGRepertoire buildGraphs(const ClonesetView &repertoire, MetadataMode save_metadata, ErrorMode error_mode, SequenceType sequence_type=NUCLEOTIDE, bool verbose=true) const
Build a set of MAAGs from the given cloneset.
Definition: probabilisticassemblingmodel.h:126
Class for storing parameters of assembling statistical model. Note: event with index 0 (zero) is "nul...
Definition: modelparametervector.h:68
Definition: probabilisticassemblingmodel.h:41
Definition: repertoire.h:51
void fill(prob_t val=0)
Fill the vector with the given value.
Definition: modelparametervector.h:377
void updateModelParameterVector(const ModelParameterVector &vec)
Set a new event probabilities vector to this model.
Definition: probabilisticassemblingmodel.h:199
Implementation of the EM-algorithm for statistical inference of assembling model parameters. Classic version described in (Murugan et al 2012)
Definition: em_algorithm.h:23