5 #ifndef YMIR_EM_ALGORITHM_H     6 #define YMIR_EM_ALGORITHM_H     9 #include "statisticalinferencealgorithm.h"    29         virtual std::vector<prob_t> statisticalInference(
const ClonesetView &repertoire,
    32                                                          ErrorMode error_mode = NO_ERRORS)
 const {
    33             cout << 
"Statistical inference on a PAM:\t" << model.name() << endl;
    34             cout << 
"\tMurugan EM-algorithm.";
    35             if (error_mode == COMPUTE_ERRORS) {
    36                 cout << 
"\t(with sequence errors)";
    38             std::cout << std::endl;
    40             if (!algo_param.check(
"niter") && !algo_param.check(
"sample")) {
    41                 return std::vector<prob_t>();
    45             size_t sample = algo_param[
"sample"].asUInt();
    46             ClonesetView rep_nonc = repertoire.noncoding().sample(sample);
    47             cout << 
"Number of noncoding clonotypes:\t" << (size_t) rep_nonc.size() << endl;
    49             std::vector<prob_t> logLvec;
    53             new_param_vec.
fill(1);
    54             new_param_vec.set_error_prob(.0003);
    58             MAAGRepertoire maag_rep = model.
buildGraphs(rep_nonc, SAVE_METADATA, error_mode, NUCLEOTIDE, 
true);
    60             vector<prob_t> prob_vec;
    62             prob_t prev_ll = 0, cur_ll = 0;
    64             cout << 
"Computing full assembling probabilities..." << endl;
    65             vector<bool> good_clonotypes;
    66             size_t removed, zero_prob, no_alignments;
    67             this->filterOut(rep_nonc, maag_rep, prob_vec, good_clonotypes, removed, zero_prob, no_alignments);
    69             cout << endl << 
"Initial data summary:" << endl;
    70             prob_summary(prob_vec);
    72             prev_ll = loglikelihood(prob_vec);
    73             logLvec.push_back(prev_ll);
    75             std::chrono::system_clock::time_point tp1, tp2;
    77             for (
size_t iter = 1; iter <= algo_param[
"niter"].asUInt(); ++iter) {
    78                 cout << endl << 
"Iteration:\t" << (size_t) iter << endl;
    80                 new_param_vec.
fill(0);
    82                 tp1 = std::chrono::system_clock::now();
    83                 for (
size_t i = 0; i < maag_rep.size(); ++i) {
    84                     if ((i+1) % 25000 == 0) {
    85                         cout << 
"Processed " << (int) i << 
" / " << (
int) maag_rep.size() << 
" MAAGs.\t" << (std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()) - std::chrono::system_clock::to_time_t(tp1)) << 
" sec." << endl;
    87                     if (good_clonotypes[i]) {
    88                         if(!this->updateTempVec(fb, maag_rep[i], new_param_vec, error_mode)) {
    89                             cout << 
"bad maag:\t" << (int) i << endl;
    94                 this->updateModel(model, new_param_vec, maag_rep, prob_vec, prev_ll, removed, error_mode);
    96                 std::cout << new_param_vec.error_prob() << std::endl;
    98                 logLvec.push_back(prev_ll);
   101             cout << endl << 
"Done. Resulting loglikelihood:\t" << loglikelihood(prob_vec) << endl << endl;
   112                            ErrorMode error_mode)
 const   114             if (!fb.process(maag, error_mode)) {
   118             while (!fb.is_empty()) {
   119                 event_pair_t ep = fb.nextEvent();
   120                 new_param_vec[ep.first] += ep.second;
   122                 if (std::isnan(ep.second)) {
   127             if (error_mode) { new_param_vec.set_error_prob(new_param_vec.error_prob() + fb.err_prob()); }
   130                 new_param_vec[new_param_vec.
event_index(VJ_VAR_JOI_INS_NUC, 0, 0)] += fb.VJ_nuc_probs()[0];
   131                 new_param_vec[new_param_vec.
event_index(VJ_VAR_JOI_INS_NUC, 0, 1)] += fb.VJ_nuc_probs()[1];
   132                 new_param_vec[new_param_vec.
event_index(VJ_VAR_JOI_INS_NUC, 0, 2)] += fb.VJ_nuc_probs()[2];
   133                 new_param_vec[new_param_vec.
event_index(VJ_VAR_JOI_INS_NUC, 0, 3)] += fb.VJ_nuc_probs()[3];
   136                 for (
auto prev_nuc = 0; prev_nuc < 4; ++prev_nuc) {
   137                     for (
auto next_nuc = 0; next_nuc < 4; ++next_nuc, ++k) {
   138                         new_param_vec[new_param_vec.
event_index(VDJ_VAR_DIV_INS_NUC, prev_nuc, next_nuc)] += fb.VD_nuc_probs()[k];
   143                 for (
auto prev_nuc = 0; prev_nuc < 4; ++prev_nuc) {
   144                     for (
auto next_nuc = 0; next_nuc < 4; ++next_nuc, ++k) {
   145                         new_param_vec[new_param_vec.
event_index(VDJ_DIV_JOI_INS_NUC, prev_nuc, next_nuc)] += fb.DJ_nuc_probs()[k];
   156                          MAAGRepertoire &maag_rep,
   157                          vector<prob_t> &prob_vec,
   160                          ErrorMode error_mode)
 const   163             new_param_vec.set_error_prob(.0003);
   170             for (
size_t i = 0; i < maag_rep.size(); ++i) {
   171                 prob_vec[i] = maag_rep[i].fullProbability();
   173             prob_summary(prob_vec, prev_ll);
   174             prev_ll = loglikelihood(prob_vec);
   181 #endif //YMIR_EM_ALGORITHM_H 
Interface for algorithms for statistical inference of assembling model parameters. 
Definition: statisticalinferencealgorithm.h:46
 
event_ind_t event_index(EventClass event_class, event_ind_t event_family, event_ind_t event_index) const 
Definition: modelparametervector.h:261
 
Definition: maagforwardbackwardalgorithm.h:20
 
Multi-Alignment Assembly Graph - basic class for representing all possible generation scenarios of a ...
Definition: maag.h:46
 
const ModelParameterVector & event_probabilities() const 
Access to vector of probabilities. 
Definition: probabilisticassemblingmodel.h:185
 
void updateEventProbabilities(MAAGRepertoire *repertoire, bool verbose=true)
Update event probabilities in the given MAAG repertoire with new ones. 
Definition: probabilisticassemblingmodel.h:143
 
Definition: statisticalinferencealgorithm.h:49
 
void normaliseEventFamilies()
Normalise each event family to have sum equal to 1. 
Definition: modelparametervector.h:316
 
MAAGRepertoire buildGraphs(const ClonesetView &repertoire, MetadataMode save_metadata, ErrorMode error_mode, SequenceType sequence_type=NUCLEOTIDE, bool verbose=true) const 
Build a set of MAAGs from the given cloneset. 
Definition: probabilisticassemblingmodel.h:126
 
Class for storing parameters of assembling statistical model. Note: event with index 0 (zero) is "nul...
Definition: modelparametervector.h:68
 
Definition: probabilisticassemblingmodel.h:41
 
Definition: repertoire.h:51
 
void fill(prob_t val=0)
Fill the vector with the given value. 
Definition: modelparametervector.h:377
 
void updateModelParameterVector(const ModelParameterVector &vec)
Set a new event probabilities vector to this model. 
Definition: probabilisticassemblingmodel.h:199
 
Implementation of the EM-algorithm for statistical inference of assembling model parameters. Classic version described in (Murugan et al 2012) 
Definition: em_algorithm.h:23