Ymir  .9
Fast\C++toolforcomputationofassemblingprobabilities,statisticalinferenceofassemblingstatisticalmodelandgenerationofartificialsequencesofT-cellreceptorsdata.
statisticalinferencealgorithm.h
1 /*
2  * Ymir <imminfo.github.io/ymir>
3  *
4  * This file is part of Ymir, a fast C++ tool for computation of assembling
5  * probabilities, statistical inference of assembling statistical model
6  * and generation of artificial sequences of T-cell receptors data.
7  *
8  *
9  * Copyright 2015 Vadim Nazarov <vdn at mailbox dot com>
10  *
11  * Licensed under the Apache License, Version 2.0 (the "License");
12  * you may not use this file except in compliance with the License.
13  * You may obtain a copy of the License at
14  *
15  * http://www.apache.org/licenses/LICENSE-2.0
16  *
17  * Unless required by applicable law or agreed to in writing, software
18  * distributed under the License is distributed on an "AS IS" BASIS,
19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20  * See the License for the specific language governing permissions and
21  * limitations under the License.
22  */
23 
24 #ifndef _STATISTICALINFERENCEALGORITHM_H
25 #define _STATISTICALINFERENCEALGORITHM_H
26 
27 
28 #include <chrono>
29 #include <ctime>
30 
31 #include "probabilisticassemblingmodel.h"
32 #include "maagforwardbackwardalgorithm.h"
33 #include "tools.h"
34 
35 
36 namespace ymir {
37 
38  class StatisticalInferenceAlgorithm;
39 
40 
47  public:
48 
50 
51 
53 
54 
55  bool check(const string& param_name) const {
56  if (_json.get(param_name, "__NA__").asString() == "__NA__") {
57  cout << "Obligatory parameter '" << param_name << "' hasn't been found, please re-run the algorithm with the supplied parameter." << endl;
58  return false;
59  }
60  return true;
61  }
62 
63 
64  Json::Value get(const string& param_name, const Json::Value& default_value) const {
65  return _json.get(param_name, default_value).asString();
66  }
67 
68 
69  AlgorithmParameters& set(const string& param_name, const Json::Value& value) {
70  _json[param_name] = value;
71  return *this;
72  }
73 
74 
75  const Json::Value& operator[](const string& param_name) const { return _json[param_name]; }
76 
77 
78  private:
79 
80  Json::Value _json;
81 
82  };
83 
84 
85  virtual ~StatisticalInferenceAlgorithm() { }
86 
87  virtual std::vector<prob_t> statisticalInference(const ClonesetView &repertoire,
89  const AlgorithmParameters &algo_param = AlgorithmParameters(),
90  ErrorMode error_mode = NO_ERRORS) const = 0;
91 
92 
93  void filterOut(const ClonesetView &rep_nonc,
94  const MAAGRepertoire &maag_rep,
95  vector<prob_t> &prob_vec,
96  vector<bool> &good_clonotypes,
97  size_t &removed,
98  size_t &zero_prob,
99  size_t &no_alignments) const
100  {
101  good_clonotypes.resize(maag_rep.size(), true);
102  prob_vec.resize(maag_rep.size(), 0);
103  removed = 0;
104  zero_prob = 0;
105  no_alignments = 0;
106 
107 #ifdef USE_OMP
108 #pragma omp parallel for
109 #endif
110  for (size_t i = 0; i < maag_rep.size(); ++i) {
111  prob_vec[i] = maag_rep[i].fullProbability();
112  }
113 
114  for (size_t i = 0; i < maag_rep.size(); ++i) {
115  if (rep_nonc[i].is_good()) {
116  if (std::isnan(prob_vec[i]) || (std::abs(prob_vec[i]) < 1e-80) || (std::abs(prob_vec[i]) >= 1)) {
117  good_clonotypes[i] = false;
118  ++removed;
119  ++zero_prob;
120  }
121  } else {
122  good_clonotypes[i] = false;
123  ++removed;
124  ++no_alignments;
125  }
126  }
127 
128  if (removed) {
129  cout << "Removed " << (int) removed
130  << " error-probability clonotypes. Check your minimal Diversity gene length to align and other parameters to make sure it won't happen again in the future." << endl;
131  cout << "\tZero probabilities:\t" << (int) zero_prob << std::endl;
132  cout << "\tBad alignments:\t" << (int) no_alignments << " (replaces with zeros)" << std::endl;
133  } else {
134  cout << "No clonotypes with error probabilities has been found. It's good in case you don't know." << std::endl;
135  }
136  }
137 
138  };
139 
140 }
141 
142 #endif
Definition: aligner.h:37
Interface for algorithms for statistical inference of assembling model parameters.
Definition: statisticalinferencealgorithm.h:46
Definition: statisticalinferencealgorithm.h:49
Definition: probabilisticassemblingmodel.h:41
Definition: repertoire.h:51