Ymir  .9
Fast\C++toolforcomputationofassemblingprobabilities,statisticalinferenceofassemblingstatisticalmodelandgenerationofartificialsequencesofT-cellreceptorsdata.
modelparametervector.h
1 /*
2  * Ymir <imminfo.github.io/ymir>
3  *
4  * This file is part of Ymir, a fast C++ tool for computation of assembling
5  * probabilities, statistical inference of assembling statistical model
6  * and generation of artificial sequences of T-cell receptors data.
7  *
8  *
9  * Copyright 2015 Vadim Nazarov <vdn at mailbox dot com>
10  *
11  * Licensed under the Apache License, Version 2.0 (the "License");
12  * you may not use this file except in compliance with the License.
13  * You may obtain a copy of the License at
14  *
15  * http://www.apache.org/licenses/LICENSE-2.0
16  *
17  * Unless required by applicable law or agreed to in writing, software
18  * distributed under the License is distributed on an "AS IS" BASIS,
19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20  * See the License for the specific language governing permissions and
21  * limitations under the License.
22  */
23 
24 #ifndef _MODELPARAMETERVECTOR_H_
25 #define _MODELPARAMETERVECTOR_H_
26 
27 
28 #include <vector>
29 
30 #include "types.h"
31 
32 
33 using namespace std;
34 
35 
36 namespace ymir {
37 
38 
69  public:
70 
71 
83  ModelParameterVector(Recombination vec_type,
84  const vector<prob_t>& param_vec,
85  const vector<event_ind_t>& lens_vec,
86  const vector<event_ind_t>& event_classes,
87  const vector<seq_len_t>& event_family_col_numbers,
88  const vector<prob_t>& laplace_vec = vector<prob_t>(),
89  prob_t err_prob = 0,
90  bool do_normalise = true,
91  const vector<seq_len_t>& d_genes_min_len = vector<seq_len_t>())
92  : _err_prob(err_prob)
93  {
94  _recomb = vec_type;
95 
96  _vec = vector<prob_t>();
97  _vec.reserve(param_vec.size() + 1);
98  _vec.push_back(0);
99  _vec.insert(_vec.end(), param_vec.begin(), param_vec.end());
100 
101  _event_classes = vector<event_ind_t>();
102  _event_classes.reserve(event_classes.size() + 1);
103  _event_classes.push_back(0);
104  for (event_ind_t i = 0; i < event_classes.size(); ++i) {
105  _event_classes.push_back(event_classes[i] + 1);
106  }
107 
108  _event_family_col_numbers = vector<seq_len_t>();
109  _event_family_col_numbers.reserve(event_family_col_numbers.size() + 1);
110  _event_family_col_numbers.push_back(0);
111  for (event_ind_t i = 0; i < event_family_col_numbers.size(); ++i) {
112  _event_family_col_numbers.push_back(event_family_col_numbers[i]);
113  }
114 
115  _edges.reserve(lens_vec.size() + 4);
116  _edges.push_back(0);
117  _edges.push_back(1);
118  for (event_ind_t i = 0; i < lens_vec.size(); ++i) {
119  _edges.push_back(_edges[i + 1] + lens_vec[i]);
120  }
121  _edges.push_back(_vec.size());
122  _event_classes.push_back(_edges.size() - 1);
123 
124  _laplace = vector<prob_t>();
125  if (laplace_vec.size()) {
126  _laplace.reserve(laplace_vec.size() + 1);
127  _laplace.push_back(0);
128  for (event_ind_t i = 0; i < laplace_vec.size(); ++i) {
129  _laplace.push_back(laplace_vec[i]);
130  }
131  } else {
132  // Laplace correction equal to zero if vector is not supplied.
133  _laplace.resize(_edges.size() - 2, 0);
134  }
135 
136  _d_genes_min_len = vector<seq_len_t>();
137  if (vec_type == VDJ_RECOMB) {
138  if (d_genes_min_len.size()) {
139  _d_genes_min_len = d_genes_min_len;
140  } else {
141  _d_genes_min_len.resize(this->eventClassSize(VDJ_DIV_DEL), DEFAULT_DIV_GENE_MIN_LEN);
142  }
143  }
144 
145  if (do_normalise) {
146  this->normaliseEventFamilies();
147  }
148  }
149 
150 
151  bool operator==(const ModelParameterVector &other) const {
152  if ((_recomb != other._recomb)
153  || (_err_prob != other._err_prob)
154  || (_vec.size() != other._vec.size())
155  || (_edges.size() != other._edges.size())
156  || (_event_classes.size() != other._event_classes.size())
157  || (_event_family_col_numbers.size() != other._event_family_col_numbers.size())
158  || (_laplace.size() != other._laplace.size())
159  || (_d_genes_min_len.size() != other._d_genes_min_len.size())) {
160  return false;
161  }
162 
163  for (size_t i = 0; i < _vec.size(); ++i) { if (_vec[i] != other._vec[i]) { return false; } }
164  for (size_t i = 0; i < _edges.size(); ++i) { if (_edges[i] != other._edges[i]) { return false; } }
165  for (size_t i = 0; i < _event_classes.size(); ++i) { if (_event_classes[i] != other._event_classes[i]) { return false; } }
166  for (size_t i = 0; i < _event_family_col_numbers.size(); ++i) { if (_event_family_col_numbers[i] != other._event_family_col_numbers[i]) { return false; } }
167  for (size_t i = 0; i < _laplace.size(); ++i) { if (_laplace[i] != other._laplace[i]) { return false; } }
168  for (size_t i = 0; i < _d_genes_min_len.size(); ++i) { /* cout << (int) _d_genes_min_len[i] << ":" << (int) other._d_genes_min_len[i] << endl; */ if (_d_genes_min_len[i] != other._d_genes_min_len[i]) { return false; } }
169 
170  return true;
171  }
172 
173 
174  Recombination recombination() const { return _recomb; }
175 
176 
177  //============= VECTOR INDICES ACCESS =============//
178 
179 
180  event_ind_t eventFamilySize(event_ind_t event_family) const {
181 #ifndef DNDEBUG
182  if (event_family >= _edges.size() || event_family + 1 >= _edges.size()) {
183  throw(std::runtime_error("Index is out of bounds."));
184  }
185 #endif
186  return _edges[event_family + 1] - _edges[event_family];
187  }
188 
189  event_ind_t eventFamilySize(event_ind_t event_class, event_ind_t event_family) const {
190 #ifndef DNDEBUG
191  if (event_class >= _event_classes.size()) {
192  throw(std::runtime_error("Index is out of bounds."));
193  }
194 #endif
195  return eventFamilySize(_event_classes[event_class] + event_family);
196  }
197 
198  event_ind_t eventClassSize(event_ind_t event_class) const {
199 #ifndef DNDEBUG
200  if (event_class + 1 >= _event_classes.size()
201  || _event_classes[event_class + 1] >= _edges.size()
202  || event_class >= _event_classes.size()
203  || _event_classes[event_class] >= _edges.size()) {
204  throw(std::runtime_error("Index is out of bounds."));
205  }
206 #endif
207  return _edges[_event_classes[event_class + 1]] - _edges[_event_classes[event_class]];
208  }
209 
210 
220  prob_t& operator[] (event_ind_t gl_event_index) {
222 #ifndef DNDEBUG
223  if (gl_event_index >= _vec.size()) {
224  throw(std::runtime_error("Index " + std::to_string(gl_event_index) + " is out of bounds."));
225  }
226 #endif
227  return _vec[gl_event_index];
228  }
229 
230  prob_t operator[] (event_ind_t gl_event_index) const {
231 #ifndef DNDEBUG
232  if (gl_event_index >= _vec.size()) {
233  throw(std::runtime_error("Index " + std::to_string(gl_event_index) + " is out of bounds."));
234  }
235 #endif
236  return _vec[gl_event_index];
237  }
238 
239  prob_t operator() (event_ind_t event_family, event_ind_t loc_event_index) const {
240 #ifndef DNDEBUG
241  if (event_family >= _edges.size() || (_edges[event_family] + loc_event_index) >= _vec.size()) {
242  throw(std::runtime_error("Index is out of bounds."));
243  }
244 #endif
245  return _vec[_edges[event_family] + loc_event_index];
246  }
248 
249  vector<prob_t>::const_iterator get_iterator(event_ind_t i) const {
250 #ifndef DNDEBUG
251  if (i > _vec.size()) { throw(std::runtime_error("Index is out of bounds.")); }
252 #endif
253  return _vec.begin() + i;
254  }
255 
256 
261  event_ind_t event_index(EventClass event_class, event_ind_t event_family, event_ind_t event_index) const {
262 #ifndef DNDEBUG
263  if (!(_edges[_event_classes[event_class] + event_family] + event_index < _edges[_event_classes[event_class] + event_family + 1])) {
264  throw(std::runtime_error("Index " + std::to_string(event_class) + ":" + std::to_string(event_family) + ":" + std::to_string(event_index) + " is out of bounds."));
265 
266  }
267 #endif
268  return _edges[_event_classes[event_class] + event_family] + event_index;
269  }
270 
271  event_ind_t event_index(EventClass event_class, event_ind_t event_family, event_ind_t event_row, event_ind_t event_column) const {
272 #ifndef DNDEBUG
273  if (event_class >= _event_classes.size()) {
274  throw(std::runtime_error("Index is out of bounds."));
275  }
276 
277  if (_event_classes[event_class] + event_family >= _edges.size()) {
278  throw(std::runtime_error("Index is out of bounds."));
279  }
280 
281  if (_event_classes[event_class] + event_family >= _event_family_col_numbers.size()) {
282  throw(std::runtime_error("Index is out of bounds."));
283  }
284 
285  if ((_edges[_event_classes[event_class] + event_family]
286  + event_row * _event_family_col_numbers[_event_classes[event_class] + event_family] + event_column) >= _vec.size()) {
287  throw(std::runtime_error("Index is out of bounds."));
288  }
289 #endif
290  return _edges[_event_classes[event_class] + event_family]
291  + event_row * _event_family_col_numbers[_event_classes[event_class] + event_family] + event_column;
292  }
293 
294  prob_t event_prob(EventClass event_class, event_ind_t event_family, event_ind_t event_index) const {
295 #ifndef DNDEBUG
296  if (this->event_index(event_class, event_family, event_index) >= _vec.size()) {
297  throw(std::runtime_error("Index " + std::to_string(event_class) + ":" + std::to_string(event_family) + ":" + std::to_string(event_index) + " is out of bounds."));
298  }
299 #endif
300  return _vec[this->event_index(event_class, event_family, event_index)];
301  }
302 
303  prob_t event_prob(EventClass event_class, event_ind_t event_family, event_ind_t event_row, event_ind_t event_column) const {
304 #ifndef DNDEBUG
305  if (this->event_index(event_class, event_family, event_row, event_column) >= _vec.size()) {
306  throw(std::runtime_error("Index is out of bounds."));
307  }
308 #endif
309  return _vec[this->event_index(event_class, event_family, event_row, event_column)];
310  }
311 
312 
317  prob_t prob_sum;
318  for (event_ind_t i = 2; i < _edges.size(); ++i) {
319  prob_sum = _laplace[i-1] * (_edges[i] - _edges[i-1]);
320  for (event_ind_t j = _edges[i-1]; j < _edges[i]; ++j) {
321  prob_sum += _vec[j];
322  }
323 
324  if (prob_sum) {
325  for (event_ind_t j = _edges[i-1]; j < _edges[i]; ++j) {
326  _vec[j] = (_vec[j] + _laplace[i-1]) / prob_sum;
327  }
328  }
329  }
330  }
331 
332  void normaliseEventClass(EventClass event_class) {
333  event_ind_t event_family = _event_classes[event_class];
334 
335  prob_t prob_sum = _laplace[event_family] * (_edges[event_family + 1] - _edges[event_family]);
336  for (event_ind_t j = _edges[event_family]; j < _edges[event_family + 1]; ++j) {
337  prob_sum += _vec[j];
338  }
339 
340  if (prob_sum) {
341  for (event_ind_t j = _edges[event_family]; j < _edges[event_family + 1]; ++j) {
342  _vec[j] = (_vec[j] + _laplace[event_family]) / prob_sum;
343  }
344  }
345  }
346 
347 
351  event_ind_t max_VJ_ins_len() const {
352  return this->eventFamilySize(VJ_VAR_JOI_INS_LEN, 0) - 1;
353  }
354  event_ind_t max_VD_ins_len() const {
355  return this->eventFamilySize(VDJ_VAR_DIV_INS_LEN, 0) - 1;
356  }
357  event_ind_t max_DJ_ins_len() const {
358  return this->eventFamilySize(VDJ_DIV_JOI_INS_LEN, 0) - 1;
359  }
360 
361 
365  inline seq_len_t D_min_len(event_ind_t d_index) const { return _d_genes_min_len[d_index - 1]; }
366 
367 
368  inline prob_t error_prob() const { return _err_prob; }
369 
370 
371  inline void set_error_prob(prob_t val) { _err_prob = val; }
372 
373 
377  void fill(prob_t val = 0) {
378  _vec[0] = 0;
379  for (size_t i = 1; i < _vec.size(); ++i) {
380  _vec[i] = val;
381  }
382  _err_prob = val;
383 // _err_prob = (val > 1e-15) ? .1 : 0;
384  }
385 
386 
387  void familyFill(event_ind_t event_family, prob_t val = 0) {
388  for (event_ind_t i = _edges[_event_classes[event_family]]; i < _edges[_event_classes[event_family + 1]]; ++i) {
389  _vec[i] = val;
390  }
391  }
392 
393 
394  size_t families() const { return _event_classes.size(); }
395 
396 
397  size_t size() const { return _vec.size(); }
398 
399 
400  seq_len_t n_columns(EventClass event_class, event_ind_t event_family = 0) const {
401 #ifndef DNDEBUG
402  if (event_class >= _event_classes.size()) {
403  throw(std::runtime_error("Wrong event class " + std::to_string(event_class)));
404  }
405  if ((_event_classes[event_class] + event_family) >= _event_family_col_numbers.size()) {
406  throw(std::runtime_error("Index " + std::to_string(_event_classes[event_class] + event_family) + " is out of bounds."));
407  }
408 #endif
409  return _event_family_col_numbers[_event_classes[event_class] + event_family];
410  }
411 
412  private:
413 
414  prob_t _err_prob;
415  vector<prob_t> _vec;
416  vector<event_ind_t> _edges;
417  vector<event_ind_t> _event_classes;
418  vector<seq_len_t> _event_family_col_numbers;
419  vector<prob_t> _laplace;
420  vector<seq_len_t> _d_genes_min_len;
421  Recombination _recomb;
422 
427 
428  };
429 }
430 
431 #endif
Definition: aligner.h:37
event_ind_t event_index(EventClass event_class, event_ind_t event_family, event_ind_t event_index) const
Definition: modelparametervector.h:261
void normaliseEventFamilies()
Normalise each event family to have sum equal to 1.
Definition: modelparametervector.h:316
Class for storing parameters of assembling statistical model. Note: event with index 0 (zero) is "nul...
Definition: modelparametervector.h:68
ModelParameterVector(Recombination vec_type, const vector< prob_t > &param_vec, const vector< event_ind_t > &lens_vec, const vector< event_ind_t > &event_classes, const vector< seq_len_t > &event_family_col_numbers, const vector< prob_t > &laplace_vec=vector< prob_t >(), prob_t err_prob=0, bool do_normalise=true, const vector< seq_len_t > &d_genes_min_len=vector< seq_len_t >())
Definition: modelparametervector.h:83
void fill(prob_t val=0)
Fill the vector with the given value.
Definition: modelparametervector.h:377