Ymir  .9
Fast\C++toolforcomputationofassemblingprobabilities,statisticalinferenceofassemblingstatisticalmodelandgenerationofartificialsequencesofT-cellreceptorsdata.
maag.h
1 /*
2  * Ymir <imminfo.github.io/ymir>
3  *
4  * This file is part of Ymir, a fast C++ tool for computation of assembling
5  * probabilities, statistical inference of assembling statistical model
6  * and generation of artificial sequences of T-cell receptors data.
7  *
8  *
9  * Copyright 2015 Vadim Nazarov <vdn at mailbox dot com>
10  *
11  * Licensed under the Apache License, Version 2.0 (the "License");
12  * you may not use this file except in compliance with the License.
13  * You may obtain a copy of the License at
14  *
15  * http://www.apache.org/licenses/LICENSE-2.0
16  *
17  * Unless required by applicable law or agreed to in writing, software
18  * distributed under the License is distributed on an "AS IS" BASIS,
19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20  * See the License for the specific language governing permissions and
21  * limitations under the License.
22  */
23 
24 #ifndef _MAAG_H_
25 #define _MAAG_H_
26 
27 #define VJ_CHAIN_SIZE 4
28 #define VDJ_CHAIN_SIZE 7
29 
30 
31 #include "multimatrixchain.h"
32 
33 
34 namespace ymir {
35 
36  class MAAG;
37 
38 
45 // class MAAG : public ProbMMC {
46  class MAAG : protected ProbMMC {
47 
48  friend class MAAGForwardBackwardAlgorithm; // ):
49  friend class MAAGBuilder; // )):
50 
51  public:
52 
56  MAAG()
57  : _recomb(UNDEF_RECOMB),
58  _events(nullptr),
59  _errors(nullptr),
60  _sequence(nullptr),
61  _seq_poses(nullptr),
62  _n_poses(0),
63  _seq_type(NUCLEOTIDE),
64  _err_prob(0)
65  {
66  _values.reserve(1);
67  }
68 
72  MAAG(const MAAG &other)
73  : ProbMMC(other),
74  _recomb(other._recomb),
75  _n_poses(other._n_poses),
76  _seq_type(other._seq_type),
77  _events(other._events ? new EventIndMMC(*other._events) : nullptr),
78  _errors(other._errors ? new ErrMMC(*other._errors) : nullptr),
79  _err_prob(other._err_prob)
80  {
81  if (other._seq_poses) {
82  _seq_poses.reset(new seq_len_t[_n_poses]);
83  std::copy(other._seq_poses.get(), other._seq_poses.get() + _n_poses, _seq_poses.get());
84  } else {
85  _seq_poses = nullptr;
86  }
87 
88  if (other._sequence) { _sequence.reset(new std::string(*other._sequence)); }
89  else { _sequence = nullptr; }
90  }
91 
92 
93  MAAG(MAAG &&other) {
94  std::swap(_recomb, other._recomb);
95 
96  _chain.swap(other._chain);
97  _values.swap(other._values);
98 
99  _events.swap(other._events);
100 
101  _errors.swap(other._errors);
102  std::swap(_err_prob, other._err_prob);
103 
104  std::swap(_n_poses, other._n_poses);
105  _seq_poses.swap(other._seq_poses);
106 
107  _sequence.swap(other._sequence);
108  std::swap(_seq_type, other._seq_type);
109  }
110 
111 
116 // MAAG(ProbMMC &prob_mmc)
117 // : _recomb(prob_mmc.chainSize() == VJ_CHAIN_SIZE ? VJ_RECOMB : VDJ_RECOMB), // TODO: fix this
118 // _events(nullptr),
119 // _sequence(nullptr),
120 // _seq_poses(nullptr),
121 // _n_poses(0),
122 // _seq_type(NUCLEOTIDE)
123 // {
124 // this->swap(prob_mmc);
125 // }
126 //
127 //
128 // MAAG(ProbMMC &prob_mmc, ErrMMC &err_mmc)
129 // : MAAG(prob_mmc)
130 // {
131 // _errors.reset(new ErrMMC());
132 // _errors.get()->swap(err_mmc);
133 // }
134 
135 
139 // MAAG(ProbMMC &prob_mmc,
140 // EventIndMMC &eventind_mcc,
141 // const std::string &sequence,
142 // unique_ptr<seq_len_t[]> &seq_poses,
143 // seq_len_t n_poses,
144 // SequenceType seq_type)
145 // : _recomb(prob_mmc.chainSize() == VJ_CHAIN_SIZE ? VJ_RECOMB : VDJ_RECOMB),
146 // _n_poses(n_poses),
147 // _seq_type(seq_type)
148 // {
149 // this->swap(prob_mmc);
150 //
151 // _events.reset(new EventIndMMC());
152 // _events.get()->swap(eventind_mcc);
153 //
154 // _sequence.reset(new std::string(sequence));
155 //
156 // _seq_poses.swap(seq_poses);
157 // }
158 //
159 //
160 // MAAG(ProbMMC &prob_mmc,
161 // EventIndMMC &eventind_mcc,
162 // ErrMMC &err_mmc,
163 // const std::string &sequence,
164 // unique_ptr<seq_len_t[]> &seq_poses,
165 // seq_len_t n_poses,
166 // SequenceType seq_type)
167 // : MAAG(prob_mmc, eventind_mcc, sequence, seq_poses, n_poses, seq_type)
168 // {
169 // _errors.reset(new ErrMMC());
170 // _errors.get()->swap(err_mmc);
171 // }
172 
173 
177  virtual ~MAAG() { }
178 
179 
180  MAAG& operator= (const MAAG &other) {
181  _recomb = other._recomb;
182  _chain = other._chain;
183  _values = other._values;
184 
185  if (other._errors) {
186  _errors.reset(new ErrMMC(*other._errors));
187  } else {
188  _errors.reset();
189  }
190  _err_prob = other._err_prob;
191 
192  if (other._events) {
193  _events.reset(new EventIndMMC(*other._events));
194  }
195  else { _events.reset(); }
196 
197  _n_poses = other._n_poses;
198 
199  if (other._seq_poses) {
200  _seq_poses.reset(new seq_len_t[_n_poses]);
201  std::copy(other._seq_poses.get(), other._seq_poses.get() + _n_poses, _seq_poses.get());
202  } else {
203  _seq_poses = nullptr;
204  }
205 
206  if (other._sequence) {
207  _sequence.reset(new std::string(*other._sequence));
208  }
209  else { _sequence.reset(); }
210 
211  _seq_type = other._seq_type;
212 
213  return *this;
214  }
215 
216 
217  MAAG& operator=(MAAG &&other) {
218  std::swap(_recomb, other._recomb);
219 
220  _chain.swap(other._chain);
221  _values.swap(other._values);
222 
223  _events.swap(other._events);
224 
225  _errors.swap(other._errors);
226  std::swap(_err_prob, other._err_prob);
227 
228  std::swap(_n_poses, other._n_poses);
229  _seq_poses.swap(other._seq_poses);
230 
231  _sequence.swap(other._sequence);
232  std::swap(_seq_type, other._seq_type);
233 
234  return *this;
235  }
236 
237 
248  prob_t fullProbability(event_ind_t v_index, event_ind_t j_index) const {
250  if (_recomb == VJ_RECOMB) {
251  // P(Vi, Ji) * P(#dels | Vi) * P(V-J insertion seq) * P(#dels | Ji)
252  return (matrix(0, 0)(v_index, j_index) * // P(Vi & Ji)
253  matrix(1, v_index) * // P(#dels | Vi)
254  matrix(2, 0) * // P(V-J insertion seq)
255  matrix(3, j_index))(0, 0); // P(#dels | Ji)
256 
257  } else {
258  return 0;
259  }
260  }
261 
262  prob_t fullProbability(event_ind_t v_index, event_ind_t d_index, event_ind_t j_index) const {
263  if (_recomb == VDJ_RECOMB) {
264  // P(Vi) * P(#dels | Vi) * P(V-D3' insertion seq) * P(D5'-D3' deletions | Di) * P(D5'-J insertion seq) * P(#dels | Ji) * P(Ji & Di)
265  return (matrix(0, v_index) * // P(Vi)
266  matrix(1, v_index) * // P(#dels | Vi)
267  matrix(2, 0) * // P(V-D3' insertion seq)
268  matrix(3, d_index) * // P(D5'-D3' deletions | Di)
269  matrix(4, 0) * // P(D5'-J insertion seq)
270  matrix(5, j_index) * // P(#dels | Ji)
271  matrix(6, 0)(j_index, d_index))(0, 0); // P(Ji & Di)
272  } else {
273  return 0;
274  }
275  }
276 
277  prob_t fullProbability(MAAGComputeProbAction action = SUM_PROBABILITY) const {
278  // choose the max full probability from all possible recombinations of V(D)J gene segment indices
279  if (_recomb == UNDEF_RECOMB) {
280  return 0;
281  }
282 
283  if (action == MAX_PROBABILITY) {
284  prob_t max_prob = 0, cur_prob = 0;
285  if (_recomb == VJ_RECOMB) {
286  for (event_ind_t v_index = 0; v_index < this->nVar(); ++v_index) {
287  for (event_ind_t j_index = 0; j_index < this->nJoi(); ++j_index) {
288  cur_prob = this->fullProbability(v_index, j_index);
289  if (cur_prob > max_prob) { max_prob = cur_prob; }
290  }
291  }
292  } else {
293  for (event_ind_t v_index = 0; v_index < this->nVar(); ++v_index) {
294  for (event_ind_t d_index = 0; d_index < this->nDiv(); ++d_index) {
295  for (event_ind_t j_index = 0; j_index < this->nJoi(); ++j_index) {
296  cur_prob = this->fullProbability(v_index, d_index, j_index);
297  if (cur_prob > max_prob) { max_prob = cur_prob; }
298  }
299  }
300  }
301  }
302  return max_prob;
303  }
304  // compute the sum of full probabilities of all possible recombinations of V(D)J gene segment indices
305  else {
306  prob_t sum_prob = 0;
307  if (_recomb == VJ_RECOMB) {
308  for (event_ind_t v_index = 0; v_index < this->nVar(); ++v_index) {
309  for (event_ind_t j_index = 0; j_index < this->nJoi(); ++j_index) {
310  sum_prob += this->fullProbability(v_index, j_index);
311  }
312  }
313  } else {
314  for (event_ind_t v_index = 0; v_index < this->nVar(); ++v_index) {
315  for (event_ind_t d_index = 0; d_index < this->nDiv(); ++d_index) {
316  for (event_ind_t j_index = 0; j_index < this->nJoi(); ++j_index) {
317  sum_prob += this->fullProbability(v_index, d_index, j_index);
318  }
319  }
320  }
321  }
322  return sum_prob;
323  }
324  }
326 
327 
333  event_ind_t nVar() const { return (_chain.size() == VJ_CHAIN_SIZE) ? this->nodeRows(VJ_VAR_JOI_GEN_I) : this->nodeSize(VDJ_VAR_GEN_I); }
335 
336  event_ind_t nJoi() const { return (_chain.size() == VJ_CHAIN_SIZE) ? this->nodeColumns(VJ_VAR_JOI_GEN_I) : this->nodeSize(VDJ_JOI_DEL_I); }
337 
338  event_ind_t nDiv() const { return (_chain.size() == VJ_CHAIN_SIZE) ? 0 : _chain[VDJ_DIV_DEL_I].size(); }
340 
341 
353  prob_t event_probability(node_ind_t node_i, matrix_ind_t mat_i, dim_t row, dim_t col) const {
355  return (*this)(node_i, mat_i, row, col);
356  }
357 
358  event_ind_t event_index(node_ind_t node_i, matrix_ind_t mat_i, dim_t row, dim_t col) const {
359  return _events ? (*_events)(node_i, mat_i, row, col) : 0;
360  };
361 
362  error_num_t errors(node_ind_t node_i, matrix_ind_t mat_i, dim_t row, dim_t col) const {
363  return _errors ? (*_errors)(node_i, mat_i, row, col) : 0;
364  };
366 
367 
368  seq_len_t position(seq_len_t i) const { return _seq_poses[i]; }
369 
370 
377  bool save(const std::string &filepath) const {
379  std::ofstream ofs(filepath);
380  return this->save(ofs);
381  }
382 
383  bool save(std::ostream &stream) const {
384  return false;
385  }
386 
387  bool load(const std::string &filepath) {
388  std::ifstream ifs(filepath);
389  return this->load(ifs);
390  }
391 
392  bool load(std::istream &stream) {
393  return false;
394  }
396 
397 
401  Recombination recombination() const { return _recomb; }
403 
404  bool is_vj() const { return _recomb == VJ_RECOMB; }
405 
406  bool is_vdj() const { return _recomb == VDJ_RECOMB; }
407 
408 // bool is_vd2j() const { return _recomb == VD2J_RECOMB; }
410 
411 
412  bool has_events() const { return (bool) _events; }
413 
414 
415  bool has_errors() const { return (bool) _errors; }
416 
417  prob_t error_prob() const { return _err_prob; }
418 
419 
420  const std::string& sequence() const {
421 #ifndef DNDEBUG
422  if (!_sequence) { throw(std::runtime_error("Access to a MAAG sequence when it's a nullptr!")); }
423 #endif
424  return *_sequence;
425  }
426 
427 
428  seq_len_t n_poses() const { return _n_poses; }
429 
430 
431  SequenceType sequence_type() const { return _seq_type; }
432 
433  dim_t rows(node_ind_t node_i) const { return this->nodeRows(node_i); }
434 
435  dim_t cols(node_ind_t node_i) const { return this->nodeColumns(node_i); }
436 
437 
438  protected:
439 
440 // pEventIndMMC _events; /** Matrix of indices of events for each edge. */
441 //
442 // unique_ptr<seq_len_t[]> _seq_poses; /** Vector of the initial clonotype sequence's positions for each vertex. */
443 // seq_len_t _n_poses;
444 //
445 // unique_ptr<sequence_t> _sequence; /** Nucleotide or amino acid CDR3 sequence. */
446 // SequenceType _seq_type;
447 //
448 // pErrMMC _errors; /** Matrix of number of errors for each scenario event position. */
449 // prob_t _err_prob; /** Probability of a error. */
450 //
451 // Recombination _recomb;
452 
453  prob_t _err_prob;
455  pEventIndMMC _events;
456  pErrMMC _errors;
457  unique_ptr<seq_len_t[]> _seq_poses;
458  unique_ptr<sequence_t> _sequence;
460  SequenceType _seq_type;
461  Recombination _recomb;
462 
463  seq_len_t _n_poses;
464 
465  };
466 
467 }
468 
469 
470 //typedef MAAG<VJ_RECOMB, NO_METADATA, NO_ERRORS> MAAG_VJ_NM_NE;
471 //
472 //typedef MAAG<VJ_RECOMB, NO_METADATA, COMPUTE_ERRORS> MAAG_VJ_NM_ER;
473 //
474 //typedef MAAG<VJ_RECOMB, SAVE_METADATA, NO_ERRORS> MAAG_VJ_MD_NE;
475 //
476 //typedef MAAG<VJ_RECOMB, SAVE_METADATA, COMPUTE_ERRORS> MAAG_VJ_MD_ER;
477 //
478 //typedef MAAG<VDJ_RECOMB, NO_METADATA, NO_ERRORS> MAAG_VDJ_NM_NE;
479 //
480 //typedef MAAG<VDJ_RECOMB, NO_METADATA, COMPUTE_ERRORS> MAAG_VDJ_NM_ER;
481 //
482 //typedef MAAG<VDJ_RECOMB, SAVE_METADATA, NO_ERRORS> MAAG_VDJ_MD_NE;
483 //
484 //typedef MAAG<VDJ_RECOMB, SAVE_METADATA, COMPUTE_ERRORS> MAAG_VDJ_MD_ER;
485 
486 #endif
Definition: maagbuilder.h:56
bool save(const std::string &filepath) const
Save the graph to the harddrive and load a previously saved graph.
Definition: maag.h:378
pEventIndMMC _events
Definition: maag.h:455
unique_ptr< sequence_t > _sequence
Definition: maag.h:458
Definition: aligner.h:37
prob_t event_probability(node_ind_t node_i, matrix_ind_t mat_i, dim_t row, dim_t col) const
Access to event indices, event probabilities and mismatches in the underlying matrices of the MAAG...
Definition: maag.h:354
prob_t fullProbability(event_ind_t v_index, event_ind_t j_index) const
Compute and return the full assembling probability of this sequence (i.e., with all gene segments ali...
Definition: maag.h:249
virtual ~MAAG()
Special swap constructor for MAAGs that will be used only for computation of the full probability...
Definition: maag.h:177
Definition: maagforwardbackwardalgorithm.h:20
SequenceType _seq_type
Definition: maag.h:460
Multi-Alignment Assembly Graph - basic class for representing all possible generation scenarios of a ...
Definition: maag.h:46
MAAG()
Default constructor.
Definition: maag.h:56
uint8_t node_ind_t
Node index type.
Definition: multimatrixchain.h:91
seq_len_t dim_t
Type of dimensions of matrices (rows and columns).
Definition: multimatrixchain.h:108
event_ind_t nVar() const
Get the number of aligned gene segments.
Definition: maag.h:334
Class for storing lists of matrices, where one node in the list (called "chain") could contain more t...
Definition: multimatrixchain.h:39
uint8_t matrix_ind_t
Matrix index type.
Definition: multimatrixchain.h:99
pErrMMC _errors
Definition: maag.h:456
unique_ptr< seq_len_t[]> _seq_poses
Definition: maag.h:457