Ymir  .9
Fast\C++toolforcomputationofassemblingprobabilities,statisticalinferenceofassemblingstatisticalmodelandgenerationofartificialsequencesofT-cellreceptorsdata.
alignment_matrix.h
1 //
2 // Created by Vadim N. on 14/02/2016.
3 //
4 
5 #ifndef YMIR_ALIGNMENT_MATRIX_H
6 #define YMIR_ALIGNMENT_MATRIX_H
7 
8 
9 #include <algorithm>
10 #include <string>
11 #include <vector>
12 
13 #include "nogap_alignment_vector.h"
14 #include "gapped_alignment_vector.h"
15 
16 
17 namespace ymir {
18 
22  template <typename AlignmentType>
24 
25  public:
26 
27  typedef std::vector<bool> bit_storage_t;
28 
29 
30  typedef std::vector<alignment_score_t> score_storage_t;
31 
32 
33  static const seq_len_t default_nrows = 80;
34 
35 
36  static const seq_len_t default_ncols = 20;
37 
38 
40  : _gene(0),
41  _nrow(default_nrows),
42  _ncol(default_ncols),
43  _starts(_nrow * _ncol, false),
44  _matrix(_nrow * _ncol, 0)
45  {
46  }
47 
48 
49  AlignmentMatrixBase(seg_index_t gene,
50  const sequence_t &pattern,
51  const sequence_t &text)
52  : _gene(gene),
53  _nrow(pattern.size() + 1),
54  _ncol(text.size() + 1),
55  _starts(_nrow * _ncol, false),
56  _matrix(_nrow * _ncol, 0)
57  {
58  }
59 
60 
61  void reinit(seg_index_t gene,
62  const sequence_t &pattern,
63  const sequence_t &text)
64  {
65  _gene = gene;
66  _nrow = pattern.size() + 1;
67  _ncol = text.size() + 1;
68  _starts.resize(_nrow * _ncol);
69  std::fill(_starts.begin(), _starts.end(), false);
70  _matrix.resize(_nrow * _ncol);
71  std::fill(_matrix.begin(), _matrix.end(), 0);
72  }
73 
74 
75  void setStart(seq_len_t row, seq_len_t col) { _starts[index(row, col)] = true; }
76 
77 
78  alignment_score_t getBestAlignment(AlignmentType *vec, const sequence_t &pattern, const sequence_t &text) const;
79 
80 
84  alignment_score_t score(seq_len_t row, seq_len_t col) const { return _matrix[index(row, col)]; }
86 
87  alignment_score_t& score(seq_len_t row, seq_len_t col) { return _matrix[index(row, col)]; }
89 
90  private:
91 
92  seq_len_t _nrow, _ncol;
93  seg_index_t _gene;
94  bit_storage_t _starts;
95  score_storage_t _matrix;
96 
97 
98  size_t index(seq_len_t row, seq_len_t col) const { return row * _ncol + col; }
99 
100  };
101 
102 
104 
105 
107 
108 
109  template <>
110  alignment_score_t SWNGAlignmentMatrix::getBestAlignment(NoGapAlignmentVector *vec,
111  const sequence_t &pattern,
112  const sequence_t &text) const
113  {
114  AlignmentVectorBase::events_storage_t bitvec;
115 
116  // Find maximum score.
117  seq_len_t max_i = 0, max_j = 0;
118  alignment_score_t max_score = -1;
119  for (seq_len_t i = 0; i < _nrow; ++i) {
120  for (seq_len_t j = 0; j < _ncol; ++j) {
121  if (score(i, j) > max_score) {
122  max_i = i;
123  max_j = j;
124  max_score = score(i, j);
125  }
126  }
127  }
128 
129  // traverse back to the pattern/text start
130  seq_len_t cur_i = max_i, cur_j = max_j;
131  while (cur_i != 1 && cur_j != 1) {
132  --cur_i;
133  --cur_j;
134  }
135 
136  seq_len_t start_i = cur_i, start_j = cur_j;
137 
138  // traverse forward to the very end of pattern/text
139  while (cur_i < _nrow && cur_j < _ncol) {
140  bitvec.push_back(pattern[cur_i - 1] != text[cur_j - 1]);
141  ++cur_i;
142  ++cur_j;
143  }
144 
145  vec->addAlignment(_gene, start_i, start_j, bitvec);
146 
147  return score(max_i, max_j);
148  }
149 
150 
151  template <>
152  alignment_score_t SWAlignmentMatrix::getBestAlignment(GappedAlignmentVector *vec,
153  const sequence_t &pattern,
154  const sequence_t &text) const
155  {
156  AlignmentVectorBase::events_storage_t bitvec;
157 
158  // Find maximum score.
159  seq_len_t max_i = 0, max_j = 0;
160  alignment_score_t max_score = -1;
161  for (seq_len_t i = 0; i < _nrow; ++i) {
162  for (seq_len_t j = 0; j < _ncol; ++j) {
163  if (score(i, j) > max_score) {
164  max_i = i;
165  max_j = j;
166  max_score = score(i, j);
167  }
168  }
169  }
170 
171  // Traceback to the start, storing alignment events.
172  seq_len_t cur_i = max_i, cur_j = max_j, max_index = 0;
173  std::array<alignment_score_t, 3> score_arr;
174  while (!_starts[index(cur_i, cur_j)]) {
175  score_arr[0] = score(cur_i - 1, cur_j - 1);
176  score_arr[1] = cur_j > 0 ? score(cur_i, cur_j - 1) : -1;
177  score_arr[2] = cur_i > 0 ? score(cur_i - 1, cur_j) : -1;
178  max_index = std::distance(score_arr.begin(), std::max_element(score_arr.begin(), score_arr.end()));
179  switch (max_index) {
180  case 0:
181 // std::cout << (int) cur_i << ":" << (int) cur_j << std::endl;
182  pattern[cur_i - 1] == text[cur_j - 1] ? add_match(&bitvec) : add_mismatch(&bitvec);
183  --cur_i;
184  --cur_j;
185  break;
186 
187  case 1:
188  add_ins(&bitvec);
189  --cur_j;
190  break;
191 
192  case 2:
193  add_del(&bitvec);
194  --cur_i;
195  break;
196 
197  default:
198  break;
199  }
200  }
201 
202  add_match(&bitvec);
203  // reverse
204  AlignmentVectorBase::events_storage_t bitvec2;
205 
206  for (int i = bitvec.size() / 2 - 1; i >= 0; --i) {
207  bitvec2.push_back(bitvec[i*2]);
208  bitvec2.push_back(bitvec[i*2 + 1]);
209  }
210  vec->addAlignment(_gene, cur_i, cur_j, bitvec2);
211 
212  return score(max_i, max_j);
213  }
214 
215 }
216 
217 #endif //YMIR_ALIGNMENT_MATRIX_H
Definition: aligner.h:37
Definition: nogap_alignment_vector.h:37
void addAlignment(seg_index_t id, seq_len_t p_start, seq_len_t t_start, seq_len_t size)
Add a new alignment to the vector.
Definition: nogap_alignment_vector.h:55
Definition: alignment_matrix.h:23
Definition: gapped_alignment_vector.h:52