5 #ifndef YMIR_INSERTIONMODEL_H 6 #define YMIR_INSERTIONMODEL_H 17 class AbstractInsertionModel;
18 class MonoNucInsertionModel;
19 class DiNucInsertionModel;
32 typedef unique_ptr<prob_t[]> prob_array_t;
43 _arr(
new prob_t[size])
51 this->updateProbabilities(start);
57 _arr(
new prob_t[other._size]),
60 this->updateProbabilities(other._arr.get());
68 if (_arr.get() != other._arr.get()) {
69 _arr.reset(
new prob_t[_size]);
70 this->updateProbabilities(other._arr.get());
86 char first_char = NULL_CHAR,
87 bool with_errors =
false)
const = 0;
90 seq_len_t sequence_len,
91 char first_char = NULL_CHAR,
92 bool with_errors =
false)
const = 0;
94 virtual prob_t
nucProbability(std::string::const_reverse_iterator start,
95 seq_len_t sequence_len,
96 char first_char = NULL_CHAR,
97 bool with_errors =
false)
const = 0;
104 virtual prob_t aaProbability(
const std::string& sequence,
106 seq_len_t first_nuc_pos,
107 seq_len_t last_nuc_pos,
108 char first_char = NULL_CHAR)
const = 0;
110 virtual prob_t aaProbability(std::string::const_iterator start,
111 seq_len_t first_nuc_pos,
112 seq_len_t last_nuc_pos,
113 char first_char = NULL_CHAR)
const = 0;
115 virtual prob_t aaProbability(std::string::const_reverse_iterator start,
116 seq_len_t first_nuc_pos,
117 seq_len_t last_nuc_pos,
118 char first_char = NULL_CHAR)
const = 0;
125 virtual sequence_t generate(seq_len_t len, std::default_random_engine &rg,
char first_char = NULL_CHAR,
bool reverse =
false)
const = 0;
131 prob_t operator[](uint8_t index)
const {
return _arr[index]; }
136 prob_t err_prob()
const {
return _err_prob; }
156 void updateProbabilities(std::vector<prob_t>::const_iterator start) {
158 for (uint8_t i = 0; i < _size; ++i, ++start) {
163 void updateProbabilities(prob_t *start) {
164 for (uint8_t i = 0; i < _size; ++i) {
165 _arr[i] = *(start + i);
169 void updateProbabilities(std::vector<prob_t>::const_iterator start, prob_t err_prob) {
170 this->updateProbabilities(start);
171 _err_prob = err_prob;
174 void updateProbabilities(prob_t *start, prob_t err_prob) {
175 this->updateProbabilities(start);
176 _err_prob = err_prob;
201 this->updateProbabilities(start);
212 AbstractInsertionModel::operator=(other);
223 char first_char = NULL_CHAR,
224 bool with_errors =
false)
const 226 return this->
nucProbability(sequence.cbegin(), sequence.size(), first_char, with_errors);
230 seq_len_t sequence_len,
231 char first_char = NULL_CHAR,
232 bool with_errors =
false)
const 239 for (seq_len_t i = 0; i < sequence_len; ++i, ++start) {
240 res *= _arr[nuc_hash(*start)];
243 for (seq_len_t i = 0; i < sequence_len; ++i, ++start) {
253 seq_len_t sequence_len,
254 char first_char = NULL_CHAR,
255 bool with_errors =
false)
const 262 for (seq_len_t i = 0; i < sequence_len; ++i, ++start) {
263 res *= _arr[nuc_hash(*start)];
266 for (seq_len_t i = 0; i < sequence_len; ++i, ++start) {
279 prob_t aaProbability(
const std::string& sequence,
281 char first_char = NULL_CHAR)
const 283 return this->aaProbability(sequence.cbegin(), sequence.size(), first_char);
286 prob_t aaProbability(std::string::const_iterator start,
287 seq_len_t sequence_len,
288 char first_char = NULL_CHAR)
const 290 throw(std::runtime_error(
"not implemented yet"));
293 prob_t aaProbability(std::string::const_reverse_iterator start,
294 seq_len_t sequence_len,
295 char first_char = NULL_CHAR)
const 297 throw(std::runtime_error(
"not implemented yet"));
302 sequence_t generate(seq_len_t len, std::default_random_engine &rg,
char first_char = NULL_CHAR,
bool reverse =
false)
const {
303 std::string res =
"";
305 std::discrete_distribution<int> distr;
306 distr = std::discrete_distribution<int>(_arr.get(), _arr.get() + 4);
308 for (seq_len_t i = 0; i < len; ++i) {
309 res += inv_nuc_hash(distr(rg));
340 this->updateProbabilities(start);
347 this->updateProbabilitiesMatrix(mat);
358 AbstractInsertionModel::operator=(other);
369 prob_t
nucProbability(
const std::string& sequence,
char first_char = NULL_CHAR,
bool with_errors =
false)
const {
370 return this->
nucProbability(sequence.cbegin(), sequence.size(), first_char, with_errors);
373 prob_t
nucProbability(std::string::const_iterator start, seq_len_t sequence_len,
char first_char = NULL_CHAR,
bool with_errors =
false)
const {
377 auto tmp1 = start, tmp2 = start + 1;
378 auto next = start + 1;
380 res = (first_char == NULL_CHAR) ? .25 : (*
this)(nuc_hash(first_char), nuc_hash(*start));
381 for (seq_len_t i = 1; i < sequence_len; ++i, ++start, ++next) {
382 res *= (*this)(nuc_hash(*start), nuc_hash(*next));
386 res = (first_char == NULL_CHAR) ? .25 : (err2 + (1 - err2) * (*this)(nuc_hash(first_char), nuc_hash(*start)));
387 for (seq_len_t i = 1; i < sequence_len; ++i, ++start, ++next) {
388 res *= (err2 + (1 - err2) * (*
this)(nuc_hash(*start), nuc_hash(*next)));
396 prob_t
nucProbability(std::string::const_reverse_iterator start, seq_len_t sequence_len,
char first_char = NULL_CHAR,
bool with_errors =
false)
const {
400 auto tmp1 = start, tmp2 = start + 1;
401 auto next = start + 1;
403 res = (first_char == NULL_CHAR) ? .25 : (*
this)(nuc_hash(first_char), nuc_hash(*start));
404 for (seq_len_t i = 1; i < sequence_len; ++i, ++start, ++next) {
405 res *= (*this)(nuc_hash(*start), nuc_hash(*next));
409 res = (first_char == NULL_CHAR) ? .25 : (err2 + (1 - err2) * (*this)(nuc_hash(first_char), nuc_hash(*start)));
410 for (seq_len_t i = 1; i < sequence_len; ++i, ++start, ++next) {
411 res *= (err2 + (1 - err2) * (*
this)(nuc_hash(*start), nuc_hash(*next)));
422 prob_t aaProbability(
const std::string& sequence,
423 char first_char = NULL_CHAR)
const 425 return this->aaProbability(sequence.cbegin(), sequence.size(), first_char);
428 prob_t aaProbability(std::string::const_iterator start,
429 seq_len_t sequence_len,
430 char first_char = NULL_CHAR)
const 432 throw(std::runtime_error(
"not implemented yet"));
435 prob_t aaProbability(std::string::const_reverse_iterator start,
436 seq_len_t sequence_len,
437 char first_char = NULL_CHAR)
const 439 throw(std::runtime_error(
"not implemented yet"));
444 sequence_t generate(seq_len_t len, std::default_random_engine &rg,
char first_char = NULL_CHAR,
bool reverse =
false)
const {
445 std::string res =
"";
447 std::discrete_distribution<int> distrs[] = {
448 std::discrete_distribution<int>(_arr.get(), _arr.get() + 4),
449 std::discrete_distribution<int>(_arr.get() + 4, _arr.get() + 8),
450 std::discrete_distribution<int>(_arr.get() + 8, _arr.get() + 12),
451 std::discrete_distribution<int>(_arr.get() + 12, _arr.get() + 16)
454 if (first_char == NULL_CHAR) {
455 res = inv_nuc_hash(distrs[std::discrete_distribution<int>{.25, .25, .25, .25}(rg)](rg));
457 res = inv_nuc_hash(distrs[nuc_hash(first_char)](rg));
459 for (seq_len_t i = 1; i < len; ++i) {
460 res += inv_nuc_hash(distrs[nuc_hash(res[i - 1])](rg));
463 if (reverse) { std::reverse(res.begin(), res.end()); }
470 prob_t operator()(uint8_t row, uint8_t col)
const {
return _arr[4*row + col]; }
475 for (uint8_t i = 0; i < 4; ++i) {
476 for (uint8_t j = 0; j < 4; ++j) {
477 _arr[4*i + j] = mat(i, j);
482 void updateProbabilitiesMatrix(
const event_matrix_t& mat, prob_t err_prob) {
483 this->updateProbabilitiesMatrix(mat);
494 #endif //YMIR_INSERTIONMODEL_H
Class for representing VJ / VD / DJ insertions models - either a mono-nucleotide or a di-nucleotide m...
Definition: insertionmodel.h:27
Simple matrix class.
Definition: matrix.h:21
prob_t _err_prob
Definition: insertionmodel.h:142
virtual prob_t nucProbability(const std::string &sequence, char first_char=NULL_CHAR, bool with_errors=false) const =0
Probability of the given nucleotide sequence.
prob_t nucProbability(const std::string &sequence, char first_char=NULL_CHAR, bool with_errors=false) const
Probability of the given nucleotide sequence.
Definition: insertionmodel.h:369
prob_t nucProbability(const std::string &sequence, char first_char=NULL_CHAR, bool with_errors=false) const
Probability of the given nucleotide sequence.
Definition: insertionmodel.h:222
Definition: insertionmodel.h:321
Definition: insertionmodel.h:183