24 #ifndef _MAAGBUILDER_H_ 25 #define _MAAGBUILDER_H_ 27 #define DEFAULT_SEQ_POSES_RESERVE 300 29 #define VARIABLE_GENES_MATRIX_INDEX 0 30 #define VARIABLE_DELETIONS_MATRIX_INDEX 1 31 #define JOINING_DELETIONS_VJ_MATRIX_INDEX 3 32 #define JOINING_GENES_VDJ_MATRIX_INDEX 6 33 #define JOINING_DELETIONS_VDJ_MATRIX_INDEX 5 34 #define DIVERSITY_GENES_MATRIX_INDEX 3 35 #define VarJoi_INSERTIONS_MATRIX_INDEX 2 36 #define VarDiv_INSERTIONS_MATRIX_INDEX 2 37 #define DivJoi_INSERTIONS_MATRIX_INDEX 4 40 #include "clonotype.h" 41 #include "genesegment.h" 42 #include "insertionmodel.h" 44 #include "modelparametervector.h" 45 #include "repertoire.h" 80 *(_param_vec.get()) = param_vec;
96 MetadataMode metadata_mode,
98 SequenceType seq_type = NUCLEOTIDE)
const {
99 if (clonotype.is_good())
104 vector<seq_len_t> seq_poses;
105 seq_poses.reserve(DEFAULT_SEQ_POSES_RESERVE);
107 auto resize_size = 0, e_resize_size = 0;
108 switch (clonotype.recombination()) {
110 resize_size = VJ_CHAIN_SIZE;
115 resize_size = VDJ_CHAIN_SIZE;
121 check_and_throw(
false,
"MAAGBuilder: unknown recombination type.");
125 probs.resize(resize_size);
127 events.resize(resize_size);
130 errors.resize(e_resize_size);
133 this->
buildVariable(clonotype, probs, events, errors, seq_poses, metadata_mode, error_mode);
134 this->
buildJoining(clonotype, probs, events, errors, seq_poses, metadata_mode, error_mode);
135 if (clonotype.recombination() == VJ_RECOMB) {
136 this->
buildVJinsertions(clonotype, probs, events, seq_poses, metadata_mode, error_mode);
137 }
else if (clonotype.recombination() == VDJ_RECOMB) {
138 this->
buildDiversity(clonotype, probs, events, errors, seq_poses, metadata_mode, error_mode);
139 this->
buildVDinsertions(clonotype, probs, events, seq_poses, metadata_mode, error_mode);
140 this->
buildDJinsertions(clonotype, probs, events, seq_poses, metadata_mode, error_mode);
172 maag._recomb = clonotype.recombination();
173 maag.
_seq_type = clonotype.sequence_type();
184 unique_ptr<seq_len_t[]> seq_poses_arr(
new seq_len_t[seq_poses.size()]);
185 copy(seq_poses.begin(), seq_poses.end(), seq_poses_arr.get());
188 maag._n_poses = seq_poses.size();
200 MetadataMode metadata_mode,
201 ErrorMode error_mode,
202 SequenceType seq_type = NUCLEOTIDE,
203 bool verbose =
true)
const 208 std::cout <<
"Building " << (size_t) cloneset.size() <<
" MAAGs..." << std::endl;
209 verbose_step = cloneset.size() / 10;
213 res.resize(cloneset.size());
215 #if OMP_THREADS == -1 216 #pragma omp parallel for 218 #pragma omp parallel for num_threads(OMP_THREADS) 221 for (
size_t i = 0; i < cloneset.size(); ++i) {
222 res[i] = this->
build(cloneset[i], metadata_mode, error_mode, seq_type);
225 if (verbose && (i+1) % verbose_step == 0 && (i+1) != cloneset.size()) {
226 cout <<
"[" << (int) ((100*(i+1)) / cloneset.size()) <<
"%] "<<
"Built " << (
int) (i+1) <<
" / " << (
int) (cloneset.size()) <<
" MAAGs." << endl;
232 cout <<
"[100%] Built " << (int) (cloneset.size()) <<
" MAAGs." << endl;
251 ErrorMode error_mode,
252 SequenceType seq_type = NUCLEOTIDE,
253 MAAGComputeProbAction action = SUM_PROBABILITY)
const {
258 ErrorMode error_mode,
259 SequenceType seq_type = NUCLEOTIDE,
260 MAAGComputeProbAction action = SUM_PROBABILITY,
261 bool verbose =
true)
const 264 res.reserve(cloneset.size());
268 std::cout <<
"Computing assembling probabilities on " << (size_t) cloneset.size() <<
" clonotypes." << std::endl;
269 verbose_step = cloneset.size() / 10;
273 #if OMP_THREADS == -1 274 #pragma omp parallel for 276 #pragma omp parallel for num_threads(OMP_THREADS) 279 for (
size_t i = 0; i < cloneset.size(); ++i) {
280 res.push_back(
buildAndCompute(cloneset[i], error_mode, seq_type, action));
283 if (verbose && (i+1) % verbose_step == 0 && (i+1) != cloneset.size()) {
284 std::cout <<
"[" << (int) ((100*(i+1)) / cloneset.size()) <<
"%] " <<
"Computed " << (
int) (i+1) <<
" / " << (
size_t) cloneset.size() <<
" assembling probabilities." << std::endl;
290 std::cout <<
"[100%] Computed " << (size_t) cloneset.size() <<
" assembling probabilities." << std::endl;
306 if (maag->has_events()) {
307 vector<seq_len_t> seq_poses_vec(maag->
_seq_poses.get(), maag->
_seq_poses.get() + maag->_n_poses);
308 for (
int node_i = 0; node_i < maag->chainSize(); ++node_i) {
310 if (maag->is_vj() && node_i == VarJoi_INSERTIONS_MATRIX_INDEX) {
311 MonoNucInsertionModel im(_param_vec->get_iterator(_param_vec->event_index(VJ_VAR_JOI_INS_NUC, 0, 0)));
313 seq_len_t v_vertices = maag->nodeColumns(VARIABLE_DELETIONS_MATRIX_INDEX),
314 j_vertices = maag->nodeRows(JOINING_DELETIONS_VJ_MATRIX_INDEX);
320 VarJoi_INSERTIONS_MATRIX_INDEX,
321 _param_vec->event_index(VJ_VAR_JOI_INS_LEN, 0, 0),
322 _param_vec->max_VJ_ins_len(),
328 v_vertices + j_vertices - 1,
332 }
else if (maag->is_vdj() && node_i == VarDiv_INSERTIONS_MATRIX_INDEX) {
333 DiNucInsertionModel im(_param_vec->get_iterator(_param_vec->event_index(VDJ_VAR_DIV_INS_NUC, 0, 0)));
335 seq_len_t v_vertices = maag->nodeColumns(VARIABLE_DELETIONS_MATRIX_INDEX),
336 d3_vertices = maag->nodeRows(DIVERSITY_GENES_MATRIX_INDEX);
342 VarDiv_INSERTIONS_MATRIX_INDEX,
343 _param_vec->event_index(VDJ_VAR_DIV_INS_LEN, 0, 0),
344 _param_vec->max_VD_ins_len(),
350 v_vertices + d3_vertices - 1,
354 }
else if (maag->is_vdj() && node_i == DivJoi_INSERTIONS_MATRIX_INDEX) {
355 DiNucInsertionModel im(_param_vec->get_iterator(_param_vec->event_index(VDJ_DIV_JOI_INS_NUC, 0, 0)));
357 seq_len_t v_vertices = maag->nodeColumns(VARIABLE_DELETIONS_MATRIX_INDEX),
358 d3_vertices = maag->nodeRows(DIVERSITY_GENES_MATRIX_INDEX),
359 d5_vertices = maag->nodeColumns(DIVERSITY_GENES_MATRIX_INDEX),
360 j_vertices = maag->nodeRows(JOINING_DELETIONS_VDJ_MATRIX_INDEX);
366 DivJoi_INSERTIONS_MATRIX_INDEX,
367 _param_vec->event_index(VDJ_DIV_JOI_INS_LEN, 0, 0),
368 _param_vec->max_DJ_ins_len(),
371 v_vertices + d3_vertices,
372 v_vertices + d3_vertices + d5_vertices - 1,
373 v_vertices + d3_vertices + d5_vertices,
374 v_vertices + d3_vertices + d5_vertices + j_vertices - 1,
380 if (!maag->has_errors()) {
381 for (
int mat_i = 0; mat_i < maag->nodeSize(node_i); ++mat_i) {
382 for (
int row_i = 0; row_i < maag->nodeRows(node_i); ++row_i) {
383 for (
int col_i = 0; col_i < maag->nodeColumns(node_i); ++col_i) {
384 (*maag)(node_i, mat_i, row_i, col_i) =
385 (*_param_vec)[maag->event_index(node_i, mat_i, row_i, col_i)];
392 err_node_i = node_i == JOINING_DELETIONS_VJ_MATRIX_INDEX ? 1 : 0;
394 if (node_i != VARIABLE_DELETIONS_MATRIX_INDEX) {
395 if (node_i == JOINING_DELETIONS_VDJ_MATRIX_INDEX) {
403 if (node_i != 0 && (maag->is_vdj() && node_i != JOINING_GENES_VDJ_MATRIX_INDEX)) {
404 for (
int mat_i = 0; mat_i < maag->nodeSize(node_i); ++mat_i) {
405 for (
int row_i = 0; row_i < maag->nodeRows(node_i); ++row_i) {
406 for (
int col_i = 0; col_i < maag->nodeColumns(node_i); ++col_i) {
407 if (maag->errors(err_node_i, mat_i, row_i, col_i)) {
408 (*maag)(node_i, mat_i, row_i, col_i) =
409 (*_param_vec)[maag->event_index(node_i, mat_i, row_i, col_i)]
410 * _param_vec->error_prob() * maag->errors(err_node_i, mat_i, row_i, col_i);
412 (*maag)(node_i, mat_i, row_i, col_i) = (*_param_vec)[maag->event_index(node_i, mat_i, row_i, col_i)];
418 for (
int mat_i = 0; mat_i < maag->nodeSize(node_i); ++mat_i) {
419 for (
int row_i = 0; row_i < maag->nodeRows(node_i); ++row_i) {
420 for (
int col_i = 0; col_i < maag->nodeColumns(node_i); ++col_i) {
421 (*maag)(node_i, mat_i, row_i, col_i) = (*_param_vec)[maag->event_index(node_i, mat_i, row_i, col_i)];
437 std::cout <<
"Updating " << (size_t) repertoire->size() <<
" MAAGs..." << std::endl;
438 verbose_step = repertoire->size() / 10;
442 #if OMP_THREADS == -1 443 #pragma omp parallel for 445 #pragma omp parallel for num_threads(OMP_THREADS) 448 for (
size_t i = 0; i < repertoire->size(); ++i) {
452 if (verbose && (i+1) % verbose_step == 0 && (i+1) != repertoire->size()) {
453 cout <<
"[" << (int) ((100*(i+1)) / repertoire->size()) <<
"%] " <<
"Updated " << (
size_t) (i+1) <<
" / " << (
size_t) repertoire->size() <<
" MAAGs." << endl;
459 cout <<
"[100%] Updated " << (int) (repertoire->size()) <<
" MAAGs." << endl;
467 unique_ptr<ModelParameterVector> _param_vec;
468 unique_ptr<VDJRecombinationGenes> _genes;
490 vector<seq_len_t> &seq_poses,
491 MetadataMode metadata_mode,
492 ErrorMode error_mode)
const 496 seg_index_t v_num = clonotype.
nVar(), j_num = clonotype.nJoi();
497 for (
int v_index = 0; v_index < v_num; ++v_index) {
498 len = std::max(len, clonotype.getVarLen(v_index));
502 seq_len_t v_len, v_gene, v_start, v_end;
504 probs.initNode(VARIABLE_DELETIONS_MATRIX_INDEX, v_num, 1, len + 1);
506 events.initNode(VARIABLE_DELETIONS_MATRIX_INDEX, v_num, 1, len + 1);
509 errors.initNode(0, v_num, 1, len + 1);
512 if (clonotype.recombination() == VJ_RECOMB) {
513 probs.initNode(VARIABLE_GENES_MATRIX_INDEX, 1, v_num, j_num);
515 events.initNode(VARIABLE_GENES_MATRIX_INDEX, 1, v_num, j_num);
517 }
else if (clonotype.recombination() == VDJ_RECOMB) {
518 probs.initNode(VARIABLE_GENES_MATRIX_INDEX, v_num, 1, 1);
520 events.initNode(VARIABLE_GENES_MATRIX_INDEX, v_num, 1, 1);
524 EventClass V_DEL = clonotype.recombination() == VJ_RECOMB ? VJ_VAR_DEL : VDJ_VAR_DEL;
525 for (seg_index_t v_index = 0; v_index < v_num; ++v_index) {
528 v_gene = clonotype.
getVar(v_index);
530 if (clonotype.recombination() == VJ_RECOMB) {
531 for (seg_index_t j_index = 0; j_index < j_num; ++j_index) {
532 probs(VARIABLE_GENES_MATRIX_INDEX, 0, v_index, j_index)
533 = _param_vec->event_prob(VJ_VAR_JOI_GEN, 0, v_gene - 1, clonotype.getJoi(j_index) - 1);
535 }
else if (clonotype.recombination() == VDJ_RECOMB) {
536 probs(VARIABLE_GENES_MATRIX_INDEX, v_index, 0, 0) = _param_vec->event_prob(VDJ_VAR_GEN, 0, v_gene - 1);
540 v_len = _genes->V()[v_gene].sequence.size();
542 v_end = clonotype.getVarGeneEnd(v_index);
544 for (seq_len_t i = 0; i < v_end - v_start + 2; ++i) {
545 probs(VARIABLE_DELETIONS_MATRIX_INDEX, v_index, 0, i) = _param_vec->event_prob(V_DEL, v_gene - 1, (1 + v_len) - (v_start + i));
549 if (clonotype.recombination() == VJ_RECOMB) {
551 for (seg_index_t j_index = 0; j_index < j_num; ++j_index) {
552 events(VARIABLE_GENES_MATRIX_INDEX, 0, v_index, j_index)
553 = _param_vec->event_index(VJ_VAR_JOI_GEN, 0, v_gene - 1, clonotype.getJoi(j_index) - 1);
555 }
else if (clonotype.recombination() == VDJ_RECOMB) {
556 events(VARIABLE_GENES_MATRIX_INDEX, v_index, 0, 0) = _param_vec->event_index(VDJ_VAR_GEN, 0, v_gene - 1);
559 for (seq_len_t i = 0; i < v_end - v_start + 2; ++i) {
560 events(VARIABLE_DELETIONS_MATRIX_INDEX, v_index, 0, i) = _param_vec->event_index(V_DEL, v_gene - 1, (1 + v_len) - (v_start + i));
565 errors(0, v_index, 0, 0) = 0;
566 for (seq_len_t i = 1; i <= clonotype.getVarLen(v_index); ++i) {
567 errors(0, v_index, 0, i) = errors(0, v_index, 0, i-1) + clonotype.isVarMismatch(v_index, i);
568 if (errors(0, v_index, 0, i)) {
569 probs(VARIABLE_DELETIONS_MATRIX_INDEX, v_index, 0, i) *= errors(0, v_index, 0, i) * _param_vec->error_prob();
575 for (seq_len_t i = 0; i <= len; ++i) { seq_poses.push_back(i); }
592 vector<seq_len_t> &seq_poses,
593 MetadataMode metadata_mode,
594 ErrorMode error_mode)
const 596 int J_index_dels = JOINING_DELETIONS_VJ_MATRIX_INDEX,
597 J_index_genes = JOINING_GENES_VDJ_MATRIX_INDEX;
598 if (clonotype.recombination() == VDJ_RECOMB) {
599 J_index_dels = JOINING_DELETIONS_VDJ_MATRIX_INDEX;
603 seg_index_t j_num = clonotype.nJoi();
604 seq_len_t len = 0, seq_global_start_pos = (seq_len_t) -1;
605 for (
int j_index = 0; j_index < j_num; ++j_index) {
606 len = std::max(len, clonotype.getJoiLen(j_index));
607 seq_global_start_pos = std::min(seq_global_start_pos, clonotype.getJoiSeqStart(j_index));
611 probs.initNode(J_index_dels, j_num, len + 1, 1);
613 events.initNode(J_index_dels, j_num, len + 1, 1);
616 errors.initNode(errors.chainSize() - 1, j_num, len + 1, 1);
620 if (clonotype.recombination() == VDJ_RECOMB) {
621 probs.initNode(J_index_genes, 1, j_num, clonotype.nDiv());
623 events.initNode(J_index_genes, 1, j_num, clonotype.nDiv());
628 seq_len_t j_len, j_gene, j_start, j_end, shift;
630 EventClass J_DEL = clonotype.recombination() == VJ_RECOMB ? VJ_JOI_DEL : VDJ_JOI_DEL;
631 for (seg_index_t j_index = 0; j_index < j_num; ++j_index) {
633 j_gene = clonotype.getJoi(j_index);
634 j_len = _genes->J()[j_gene].sequence.size();
636 if (clonotype.recombination() == VDJ_RECOMB) {
637 for (seg_index_t d_index = 0; d_index < clonotype.nDiv(); ++d_index) {
638 probs(J_index_genes, 0, j_index, d_index)
639 = _param_vec->event_prob(VDJ_JOI_DIV_GEN, 0, j_gene - 1, clonotype.getDiv(d_index) - 1);
644 j_start = clonotype.getJoiGeneStart(j_index);
645 j_end = clonotype.getJoiGeneEnd(j_index);
646 shift = clonotype.getJoiSeqStart(j_index) - seq_global_start_pos;
648 for (seq_len_t i = 0; i < clonotype.getJoiLen(j_index) + 1; ++i) {
649 probs(J_index_dels, j_index, i + shift, 0) = _param_vec->event_prob(J_DEL, j_gene - 1, j_start + i - 1);
653 if (clonotype.recombination() == VDJ_RECOMB) {
654 for (seg_index_t d_index = 0; d_index < clonotype.nDiv(); ++d_index) {
655 events(J_index_genes, 0, j_index, d_index)
656 = _param_vec->event_index(VDJ_JOI_DIV_GEN, 0, j_gene - 1, clonotype.getDiv(d_index) - 1);
660 for (seq_len_t i = 0; i < clonotype.getJoiLen(j_index) + 1; ++i) {
661 events(J_index_dels, j_index, i + shift, 0) = _param_vec->event_index(J_DEL, j_gene - 1, j_start + i - 1);
666 errors(errors.chainSize() - 1, j_index, len, 0) = 0;
667 for (seq_len_t i = 1; i <= clonotype.getJoiLen(j_index); ++i) {
668 errors(errors.chainSize() - 1, j_index, len - i, 0)
669 = errors(errors.chainSize() - 1, j_index, len + 1 - i, 0)
670 + clonotype.isJoiMismatch(j_index, clonotype.getJoiLen(j_index) + 1 - i);
671 if (errors(errors.chainSize() - 1, j_index, len - i, 0)) {
672 probs(J_index_dels, j_index, len - i, 0) *= errors(errors.chainSize() - 1, j_index, len - i, 0) * _param_vec->error_prob();
678 for (seq_len_t i = clonotype.
sequence().size() - len + 1; i <= clonotype.
sequence().size() + 1; ++i) {
679 seq_poses.push_back(i);
697 vector<seq_len_t> &seq_poses,
698 MetadataMode metadata_mode,
699 ErrorMode error_mode)
const 704 seq_len_t seq_arr_size = clonotype.
sequence().size() + 1;
705 unique_ptr<seq_len_t[]> seq_row(
new seq_len_t[seq_arr_size]);
706 std::fill(seq_row.get(), seq_row.get() + seq_arr_size, 0);
709 unique_ptr<seq_len_t[]> seq_col(
new seq_len_t[seq_arr_size]);
710 std::fill(seq_col.get(), seq_col.get() + seq_arr_size, 0);
712 for (seg_index_t d_index = 0; d_index < clonotype.nDiv(); ++d_index) {
713 min_D_len = _param_vec->D_min_len(clonotype.getDiv(d_index));
716 seq_len_t d_seq_start = clonotype.getDivSeqStart(d_index, j),
717 d_seq_end = clonotype.getDivSeqEnd(d_index, j);
720 for (seq_len_t i = d_seq_start; i <= d_seq_end - min_D_len + 1; ++i) {
724 for (seq_len_t i = d_seq_start + min_D_len - (seq_len_t) 1; i <= d_seq_end; ++i) {
731 seq_len_t seq_ind = 1;
732 for (seq_len_t i = 0; i < seq_arr_size; ++i) {
734 seq_row[i] = seq_ind;
741 for (seq_len_t i = 0; i < seq_arr_size; ++i) {
743 seq_col[i] = seq_ind;
749 seq_len_t last_max_seq_start = 0, last_max_seq_end = 0, seq_row_ind = 0, seq_col_ind = 0;
751 seq_len_t seq_row_nonzeros = 0, seq_col_nonzeros = 0;
752 for (seq_len_t i = 0; i < seq_arr_size; ++i) {
753 seq_row_nonzeros += seq_row[i] != 0;
754 seq_col_nonzeros += seq_col[i] != 0;
757 probs.initNode(DIVERSITY_GENES_MATRIX_INDEX, clonotype.nDiv(), seq_row_nonzeros, seq_col_nonzeros);
759 events.initNode(DIVERSITY_GENES_MATRIX_INDEX, clonotype.nDiv(), seq_row_nonzeros, seq_col_nonzeros);
762 errors.initNode(1, clonotype.nDiv(), seq_row_nonzeros, seq_col_nonzeros);
766 seg_index_t d_index, d_gene;
768 seq_len_t d_seq_start, d_seq_end, d_gene_start, d_gene_end;
770 for (seg_index_t d_index = 0; d_index < clonotype.nDiv(); ++d_index) {
771 d_gene = clonotype.getDiv(d_index);
772 d_len = _genes->D()[d_gene].sequence.size();
773 min_D_len = _param_vec->D_min_len(d_gene);
777 d_seq_start = clonotype.getDivSeqStart(d_index, j);
778 d_seq_end = clonotype.getDivSeqEnd(d_index, j);
779 d_gene_start = clonotype.getDivGeneStart(d_index, j);
780 d_gene_end = clonotype.getDivGeneEnd(d_index, j);
782 for (seq_len_t left_pos = d_seq_start; left_pos <= d_seq_end - min_D_len + 1; ++left_pos) {
783 for (seq_len_t right_pos = left_pos + min_D_len - 1; right_pos <= d_seq_end; ++right_pos) {
784 probs(DIVERSITY_GENES_MATRIX_INDEX, d_index, seq_row[left_pos] - 1, seq_col[right_pos] - 1)
785 = _param_vec->event_prob(VDJ_DIV_DEL,
787 d_gene_start + left_pos - d_seq_start,
788 d_len - (d_gene_end - (d_seq_end - right_pos)));
790 events(DIVERSITY_GENES_MATRIX_INDEX, d_index, seq_row[left_pos] - 1, seq_col[right_pos] - 1)
791 = _param_vec->event_index(VDJ_DIV_DEL,
793 d_gene_start + left_pos - d_seq_start,
794 d_len - (d_gene_end - (d_seq_end - right_pos)));
798 errors(1, d_index, seq_row[left_pos] - 1, seq_col[right_pos] - 1) =
799 clonotype.numDivMismatches(d_index, j,
800 d_gene_start + left_pos - d_seq_start,
801 d_gene_end - (d_seq_end - right_pos));
803 if (errors(1, d_index, seq_row[left_pos] - 1, seq_col[right_pos] - 1)) {
804 probs(DIVERSITY_GENES_MATRIX_INDEX, d_index, seq_row[left_pos] - 1, seq_col[right_pos] - 1)
805 *= _param_vec->error_prob() * errors(1, d_index, seq_row[left_pos] - 1, seq_col[right_pos] - 1);
814 vector<seq_len_t> D35_poses;
815 D35_poses.reserve(seq_row_nonzeros + seq_col_nonzeros + 2);
816 for (seq_len_t i = 1; i < seq_arr_size; ++i) {
if (seq_row[i]) { D35_poses.push_back(i); } }
817 for (seq_len_t i = 1; i < seq_arr_size; ++i) {
if (seq_col[i]) { D35_poses.push_back(i); } }
820 seq_poses.reserve(seq_poses.size() + D35_poses.size() + 2);
821 seq_poses.insert(seq_poses.begin() + probs.nodeColumns(VARIABLE_DELETIONS_MATRIX_INDEX), D35_poses.begin(), D35_poses.end());
837 const vector<seq_len_t> &seq_poses,
838 MetadataMode metadata_mode,
839 ErrorMode error_mode)
const 841 MonoNucInsertionModel mc(_param_vec->get_iterator(_param_vec->event_index(VJ_VAR_JOI_INS_NUC, 0, 0)), error_mode ? _param_vec->error_prob() : 0);
843 seq_len_t v_vertices = probs.nodeColumns(VARIABLE_DELETIONS_MATRIX_INDEX),
844 j_vertices = probs.nodeRows(JOINING_DELETIONS_VJ_MATRIX_INDEX);
846 probs.initNode(VarJoi_INSERTIONS_MATRIX_INDEX, 1, v_vertices, j_vertices);
849 events.initNode(VarJoi_INSERTIONS_MATRIX_INDEX, 1, v_vertices, j_vertices);
856 VarJoi_INSERTIONS_MATRIX_INDEX,
857 _param_vec->event_index(VJ_VAR_JOI_INS_LEN, 0, 0),
858 _param_vec->max_VJ_ins_len(),
864 v_vertices + j_vertices - 1,
882 const vector<seq_len_t> &seq_poses,
883 MetadataMode metadata_mode,
884 ErrorMode error_mode)
const 886 DiNucInsertionModel mc(_param_vec->get_iterator(_param_vec->event_index(VDJ_VAR_DIV_INS_NUC, 0, 0)), error_mode ? _param_vec->error_prob() : 0);
888 seq_len_t v_vertices = probs.nodeColumns(VARIABLE_DELETIONS_MATRIX_INDEX),
889 d3_vertices = probs.nodeRows(DIVERSITY_GENES_MATRIX_INDEX);
891 probs.initNode(VarDiv_INSERTIONS_MATRIX_INDEX, 1, v_vertices, d3_vertices);
894 events.initNode(VarDiv_INSERTIONS_MATRIX_INDEX, 1, v_vertices, d3_vertices);
901 VarDiv_INSERTIONS_MATRIX_INDEX,
902 _param_vec->event_index(VDJ_VAR_DIV_INS_LEN, 0, 0),
903 _param_vec->max_VD_ins_len(),
909 v_vertices + d3_vertices - 1,
927 const vector<seq_len_t> &seq_poses,
928 MetadataMode metadata_mode,
929 ErrorMode error_mode)
const 931 DiNucInsertionModel mc(_param_vec->get_iterator(_param_vec->event_index(VDJ_DIV_JOI_INS_NUC, 0, 0)), error_mode ? _param_vec->error_prob() : 0);
933 seq_len_t v_vertices = probs.nodeColumns(VARIABLE_DELETIONS_MATRIX_INDEX),
934 d3_vertices = probs.nodeRows(DIVERSITY_GENES_MATRIX_INDEX),
935 d5_vertices = probs.nodeColumns(DIVERSITY_GENES_MATRIX_INDEX),
936 j_vertices = probs.nodeRows(JOINING_DELETIONS_VDJ_MATRIX_INDEX);
938 probs.initNode(DivJoi_INSERTIONS_MATRIX_INDEX, 1, d5_vertices, j_vertices);
941 events.initNode(DivJoi_INSERTIONS_MATRIX_INDEX, 1, d5_vertices, j_vertices);
948 DivJoi_INSERTIONS_MATRIX_INDEX,
949 _param_vec->event_index(VDJ_DIV_JOI_INS_LEN, 0, 0),
950 _param_vec->max_DJ_ins_len(),
953 v_vertices + d3_vertices,
954 v_vertices + d3_vertices + d5_vertices - 1,
955 v_vertices + d3_vertices + d5_vertices,
956 v_vertices + d3_vertices + d5_vertices + j_vertices - 1,
982 const vector<seq_len_t> &seq_poses,
984 event_ind_t null_insertion,
986 MetadataMode metadata_mode,
988 seq_len_t left_vertices_start,
989 seq_len_t left_vertices_end,
990 seq_len_t right_vertices_start,
991 seq_len_t right_vertices_end,
993 bool reversed =
false)
const 997 char last_char = NULL_CHAR;
999 for (
size_t left_vertex_i = left_vertices_start; left_vertex_i <= left_vertices_end; ++left_vertex_i) {
1000 for (
size_t right_vertex_i = right_vertices_start; right_vertex_i <= right_vertices_end; ++right_vertex_i) {
1001 insertion_len = seq_poses[right_vertex_i] - seq_poses[left_vertex_i] - 1;
1002 good_insertion = (insertion_len >= 0) && (insertion_len <= max_size);
1003 if (good_insertion) {
1005 if (seq_poses[left_vertex_i] == 0) {
1006 last_char = NULL_CHAR;
1008 last_char = sequence[seq_poses[left_vertex_i] - 1];
1011 probs(ins_node_index, 0, left_vertex_i - left_vertices_start, right_vertex_i - right_vertices_start)
1012 = mc.
nucProbability(sequence.cbegin() + seq_poses[left_vertex_i],
1016 * (*_param_vec)[null_insertion + insertion_len];
1018 if (seq_poses[right_vertex_i] == sequence.size() + 1) {
1019 last_char = NULL_CHAR;
1021 last_char = sequence[seq_poses[right_vertex_i] - 1];
1024 probs(ins_node_index, 0, left_vertex_i - left_vertices_start, right_vertex_i - right_vertices_start)
1025 = mc.
nucProbability(sequence.crbegin() + (sequence.size() - seq_poses[right_vertex_i] + 1),
1029 * (*_param_vec)[null_insertion + insertion_len];
1032 if (metadata_mode) {
1033 events(ins_node_index, 0, left_vertex_i - left_vertices_start, right_vertex_i - right_vertices_start)
1034 = null_insertion + insertion_len;
1061 #endif //_MAAGBUILDER_H_ Definition: maagbuilder.h:56
pEventIndMMC _events
Definition: maag.h:455
unique_ptr< sequence_t > _sequence
Definition: maag.h:458
prob_t fullProbability(event_ind_t v_index, event_ind_t j_index) const
Compute and return the full assembling probability of this sequence (i.e., with all gene segments ali...
Definition: maag.h:249
prob_t buildAndCompute(const Clonotype &clonotype, ErrorMode error_mode, SequenceType seq_type=NUCLEOTIDE, MAAGComputeProbAction action=SUM_PROBABILITY) const
Compute generation probabilities without building the full information about MAAGs.
Definition: maagbuilder.h:250
Class for representing VJ / VD / DJ insertions models - either a mono-nucleotide or a di-nucleotide m...
Definition: insertionmodel.h:27
Definition: clonotype.h:46
SequenceType _seq_type
Definition: maag.h:460
MAAGBuilder()
Private default constructor.
Definition: maagbuilder.h:474
Definition: genesegment.h:265
void buildDJinsertions(const Clonotype &clonotype, ProbMMC &probs, EventIndMMC &events, const vector< seq_len_t > &seq_poses, MetadataMode metadata_mode, ErrorMode error_mode) const
Build probability and events matrices for Diversity-Joining gene segments insertions.
Definition: maagbuilder.h:924
Multi-Alignment Assembly Graph - basic class for representing all possible generation scenarios of a ...
Definition: maag.h:46
seq_len_t numDivAlignments(seg_index_t index) const
Get the number of D gene alignments for the given D gene.
Definition: vdj_alignment.h:209
MAAG()
Default constructor.
Definition: maag.h:56
void updateEventProbabilities(MAAG *maag) const
Replace event probabilities in the given MAAGs if they have stored event indices. ...
Definition: maagbuilder.h:305
uint8_t node_ind_t
Node index type.
Definition: multimatrixchain.h:91
Class for storing parameters of assembling statistical model. Note: event with index 0 (zero) is "nul...
Definition: modelparametervector.h:68
void buildInsertions(const string &sequence, ProbMMC &probs, EventIndMMC &events, const vector< seq_len_t > &seq_poses, ProbMMC::node_ind_t ins_node_index, event_ind_t null_insertion, seq_len_t max_size, MetadataMode metadata_mode, bool error_mode, seq_len_t left_vertices_start, seq_len_t left_vertices_end, seq_len_t right_vertices_start, seq_len_t right_vertices_end, const AbstractInsertionModel &mc, bool reversed=false) const
General function for building insertions.
Definition: maagbuilder.h:979
virtual prob_t nucProbability(const std::string &sequence, char first_char=NULL_CHAR, bool with_errors=false) const =0
Probability of the given nucleotide sequence.
seg_index_t nVar() const
Get the number of alignments for the specific gene.
Definition: vdj_alignment.h:100
void buildDiversity(const Clonotype &clonotype, ProbMMC &probs, EventIndMMC &events, ErrMMC &errors, vector< seq_len_t > &seq_poses, MetadataMode metadata_mode, ErrorMode error_mode) const
Build probability and events matrices for Diversity gene segments.
Definition: maagbuilder.h:693
seq_len_t getVarGeneStart(seg_index_t vgene) const
Get alignments for the specific gene.
Definition: vdj_alignment.h:124
void buildVJinsertions(const Clonotype &clonotype, ProbMMC &probs, EventIndMMC &events, const vector< seq_len_t > &seq_poses, MetadataMode metadata_mode, ErrorMode error_mode) const
Build probability and events matrices for Variable-Joining gene segments insertions.
Definition: maagbuilder.h:834
seg_index_t getVar(size_t index) const
Get the index of the aligned gene segment for the specific gene.
Definition: vdj_alignment.h:112
Class for storing lists of matrices, where one node in the list (called "chain") could contain more t...
Definition: multimatrixchain.h:39
MAAGBuilder(const ModelParameterVector ¶m_vec, const VDJRecombinationGenes &genes)
Constructor for the builder from given vector with event probabilities and gene segments.
Definition: maagbuilder.h:66
MAAG build(const Clonotype &clonotype, MetadataMode metadata_mode, ErrorMode error_mode, SequenceType seq_type=NUCLEOTIDE) const
Build MAAGs from the given clonotypes.
Definition: maagbuilder.h:95
void buildVariable(const Clonotype &clonotype, ProbMMC &probs, EventIndMMC &events, ErrMMC &errors, vector< seq_len_t > &seq_poses, MetadataMode metadata_mode, ErrorMode error_mode) const
Build probability and events matrices for Variable gene segments.
Definition: maagbuilder.h:486
Definition: repertoire.h:51
void buildJoining(const Clonotype &clonotype, ProbMMC &probs, EventIndMMC &events, ErrMMC &errors, vector< seq_len_t > &seq_poses, MetadataMode metadata_mode, ErrorMode error_mode) const
Build probability and events matrices for Joining gene segments.
Definition: maagbuilder.h:588
pErrMMC _errors
Definition: maag.h:456
Definition: insertionmodel.h:321
void buildVDinsertions(const Clonotype &clonotype, ProbMMC &probs, EventIndMMC &events, const vector< seq_len_t > &seq_poses, MetadataMode metadata_mode, ErrorMode error_mode) const
Build probability and events matrices for Variable-Diversity gene segments insertions.
Definition: maagbuilder.h:879
unique_ptr< seq_len_t[]> _seq_poses
Definition: maag.h:457
Definition: insertionmodel.h:183
const sequence_t & sequence() const
Get the sequence of this Clonotype.
Definition: clonotype.h:90