5 #ifndef _YMIR_CLONOTYPEASSEMBLER_H_ 6 #define _YMIR_CLONOTYPEASSEMBLER_H_ 11 #include "insertionmodel.h" 12 #include "clonotype_builder.h" 25 : _param_vec(param_vec), _genes(genes)
36 _param_vec = param_vec;
43 Cloneset generate(
size_t count = 1,
bool verbose =
true)
const {
44 std::vector<Clonotype> vec;
47 unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
48 std::default_random_engine rg(seed);
50 if (_param_vec.recombination() == VJ_RECOMB) {
52 std::cout <<
"Generating " << (size_t) count <<
" sequences." << std::endl;
55 for (
size_t clonotype_i = 0; clonotype_i < count; ++clonotype_i) {
56 vec.push_back(this->generate_vj(rg));
58 if (verbose && (clonotype_i + 1) % 50000 == 0) {
59 cout <<
"Generated " << (size_t) (clonotype_i + 1) <<
"/" << (size_t) count <<
" sequences." << endl;
64 cout <<
"Generated " << (size_t) count <<
"/" << (
size_t) count <<
" sequences." << endl;
67 else if (_param_vec.recombination() == VDJ_RECOMB) {
69 std::cout <<
"Generating " << (size_t) count <<
" sequences." << std::endl;
72 for (
size_t clonotype_i = 0; clonotype_i < count; ++clonotype_i) {
73 vec.push_back(this->generate_vdj(rg));
75 if (verbose && (clonotype_i + 1) % 50000 == 0) {
76 cout <<
"Generated " << (size_t) (clonotype_i + 1) <<
"/" << (size_t) count <<
" sequences." << endl;
81 cout <<
"Generated " << (size_t) count <<
"/" << (
size_t) count <<
" sequences." << endl;
84 std::cout <<
"Unrecognised recombination type of the input model." << std::endl;
96 Clonotype generate_vj(std::default_random_engine &rg)
const {
100 std::discrete_distribution<event_ind_t> vj_genes(_param_vec.get_iterator(_param_vec.
event_index(VJ_VAR_JOI_GEN, 0, 0)),
101 _param_vec.get_iterator(_param_vec.
event_index(VJ_VAR_JOI_GEN, 0, 0)
102 + _param_vec.eventClassSize(VJ_VAR_JOI_GEN)));
103 event_ind_t v_j = vj_genes(rg);
105 seg_index_t vgene = v_j / _param_vec.n_columns(VJ_VAR_JOI_GEN) + 1;
107 seg_index_t jgene = v_j - (v_j / _param_vec.n_columns(VJ_VAR_JOI_GEN)) * (_param_vec.n_columns(VJ_VAR_JOI_GEN)) + 1;
109 seq_len_t v_del_num = std::discrete_distribution<event_ind_t>(_param_vec.get_iterator(_param_vec.
event_index(VJ_VAR_DEL, vgene - 1, 0)),
110 _param_vec.get_iterator(_param_vec.
event_index(VJ_VAR_DEL, vgene - 1, 0)
111 + _param_vec.eventFamilySize(VJ_VAR_DEL, vgene - 1)))(rg);
113 builder.
addVarAlignment(vgene, 1, 1, _genes.V()[vgene].sequence.size() - v_del_num);
115 seq_len_t ins_len = std::discrete_distribution<seq_len_t>(_param_vec.get_iterator(_param_vec.
event_index(VJ_VAR_JOI_INS_LEN, 0, 0)),
116 _param_vec.get_iterator(_param_vec.
event_index(VJ_VAR_JOI_INS_LEN, 0, 0)
117 + _param_vec.eventClassSize(VJ_VAR_JOI_INS_LEN)))(rg);
119 seq_len_t j_del_num = std::discrete_distribution<event_ind_t>(_param_vec.get_iterator(_param_vec.
event_index(VJ_JOI_DEL, jgene - 1, 0)),
120 _param_vec.get_iterator(_param_vec.
event_index(VJ_JOI_DEL, jgene - 1, 0)
121 + _param_vec.eventFamilySize(VJ_JOI_DEL, jgene - 1)))(rg);
123 builder.addJoiAlignment(jgene, 1, _genes.V()[vgene].sequence.size() - v_del_num + ins_len + 1, _genes.J()[jgene].sequence.size() - j_del_num);
125 builder.setSequence(_genes.V()[vgene].sequence.substr(0, _genes.V()[vgene].sequence.size() - v_del_num)
126 + mc_vj.generate(ins_len, rg)
127 + _genes.J()[jgene].sequence.substr(j_del_num));
128 builder.setNucleotideSeq();
133 Clonotype generate_vdj(std::default_random_engine &rg)
const {
138 seq_len_t ins_len_vd = std::discrete_distribution<seq_len_t>(_param_vec.get_iterator(_param_vec.
event_index(VDJ_VAR_DIV_INS_LEN, 0, 0)),
139 _param_vec.get_iterator(_param_vec.
event_index(VDJ_VAR_DIV_INS_LEN, 0, 0)
140 + _param_vec.eventClassSize(VDJ_VAR_DIV_INS_LEN)))(rg);
141 seq_len_t ins_len_dj = std::discrete_distribution<seq_len_t>(_param_vec.get_iterator(_param_vec.
event_index(VDJ_DIV_JOI_INS_LEN, 0, 0)),
142 _param_vec.get_iterator(_param_vec.
event_index(VDJ_DIV_JOI_INS_LEN, 0, 0)
143 + _param_vec.eventClassSize(VDJ_DIV_JOI_INS_LEN)))(rg);
145 seg_index_t vgene = std::discrete_distribution<event_ind_t>(_param_vec.get_iterator(_param_vec.
event_index(VDJ_VAR_GEN, 0, 0)),
146 _param_vec.get_iterator(_param_vec.
event_index(VDJ_VAR_GEN, 0, 0)
147 + _param_vec.eventClassSize(VDJ_VAR_GEN)))(rg) + 1;
149 std::discrete_distribution<event_ind_t> jd_genes(_param_vec.get_iterator(_param_vec.
event_index(VDJ_JOI_DIV_GEN, 0, 0)),
150 _param_vec.get_iterator(_param_vec.
event_index(VDJ_JOI_DIV_GEN, 0, 0)
151 + _param_vec.eventClassSize(VDJ_JOI_DIV_GEN)));
152 event_ind_t j_d = jd_genes(rg);
154 seg_index_t jgene = j_d / _param_vec.n_columns(VDJ_JOI_DIV_GEN) + 1;
156 seg_index_t dgene = j_d - (j_d / _param_vec.n_columns(VDJ_JOI_DIV_GEN)) * _param_vec.n_columns(VDJ_JOI_DIV_GEN) + 1;
158 seq_len_t v_del_num = std::discrete_distribution<event_ind_t>(_param_vec.get_iterator(_param_vec.
event_index(VDJ_VAR_DEL, vgene - 1, 0)),
159 _param_vec.get_iterator(_param_vec.
event_index(VDJ_VAR_DEL, vgene - 1, 0)
160 + _param_vec.eventFamilySize(VDJ_VAR_DEL, vgene - 1)))(rg);
161 std::string vgene_del = _genes.V()[vgene].sequence.substr(0, _genes.V()[vgene].sequence.size() - v_del_num);
165 event_ind_t d_del_index = std::discrete_distribution<event_ind_t>(_param_vec.get_iterator(_param_vec.
event_index(VDJ_DIV_DEL, dgene - 1, 0)),
166 _param_vec.get_iterator(_param_vec.
event_index(VDJ_DIV_DEL, dgene - 1, 0)
167 + _param_vec.eventFamilySize(VDJ_DIV_DEL, dgene - 1)))(rg);
168 seq_len_t d5_del_num = d_del_index / _param_vec.n_columns(VDJ_DIV_DEL, 0);
169 seq_len_t d3_del_num = d_del_index - (d_del_index / _param_vec.n_columns(VDJ_DIV_DEL, 0)) * _param_vec.n_columns(VDJ_DIV_DEL, 0);
170 std::string dgene_del = _genes.D()[dgene].sequence.substr(d5_del_num, _genes.D()[dgene].sequence.size() - d3_del_num - d5_del_num);
172 builder.addDivAlignment(dgene,
174 vgene_del.size() + ins_len_vd + 1,
177 seq_len_t j_del_num = std::discrete_distribution<event_ind_t>(_param_vec.get_iterator(_param_vec.
event_index(VDJ_JOI_DEL, jgene - 1, 0)),
178 _param_vec.get_iterator(_param_vec.
event_index(VDJ_JOI_DEL, jgene - 1, 0)
179 + _param_vec.eventFamilySize(VDJ_JOI_DEL, jgene - 1)))(rg);
180 std::string jgene_del = _genes.J()[jgene].sequence.substr(j_del_num);
182 builder.addJoiAlignment(jgene, 1, vgene_del.size() + ins_len_vd + ins_len_dj + dgene_del.size() + 1, jgene_del.size());
184 char last_v_char = vgene_del.size() ? vgene_del[vgene_del.size() - 1] : NULL_CHAR;
185 char first_j_char = jgene_del.size() ? jgene_del[0]: NULL_CHAR;
187 builder.setSequence(vgene_del
188 + mc_vd.generate(ins_len_vd, rg, last_v_char)
190 + mc_dj.generate(ins_len_dj, rg, first_j_char,
true)
193 builder.setNucleotideSeq();
201 #endif //_YMIR_CLONOTYPEASSEMBLER_H_ VDJAlignmentBuilder & addVarAlignment(seg_index_t vseg, seq_len_t vstart, seq_len_t seqstart, seq_len_t alignment_len)
Add singular alignments to the builder.
Definition: vdj_alignment_builder.h:86
event_ind_t event_index(EventClass event_class, event_ind_t event_family, event_ind_t event_index) const
Definition: modelparametervector.h:261
Definition: clonotype.h:46
Definition: genesegment.h:265
Class for storing parameters of assembling statistical model. Note: event with index 0 (zero) is "nul...
Definition: modelparametervector.h:68
Definition: clonotype_builder.h:37
Definition: repertoire.h:171
Clonotype buildClonotype()
Build clone alignment structure with stored information.
Definition: clonotype_builder.h:55
Definition: insertionmodel.h:321
Definition: insertionmodel.h:183
Definition: clonotypeassembler.h:17