5 #ifndef YMIR_MODEL_PARSER_H 6 #define YMIR_MODEL_PARSER_H 10 #include "genesegment.h" 19 ModelParser(
const std::string &model_path, Json::Value config, ModelBehaviour behav)
20 : _model_path(model_path),
33 return this->parseGeneSegments()
34 && this->makeEventProbabilitiesVector();
38 virtual bool parseGeneSegments() = 0;
41 bool makeEventProbabilitiesVector() {
42 if (_behaviour == EMPTY) {
43 return this->createEventProbabilitiesFromScratch();
45 return this->parseEventProbabilitiesFromFiles();
50 void swap_genes(unique_ptr<VDJRecombinationGenes> &ptr) {
54 void swap_parameters(unique_ptr<ModelParameterVector> &ptr) {
61 std::string _model_path;
62 ModelBehaviour _behaviour;
63 unique_ptr<VDJRecombinationGenes> _genes;
64 unique_ptr<ModelParameterVector> _param_vec;
68 bool createEventProbabilitiesFromScratch() {
69 vector<AbstractTDContainer*> containers;
70 containers.resize(10,
nullptr);
72 this->createContainers(containers);
74 bool is_ok = this->makeModelParameterVector(containers);
77 _param_vec->normaliseEventFamilies();
79 if (!is_ok) {
throw(runtime_error(
"WRONG EMPTY VECTOR CREATING SUBROUTINE!!!")); }
85 virtual void createContainers(vector<AbstractTDContainer*> &containers) = 0;
88 bool parseEventProbabilitiesFromFiles() {
89 Json::Value pt = _config.get(
"probtables",
"no-prob");
92 string element =
"", err_message =
"";
93 vector<AbstractTDContainer*> containers;
94 containers.resize(10,
nullptr);
97 for (Json::ArrayIndex i = 0; i < pt.size(); ++i) {
98 element = pt.getMemberNames()[i];
99 container = read_textdata(_model_path + pt[element][
"file"].asString(),
100 pt[element][
"type"].asString(),
101 pt[element].
get(
"skip.first.column",
true).asBool(),
102 pt[element].
get(
"laplace", .0).asDouble(),
105 this->parseDataContainer(element, container, containers);
108 return this->makeModelParameterVector(containers);
111 std::cout <<
"[ERROR] No information about probability events in the model .json file found." << std::endl;
116 virtual void parseDataContainer(
const string &element,
AbstractTDContainer *container, vector<AbstractTDContainer*> &containers) = 0;
119 bool makeModelParameterVector(vector<AbstractTDContainer*> &containers) {
121 vector<prob_t> event_probs;
122 vector<event_ind_t> event_lengths;
123 vector<event_ind_t> event_classes;
124 vector<seq_len_t> event_col_num;
125 vector<prob_t> laplace;
126 vector<seq_len_t> min_D_len_vec;
128 bool is_ok = this->makeModelParameterVector(containers, event_probs, event_lengths, event_classes, event_col_num, laplace, min_D_len_vec);
131 for (
size_t i = 0; i < containers.size(); ++i) {
132 if (containers[i]) {
delete containers[i]; }
139 virtual bool makeModelParameterVector(vector<AbstractTDContainer*> &containers,
140 vector<prob_t> &event_probs,
141 vector<event_ind_t> &event_lengths,
142 vector<event_ind_t> &event_classes,
143 vector<seq_len_t> &event_col_num,
144 vector<prob_t> &laplace,
145 vector<seq_len_t> &min_D_len_vec) = 0;
151 bool findGenes(
const vector<string> &names,
const GeneSegmentAlphabet &gsa,
string &err_message)
const {
152 unordered_set<string> nameset;
154 for (
size_t i = 0; i < names.size(); ++i) {
155 if (gsa[names[i]].index == 0) {
156 err_message =
"ERROR: can't find '" + names[i] +
"' in gene segments.";
159 nameset.insert(names[i]);
162 for (
size_t i = 1; i <= gsa.max(); ++i) {
163 if (nameset.count(gsa[i].allele) == 0) {
164 err_message =
"ERROR: can't find '" + gsa[i].allele +
"' in this file.";
176 vector<seg_index_t> arrangeNames(
const vector<string> &names,
const GeneSegmentAlphabet &gsa)
const {
177 vector<seg_index_t> res;
178 res.resize(names.size(), 0);
180 for (
size_t i = 0; i < names.size(); ++i) { res[i] = gsa[names[i]].index; }
188 vector<prob_t> &event_probs,
189 vector<event_ind_t> &event_lengths,
190 vector<event_ind_t> &event_classes,
191 vector<seq_len_t> &event_col_num,
192 vector<prob_t> &laplace)
const {
193 vector<seg_index_t> name_order = this->arrangeNames(container->row_names(), gsa);
194 vector<prob_t> prob_data;
195 prob_data.resize(container->data(0).size(), 0);
196 for (
size_t i = 0; i < name_order.size(); ++i) {
197 prob_data[name_order[i] - 1] = container->data(0)[i];
199 event_probs.insert(event_probs.end(),
202 event_lengths.push_back(prob_data.size());
203 event_col_num.push_back(0);
204 laplace.push_back(container->laplace());
206 event_classes.push_back(0);
213 vector<prob_t> &event_probs,
214 vector<event_ind_t> &event_lengths,
215 vector<event_ind_t> &event_classes,
216 vector<seq_len_t> &event_col_num,
217 vector<prob_t> &laplace,
218 seg_index_t prev_class_size)
const {
219 vector<seg_index_t> name_order_row = this->arrangeNames(container->row_names(), gsa_row);
220 vector<seg_index_t> name_order_column = this->arrangeNames(container->column_names(), gsa_column);
221 vector<prob_t> prob_data = container->data(0), sorted_prob_data;
222 sorted_prob_data.resize(prob_data.size(), 0);
223 for (
size_t i = 0; i < name_order_row.size(); ++i) {
224 for (
size_t j = 0; j < name_order_column.size(); ++j) {
225 sorted_prob_data[(name_order_row[i] - 1) * container->n_columns() + (name_order_column[j] - 1)] = prob_data[i * container->n_columns() + j];
228 event_probs.insert(event_probs.end(),
229 sorted_prob_data.begin(),
230 sorted_prob_data.end());
231 event_lengths.push_back(sorted_prob_data.size());
232 event_col_num.push_back(container->n_columns());
233 laplace.push_back(container->laplace());
235 if (prev_class_size) {
236 event_classes.push_back(event_classes[event_classes.size() - 1] + prev_class_size);
238 event_classes.push_back(0);
245 vector<prob_t> &event_probs,
246 vector<event_ind_t> &event_lengths,
247 vector<event_ind_t> &event_classes,
248 vector<seq_len_t> &event_col_num,
249 vector<prob_t> &laplace,
250 seg_index_t prev_class_size)
const {
251 vector<seg_index_t> name_order = this->arrangeNames(container->column_names(), gsa);
252 vector<prob_t> prob_data;
253 for (
size_t i = 0; i < name_order.size(); ++i) {
256 for (; i+1 != name_order[j] ; ++j) {}
257 prob_data = container->data(j);
260 if (prob_data.size() < gsa[name_order[j]].sequence.size() + 1) {
261 prob_data.resize(gsa[name_order[j]].sequence.size() + 1, 0);
265 if (prob_data.size() > gsa[name_order[j]].sequence.size() + 1) {
266 prob_data.resize(gsa[name_order[j]].sequence.size() + 1);
269 event_probs.insert(event_probs.end(),
272 event_lengths.push_back(prob_data.size());
273 event_col_num.push_back(0);
274 laplace.push_back(container->laplace());
276 event_classes.push_back(event_classes[event_classes.size() - 1] + prev_class_size);
282 vector<prob_t> &event_probs,
283 vector<event_ind_t> &event_lengths,
284 vector<event_ind_t> &event_classes,
285 vector<seq_len_t> &event_col_num,
286 vector<prob_t> &laplace,
287 seg_index_t prev_class_size)
const {
288 vector<seg_index_t> name_order = this->arrangeNames(container->row_names(), gsa);
289 vector<prob_t> prob_data;
290 for (
size_t i = 0; i < name_order.size(); ++i) {
293 for (; i+1 != name_order[j] ; ++j) {}
294 prob_data = container->data(j);
296 if (prob_data.size() > gsa[name_order[j]].sequence.size() + 1) {
297 vector<prob_t> new_prob_data((gsa[name_order[j]].sequence.size() + 1) * (gsa[name_order[j]].sequence.size() + 1));
298 for (
auto row_i = 0; row_i < gsa[name_order[j]].sequence.size() + 1; ++row_i) {
299 for (
auto col_i = 0; col_i < gsa[name_order[j]].sequence.size() + 1; ++col_i) {
300 new_prob_data[row_i * (gsa[name_order[j]].sequence.size() + 1) + col_i] = prob_data[row_i * (gsa[name_order[j]].sequence.size() + 1) + col_i];
303 prob_data = new_prob_data;
306 event_probs.insert(event_probs.end(),
309 event_lengths.push_back(prob_data.size());
310 event_col_num.push_back(container->metadata(j));
311 laplace.push_back(container->laplace());
313 event_classes.push_back(event_classes[event_classes.size() - 1] + prev_class_size);
318 vector<prob_t> &event_probs,
319 vector<event_ind_t> &event_lengths,
320 vector<event_ind_t> &event_classes,
321 vector<seq_len_t> &event_col_num,
322 vector<prob_t> &laplace,
323 seg_index_t prev_class_size,
324 seq_len_t max_ins_len = 0)
const {
325 vector<prob_t> prob_data;
326 for (
size_t i = 0; i < container->n_columns(); ++i) {
327 prob_data = container->data(i);
328 if (max_ins_len) { prob_data.resize(max_ins_len); }
329 event_probs.insert(event_probs.end(),
332 event_lengths.push_back(prob_data.size());
333 event_col_num.push_back(0);
334 laplace.push_back(container->laplace());
335 event_classes.push_back(event_classes[event_classes.size() - 1] + prev_class_size);
346 VJModelParser(
const std::string &model_path, Json::Value config, ModelBehaviour behav)
352 bool parseGeneSegments() {
353 cout <<
"\tV gene seg.: ";
354 if (_config.get(
"segments", Json::Value(
"")).get(
"variable", Json::Value(
"")).size() == 0) {
355 cout <<
"ERROR: no gene segments file in the model's .json." << endl;
359 cout <<
"OK" << endl;
361 string v_path = _model_path + _config.get(
"segments", Json::Value(
"")).get(
"variable", Json::Value(
"")).get(
"file",
"").asString();
363 cout <<
"\tJ gene seg.: ";
364 if (_config.get(
"segments", Json::Value(
"")).get(
"joining", Json::Value(
"")).size() == 0) {
365 cout <<
"ERROR: no gene segments file in the model's .json." << endl;
369 cout <<
"OK" << endl;
371 string j_path = _model_path + _config.get(
"segments", Json::Value(
"")).get(
"joining", Json::Value(
"")).get(
"file",
"").asString();
378 _genes->appendPalindromicNucleotides(VARIABLE,
379 _config.get(
"segments", Json::Value(
"")).get(
"variable", Json::Value(
"")).get(
"P.nuc.3'", 0).asUInt(),
380 _config.get(
"segments", Json::Value(
"")).get(
"variable", Json::Value(
"")).get(
"P.nuc.5'", 0).asUInt());
381 _genes->appendPalindromicNucleotides(JOINING,
382 _config.get(
"segments", Json::Value(
"")).get(
"joining", Json::Value(
"")).get(
"P.nuc.3'", 0).asUInt(),
383 _config.get(
"segments", Json::Value(
"")).get(
"joining", Json::Value(
"")).get(
"P.nuc.5'", 0).asUInt());
391 virtual void createContainers(vector<AbstractTDContainer*> &containers) {
395 container =
new TDMatrix(
true, _config.get(
"probtables", Json::Value()).get(
"v.j", Json::Value()).get(
"laplace", .0).asDouble());
396 for (
auto i = 1; i <= _genes->V().max(); ++i){
397 container->addRowName(_genes->V()[i].allele);
399 for (
auto i = 1; i <= _genes->J().max(); ++i){
400 container->addColumnName(_genes->J()[i].allele);
402 container->addDataVector(vector<prob_t>(_genes->V().max() * _genes->J().max()));
403 containers[VJ_VAR_JOI_GEN] = container;
406 container =
new TDVectorList(
true, _config.get(
"probtables", Json::Value()).get(
"v.del", Json::Value()).get(
"laplace", .0).asDouble());
407 for (
auto i = 1; i <= _genes->V().max(); ++i) {
408 container->addColumnName(_genes->V()[i].allele);
409 container->addDataVector(vector<prob_t>(_genes->V()[i].sequence.size() + 1));
411 containers[VJ_VAR_DEL] = container;
414 container =
new TDVectorList(
true, _config.get(
"probtables", Json::Value()).get(
"j.del", Json::Value()).get(
"laplace", .0).asDouble());
415 for (
auto i = 1; i <= _genes->J().max(); ++i){
416 container->addColumnName(_genes->J()[i].allele);
417 container->addDataVector(vector<prob_t>(_genes->J()[i].sequence.size() + 1));
419 containers[VJ_JOI_DEL] = container;
422 container =
new TDVectorList(
true, _config.get(
"probtables", Json::Value()).get(
"ins.len", Json::Value()).get(
"laplace", .0).asDouble());
423 container->addColumnName(
"VJ ins len");
424 container->addDataVector(vector<prob_t>(_config.get(
"probtables", Json::Value()).get(
"ins.len", Json::Value()).get(
"max.len", DEFAULT_MAX_INS_LENGTH).asUInt64() + 1));
425 containers[VJ_VAR_JOI_INS_LEN] = container;
428 container =
new TDVectorList(
true, _config.get(
"probtables", Json::Value()).get(
"ins.nucl", Json::Value()).get(
"laplace", .0).asDouble());
429 container->addColumnName(
"VJ nucs");
430 container->addDataVector(vector<prob_t>(4));
431 containers[VJ_VAR_JOI_INS_NUC] = container;
435 virtual void parseDataContainer(
const string &element,
AbstractTDContainer *container, vector<AbstractTDContainer*> &containers) {
436 std::string err_message =
"NOT FOUND";
437 if (element ==
"v.j") {
439 && container->file_exists()
440 && this->findGenes(container->column_names(), _genes->J(), err_message)
441 && this->findGenes(container->row_names(), _genes->V(), err_message))
443 containers[VJ_VAR_JOI_GEN] = container;
447 cout <<
"\tV-J gene pairs: " << err_message << endl;
449 else if (element ==
"v.del") {
451 && container->file_exists()
452 && this->findGenes(container->column_names(), _genes->V(), err_message))
454 containers[VJ_VAR_DEL] = container;
458 cout <<
"\tV delet. num.: " << err_message << endl;
460 else if (element ==
"j.del") {
462 && container->file_exists()
463 && this->findGenes(container->column_names(), _genes->J(), err_message))
465 containers[VJ_JOI_DEL] = container;
469 cout <<
"\tJ delet. num.: " << err_message << endl;
471 else if (element ==
"ins.len") {
472 if (container && container->file_exists()) {
473 if (container->n_columns() != 1) {
475 ss <<
"ERROR: wrong number of columns (expected: 1, got: " << (int) container->n_columns() <<
")";
476 err_message = ss.str();
478 containers[VJ_VAR_JOI_INS_LEN] = container;
483 cout <<
"\tVJ ins. len.: " << err_message << endl;
485 else if (element ==
"ins.nucl") {
486 if (container && container->file_exists()) {
487 if (container->n_rows() != 4 || container->n_columns() != 1) {
489 ss <<
"ERROR: wrong number of columns and rows (expected: 4 X 1, got: " << (int) container->n_rows() <<
" X " << (int) container->n_columns() <<
")";
490 err_message = ss.str();
492 containers[VJ_VAR_JOI_INS_NUC] = container;
497 cout <<
"\tVJ ins. nuc.: " << err_message << endl;
499 else { cerr <<
"Unrecognised element in \'probtables\'" <<
":\n\t" << element << endl; }
503 virtual bool makeModelParameterVector(vector<AbstractTDContainer*> &containers,
504 vector<prob_t> &event_probs,
505 vector<event_ind_t> &event_lengths,
506 vector<event_ind_t> &event_classes,
507 vector<seq_len_t> &event_col_num,
508 vector<prob_t> &laplace,
509 vector<seq_len_t> &min_D_len_vec)
513 if (containers[VJ_VAR_JOI_GEN]
514 && containers[VJ_VAR_DEL]
515 && containers[VJ_JOI_DEL]
516 && containers[VJ_VAR_JOI_INS_LEN]
517 && containers[VJ_VAR_JOI_INS_NUC]) {
519 this->addGenes(containers[VJ_VAR_JOI_GEN],
529 this->addDels(containers[VJ_VAR_DEL],
538 this->addDels(containers[VJ_JOI_DEL],
545 containers[VJ_VAR_DEL]->n_columns());
547 this->addIns(containers[VJ_VAR_JOI_INS_LEN],
553 containers[VJ_JOI_DEL]->n_columns(),
554 _config.get(
"probtables", Json::Value()).get(
"ins.len", Json::Value()).get(
"max.len", DEFAULT_MAX_INS_LENGTH).asUInt64() + 1);
556 this->addIns(containers[VJ_VAR_JOI_INS_NUC],
564 _param_vec.reset(
new ModelParameterVector(VJ_RECOMB, event_probs, event_lengths, event_classes, event_col_num, laplace, _config.get(
"errors", 0).asDouble()));
578 VDJModelParser(
const std::string &model_path, Json::Value config, ModelBehaviour behav)
584 bool parseGeneSegments() {
585 cout <<
"\tV gene seg.: ";
586 if (_config.get(
"segments", Json::Value(
"")).get(
"variable", Json::Value(
"")).size() == 0) {
587 cout <<
"ERROR: no gene segments file in the model's .json." << endl;
591 cout <<
"OK" << endl;
593 string v_path = _model_path + _config.get(
"segments", Json::Value(
"")).get(
"variable", Json::Value(
"")).get(
"file",
"").asString();
595 cout <<
"\tJ gene seg.: ";
596 if (_config.get(
"segments", Json::Value(
"")).get(
"joining", Json::Value(
"")).size() == 0) {
597 cout <<
"ERROR: no gene segments file in the model's .json." << endl;
601 cout <<
"OK" << endl;
603 string j_path = _model_path + _config.get(
"segments", Json::Value(
"")).get(
"joining", Json::Value(
"")).get(
"file",
"").asString();
605 bool vok, jok, dok =
true;
607 cout <<
"\tD gene seg.: ";
608 if (_config.get(
"segments", Json::Value(
"")).get(
"diversity", Json::Value(
"")).size() == 0) {
609 cout <<
"ERROR: no gene segments file in the model's .json." << endl;
613 string d_path = _model_path + _config.get(
"segments", Json::Value(
"")).get(
"diversity", Json::Value(
"")).get(
"file",
"").asString();
614 _genes.reset(
new VDJRecombinationGenes(
"VDJ.V", v_path,
"VDJ.J", j_path,
"VDJ.D", d_path, &vok, &jok, &dok));
615 _min_D_len = _config.get(
"segments", Json::Value(
"")).get(
"diversity", Json::Value(
"")).get(
"min.len", DEFAULT_DIV_GENE_MIN_LEN).asUInt();
616 cout <<
"OK" << endl;
619 if (vok && jok && dok) {
620 _genes->appendPalindromicNucleotides(VARIABLE,
621 _config.get(
"segments", Json::Value(
"")).get(
"variable", Json::Value(
"")).get(
"P.nuc.3'", 0).asUInt(),
622 _config.get(
"segments", Json::Value(
"")).get(
"variable", Json::Value(
"")).get(
"P.nuc.5'", 0).asUInt());
623 _genes->appendPalindromicNucleotides(JOINING,
624 _config.get(
"segments", Json::Value(
"")).get(
"joining", Json::Value(
"")).get(
"P.nuc.3'", 0).asUInt(),
625 _config.get(
"segments", Json::Value(
"")).get(
"joining", Json::Value(
"")).get(
"P.nuc.5'", 0).asUInt());
626 if (_genes->is_vdj()) {
627 _genes->appendPalindromicNucleotides(DIVERSITY,
628 _config.get(
"segments", Json::Value(
"")).get(
"diversity", Json::Value(
"")).get(
"P.nuc.3'", 0).asUInt(),
629 _config.get(
"segments", Json::Value(
"")).get(
"diversity", Json::Value(
"")).get(
"P.nuc.5'", 0).asUInt());
633 return vok && jok && dok;
638 virtual void createContainers(vector<AbstractTDContainer*> &containers) {
642 container =
new TDVector(
true, _config.get(
"probtables", Json::Value()).get(
"v", Json::Value()).get(
"laplace", .0).asDouble());
643 container->addDataVector(vector<prob_t>());
644 for (
auto i = 1; i <= _genes->V().max(); ++i) {
645 container->addRowName(_genes->V()[i].allele);
646 container->addDataValue(1);
648 containers[VDJ_VAR_GEN] = container;
649 cout <<
"\tV genes prob.: " <<
"CREATED" << endl;
652 container =
new TDMatrix(
true, _config.get(
"probtables", Json::Value()).get(
"j.d", Json::Value()).get(
"laplace", .0).asDouble());
653 for (
auto i = 1; i <= _genes->J().max(); ++i) {
654 container->addRowName(_genes->J()[i].allele);
656 for (
auto i = 1; i <= _genes->D().max(); ++i) {
657 container->addColumnName(_genes->D()[i].allele);
659 container->addDataVector(vector<prob_t>(_genes->D().max() * _genes->J().max()));
660 containers[VDJ_JOI_DIV_GEN] = container;
661 cout <<
"\tJ-D gene pairs: " <<
"CREATED" << endl;
664 container =
new TDVectorList(
true, _config.get(
"probtables", Json::Value()).get(
"v.del", Json::Value()).get(
"laplace", .0).asDouble());
665 for (
auto i = 1; i <= _genes->V().max(); ++i) {
666 container->addColumnName(_genes->V()[i].allele);
667 container->addDataVector(vector<prob_t>(_genes->V()[i].sequence.size() + 1));
669 containers[VDJ_VAR_DEL] = container;
670 cout <<
"\tV delet. num.: " <<
"CREATED" << endl;;
673 container =
new TDVectorList(
true, _config.get(
"probtables", Json::Value()).get(
"j.del", Json::Value()).get(
"laplace", .0).asDouble());
674 for (
auto i = 1; i <= _genes->J().max(); ++i) {
675 container->addColumnName(_genes->J()[i].allele);
676 container->addDataVector(vector<prob_t>(_genes->J()[i].sequence.size() + 1));
678 containers[VDJ_JOI_DEL] = container;
679 cout <<
"\tJ delet. num.: " <<
"CREATED" << endl;
682 container =
new TDMatrixList(
true, _config.get(
"probtables", Json::Value()).get(
"d.del", Json::Value()).get(
"laplace", .0).asDouble());
683 for (
auto i = 1; i <= _genes->D().max(); ++i) {
684 container->addColumnName(_genes->D()[i].allele);
685 container->addDataVector(vector<prob_t>( (_genes->D()[i].sequence.size() + 1) * (_genes->D()[i].sequence.size() + 1) ));
686 container->addRowName(_genes->D()[i].allele);
687 container->addMetadata(_genes->D()[i].sequence.size() + 1);
689 containers[VDJ_DIV_DEL] = container;
690 cout <<
"\tD delet. num.: " <<
"CREATED" << endl;
693 container =
new TDVectorList(
true, _config.get(
"probtables", Json::Value()).get(
"ins.len", Json::Value()).get(
"laplace", .0).asDouble());
694 container->addColumnName(
"VD ins");
695 container->addColumnName(
"DJ ins");
696 container->addDataVector(vector<prob_t>(_config.get(
"probtables", Json::Value()).get(
"ins.len", Json::Value()).get(
"max.len", DEFAULT_MAX_INS_LENGTH).asUInt64() + 1));
697 container->addDataVector(vector<prob_t>(_config.get(
"probtables", Json::Value()).get(
"ins.len", Json::Value()).get(
"max.len", DEFAULT_MAX_INS_LENGTH).asUInt64() + 1));
698 containers[VDJ_VAR_DIV_INS_LEN] = container;
699 cout <<
"\tVD/DJ ins. len.: " <<
"CREATED" << endl;
702 container =
new TDVectorList(
true, _config.get(
"probtables", Json::Value()).get(
"ins.nucl", Json::Value()).get(
"laplace", .0).asDouble());
703 for (
auto i = 0; i < 8; ++i) {
704 container->addColumnName(
"VD/DJ nucs");
705 container->addDataVector(vector<prob_t>(4));
707 containers[VDJ_VAR_DIV_INS_NUC] = container;
708 cout <<
"\tVD/DJ ins. nuc.: " <<
"CREATED" << endl;
712 virtual void parseDataContainer(
const string &element,
AbstractTDContainer *container, vector<AbstractTDContainer*> &containers) {
713 std::string err_message =
"NOT FOUND";
714 if (element ==
"v") {
715 if (container && container->file_exists()) {
716 containers[VDJ_VAR_GEN] = container;
719 cout <<
"\tV genes prob.: " << err_message << endl;
721 else if (element ==
"j.d") {
723 && container->file_exists()
724 && this->findGenes(container->column_names(), _genes->D(), err_message)
725 && this->findGenes(container->row_names(), _genes->J(), err_message))
727 containers[VDJ_JOI_DIV_GEN] = container;
731 cout <<
"\tJ-D gene pairs: " << err_message << endl;
733 else if (element ==
"v.del") {
735 && container->file_exists()
736 && this->findGenes(container->column_names(), _genes->V(), err_message))
738 containers[VDJ_VAR_DEL] = container;
742 cout <<
"\tV delet. num.: " << err_message << endl;;
744 else if (element ==
"j.del") {
746 && container->file_exists()
747 && this->findGenes(container->column_names(), _genes->J(), err_message))
749 containers[VDJ_JOI_DEL] = container;
753 cout <<
"\tJ delet. num.: " << err_message << endl;
755 else if (element ==
"d.del") {
756 if (container && container->file_exists()) {
757 containers[VDJ_DIV_DEL] = container;
761 cout <<
"\tD delet. num.: " << err_message << endl;
763 else if (element ==
"ins.len") {
764 if (container && container->file_exists()) {
765 if (container->n_columns() != 2) {
767 ss <<
"ERROR: wrong number of columns (expected: 2, got: " << (int) container->n_columns() <<
")";
768 err_message = ss.str();
770 containers[VDJ_VAR_DIV_INS_LEN] = container;
775 cout <<
"\tVD/DJ ins. len.: " << err_message << endl;
777 else if (element ==
"ins.nucl") {
778 if (container && container->file_exists()) {
779 if (container->n_rows() != 4 || container->n_columns() != 8) {
781 ss <<
"ERROR: wrong number of columns and rows (expected: 4 X 8, got: " << (int) container->n_rows() <<
" X " << (int) container->n_columns() <<
")";
782 err_message = ss.str();
784 containers[VDJ_VAR_DIV_INS_NUC] = container;
789 cout <<
"\tVD/DJ ins. nuc.: " << err_message << endl;
791 else { std::cout <<
"Unrecognised element in \'probtables\'" <<
":\n\t" << element << std::endl; }
795 virtual bool makeModelParameterVector(vector<AbstractTDContainer*> &containers,
796 vector<prob_t> &event_probs,
797 vector<event_ind_t> &event_lengths,
798 vector<event_ind_t> &event_classes,
799 vector<seq_len_t> &event_col_num,
800 vector<prob_t> &laplace,
801 vector<seq_len_t> &min_D_len_vec)
805 if (containers[VDJ_VAR_GEN]
806 && containers[VDJ_JOI_DIV_GEN]
807 && containers[VDJ_VAR_DEL]
808 && containers[VDJ_JOI_DEL]
809 && containers[VDJ_DIV_DEL]
810 && containers[VDJ_VAR_DIV_INS_LEN]
811 && containers[VDJ_VAR_DIV_INS_NUC]) {
813 this->addGenes(containers[VDJ_VAR_GEN],
821 this->addGenes(containers[VDJ_JOI_DIV_GEN],
831 this->addDels(containers[VDJ_VAR_DEL],
840 this->addDels(containers[VDJ_JOI_DEL],
847 containers[VDJ_VAR_DEL]->n_columns());
849 this->addDels2D(containers[VDJ_DIV_DEL],
856 containers[VDJ_JOI_DEL]->n_columns());
858 this->addIns(containers[VDJ_VAR_DIV_INS_LEN],
864 containers[VDJ_DIV_DEL]->n_rows(),
865 _config.get(
"probtables", Json::Value()).get(
"ins.len", Json::Value()).get(
"max.len", DEFAULT_MAX_INS_LENGTH).asUInt64() + 1);
867 this->addIns(containers[VDJ_VAR_DIV_INS_NUC],
875 for (seg_index_t i = 1; i <= _genes->D().max(); ++i) { min_D_len_vec.push_back(_min_D_len); }
876 _param_vec.reset(
new ModelParameterVector(VDJ_RECOMB, event_probs, event_lengths, event_classes, event_col_num, laplace, _config.get(
"errors", 0).asDouble(),
true, min_D_len_vec));
892 #endif //YMIR_MODEL_PARSER_H
Definition: model_parser.h:15
Definition: textdata.h:540
Vector of gene segments.
Definition: textdata.h:138
Definition: genesegment.h:265
Definition: model_parser.h:574
Definition: textdata.h:28
Definition: textdata.h:401
Definition: model_parser.h:343
Class for storing parameters of assembling statistical model. Note: event with index 0 (zero) is "nul...
Definition: modelparametervector.h:68
List of std::vectors for deletions and insertions.
Definition: textdata.h:258
Definition: genesegment.h:44