9 #ifndef YMIR_TEXTDATA_H 10 #define YMIR_TEXTDATA_H 31 AbstractTDContainer(CONTAINER_TYPE ctype,
bool skip_first_column, prob_t laplace,
const std::string &filepath) {
32 _colnames.reserve(40);
33 _rownames.reserve(40);
36 _skip_first_column = skip_first_column;
54 virtual void addDataValue(prob_t value,
size_t i = 0) { _data[i].push_back(value + _laplace); }
57 virtual void addDataVector(
const std::vector<prob_t> &vec) {
58 _data.push_back(std::vector<prob_t>());
59 for (
auto i = 0; i < vec.size(); ++i) {
60 _data[_data.size() - 1].push_back(vec[i] + _laplace);
65 virtual void addDataVector(std::vector<prob_t>::const_iterator start, std::vector<prob_t>::const_iterator end) {
66 _data.push_back(std::vector<prob_t>());
67 _data[_data.size() - 1].insert(_data[_data.size() - 1].end(), start, end);
71 prob_t laplace()
const {
return _laplace; }
74 seg_index_t n_rows()
const {
return _rownames.size(); }
77 seg_index_t n_columns()
const {
return _colnames.size(); }
80 void addRowName(
const std::string& name) { _rownames.push_back(name); }
83 void addColumnName(
const std::string& name) { _colnames.push_back(name); }
86 void addMetadata(seq_len_t data) { _metadata.push_back(data); }
89 const std::vector<std::string>& row_names()
const {
return _rownames; }
92 const std::vector<std::string>& column_names()
const {
return _colnames; }
95 seq_len_t metadata(
size_t i)
const {
return _metadata[i]; }
98 virtual bool read(std::string &err_message) = 0;
101 virtual bool write(
const std::string& filepath) = 0;
104 const std::vector<prob_t>& data(
size_t i)
const {
return _data[i]; }
107 CONTAINER_TYPE type()
const {
return _type; }
110 bool file_exists()
const {
return _file_exists; }
114 std::vector< std::vector<prob_t> > _data;
115 std::vector<std::string> _colnames;
116 std::vector<std::string> _rownames;
118 bool _skip_first_column, _file_exists;
119 CONTAINER_TYPE _type;
120 std::vector<seq_len_t> _metadata;
121 std::string _filepath;
125 _colnames.reserve(40);
126 _rownames.reserve(40);
129 _skip_first_column =
true;
141 TDVector(
bool skip_first_column =
true, prob_t laplace = 0,
const std::string &filepath =
"")
148 bool read(std::string &err_message) {
154 std::stringstream line_stream;
155 std::string line, word;
156 std::vector<prob_t> word_vec;
158 bool skip_col_num_check =
true, empty_line =
false;
162 if (line[0] !=
'\n' && line[0] != 0) {
164 line_stream.str(line);
165 int i = !_skip_first_column;
167 if (skip_col_num_check) {
168 getline(line_stream, word,
'\t');
169 this->addColumnName(
"Probability");
170 _data.push_back(std::vector<prob_t>());
171 _data[0].reserve(60);
173 while (!line_stream.eof()) {
174 getline(line_stream, word,
'\t');
177 if (i) { word_vec.push_back(stod(word)); }
178 else { this->addRowName(word); }
189 if (!skip_col_num_check) {
190 if (word_vec.size() == _colnames.size()) {
191 for (
size_t i = 0; i < word_vec.size(); ++i) {
192 _data[i].push_back(word_vec[i]);
195 std::stringstream ss;
196 ss <<
"ERROR: number of elements doesn't match the number of columns in the line " <<
199 (
int) _colnames.size() <<
201 (int) word_vec.size() <<
")";
202 err_message = ss.str();
206 skip_col_num_check =
false;
219 _file_exists =
false;
220 err_message =
"ERROR: can't open file [" + _filepath +
"]";
225 bool write(
const std::string& filepath) {
232 ofs << _colnames[0] <<
'\t' << _colnames[1] << std::endl;
234 for (
auto i = 0; i < _data[0].size(); ++i) {
235 ofs << _rownames[i] <<
'\t' << _data[0][i] << std::endl;
243 _file_exists =
false;
261 TDVectorList(
bool skip_first_column =
true, prob_t laplace = 0,
const std::string &filepath =
"")
268 bool read(std::string &err_message) {
274 std::stringstream line_stream;
275 std::string line, word;
276 std::vector<prob_t> word_vec;
278 bool skip_col_num_check =
true, empty_line =
false;
281 if (line[0] !=
'\n' && line[0] != 0) {
283 line_stream.str(line);
284 int i = !_skip_first_column;
286 if (skip_col_num_check) {
287 while (!line_stream.eof()) {
288 getline(line_stream, word,
'\t');
290 this->addColumnName(word);
291 _data.push_back(std::vector<prob_t>());
292 _data[_data.size() - 1].reserve(40);
297 while (!line_stream.eof()) {
298 getline(line_stream, word,
'\t');
301 if (i) { word_vec.push_back(stod(word)); }
302 else { this->addRowName(word); }
313 if (!skip_col_num_check) {
314 if (word_vec.size() == _colnames.size()) {
315 for (
size_t i = 0; i < word_vec.size(); ++i) {
316 _data[i].push_back(word_vec[i]);
319 std::stringstream ss;
320 ss <<
"ERROR: number of elements doesn't match the number of columns in the line " <<
323 (
int) _colnames.size() <<
325 (int) word_vec.size() <<
")";
326 err_message = ss.str();
330 skip_col_num_check =
false;
343 _file_exists =
false;
344 err_message =
"ERROR: can't open file [" + _filepath +
"]";
349 bool write(
const std::string& filepath) {
355 seq_len_t max_len = _data[0].size();
356 for (
auto i = 1; i < _data.size(); ++i) { max_len = std::max(max_len, (seq_len_t) _data[i].size()); }
358 for (
auto i = 0; i < _colnames.size(); ++i) {
360 if (i < _colnames.size() - 1) {
367 for (
auto pos_i = 0; pos_i < max_len; ++pos_i) {
368 if (pos_i < _rownames.size()) {
369 ofs << _rownames[pos_i] <<
'\t';
374 for (
auto vec_i = 0; vec_i < _data.size(); ++vec_i) {
375 if (pos_i < _data[vec_i].size()) {
376 ofs << _data[vec_i][pos_i];
380 if (vec_i < _data.size() - 1) { ofs <<
'\t'; }
390 _file_exists =
false;
404 TDMatrix(
bool skip_first_column =
true, prob_t laplace = 0,
const std::string &filepath =
"")
411 virtual void addDataVector(
const std::vector<prob_t> &vec) {
413 _data.push_back(std::vector<prob_t>());
415 _data[0].insert(_data[0].end(), vec.begin(), vec.end());
419 bool read(std::string &err_message) {
425 _data.push_back(std::vector<prob_t>());
426 std::stringstream line_stream;
427 std::string line, word;
428 std::vector<prob_t> word_vec;
430 bool skip_col_num_check =
true, empty_line =
false;
433 if (line[0] !=
'\n' && line[0] != 0) {
435 line_stream.str(line);
436 int i = !_skip_first_column;
438 if (skip_col_num_check) {
439 while (!line_stream.eof()) {
440 getline(line_stream, word,
'\t');
441 if (i) { this->addColumnName(word); }
445 while (!line_stream.eof()) {
446 getline(line_stream, word,
'\t');
449 if (i) { word_vec.push_back(stod(word)); }
450 else { this->addRowName(word); }
461 if (!skip_col_num_check) {
462 if (word_vec.size() == _colnames.size()) {
463 _data[0].insert(_data[0].end(), word_vec.begin(), word_vec.end());
465 std::stringstream ss;
466 ss <<
"ERROR: number of elements doesn't match the number of columns in the line " <<
469 (
int) _colnames.size() <<
471 (int) word_vec.size() <<
")";
472 err_message = ss.str();
476 skip_col_num_check =
false;
489 _file_exists =
false;
490 err_message =
"ERROR: can't open file [" + _filepath +
"]";
495 bool write(
const std::string& filepath) {
501 for (
auto i = 0; i < _colnames.size(); ++i) {
503 if (i < _colnames.size() - 1) {
510 for (
size_t row_i = 0; row_i < _data[0].size() / _metadata[0]; ++row_i) {
511 ofs << _rownames[row_i] <<
'\t';
512 for (
auto col_i = 0; col_i < _metadata[0]; ++col_i) {
513 ofs << _data[0][row_i * _metadata[0] + col_i];
514 if (col_i < _metadata[0] - 1) { ofs <<
'\t'; }
524 _file_exists =
false;
544 TDMatrixList(
bool skip_first_column =
true, prob_t laplace = 0,
const std::string &filepath =
"")
551 bool read(std::string &err_message) {
557 std::stringstream line_stream;
558 std::string line, word;
559 std::vector<prob_t> word_vec;
560 int line_num = 1, matrix_num = 0;
561 bool skip_col_num_check =
true, in_matrix =
true, empty_line =
false;
562 _data.push_back(std::vector<prob_t>());
565 if (line[0] !=
'\n' && line[0] != 0) {
567 line_stream.str(line);
568 int i = !_skip_first_column;
570 if (skip_col_num_check) {
571 while (!line_stream.eof()) {
572 getline(line_stream, word,
'\t');
573 if (i) { this->addColumnName(word); }
574 else { this->addRowName(word); }
577 _metadata.push_back(i - 1);
579 while (!line_stream.eof()) {
580 getline(line_stream, word,
'\t');
583 if (i) { word_vec.push_back(stod(word)); }
591 _data.push_back(std::vector<prob_t>());
593 skip_col_num_check =
true;
597 if (!skip_col_num_check) {
598 if (word_vec.size() == _metadata[matrix_num]) {
599 _data[matrix_num].insert(_data[matrix_num].end(), word_vec.begin(), word_vec.end());
601 std::stringstream ss;
602 ss <<
"ERROR: number of elements doesn't match the number of columns in the line " <<
605 (
int) _colnames.size() <<
607 (int) word_vec.size() <<
")";
608 err_message = ss.str();
612 skip_col_num_check =
false;
625 _file_exists =
false;
626 err_message =
"ERROR: can't open file [" + _filepath +
"]";
631 bool write(
const std::string& filepath) {
637 for (
auto mat_i = 0; mat_i < _data.size(); ++mat_i) {
638 ofs << _rownames[mat_i] <<
'\t';
639 for (
auto i = 0; i < _metadata[mat_i]; ++i) {
640 ofs << std::to_string(i);
641 if (i < _metadata[mat_i] - 1) { ofs <<
"\t"; }
645 for (
size_t row_i = 0; row_i < _data[mat_i].size() / _metadata[mat_i]; ++row_i) {
646 ofs << std::to_string(row_i) <<
'\t';
647 for (
auto col_i = 0; col_i < _metadata[mat_i]; ++col_i) {
648 ofs << _data[mat_i][row_i * _metadata[mat_i] + col_i];
649 if (col_i < _metadata[mat_i] - 1) { ofs <<
'\t'; }
662 _file_exists =
false;
679 const std::string& filetype,
680 bool skip_first_column,
682 std::string& err_message) {
685 if (filetype ==
"matrix") {
686 container =
new TDMatrix(skip_first_column, laplace, filepath);
687 }
else if (filetype ==
"vector.list") {
688 container =
new TDVectorList(skip_first_column, laplace, filepath);
689 }
else if (filetype ==
"matrix.list") {
690 container =
new TDMatrixList(skip_first_column, laplace, filepath);
691 }
else if (filetype ==
"vector") {
692 container =
new TDVector(skip_first_column, laplace, filepath);
694 if (filetype ==
"") {
695 err_message =
"ERROR: no file type for [" + filepath +
"]";
697 err_message =
"ERROR: unrecognised file type for [" + filepath +
"]";
702 if (container->file_exists()) {
703 container->read(err_message);
706 err_message =
"ERROR: can't open or read the file [" + filepath +
"]";
713 #endif //YMIR_TEXTDATA_H
Definition: textdata.h:540
Vector of gene segments.
Definition: textdata.h:138
Definition: textdata.h:28
Definition: textdata.h:401
List of std::vectors for deletions and insertions.
Definition: textdata.h:258