24 #ifndef _GENESEGMENT_H 25 #define _GENESEGMENT_H 37 class GeneSegmentAlphabet;
38 class VDJRecombinationGenes;
53 std::string orig_sequence;
57 const std::string& sequence_,
59 : allele(allele_), sequence(sequence_), orig_sequence(sequence_), index(index_) {
73 const std::string& name,
74 const std::string& filepath,
75 bool *is_ok =
nullptr)
78 bool res = this->read(filepath);
82 _gene_segment = gene_segment;
87 const std::string& name,
88 const std::vector<std::string>& alleles,
89 const std::vector<std::string>& sequences) {
92 if (alleles.size() != sequences.size()) {
93 std::cout <<
"Gene segment alphabet [" << this->_name <<
"] warning:" << std::endl <<
"\talleles and sequences vectors have different sizes" << std::endl;
96 size_t min_size = std::min(alleles.size(), sequences.size());
97 this->_vec.reserve(min_size);
99 for (
size_t i = 0; i < min_size; i++) {
100 this->addGeneSegment(alleles[i], sequences[i], i + 1);
103 _gene_segment = gene_segment;
107 const std::string& name()
const {
return this->_name; }
110 const GeneSegment& operator[] (seg_index_t index)
const {
112 check_and_throw(index == 0 || index > _vec.size(),
113 "Gene segment alphabet [" + _name +
"] error:\tindex " + std::to_string(index) +
"is out of bounds");
115 return _vec[index - 1];
118 const GeneSegment& operator[] (
const std::string& name)
const {
120 check_and_throw(_map.find(name) == _map.end(),
121 "Gene segment alphabet [" + _name +
"] error:\tcan't find allele " + name);
123 return _vec[_map.at(name)];
127 seg_index_t size()
const {
return this->_vec.size(); }
130 seg_index_t max()
const {
return this->_vec.size(); }
141 std::string seq_5_end =
"", seq_3_end =
"";
142 for (
auto i = 0; i < _vec.size(); ++i) {
144 for (
auto left_i = 0; left_i < from_5_end; ++ left_i) {
145 seq_5_end = complement(_vec[i].orig_sequence[left_i]) + seq_5_end;
149 for (
auto right_i = 0; right_i < from_3_end; ++right_i) {
150 seq_3_end = seq_3_end + complement(_vec[i].orig_sequence[_vec[i].orig_sequence.size() - 1 - right_i]);
153 _vec[i].sequence.reserve(from_5_end + _vec[i].orig_sequence.size() + from_3_end + 2);
154 _vec[i].sequence = seq_5_end + _vec[i].orig_sequence + seq_3_end;
162 bool write(
const std::string& filepath)
const {
163 if (filepath.empty()) {
164 std::cout <<
"Gene segment alphabet [" << this->_name <<
"] error:" << std::endl <<
"\tinput file name is an empty string" << std::endl;
172 ofs << this->_name <<
'\t' <<
"Sequences" << std::endl;
173 for (seg_index_t i = 0; i < this->_vec.size(); ++i) {
174 ofs << _vec[i].allele <<
'\t' << _vec[i].orig_sequence;
175 if (i != _vec.size() - 1) {
188 bool read(
const std::string& filepath) {
189 if (filepath.empty()) {
190 std::cout <<
"Gene segment alphabet [" << this->_name <<
"] error:" << std::endl <<
"\tinput file name is an empty string" << std::endl;
198 std::cout <<
"Gene segment alphabet [" << this->_name <<
"] reading input file [" << filepath <<
"]" << std::endl;
199 std::stringstream line_stream;
200 std::string line, gene_name, gene_seq;
202 seg_index_t cur_index = 1;
205 if (line[0] !=
'\n' && line.size() > 3) {
206 line_stream.str(line);
209 getline(line_stream, gene_name,
'\t');
211 getline(line_stream, gene_seq,
'\t');
212 this->addGeneSegment(gene_name, gene_seq, cur_index);
222 std::cout <<
"Gene segment alphabet [" << this->_name <<
"] error:" << std::endl <<
"\tinput file [" << filepath <<
"] not found" << std::endl;
231 seq_len_t maxLength()
const {
233 for (
auto i = 1; i < _vec.size(); ++i) {
234 res = std::max(res, (seq_len_t) _vec[i].sequence.size());
240 GeneSegments gene_segment()
const {
return _gene_segment; }
245 std::unordered_map<std::string, seg_index_t> _map;
246 std::vector<GeneSegment> _vec;
247 GeneSegments _gene_segment;
250 void addGeneSegment(
const std::string& name,
const std::string& seq, seg_index_t index) {
251 if (name.empty() || seq.empty()) {
252 std::cout <<
"Gene segment alphabet [" << this->_name <<
"] warning:" << std::endl <<
"\tempty string for name or sequence. Skipping." << std::endl;
254 this->_map[name] = index - 1;
255 this->_vec.push_back(
GeneSegment(name, seq, index));
267 typedef unique_ptr<GeneSegmentAlphabet> GSA_ptr;
278 const std::string& j_segments_name,
const std::string& j_segments_file,
279 bool *is_V_ok =
nullptr,
bool *is_J_ok =
nullptr)
283 std::cout <<
"Loaded " << std::to_string(_V->size()) <<
" V alleles." << std::endl;
284 std::cout <<
"Loaded " << std::to_string(_J->size()) <<
" J alleles." << std::endl;
289 const std::string& j_segments_name,
const std::string& j_segments_file,
290 const std::string& d_segments_name,
const std::string& d_segments_file,
291 bool *is_V_ok =
nullptr,
bool *is_J_ok =
nullptr,
bool *is_D_ok =
nullptr)
296 std::cout <<
"Loaded " << std::to_string(_V->size()) <<
" V alleles." << std::endl;
297 std::cout <<
"Loaded " << std::to_string(_D->size()) <<
" D alleles." << std::endl;
298 std::cout <<
"Loaded " << std::to_string(_J->size()) <<
" J alleles." << std::endl;
302 VDJRecombinationGenes(
const std::string& V_name,
const std::vector<std::string>& V_alleles,
const std::vector<std::string>& V_sequences,
303 const std::string& J_name,
const std::vector<std::string>& J_alleles,
const std::vector<std::string>& J_sequences)
307 std::cout <<
"Loaded " << std::to_string(_V->size()) <<
" V alleles." << std::endl;
308 std::cout <<
"Loaded " << std::to_string(_J->size()) <<
" J alleles." << std::endl;
312 VDJRecombinationGenes(
const std::string& V_name,
const std::vector<std::string>& V_alleles,
const std::vector<std::string>& V_sequences,
313 const std::string& J_name,
const std::vector<std::string>& J_alleles,
const std::vector<std::string>& J_sequences,
314 const std::string& D_name,
const std::vector<std::string>& D_alleles,
const std::vector<std::string>& D_sequences)
319 std::cout <<
"Loaded " << std::to_string(_V->size()) <<
" V alleles." << std::endl;
320 std::cout <<
"Loaded " << std::to_string(_D->size()) <<
" D alleles." << std::endl;
321 std::cout <<
"Loaded " << std::to_string(_J->size()) <<
" J alleles." << std::endl;
325 VDJRecombinationGenes(
const VDJRecombinationGenes &other)
335 VDJRecombinationGenes& operator=(
const VDJRecombinationGenes &other) {
349 bool is_vdj()
const {
return (
bool) _D; }
358 check_and_throw(!(
bool) _V,
"V pointer is null");
365 check_and_throw(!(
bool) _J,
"J pointer is null");
372 check_and_throw(!(
bool) _D,
"D pointer is null");
383 if (gene == VARIABLE) {
384 this->_V->appendPalindromicNucleotides(from_5_end, from_3_end);
385 }
else if (gene == JOINING) {
386 this->_J->appendPalindromicNucleotides(from_5_end, from_3_end);
387 }
else if (gene == DIVERSITY) {
388 this->_D->appendPalindromicNucleotides(from_5_end, from_3_end);
393 bool write(
const std::string& v_segments_file =
"vsegments.txt",
394 const std::string& j_segments_file =
"jsegments.txt",
395 const std::string& d_segments_file =
"dsegments.txt")
const 397 if (this->_V->write(v_segments_file)) {
398 if (this->_J->write(j_segments_file)) {
399 if (!this->is_vdj()) {
402 return this->_D->write(d_segments_file);
Definition: genesegment.h:265
Definition: genesegment.h:50
void appendPalindromicNucleotides(seq_len_t from_5_end, seq_len_t from_3_end)
Append P(alindromic)-nucleotides to all gene segment sequences from 3'end, 5'end or both...
Definition: genesegment.h:140
Definition: genesegment.h:44
void appendPalindromicNucleotides(GeneSegments gene, seq_len_t from_5_end=4, seq_len_t from_3_end=4)
Definition: genesegment.h:382
bool write(const std::string &filepath) const
Save segment alphabet to the given file as a tab-separated table with 3 columns.
Definition: genesegment.h:162