24 #ifndef _GENESEGMENT_H    25 #define _GENESEGMENT_H    37     class GeneSegmentAlphabet;
    38     class VDJRecombinationGenes;
    53             std::string orig_sequence;
    57                         const std::string& sequence_,
    59                     : allele(allele_), sequence(sequence_), orig_sequence(sequence_), index(index_) {
    73                             const std::string& name, 
    74                             const std::string& filepath, 
    75                             bool *is_ok = 
nullptr)
    78             bool res = this->read(filepath);
    82             _gene_segment = gene_segment;
    87                             const std::string& name,       
    88                             const std::vector<std::string>& alleles, 
    89                             const std::vector<std::string>& sequences) {
    92             if (alleles.size() != sequences.size()) {
    93                 std::cout << 
"Gene segment alphabet [" << this->_name << 
"] warning:" << std::endl << 
"\talleles and sequences vectors have different sizes" << std::endl;
    96             size_t min_size = std::min(alleles.size(), sequences.size());
    97             this->_vec.reserve(min_size);
    99             for (
size_t i = 0; i < min_size; i++) {
   100                 this->addGeneSegment(alleles[i], sequences[i], i + 1);
   103             _gene_segment = gene_segment;
   107         const std::string& name()
 const { 
return this->_name; }
   110         const GeneSegment& operator[] (seg_index_t index)
 const {
   112             check_and_throw(index == 0 || index > _vec.size(), 
   113                             "Gene segment alphabet [" + _name + 
"] error:\tindex " + std::to_string(index) + 
"is out of bounds");
   115             return _vec[index - 1];
   118         const GeneSegment& operator[] (
const std::string& name)
 const {
   120             check_and_throw(_map.find(name) == _map.end(), 
   121                             "Gene segment alphabet [" + _name + 
"] error:\tcan't find allele " + name);
   123             return _vec[_map.at(name)];
   127         seg_index_t size()
 const { 
return this->_vec.size(); }
   130         seg_index_t max()
 const { 
return this->_vec.size(); }
   141             std::string seq_5_end = 
"", seq_3_end = 
"";
   142             for (
auto i = 0; i < _vec.size(); ++i) {
   144                 for (
auto left_i = 0; left_i < from_5_end; ++ left_i) {
   145                     seq_5_end = complement(_vec[i].orig_sequence[left_i]) + seq_5_end;
   149                 for (
auto right_i = 0; right_i < from_3_end; ++right_i) {
   150                     seq_3_end = seq_3_end + complement(_vec[i].orig_sequence[_vec[i].orig_sequence.size() - 1 - right_i]);
   153                 _vec[i].sequence.reserve(from_5_end + _vec[i].orig_sequence.size() + from_3_end + 2);
   154                 _vec[i].sequence = seq_5_end + _vec[i].orig_sequence + seq_3_end;
   162         bool write(
const std::string& filepath)
 const {
   163             if (filepath.empty()) {
   164                 std::cout << 
"Gene segment alphabet [" << this->_name << 
"] error:" << std::endl << 
"\tinput file name is an empty string" << std::endl;
   172                 ofs << this->_name << 
'\t' << 
"Sequences" << std::endl;
   173                 for (seg_index_t i = 0; i < this->_vec.size(); ++i) {
   174                     ofs << _vec[i].allele << 
'\t' << _vec[i].orig_sequence;
   175                     if (i != _vec.size() - 1) {
   188         bool read(
const std::string& filepath) {
   189             if (filepath.empty()) {
   190                 std::cout << 
"Gene segment alphabet [" << this->_name << 
"] error:" << std::endl << 
"\tinput file name is an empty string" << std::endl;
   198                 std::cout << 
"Gene segment alphabet [" << this->_name << 
"] reading input file [" << filepath << 
"]" << std::endl;
   199                 std::stringstream line_stream;
   200                 std::string line, gene_name, gene_seq;
   202                 seg_index_t cur_index = 1;
   205                     if (line[0] != 
'\n' && line.size() > 3) {
   206                         line_stream.str(line);
   209                             getline(line_stream, gene_name, 
'\t');
   211                             getline(line_stream, gene_seq, 
'\t');
   212                             this->addGeneSegment(gene_name, gene_seq, cur_index);
   222                 std::cout << 
"Gene segment alphabet [" << this->_name << 
"] error:" << std::endl << 
"\tinput file [" << filepath << 
"] not found" << std::endl;
   231         seq_len_t maxLength()
 const {
   233             for (
auto i = 1; i < _vec.size(); ++i) { 
   234                 res = std::max(res, (seq_len_t) _vec[i].sequence.size());
   240         GeneSegments gene_segment()
 const { 
return _gene_segment; }
   245         std::unordered_map<std::string, seg_index_t> _map;
   246         std::vector<GeneSegment> _vec;
   247         GeneSegments _gene_segment;
   250         void addGeneSegment(
const std::string& name, 
const std::string& seq, seg_index_t index) {
   251             if (name.empty() || seq.empty()) {
   252                 std::cout << 
"Gene segment alphabet [" << this->_name << 
"] warning:" << std::endl << 
"\tempty string for name or sequence. Skipping." << std::endl;
   254                 this->_map[name] = index - 1;
   255                 this->_vec.push_back(
GeneSegment(name, seq, index));
   267         typedef unique_ptr<GeneSegmentAlphabet> GSA_ptr;
   278                               const std::string& j_segments_name, 
const std::string& j_segments_file,
   279                               bool *is_V_ok = 
nullptr, 
bool *is_J_ok = 
nullptr)
   283             std::cout << 
"Loaded " << std::to_string(_V->size()) << 
" V alleles." << std::endl;
   284             std::cout << 
"Loaded " << std::to_string(_J->size()) << 
" J alleles." << std::endl;
   289                               const std::string& j_segments_name, 
const std::string& j_segments_file,
   290                               const std::string& d_segments_name, 
const std::string& d_segments_file,
   291                               bool *is_V_ok = 
nullptr, 
bool *is_J_ok = 
nullptr, 
bool *is_D_ok = 
nullptr)
   296             std::cout << 
"Loaded " << std::to_string(_V->size()) << 
" V alleles." << std::endl;
   297             std::cout << 
"Loaded " << std::to_string(_D->size()) << 
" D alleles." << std::endl;
   298             std::cout << 
"Loaded " << std::to_string(_J->size()) << 
" J alleles." << std::endl;
   302         VDJRecombinationGenes(
const std::string& V_name, 
const std::vector<std::string>& V_alleles, 
const std::vector<std::string>& V_sequences,
   303                               const std::string& J_name, 
const std::vector<std::string>& J_alleles, 
const std::vector<std::string>& J_sequences)
   307             std::cout << 
"Loaded " << std::to_string(_V->size()) << 
" V alleles." << std::endl;
   308             std::cout << 
"Loaded " << std::to_string(_J->size()) << 
" J alleles." << std::endl;
   312         VDJRecombinationGenes(
const std::string& V_name, 
const std::vector<std::string>& V_alleles, 
const std::vector<std::string>& V_sequences,
   313                               const std::string& J_name, 
const std::vector<std::string>& J_alleles, 
const std::vector<std::string>& J_sequences,
   314                               const std::string& D_name, 
const std::vector<std::string>& D_alleles, 
const std::vector<std::string>& D_sequences)
   319             std::cout << 
"Loaded " << std::to_string(_V->size()) << 
" V alleles." << std::endl;
   320             std::cout << 
"Loaded " << std::to_string(_D->size()) << 
" D alleles." << std::endl;
   321             std::cout << 
"Loaded " << std::to_string(_J->size()) << 
" J alleles." << std::endl;
   325        VDJRecombinationGenes(
const VDJRecombinationGenes &other)
   335        VDJRecombinationGenes& operator=(
const VDJRecombinationGenes &other) {
   349         bool is_vdj()
 const { 
return (
bool) _D; }
   358             check_and_throw(!(
bool) _V, 
"V pointer is null");
   365             check_and_throw(!(
bool) _J, 
"J pointer is null");
   372             check_and_throw(!(
bool) _D, 
"D pointer is null");
   383             if (gene == VARIABLE) {
   384                 this->_V->appendPalindromicNucleotides(from_5_end, from_3_end);
   385             } 
else if (gene == JOINING) {
   386                 this->_J->appendPalindromicNucleotides(from_5_end, from_3_end);
   387             } 
else if (gene == DIVERSITY) {
   388                 this->_D->appendPalindromicNucleotides(from_5_end, from_3_end);
   393         bool write(
const std::string& v_segments_file = 
"vsegments.txt",
   394                    const std::string& j_segments_file = 
"jsegments.txt",
   395                    const std::string& d_segments_file = 
"dsegments.txt")
 const   397             if (this->_V->write(v_segments_file)) {
   398                 if (this->_J->write(j_segments_file)) {
   399                     if (!this->is_vdj()) {
   402                         return this->_D->write(d_segments_file);
 
Definition: genesegment.h:265
 
Definition: genesegment.h:50
 
void appendPalindromicNucleotides(seq_len_t from_5_end, seq_len_t from_3_end)
Append P(alindromic)-nucleotides to all gene segment sequences from 3'end, 5'end or both...
Definition: genesegment.h:140
 
Definition: genesegment.h:44
 
void appendPalindromicNucleotides(GeneSegments gene, seq_len_t from_5_end=4, seq_len_t from_3_end=4)
Definition: genesegment.h:382
 
bool write(const std::string &filepath) const 
Save segment alphabet to the given file as a tab-separated table with 3 columns. 
Definition: genesegment.h:162