Ymir  .9
Fast\C++toolforcomputationofassemblingprobabilities,statisticalinferenceofassemblingstatisticalmodelandgenerationofartificialsequencesofT-cellreceptorsdata.
types.h
1 /*
2  * Ymir <imminfo.github.io/ymir>
3  *
4  * This file is part of Ymir, a fast C++ tool for computation of assembling
5  * probabilities, statistical inference of assembling statistical model
6  * and generation of artificial sequences of T-cell receptors data.
7  *
8  *
9  * Copyright 2015 Vadim Nazarov <vdn at mailbox dot com>
10  *
11  * Licensed under the Apache License, Version 2.0 (the "License");
12  * you may not use this file except in compliance with the License.
13  * You may obtain a copy of the License at
14  *
15  * http://www.apache.org/licenses/LICENSE-2.0
16  *
17  * Unless required by applicable law or agreed to in writing, software
18  * distributed under the License is distributed on an "AS IS" BASIS,
19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20  * See the License for the specific language governing permissions and
21  * limitations under the License.
22  */
23 
24 #ifndef _TYPES_H
25 #define _TYPES_H
26 
27 
28 #include <bitset>
29 #include <cmath>
30 #include <fstream>
31 #include <iostream>
32 #include <sstream>
33 #include <tuple>
34 #include <vector>
35 
36 #include "jsoncpp.cpp"
37 
38 #ifdef USE_OMP
39 #include "omp.h"
40 #endif
41 
42 #include "matrix.h"
43 #include "codon_table.h"
44 
45 //#include "Eigen/Dense"
46 
47 //#include "tools.h"
48 
49 //#define MPFR
50 //#include "gmp.h"
51 //#include "mpfr.h"
52 //#include <Eigen/unsupported/Eigen/MPRealSupport>
53 //using namespace mpfr;
54 //using namespace Eigen;
55 
56 
57 namespace ymir {
58 
59  #define DEFAULT_DIV_GENE_MIN_LEN 3
60 
61  #define NULL_CHAR '_'
62 
63  #define DEFAULT_MAX_INS_LENGTH 65
64 
65  #ifndef DNDEBUG
66  #define YDEBUG
67  #endif
68 
69  #define DEFAULT_AWE_V_RESERVE_SIZE 60
70  #define DEFAULT_AWE_D_RESERVE_SIZE 4000
71  #define DEFAULT_AWE_J_RESERVE_SIZE 60
72 
79  #ifdef MPFR
80 
86  typedef mpreal prob_t;
87  #else
88 
93 // typedef double prob_t;
94  typedef long double prob_t;
95  #endif
96 
97 
98  typedef std::string sequence_t;
99 
100 
106  typedef uint16_t event_ind_t;
107 
108 
109  typedef uint16_t seq_len_t;
110 
111 
117 // typedef Eigen::Matrix<prob_t, Eigen::Dynamic, Eigen::Dynamic> event_matrix_t;
118  typedef Matrix<prob_t, seq_len_t> event_matrix_t;
119 
120 
121  typedef uint8_t seg_index_t;
122 
123 
124  using std::unique_ptr;
125 
126 
127  typedef std::pair<event_ind_t, prob_t> event_pair_t;
128 
129 
133  enum MAAGComputeProbAction {
134  MAX_PROBABILITY,
135  SUM_PROBABILITY
136  };
137 
138 
139  enum GeneSegments {
140  UNDEF_GENE,
141  VARIABLE,
142  JOINING,
143  DIVERSITY
144  };
145 
146 
150  enum Recombination {
151  UNDEF_RECOMB,
152  VJ_RECOMB,
153  VDJ_RECOMB
154  };
155 
156 
160  enum ModelBehaviour {
161  PREDEFINED,
162  EMPTY
163  };
164 
165 
169  enum MetadataMode {
170  NO_METADATA = 0,
171  SAVE_METADATA = 1
172  };
173 
174 
178  enum ErrorMode {
179  NO_ERRORS = 0,
180  COMPUTE_ERRORS = 1
181  };
182 
183 
187  enum SequenceType {
188  UNDEF_SEQ_TYPE = 0,
189  NUCLEOTIDE = 1,
190  AMINOACID = 2
191  };
192 
193 
197  enum EventClass {
198  NULL_EVENT = 0,
199 
200  VJ_VAR_JOI_GEN = 1,
201  VJ_VAR_DEL = 2,
202  VJ_JOI_DEL = 3,
203  VJ_VAR_JOI_INS_LEN = 4,
204  VJ_VAR_JOI_INS_NUC = 5,
205  VJ_VAR_JOI_INS_NUC_A = 5,
206  VJ_VAR_JOI_INS_NUC_C = 6,
207  VJ_VAR_JOI_INS_NUC_G = 7,
208  VJ_VAR_JOI_INS_NUC_T = 8,
209  VJ_ERROR_RATE = 9,
210 
211  VDJ_VAR_GEN = 1,
212  VDJ_JOI_DIV_GEN = 2,
213  VDJ_VAR_DEL = 3,
214  VDJ_JOI_DEL = 4,
215  VDJ_DIV_DEL = 5,
216  VDJ_VAR_DIV_INS_LEN = 6,
217  VDJ_DIV_JOI_INS_LEN = 7,
218  VDJ_VAR_DIV_INS_NUC = 8,
219  VDJ_VAR_DIV_INS_NUC_A = 8,
220  VDJ_VAR_DIV_INS_NUC_C = 9,
221  VDJ_VAR_DIV_INS_NUC_G = 10,
222  VDJ_VAR_DIV_INS_NUC_T = 11,
223  VDJ_DIV_JOI_INS_NUC = 12,
224  VDJ_DIV_JOI_INS_NUC_A = 12,
225  VDJ_DIV_JOI_INS_NUC_C = 13,
226  VDJ_DIV_JOI_INS_NUC_G = 14,
227  VDJ_DIV_JOI_INS_NUC_T = 15,
228  VDJ_ERROR_RATE = 16
229  };
230 
231 
232  enum MAAGNodeEventIndex {
233  VJ_VAR_JOI_GEN_I = 0,
234  VJ_VAR_DEL_I = 1,
235  VJ_VAR_JOI_INS_I = 2,
236  VJ_JOI_DEL_I = 3,
237 
238  VDJ_VAR_GEN_I = 0,
239  VDJ_VAR_DEL_I = 1,
240  VDJ_VAR_DIV_INS_I = 2,
241  VDJ_DIV_DEL_I = 3,
242  VDJ_DIV_JOI_INS_I = 4,
243  VDJ_JOI_DEL_I = 5,
244  VDJ_JOI_DIV_GEN_I = 6
245  };
246 
247 
248  enum InsertionModelType {
249  MONO_NUCLEOTIDE,
250  DI_NUCLEOTIDE
251  };
252 
253 
259  struct CodonTable {
260 
261  struct Codons {
262 
263  Codons(std::pair<std::unordered_multimap<char, std::string>::const_iterator, std::unordered_multimap<char, std::string>::const_iterator> it)
264  : _begin(it.first), _end(it.second), _current(it.first)
265  {}
266 
267 
268  std::string next() {
269  std::string res = _current->second;
270  ++_current;
271  return res;
272  }
273 
274 
275  bool end() const { return _current == _end; }
276 
277  protected:
278  std::unordered_multimap<char, std::string>::const_iterator _begin, _end, _current;
279 
280  Codons() {}
281 
282  };
283 
284  CodonTable() {
285  _codons = {
286  {'A', "GCT"}, {'A', "GCC"}, {'A', "GCA"}, {'A', "GCG"},
287  {'L', "TTA"}, {'L', "TTG"}, {'L', "CTT"}, {'L', "CTC"}, {'L', "CTA"}, {'L', "CTG"},
288  {'R', "CGT"}, {'R', "CGC"}, {'R', "CGA"}, {'R', "CGG"}, {'R', "AGA"}, {'R', "AGG"},
289  {'K', "AAA"}, {'K', "AAG"},
290  {'N', "AAT"}, {'N', "AAC"},
291  {'M', "ATG"},
292  {'D', "GAT"}, {'D', "GAC"},
293  {'F', "TTT"}, {'F', "TTC"},
294  {'C', "TGT"}, {'C', "TGC"},
295  {'P', "CCT"}, {'P', "CCC"}, {'P', "CCA"}, {'P', "CCG"},
296  {'Q', "CAA"}, {'Q', "CAG"},
297  {'S', "TCT"}, {'S', "TCC"}, {'S', "TCA"}, {'S', "TCG"}, {'S', "AGT"}, {'S', "AGC"},
298  {'E', "GAA"}, {'E', "GAG"},
299  {'T', "ACT"}, {'T', "ACC"}, {'T', "ACA"}, {'T', "ACG"},
300  {'G', "GGT"}, {'G', "GGC"}, {'G', "GGA"}, {'G', "GGG"},
301  {'W', "TGG"},
302  {'H', "CAT"}, {'H', "CAC"},
303  {'Y', "TAT"}, {'Y', "TAC"},
304  {'I', "ATT"}, {'I', "ATC"}, {'I', "ATA"},
305  {'V', "GTT"}, {'V', "GTC"}, {'V', "GTA"}, {'V', "GTG"},
306  {'*', "TAA"}, {'*', "TGA"}, {'*', "TAG"}
307  };
308  }
309 
310  Codons codons(char aminoacid) const { return Codons(_codons.equal_range(aminoacid)); }
311 
312 
313 
314  protected:
315  std::unordered_multimap<char, std::string> _codons;
316  };
317 
318 
319  typedef std::pair<std::string*, uint> codons_t;
320  codons_t codons(char aminoacid) {
321  switch (aminoacid) {
322  default: return codons_t(nullptr, 0);
323  }
324  }
325 
326 }
327 
328 #endif
Definition: types.h:261
Definition: aligner.h:37
A struct for representing nucleotide codons for amino acids.
Definition: codon_table.h:17