Mercurial > repos > bitlab > bitlab
comparison gecko/src/structs.h @ 1:35af401890c0 draft
Uploaded
author | bitlab |
---|---|
date | Thu, 13 Dec 2018 07:59:25 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:ee6b15b409e5 | 1:35af401890c0 |
---|---|
1 #ifndef STRUCTS_H | |
2 #define STRUCTS_H | |
3 | |
4 #include <inttypes.h> | |
5 //Structs required for the dotplot workflow | |
6 #define MAXLID 200 | |
7 #define MAXLS 1000000000 | |
8 #define READBUF 50000000 //50MB | |
9 | |
10 //Struct for words program | |
11 typedef struct { | |
12 //Each letter is stored using 2 bits | |
13 //We have 4 letters per byte and a | |
14 //maximum of 32 in 'b' | |
15 unsigned char b[8]; | |
16 } word; | |
17 | |
18 //Struct for words and sort program | |
19 typedef struct { | |
20 //Word compressed in binary format | |
21 word w; | |
22 //Ocurrence position in the sequence | |
23 uint64_t pos; | |
24 //For multiple sequence files this var | |
25 //reflects in what sequence occurs the | |
26 //word | |
27 uint64_t seq; | |
28 } wentry; | |
29 | |
30 //Struct for w2hd program | |
31 typedef struct { | |
32 //Word compressed in binary format | |
33 word w; | |
34 //Ocurrence position in the sequence | |
35 uint64_t pos; | |
36 //Number of ocurrences inside the | |
37 //sequence. This is used to know the | |
38 //number of locations stored in the | |
39 //positions file | |
40 uint64_t num; | |
41 } hashentry; | |
42 | |
43 //Struct for w2hd program | |
44 typedef struct { | |
45 //Ocurrence position in the sequence | |
46 uint64_t pos; | |
47 //For multiple sequence files this var | |
48 //reflects in what sequence occurs the | |
49 //word | |
50 uint64_t seq; | |
51 } location; | |
52 | |
53 //Struct for hits, sortHits and filterHits programs | |
54 typedef struct { | |
55 //Diagonal where the hit is located | |
56 //This value is calculated as: | |
57 //posX - posY | |
58 int64_t diag; | |
59 //Ocurrence position in sequence X | |
60 uint64_t posX; | |
61 //Ocurrence position in sequence Y | |
62 uint64_t posY; | |
63 //For multiple sequence files this var | |
64 //reflects in what sequence of X file | |
65 //occurs the word | |
66 uint64_t seqX; | |
67 //For multiple sequence files this var | |
68 //reflects in what sequence of Y file | |
69 //occurs the word | |
70 uint64_t seqY; | |
71 } hit; | |
72 | |
73 //Struct for FragHits, af2png and leeFrag programs | |
74 struct FragFile { | |
75 //Diagonal where the frag is located | |
76 //This value is calculated as: | |
77 //posX - posY | |
78 int64_t diag; | |
79 //Start position in sequence X | |
80 uint64_t xStart; | |
81 //Start position in Sequence Y | |
82 uint64_t yStart; | |
83 //End position in Sequence X | |
84 uint64_t xEnd; | |
85 //End position in Sequence Y | |
86 uint64_t yEnd; | |
87 //Fragment Length | |
88 //For ungaped aligment is: | |
89 //xEnd-xStart+1 | |
90 uint64_t length; | |
91 //Number of identities in the | |
92 //fragment | |
93 uint64_t ident; | |
94 //Score of the fragment. This | |
95 //depends on the score matrix | |
96 //used | |
97 uint64_t score; | |
98 //Percentage of similarity. This | |
99 //is calculated as score/scoreMax | |
100 //Where score max is the maximum | |
101 //score possible | |
102 float similarity; | |
103 //sequence number in the 'X' file | |
104 uint64_t seqX; | |
105 //sequence number in the 'Y' file | |
106 uint64_t seqY; | |
107 //synteny block id | |
108 int64_t block; | |
109 //'f' for the forward strain and 'r' for the reverse | |
110 char strand; | |
111 }; | |
112 | |
113 //Struct for leeSeqDB function | |
114 struct Sequence{ | |
115 char ident[MAXLID+1]; | |
116 char *datos; | |
117 }; | |
118 | |
119 //Struct for reads index tuple | |
120 struct rIndex2 { | |
121 char id[MAXLID]; | |
122 uint64_t rNumber; | |
123 uint64_t rLen; | |
124 uint64_t rLmasked; //Masked positions | |
125 uint64_t nonACGT; //N's | |
126 uint64_t pos; //Start position of sequence | |
127 uint64_t Lac; //Accumulated length | |
128 }; | |
129 | |
130 #endif |