1
|
1 #ifndef STRUCTS_H
|
|
2 #define STRUCTS_H
|
|
3
|
|
4 #include <inttypes.h>
|
|
5 //Structs required for the dotplot workflow
|
|
6 #define MAXLID 200
|
|
7 #define MAXLS 1000000000
|
|
8 #define READBUF 50000000 //50MB
|
|
9
|
|
10 //Struct for words program
|
|
11 typedef struct {
|
|
12 //Each letter is stored using 2 bits
|
|
13 //We have 4 letters per byte and a
|
|
14 //maximum of 32 in 'b'
|
|
15 unsigned char b[8];
|
|
16 } word;
|
|
17
|
|
18 //Struct for words and sort program
|
|
19 typedef struct {
|
|
20 //Word compressed in binary format
|
|
21 word w;
|
|
22 //Ocurrence position in the sequence
|
|
23 uint64_t pos;
|
|
24 //For multiple sequence files this var
|
|
25 //reflects in what sequence occurs the
|
|
26 //word
|
|
27 uint64_t seq;
|
|
28 } wentry;
|
|
29
|
|
30 //Struct for w2hd program
|
|
31 typedef struct {
|
|
32 //Word compressed in binary format
|
|
33 word w;
|
|
34 //Ocurrence position in the sequence
|
|
35 uint64_t pos;
|
|
36 //Number of ocurrences inside the
|
|
37 //sequence. This is used to know the
|
|
38 //number of locations stored in the
|
|
39 //positions file
|
|
40 uint64_t num;
|
|
41 } hashentry;
|
|
42
|
|
43 //Struct for w2hd program
|
|
44 typedef struct {
|
|
45 //Ocurrence position in the sequence
|
|
46 uint64_t pos;
|
|
47 //For multiple sequence files this var
|
|
48 //reflects in what sequence occurs the
|
|
49 //word
|
|
50 uint64_t seq;
|
|
51 } location;
|
|
52
|
|
53 //Struct for hits, sortHits and filterHits programs
|
|
54 typedef struct {
|
|
55 //Diagonal where the hit is located
|
|
56 //This value is calculated as:
|
|
57 //posX - posY
|
|
58 int64_t diag;
|
|
59 //Ocurrence position in sequence X
|
|
60 uint64_t posX;
|
|
61 //Ocurrence position in sequence Y
|
|
62 uint64_t posY;
|
|
63 //For multiple sequence files this var
|
|
64 //reflects in what sequence of X file
|
|
65 //occurs the word
|
|
66 uint64_t seqX;
|
|
67 //For multiple sequence files this var
|
|
68 //reflects in what sequence of Y file
|
|
69 //occurs the word
|
|
70 uint64_t seqY;
|
|
71 } hit;
|
|
72
|
|
73 //Struct for FragHits, af2png and leeFrag programs
|
|
74 struct FragFile {
|
|
75 //Diagonal where the frag is located
|
|
76 //This value is calculated as:
|
|
77 //posX - posY
|
|
78 int64_t diag;
|
|
79 //Start position in sequence X
|
|
80 uint64_t xStart;
|
|
81 //Start position in Sequence Y
|
|
82 uint64_t yStart;
|
|
83 //End position in Sequence X
|
|
84 uint64_t xEnd;
|
|
85 //End position in Sequence Y
|
|
86 uint64_t yEnd;
|
|
87 //Fragment Length
|
|
88 //For ungaped aligment is:
|
|
89 //xEnd-xStart+1
|
|
90 uint64_t length;
|
|
91 //Number of identities in the
|
|
92 //fragment
|
|
93 uint64_t ident;
|
|
94 //Score of the fragment. This
|
|
95 //depends on the score matrix
|
|
96 //used
|
|
97 uint64_t score;
|
|
98 //Percentage of similarity. This
|
|
99 //is calculated as score/scoreMax
|
|
100 //Where score max is the maximum
|
|
101 //score possible
|
|
102 float similarity;
|
|
103 //sequence number in the 'X' file
|
|
104 uint64_t seqX;
|
|
105 //sequence number in the 'Y' file
|
|
106 uint64_t seqY;
|
|
107 //synteny block id
|
|
108 int64_t block;
|
|
109 //'f' for the forward strain and 'r' for the reverse
|
|
110 char strand;
|
|
111 };
|
|
112
|
|
113 //Struct for leeSeqDB function
|
|
114 struct Sequence{
|
|
115 char ident[MAXLID+1];
|
|
116 char *datos;
|
|
117 };
|
|
118
|
|
119 //Struct for reads index tuple
|
|
120 struct rIndex2 {
|
|
121 char id[MAXLID];
|
|
122 uint64_t rNumber;
|
|
123 uint64_t rLen;
|
|
124 uint64_t rLmasked; //Masked positions
|
|
125 uint64_t nonACGT; //N's
|
|
126 uint64_t pos; //Start position of sequence
|
|
127 uint64_t Lac; //Accumulated length
|
|
128 };
|
|
129
|
|
130 #endif
|