comparison gecko/src/structs.h @ 1:35af401890c0 draft

Uploaded
author bitlab
date Thu, 13 Dec 2018 07:59:25 -0500
parents
children
comparison
equal deleted inserted replaced
0:ee6b15b409e5 1:35af401890c0
1 #ifndef STRUCTS_H
2 #define STRUCTS_H
3
4 #include <inttypes.h>
5 //Structs required for the dotplot workflow
6 #define MAXLID 200
7 #define MAXLS 1000000000
8 #define READBUF 50000000 //50MB
9
10 //Struct for words program
11 typedef struct {
12 //Each letter is stored using 2 bits
13 //We have 4 letters per byte and a
14 //maximum of 32 in 'b'
15 unsigned char b[8];
16 } word;
17
18 //Struct for words and sort program
19 typedef struct {
20 //Word compressed in binary format
21 word w;
22 //Ocurrence position in the sequence
23 uint64_t pos;
24 //For multiple sequence files this var
25 //reflects in what sequence occurs the
26 //word
27 uint64_t seq;
28 } wentry;
29
30 //Struct for w2hd program
31 typedef struct {
32 //Word compressed in binary format
33 word w;
34 //Ocurrence position in the sequence
35 uint64_t pos;
36 //Number of ocurrences inside the
37 //sequence. This is used to know the
38 //number of locations stored in the
39 //positions file
40 uint64_t num;
41 } hashentry;
42
43 //Struct for w2hd program
44 typedef struct {
45 //Ocurrence position in the sequence
46 uint64_t pos;
47 //For multiple sequence files this var
48 //reflects in what sequence occurs the
49 //word
50 uint64_t seq;
51 } location;
52
53 //Struct for hits, sortHits and filterHits programs
54 typedef struct {
55 //Diagonal where the hit is located
56 //This value is calculated as:
57 //posX - posY
58 int64_t diag;
59 //Ocurrence position in sequence X
60 uint64_t posX;
61 //Ocurrence position in sequence Y
62 uint64_t posY;
63 //For multiple sequence files this var
64 //reflects in what sequence of X file
65 //occurs the word
66 uint64_t seqX;
67 //For multiple sequence files this var
68 //reflects in what sequence of Y file
69 //occurs the word
70 uint64_t seqY;
71 } hit;
72
73 //Struct for FragHits, af2png and leeFrag programs
74 struct FragFile {
75 //Diagonal where the frag is located
76 //This value is calculated as:
77 //posX - posY
78 int64_t diag;
79 //Start position in sequence X
80 uint64_t xStart;
81 //Start position in Sequence Y
82 uint64_t yStart;
83 //End position in Sequence X
84 uint64_t xEnd;
85 //End position in Sequence Y
86 uint64_t yEnd;
87 //Fragment Length
88 //For ungaped aligment is:
89 //xEnd-xStart+1
90 uint64_t length;
91 //Number of identities in the
92 //fragment
93 uint64_t ident;
94 //Score of the fragment. This
95 //depends on the score matrix
96 //used
97 uint64_t score;
98 //Percentage of similarity. This
99 //is calculated as score/scoreMax
100 //Where score max is the maximum
101 //score possible
102 float similarity;
103 //sequence number in the 'X' file
104 uint64_t seqX;
105 //sequence number in the 'Y' file
106 uint64_t seqY;
107 //synteny block id
108 int64_t block;
109 //'f' for the forward strain and 'r' for the reverse
110 char strand;
111 };
112
113 //Struct for leeSeqDB function
114 struct Sequence{
115 char ident[MAXLID+1];
116 char *datos;
117 };
118
119 //Struct for reads index tuple
120 struct rIndex2 {
121 char id[MAXLID];
122 uint64_t rNumber;
123 uint64_t rLen;
124 uint64_t rLmasked; //Masked positions
125 uint64_t nonACGT; //N's
126 uint64_t pos; //Start position of sequence
127 uint64_t Lac; //Accumulated length
128 };
129
130 #endif