Mercurial > repos > bitlab > bitlab
diff gecko/src/structs.h @ 1:35af401890c0 draft
Uploaded
author | bitlab |
---|---|
date | Thu, 13 Dec 2018 07:59:25 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gecko/src/structs.h Thu Dec 13 07:59:25 2018 -0500 @@ -0,0 +1,130 @@ +#ifndef STRUCTS_H +#define STRUCTS_H + +#include <inttypes.h> +//Structs required for the dotplot workflow +#define MAXLID 200 +#define MAXLS 1000000000 +#define READBUF 50000000 //50MB + +//Struct for words program +typedef struct { + //Each letter is stored using 2 bits + //We have 4 letters per byte and a + //maximum of 32 in 'b' + unsigned char b[8]; +} word; + +//Struct for words and sort program +typedef struct { + //Word compressed in binary format + word w; + //Ocurrence position in the sequence + uint64_t pos; + //For multiple sequence files this var + //reflects in what sequence occurs the + //word + uint64_t seq; +} wentry; + +//Struct for w2hd program +typedef struct { + //Word compressed in binary format + word w; + //Ocurrence position in the sequence + uint64_t pos; + //Number of ocurrences inside the + //sequence. This is used to know the + //number of locations stored in the + //positions file + uint64_t num; +} hashentry; + +//Struct for w2hd program +typedef struct { + //Ocurrence position in the sequence + uint64_t pos; + //For multiple sequence files this var + //reflects in what sequence occurs the + //word + uint64_t seq; +} location; + +//Struct for hits, sortHits and filterHits programs +typedef struct { + //Diagonal where the hit is located + //This value is calculated as: + //posX - posY + int64_t diag; + //Ocurrence position in sequence X + uint64_t posX; + //Ocurrence position in sequence Y + uint64_t posY; + //For multiple sequence files this var + //reflects in what sequence of X file + //occurs the word + uint64_t seqX; + //For multiple sequence files this var + //reflects in what sequence of Y file + //occurs the word + uint64_t seqY; +} hit; + +//Struct for FragHits, af2png and leeFrag programs +struct FragFile { + //Diagonal where the frag is located + //This value is calculated as: + //posX - posY + int64_t diag; + //Start position in sequence X + uint64_t xStart; + //Start position in Sequence Y + uint64_t yStart; + //End position in Sequence X + uint64_t xEnd; + //End position in Sequence Y + uint64_t yEnd; + //Fragment Length + //For ungaped aligment is: + //xEnd-xStart+1 + uint64_t length; + //Number of identities in the + //fragment + uint64_t ident; + //Score of the fragment. This + //depends on the score matrix + //used + uint64_t score; + //Percentage of similarity. This + //is calculated as score/scoreMax + //Where score max is the maximum + //score possible + float similarity; + //sequence number in the 'X' file + uint64_t seqX; + //sequence number in the 'Y' file + uint64_t seqY; + //synteny block id + int64_t block; + //'f' for the forward strain and 'r' for the reverse + char strand; +}; + +//Struct for leeSeqDB function +struct Sequence{ + char ident[MAXLID+1]; + char *datos; +}; + +//Struct for reads index tuple +struct rIndex2 { + char id[MAXLID]; + uint64_t rNumber; + uint64_t rLen; + uint64_t rLmasked; //Masked positions + uint64_t nonACGT; //N's + uint64_t pos; //Start position of sequence + uint64_t Lac; //Accumulated length +}; + +#endif