diff gecko/src/structs.h @ 1:35af401890c0 draft

Uploaded
author bitlab
date Thu, 13 Dec 2018 07:59:25 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/structs.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,130 @@
+#ifndef STRUCTS_H
+#define STRUCTS_H
+
+#include <inttypes.h>
+//Structs required for the dotplot workflow
+#define MAXLID 200
+#define MAXLS 1000000000
+#define READBUF 50000000 //50MB
+
+//Struct for words program
+typedef struct {
+	//Each letter is stored using 2 bits
+	//We have 4 letters per byte and a
+	//maximum of 32 in 'b'
+    unsigned char b[8];
+} word;
+
+//Struct for words and sort program
+typedef struct {
+	//Word compressed in binary format
+    word w;
+    //Ocurrence position in the sequence
+    uint64_t pos;
+    //For multiple sequence files this var
+    //reflects in what sequence occurs the
+    //word
+    uint64_t seq;
+} wentry;
+
+//Struct for w2hd program
+typedef struct {
+	//Word compressed in binary format
+    word w;
+    //Ocurrence position in the sequence
+    uint64_t pos;
+    //Number of ocurrences inside the
+    //sequence. This is used to know the
+    //number of locations stored in the
+    //positions file
+    uint64_t num;
+} hashentry;
+
+//Struct for w2hd program
+typedef struct {
+    //Ocurrence position in the sequence
+    uint64_t pos;
+    //For multiple sequence files this var
+    //reflects in what sequence occurs the
+    //word
+    uint64_t seq;
+} location;
+
+//Struct for hits, sortHits and filterHits programs
+typedef struct {
+	//Diagonal where the hit is located
+	//This value is calculated as:
+	//posX - posY
+	int64_t diag;
+    //Ocurrence position in sequence X
+    uint64_t posX;
+    //Ocurrence position in sequence Y
+    uint64_t posY;
+    //For multiple sequence files this var
+    //reflects in what sequence of X file
+    //occurs the word
+    uint64_t seqX;
+    //For multiple sequence files this var
+    //reflects in what sequence of Y file
+    //occurs the word
+    uint64_t seqY;
+} hit;
+
+//Struct for FragHits, af2png and leeFrag programs
+struct FragFile {
+	//Diagonal where the frag is located
+	//This value is calculated as:
+	//posX - posY
+    int64_t diag;
+    //Start position in sequence X
+    uint64_t xStart;
+    //Start position in Sequence Y
+    uint64_t yStart;
+    //End position in Sequence X
+    uint64_t xEnd;
+    //End position in Sequence Y
+    uint64_t yEnd;
+    //Fragment Length
+    //For ungaped aligment is:
+    //xEnd-xStart+1
+    uint64_t length;
+    //Number of identities in the
+    //fragment
+    uint64_t ident;
+    //Score of the fragment. This
+    //depends on the score matrix
+    //used
+    uint64_t score;
+    //Percentage of similarity. This
+    //is calculated as score/scoreMax
+    //Where score max is the maximum
+    //score possible
+    float similarity;
+    //sequence number in the 'X' file
+    uint64_t seqX;
+    //sequence number in the 'Y' file
+    uint64_t seqY;
+    //synteny block id
+    int64_t block;
+    //'f' for the forward strain and 'r' for the reverse
+    char strand;
+};
+
+//Struct for leeSeqDB function
+struct Sequence{
+    char ident[MAXLID+1];
+    char *datos;
+};
+
+//Struct for reads index tuple
+struct rIndex2 {
+    char id[MAXLID];
+    uint64_t  rNumber;
+    uint64_t  rLen;
+    uint64_t  rLmasked; //Masked positions
+    uint64_t  nonACGT;  //N's
+    uint64_t  pos;      //Start position of sequence
+    uint64_t  Lac;      //Accumulated length
+};
+
+#endif