comparison mrfast-2.1.0.4/MrFAST.h @ 0:7b3dc85dc7fd

Uploaded mrfast source tarball
author calkan
date Tue, 21 Feb 2012 10:29:47 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7b3dc85dc7fd
1 /*
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * Redistributions of source code must retain the above copyright notice, this list
9 * of conditions and the following disclaimer.
10 * - Redistributions in binary form must reproduce the above copyright notice, this
11 * list of conditions and the following disclaimer in the documentation and/or other
12 * materials provided with the distribution.
13 * - Neither the names of the University of Washington, Simon Fraser University,
14 * nor the names of its contributors may be
15 * used to endorse or promote products derived from this software without specific
16 * prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31
32 /*
33 Authors:
34 Farhad Hormozdiari
35 Faraz Hach
36 Can Alkan
37 Emails:
38 farhadh AT uw DOT edu
39 fhach AT cs DOT sfu DOT ca
40 calkan AT uw DOT edu
41 */
42
43
44
45 #ifndef __MR_FAST__
46 #define __MR_FAST__
47
48 #include "Reads.h"
49
50 #define MAP_CHUNKS 15
51 #define MAX_CIGAR_SIZE 100
52
53
54 // Pair is used to pre-processing and making the read index table
55 typedef struct
56 {
57 int hv;
58 //char hv[50];
59 int readNumber;
60 } Pair;
61
62 typedef struct
63 {
64 int hv;
65 unsigned int *seqInfo;
66 } ReadIndexTable;
67
68
69 typedef struct
70 {
71 int loc;
72 char dir;
73 int err;
74 float score;
75 char md[MAX_CIGAR_SIZE];
76 char chr[20];
77 char cigar[MAX_CIGAR_SIZE];
78 int cigarSize;
79 int mdSize;
80 } FullMappingInfo;
81
82 typedef struct
83 {
84 int loc;
85 char dir;
86 int err;
87 float score;
88 char md[MAX_CIGAR_SIZE];
89 char chr[20];
90 char cigar[MAX_CIGAR_SIZE];
91 int cigarSize;
92 int mdSize;
93 } BestFullMappingInfo;
94
95 typedef struct lc
96 {
97 char md[MAP_CHUNKS][MAX_CIGAR_SIZE];
98 int mdSize[MAP_CHUNKS];
99
100 char cigar[MAP_CHUNKS][MAX_CIGAR_SIZE];
101 int cigarSize[MAP_CHUNKS];
102
103 int err[MAP_CHUNKS];
104 int loc[MAP_CHUNKS];
105 struct lc *next;
106 } MappingLocations;
107
108 typedef struct inf
109 {
110 int size;
111 MappingLocations *next;
112 } MappingInfo;
113
114 typedef struct
115 {
116 FILE * fp;
117 char name[400];
118 } FILE_STRUCT;
119
120 typedef struct
121 {
122 FullMappingInfo *mi;
123 int size;
124 } FullMappingInfoLink;
125
126
127 typedef struct
128 {
129 char readString[200];
130 char ref[200];
131 int err;
132 char matrix[200];
133 } extraCaching;
134
135 extern long long verificationCnt;
136 extern long long mappingCnt;
137 extern long long mappedSeqCnt;
138 extern long long completedSeqCnt;
139
140 void initFAST( Read *seqList,
141 int seqListSize,
142 int *samplingLocs,
143 int samplingLocsSize,
144 char *fileName);
145
146 void initVerifiedLocs();
147 void initLookUpTable();
148 void initBestMapping();
149 void initBestConcordantDiscordant(int readNumber);
150
151 void finalizeFAST();
152 void finalizeBestSingleMapping();
153 void finalizeBestConcordantDiscordant();
154 void finalizeOEAReads(char *);
155
156
157 int mapAllSingleEndSeq();
158 void mapSingleEndSeq(unsigned int *l1, int s1, int readNumber, int readSegment, int direction);
159 void mapPairedEndSeqList(unsigned int *l1, int s1, int readNumber, int readSegment, int direction);
160
161 void mapPairedEndSeq();
162
163 void outputPairedEnd();
164 void outputPairedEndDiscPP();
165
166
167 void outputPairFullMappingInfo(FILE *fp, int readNumber);
168 void setPairFullMappingInfo(int readNumber, FullMappingInfo mi1, FullMappingInfo mi2);
169 void setFullMappingInfo(int readNumber, int loc, int dir, int err, int score, char *md, char * refName, char *cigar);
170
171 void outputAllTransChromosal();
172 void outputTransChromosal(char *fileName1, char *fileName2, FILE * fp_out);
173
174 void generateSNPSAM(char *matrix, int matrixLength, char *outputSNP);
175 void generateCigar(char *matrix, int matrixLength, char *cigar);
176 void generateCigarFromMD(char *mistmatch, int mismatchLength, char *cigar);
177
178 int msfHashVal(char *seq);
179
180 int backwardEditDistance2SSE2(char *a, int lena, char *b,int lenb);
181 int forwardEditDistance2SSE2(char *a, int lena, char *b,int lenb);
182
183 int forwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb);
184 int backwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb);
185
186 int forwardEditDistance4SSE2(char *a, int lena, char *b,int lenb);
187 int backwardEditDistance4SSE2(char *a, int lena, char *b,int lenb);
188
189 int forwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb);
190 int backwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb);
191
192
193 /***********************************/
194
195 int editDistance(int refIndex, char *seq, int seqLength, char *matrix);
196
197 int verifySingleEndEditDistance(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
198 char *matrix, int *map_location, short *seqHashValue);
199
200 int verifySingleEndEditDistance2(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
201 char *matrix, int *map_location, short *seqHashValue);
202
203 int verifySingleEndEditDistance4(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
204 char *matrix, int *map_location, short *seqHashValue);
205
206 int verifySingleEndEditDistanceExtention(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
207 char *matrix, int *map_location, short *seqHashValue);
208
209 #endif