1
|
1 /*
|
|
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
|
|
3 * All rights reserved.
|
|
4 *
|
|
5 * Redistribution and use in source and binary forms, with or without modification,
|
|
6 * are permitted provided that the following conditions are met:
|
|
7 *
|
|
8 * Redistributions of source code must retain the above copyright notice, this list
|
|
9 * of conditions and the following disclaimer.
|
|
10 * - Redistributions in binary form must reproduce the above copyright notice, this
|
|
11 * list of conditions and the following disclaimer in the documentation and/or other
|
|
12 * materials provided with the distribution.
|
|
13 * - Neither the names of the University of Washington, Simon Fraser University,
|
|
14 * nor the names of its contributors may be
|
|
15 * used to endorse or promote products derived from this software without specific
|
|
16 * prior written permission.
|
|
17 *
|
|
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29 */
|
|
30
|
|
31
|
|
32 /*
|
|
33 Authors:
|
|
34 Farhad Hormozdiari
|
|
35 Faraz Hach
|
|
36 Can Alkan
|
|
37 Emails:
|
|
38 farhadh AT uw DOT edu
|
|
39 fhach AT cs DOT sfu DOT ca
|
|
40 calkan AT uw DOT edu
|
|
41 */
|
|
42
|
|
43
|
|
44
|
|
45 #ifndef __MR_FAST__
|
|
46 #define __MR_FAST__
|
|
47
|
|
48 #include "Reads.h"
|
|
49
|
|
50 #define MAP_CHUNKS 15
|
|
51 #define MAX_CIGAR_SIZE 100
|
|
52
|
|
53
|
|
54 // Pair is used to pre-processing and making the read index table
|
|
55 typedef struct
|
|
56 {
|
|
57 int hv;
|
|
58 //char hv[50];
|
|
59 int readNumber;
|
|
60 } Pair;
|
|
61
|
|
62 typedef struct
|
|
63 {
|
|
64 int hv;
|
|
65 unsigned int *seqInfo;
|
|
66 } ReadIndexTable;
|
|
67
|
|
68
|
|
69 typedef struct
|
|
70 {
|
|
71 int loc;
|
|
72 char dir;
|
|
73 int err;
|
|
74 float score;
|
|
75 char md[MAX_CIGAR_SIZE];
|
|
76 char chr[20];
|
|
77 char cigar[MAX_CIGAR_SIZE];
|
|
78 int cigarSize;
|
|
79 int mdSize;
|
|
80 } FullMappingInfo;
|
|
81
|
|
82 typedef struct
|
|
83 {
|
|
84 int loc;
|
|
85 char dir;
|
|
86 int err;
|
|
87 float score;
|
|
88 char md[MAX_CIGAR_SIZE];
|
|
89 char chr[20];
|
|
90 char cigar[MAX_CIGAR_SIZE];
|
|
91 int cigarSize;
|
|
92 int mdSize;
|
|
93 } BestFullMappingInfo;
|
|
94
|
|
95 typedef struct lc
|
|
96 {
|
|
97 char md[MAP_CHUNKS][MAX_CIGAR_SIZE];
|
|
98 int mdSize[MAP_CHUNKS];
|
|
99
|
|
100 char cigar[MAP_CHUNKS][MAX_CIGAR_SIZE];
|
|
101 int cigarSize[MAP_CHUNKS];
|
|
102
|
|
103 int err[MAP_CHUNKS];
|
|
104 int loc[MAP_CHUNKS];
|
|
105 struct lc *next;
|
|
106 } MappingLocations;
|
|
107
|
|
108 typedef struct inf
|
|
109 {
|
|
110 int size;
|
|
111 MappingLocations *next;
|
|
112 } MappingInfo;
|
|
113
|
|
114 typedef struct
|
|
115 {
|
|
116 FILE * fp;
|
|
117 char name[400];
|
|
118 } FILE_STRUCT;
|
|
119
|
|
120 typedef struct
|
|
121 {
|
|
122 FullMappingInfo *mi;
|
|
123 int size;
|
|
124 } FullMappingInfoLink;
|
|
125
|
|
126
|
|
127 typedef struct
|
|
128 {
|
|
129 char readString[200];
|
|
130 char ref[200];
|
|
131 int err;
|
|
132 char matrix[200];
|
|
133 } extraCaching;
|
|
134
|
|
135 extern long long verificationCnt;
|
|
136 extern long long mappingCnt;
|
|
137 extern long long mappedSeqCnt;
|
|
138 extern long long completedSeqCnt;
|
|
139
|
|
140 void initFAST( Read *seqList,
|
|
141 int seqListSize,
|
|
142 int *samplingLocs,
|
|
143 int samplingLocsSize,
|
|
144 char *fileName);
|
|
145
|
|
146 void initVerifiedLocs();
|
|
147 void initLookUpTable();
|
|
148 void initBestMapping();
|
|
149 void initBestConcordantDiscordant(int readNumber);
|
|
150
|
|
151 void finalizeFAST();
|
|
152 void finalizeBestSingleMapping();
|
|
153 void finalizeBestConcordantDiscordant();
|
|
154 void finalizeOEAReads(char *);
|
|
155
|
|
156
|
|
157 int mapAllSingleEndSeq();
|
|
158 void mapSingleEndSeq(unsigned int *l1, int s1, int readNumber, int readSegment, int direction);
|
|
159 void mapPairedEndSeqList(unsigned int *l1, int s1, int readNumber, int readSegment, int direction);
|
|
160
|
|
161 void mapPairedEndSeq();
|
|
162
|
|
163 void outputPairedEnd();
|
|
164 void outputPairedEndDiscPP();
|
|
165
|
|
166
|
|
167 void outputPairFullMappingInfo(FILE *fp, int readNumber);
|
|
168 void setPairFullMappingInfo(int readNumber, FullMappingInfo mi1, FullMappingInfo mi2);
|
|
169 void setFullMappingInfo(int readNumber, int loc, int dir, int err, int score, char *md, char * refName, char *cigar);
|
|
170
|
|
171 void outputAllTransChromosal();
|
|
172 void outputTransChromosal(char *fileName1, char *fileName2, FILE * fp_out);
|
|
173
|
|
174 void generateSNPSAM(char *matrix, int matrixLength, char *outputSNP);
|
|
175 void generateCigar(char *matrix, int matrixLength, char *cigar);
|
|
176 void generateCigarFromMD(char *mistmatch, int mismatchLength, char *cigar);
|
|
177
|
|
178 int msfHashVal(char *seq);
|
|
179
|
|
180 int backwardEditDistance2SSE2(char *a, int lena, char *b,int lenb);
|
|
181 int forwardEditDistance2SSE2(char *a, int lena, char *b,int lenb);
|
|
182
|
|
183 int forwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb);
|
|
184 int backwardEditDistanceSSE2G(char *a, int lena, char *b,int lenb);
|
|
185
|
|
186 int forwardEditDistance4SSE2(char *a, int lena, char *b,int lenb);
|
|
187 int backwardEditDistance4SSE2(char *a, int lena, char *b,int lenb);
|
|
188
|
|
189 int forwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb);
|
|
190 int backwardEditDistanceSSE2Extention(char *a, int lena, char *b,int lenb);
|
|
191
|
|
192
|
|
193 /***********************************/
|
|
194
|
|
195 int editDistance(int refIndex, char *seq, int seqLength, char *matrix);
|
|
196
|
|
197 int verifySingleEndEditDistance(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
|
|
198 char *matrix, int *map_location, short *seqHashValue);
|
|
199
|
|
200 int verifySingleEndEditDistance2(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
|
|
201 char *matrix, int *map_location, short *seqHashValue);
|
|
202
|
|
203 int verifySingleEndEditDistance4(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
|
|
204 char *matrix, int *map_location, short *seqHashValue);
|
|
205
|
|
206 int verifySingleEndEditDistanceExtention(int refIndex, char *lSeq, int lSeqLength, char *rSeq, int rSeqLength, int segLength,
|
|
207 char *matrix, int *map_location, short *seqHashValue);
|
|
208
|
|
209 #endif
|