annotate mrsfast-2.3.0.2/Reads.c @ 0:ec628ba33878 default tip

Uploaded source code for mrsFAST
author calkan
date Tue, 21 Feb 2012 10:39:28 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
1 /*
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
2 * Copyright (c) <2008 - 2009>, University of Washington, Simon Fraser University
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
3 * All rights reserved.
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
4 *
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
5 * Redistribution and use in source and binary forms, with or without modification,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
6 * are permitted provided that the following conditions are met:
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
7 *
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
8 * Redistributions of source code must retain the above copyright notice, this list
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
9 * of conditions and the following disclaimer.
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
10 * - Redistributions in binary form must reproduce the above copyright notice, this
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
11 * list of conditions and the following disclaimer in the documentation and/or other
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
12 * materials provided with the distribution.
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
13 * - Neither the name of the <ORGANIZATION> nor the names of its contributors may be
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
14 * used to endorse or promote products derived from this software without specific
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
15 * prior written permission.
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
27 */
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
28
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
29 /*
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
30 * Author : Faraz Hach
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
31 * Email : fhach AT cs DOT sfu
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
32 * Last Update : 2009-12-08
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
33 */
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
34
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
35
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
36 #include <stdio.h>
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
37 #include <stdlib.h>
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
38 #include <string.h>
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
39 #include <ctype.h>
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
40 #include <zlib.h>
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
41 #include "Common.h"
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
42 #include "Reads.h"
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
43
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
44
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
45
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
46 FILE *_r_fp1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
47 FILE *_r_fp2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
48 gzFile _r_gzfp1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
49 gzFile _r_gzfp2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
50 Read *_r_seq;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
51 int _r_seqCnt;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
52 int *_r_samplingLocs;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
53
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
54 /**********************************************/
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
55 char *(*readFirstSeq)(char *);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
56 char *(*readSecondSeq)(char *);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
57 /**********************************************/
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
58 char *readFirstSeqTXT( char *seq )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
59 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
60 return fgets(seq, SEQ_MAX_LENGTH, _r_fp1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
61 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
62
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
63 /**********************************************/
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
64 char *readSecondSeqTXT( char *seq )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
65 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
66 return fgets(seq, SEQ_MAX_LENGTH, _r_fp2);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
67 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
68 /**********************************************/
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
69 char *readFirstSeqGZ( char *seq )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
70 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
71 return gzgets(_r_gzfp1, seq, SEQ_MAX_LENGTH);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
72 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
73
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
74 /**********************************************/
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
75 char *readSecondSeqGZ( char *seq )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
76 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
77 return gzgets(_r_gzfp2, seq, SEQ_MAX_LENGTH);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
78 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
79 /**********************************************/
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
80 int readAllReads(char *fileName1,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
81 char *fileName2,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
82 int compressed,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
83 unsigned char *fastq,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
84 unsigned char pairedEnd,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
85 Read **seqList,
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
86 unsigned int *seqListSize)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
87 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
88 double startTime=getTime();
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
89
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
90 char seq1[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
91 char rseq1[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
92 char name1[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
93 char qual1[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
94 char seq2[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
95 char rseq2[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
96 char name2[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
97 char qual2[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
98
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
99 char dummy[SEQ_MAX_LENGTH];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
100 char ch;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
101 int err1, err2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
102 int nCnt;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
103 int discarded = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
104 int seqCnt = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
105 int maxCnt = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
106 int i;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
107 Read *list = NULL;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
108
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
109
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
110 if (!compressed)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
111 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
112 _r_fp1 = fileOpen( fileName1, "r");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
113
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
114 if (_r_fp1 == NULL)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
115 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
116 return 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
117 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
118
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
119 ch = fgetc(_r_fp1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
120
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
121 if ( pairedEnd && fileName2 != NULL )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
122 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
123 _r_fp2 = fileOpen ( fileName2, "r" );
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
124 if (_r_fp2 == NULL)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
125 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
126 return 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
127 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
128 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
129 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
130 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
131 _r_fp2 = _r_fp1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
132 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
133
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
134 readFirstSeq = &readFirstSeqTXT;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
135 readSecondSeq = &readSecondSeqTXT;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
136 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
137 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
138 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
139
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
140 _r_gzfp1 = fileOpenGZ (fileName1, "r");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
141
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
142 if (_r_gzfp1 == NULL)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
143 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
144 return 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
145 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
146
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
147 ch = gzgetc(_r_gzfp1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
148
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
149 if ( pairedEnd && fileName2 != NULL )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
150 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
151 _r_fp2 = fileOpenGZ ( fileName2, "r" );
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
152 if (_r_fp2 == NULL)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
153 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
154 return 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
155 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
156 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
157 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
158 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
159 _r_fp2 = _r_fp1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
160 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
161
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
162 readFirstSeq = &readFirstSeqGZ;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
163 readSecondSeq = &readSecondSeqGZ;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
164 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
165
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
166 if (ch == '>')
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
167 *fastq = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
168 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
169 *fastq = 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
170
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
171 // Counting the number of lines in the file
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
172 while (readFirstSeq(dummy)) maxCnt++;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
173
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
174 if (!compressed)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
175 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
176 rewind(_r_fp1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
177 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
178 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
179 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
180 gzrewind(_r_gzfp1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
181 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
182
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
183 // Calculating the Maximum # of sequences
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
184 if (*fastq)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
185 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
186 maxCnt /= 4;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
187 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
188 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
189 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
190 maxCnt /= 2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
191 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
192
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
193
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
194
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
195 if (pairedEnd && fileName2 != NULL )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
196 maxCnt *= 2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
197
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
198 list = getMem(sizeof(Read)*maxCnt);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
199
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
200 while( readFirstSeq(name1) )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
201 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
202 err1 = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
203 err2 = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
204 readFirstSeq(seq1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
205 name1[strlen(name1)-1] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
206 for (i=0; i<strlen(name1);i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
207 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
208 if (name1[i] == ' ')
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
209 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
210 name1[i] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
211 break;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
212 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
213
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
214 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
215
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
216 if ( *fastq )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
217 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
218 readFirstSeq(dummy);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
219 readFirstSeq(qual1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
220 qual1[strlen(qual1)-1] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
221 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
222 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
223 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
224 sprintf(qual1, "*");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
225 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
226
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
227
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
228 // Cropping
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
229 if (cropSize > 0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
230 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
231 seq1[cropSize] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
232 if ( *fastq )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
233 qual1[cropSize] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
234 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
235
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
236
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
237 nCnt = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
238 for (i=0; i<strlen(seq1); i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
239 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
240 seq1[i] = toupper (seq1[i]);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
241 if (seq1[i] == 'N')
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
242 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
243 nCnt++;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
244 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
245 else if (isspace(seq1[i]))
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
246 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
247
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
248 seq1[i] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
249 break;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
250 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
251 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
252
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
253 if (nCnt > errThreshold)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
254 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
255 err1 = 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
256 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
257
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
258 // Reading the second seq of pair-ends
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
259 if (pairedEnd)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
260 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
261 readSecondSeq(name2);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
262 readSecondSeq(seq2);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
263 name2[strlen(name2)-1] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
264 for (i=0; i<strlen(name2);i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
265 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
266 if (name2[i] == ' ')
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
267 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
268 name2[i] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
269 break;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
270 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
271
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
272 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
273
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
274 if ( *fastq )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
275 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
276 readSecondSeq(dummy);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
277 readSecondSeq(qual2);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
278
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
279 qual2[strlen(qual2)-1] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
280 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
281 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
282 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
283 sprintf(qual2, "*");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
284 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
285
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
286
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
287 // Cropping
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
288 if (cropSize > 0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
289 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
290 seq2[cropSize] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
291 if ( *fastq )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
292 qual2[cropSize] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
293 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
294
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
295
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
296 nCnt = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
297 for (i=0; i<strlen(seq2); i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
298 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
299 seq2[i] = toupper (seq2[i]);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
300 if (seq2[i] == 'N')
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
301 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
302 nCnt++;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
303
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
304 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
305 else if (isspace(seq2[i]))
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
306 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
307 seq2[i] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
308 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
309 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
310 if (nCnt > errThreshold)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
311 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
312 err2 = 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
313 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
314 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
315
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
316 if (!pairedEnd && !err1)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
317 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
318
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
319 int _mtmp = strlen(seq1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
320 list[seqCnt].hits = getMem (1+3*_mtmp+3+strlen(name1)+1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
321 list[seqCnt].seq = list[seqCnt].hits + 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
322 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
323 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
324 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
325
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
326
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
327 reverseComplete(seq1, rseq1, _mtmp);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
328 rseq1[_mtmp] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
329 int i;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
330
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
331 list[seqCnt].hits[0] = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
332
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
333 for (i=0; i<=_mtmp; i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
334 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
335 list[seqCnt].seq[i] = seq1[i];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
336 list[seqCnt].rseq[i] = rseq1[i] ;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
337 list[seqCnt].qual[i] = qual1[i];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
338 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
339 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
340
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
341 seqCnt++;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
342
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
343 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
344 else if (pairedEnd && !err1 && !err2)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
345 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
346 // Naming Conventions X/1, X/2 OR X
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
347 int tmplen = strlen(name1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
348 if (strcmp(name1, name2) != 0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
349 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
350 tmplen = strlen(name1)-2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
351 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
352
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
353 if (strcmp(name1, "@IL11_266:2:1:922:509/1") == 0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
354 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
355 fprintf(stdout, "%d\n", seqCnt);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
356 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
357 //first seq
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
358 int _mtmp = strlen(seq1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
359 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
360 list[seqCnt].seq = list[seqCnt].hits + 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
361 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
362 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
363 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
364
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
365 reverseComplete(seq1, rseq1, _mtmp);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
366 rseq1[_mtmp] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
367 int i;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
368
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
369 list[seqCnt].hits[0] = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
370
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
371 for (i=0; i<=_mtmp; i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
372 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
373 list[seqCnt].seq[i] = seq1[i];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
374 list[seqCnt].rseq[i] = rseq1[i] ;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
375 list[seqCnt].qual[i] = qual1[i];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
376 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
377
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
378
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
379 name1[tmplen]='\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
380 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
381
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
382
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
383 seqCnt++;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
384
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
385 //second seq
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
386 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
387 list[seqCnt].seq = list[seqCnt].hits + 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
388 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
389 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
390 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
391
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
392 reverseComplete(seq2, rseq2, _mtmp);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
393 rseq2[_mtmp] = '\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
394
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
395 list[seqCnt].hits[0] = 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
396
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
397 for (i=0; i<=_mtmp; i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
398 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
399 list[seqCnt].seq[i] = seq2[i];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
400 list[seqCnt].rseq[i] = rseq2[i] ;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
401 list[seqCnt].qual[i] = qual2[i];
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
402 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
403
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
404
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
405 name2[tmplen]='\0';
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
406 sprintf(list[seqCnt].name,"%s%c", ((char*)name2)+1,'\0');
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
407
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
408
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
409 seqCnt++;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
410
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
411 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
412 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
413 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
414 discarded++;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
415 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
416 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
417
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
418 if (seqCnt > 0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
419 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
420 QUAL_LENGTH = SEQ_LENGTH = strlen(list[0].seq);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
421 if (! *fastq)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
422 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
423 QUAL_LENGTH = 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
424 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
425 //fprintf(stderr, "%d %d\n", SEQ_LENGTH, QUAL_LENGTH);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
426 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
427 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
428 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
429 fprintf(stdout, "ERR: No reads can be found for mapping\n");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
430 return 0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
431 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
432
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
433
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
434 if (pairedEnd)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
435 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
436 // seqCnt /= 2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
437 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
438
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
439
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
440 // Closing Files
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
441 if (!compressed)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
442 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
443 fclose(_r_fp1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
444 if ( pairedEnd && fileName2 != NULL )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
445 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
446 fclose(_r_fp2);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
447 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
448 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
449 else
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
450 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
451 gzclose(_r_gzfp1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
452 if ( pairedEnd && fileName2 != NULL)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
453 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
454 gzclose(_r_fp2);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
455 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
456 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
457
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
458 *seqList = list;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
459 *seqListSize = seqCnt;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
460
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
461 _r_seq = list;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
462 _r_seqCnt = seqCnt;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
463
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
464 fprintf(stdout, "%d sequences are read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage());
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
465 //totalLoadingTime+=getTime()-startTime;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
466
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
467 return 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
468 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
469 /**********************************************/
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
470 void loadSamplingLocations(int **samplingLocs, int * samplingLocsSize)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
471 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
472 int i;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
473 int samLocsSize = errThreshold + 1;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
474 int *samLocs = getMem(sizeof(int)*samLocsSize);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
475
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
476 for (i=0; i<samLocsSize; i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
477 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
478 samLocs[i] = (SEQ_LENGTH / samLocsSize) *i;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
479 if ( samLocs[i] + WINDOW_SIZE > SEQ_LENGTH)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
480 samLocs[i] = SEQ_LENGTH - WINDOW_SIZE;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
481 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
482
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
483 // Outputing the sampling locations
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
484
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
485 /* int j;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
486 for (i=0; i<SEQ_LENGTH; i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
487 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
488 fprintf(stdout, "-");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
489 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
490 fprintf(stdout, "\n");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
491
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
492 for ( i=0; i<samLocsSize; i++ )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
493 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
494 for ( j=0; j<samLocs[i]; j++ )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
495 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
496 fprintf(stdout," ");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
497 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
498 for (j=0; j<WINDOW_SIZE; j++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
499 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
500 fprintf(stdout,"+");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
501 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
502 fprintf(stdout, "\n");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
503 fflush(stdout);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
504 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
505 for ( i=0; i<SEQ_LENGTH; i++ )
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
506 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
507 fprintf(stdout, "-");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
508 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
509 fprintf(stdout, "\n");*/
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
510 *samplingLocs = samLocs;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
511 *samplingLocsSize = samLocsSize;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
512 _r_samplingLocs = samLocs;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
513 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
514
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
515 void finalizeReads(char *fileName)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
516 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
517 FILE *fp1=NULL;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
518
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
519 if (fileName != NULL)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
520 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
521 fp1 = fileOpen(fileName, "w");
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
522 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
523 if (pairedEndMode)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
524 _r_seqCnt /=2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
525
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
526 int i=0;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
527 for (i = 0; i < _r_seqCnt; i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
528 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
529 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && strcmp(_r_seq[2*i].qual,"*")!=0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
530 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
531 fprintf(fp1,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
532 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
533 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
534 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
535 fprintf(fp1, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
536 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
537 else if (_r_seq[i].hits[0] == 0 && strcmp(_r_seq[i].qual, "*")!=0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
538 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
539 fprintf(fp1,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
540 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
541 else if (_r_seq[i].hits[0] == 0)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
542 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
543 fprintf(fp1,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
544 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
545 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
546
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
547 fclose(fp1);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
548 if (pairedEndMode)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
549 _r_seqCnt *= 2;
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
550
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
551 for (i = 0; i < _r_seqCnt; i++)
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
552 {
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
553 freeMem(_r_seq[i].hits,0);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
554 }
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
555
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
556
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
557 freeMem(_r_seq,0);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
558 freeMem(_r_samplingLocs,0);
ec628ba33878 Uploaded source code for mrsFAST
calkan
parents:
diff changeset
559 }