Mercurial > repos > calkan > mrfast
comparison mrfast-2.1.0.4/Reads.c @ 0:7b3dc85dc7fd
Uploaded mrfast source tarball
author | calkan |
---|---|
date | Tue, 21 Feb 2012 10:29:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7b3dc85dc7fd |
---|---|
1 /* | |
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University | |
3 * All rights reserved. | |
4 * | |
5 * Redistribution and use in source and binary forms, with or without modification, | |
6 * are permitted provided that the following conditions are met: | |
7 * | |
8 * Redistributions of source code must retain the above copyright notice, this list | |
9 * of conditions and the following disclaimer. | |
10 * - Redistributions in binary form must reproduce the above copyright notice, this | |
11 * list of conditions and the following disclaimer in the documentation and/or other | |
12 * materials provided with the distribution. | |
13 * - Neither the names of the University of Washington, Simon Fraser University, | |
14 * nor the names of its contributors may be | |
15 * used to endorse or promote products derived from this software without specific | |
16 * prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | |
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 /* | |
32 Authors: | |
33 Farhad Hormozdiari | |
34 Faraz Hach | |
35 Can Alkan | |
36 Emails: | |
37 farhadh AT uw DOT edu | |
38 fhach AT cs DOT sfu DOT ca | |
39 calkan AT uw DOT edu | |
40 */ | |
41 | |
42 | |
43 | |
44 #include <stdio.h> | |
45 #include <stdlib.h> | |
46 #include <string.h> | |
47 #include <ctype.h> | |
48 #include <zlib.h> | |
49 #include "Common.h" | |
50 #include "Reads.h" | |
51 | |
52 #define CHARCODE(a) (a=='A' ? 0 : (a=='C' ? 1 : (a=='G' ? 2 : (a=='T' ? 3 : 4)))) | |
53 | |
54 FILE *_r_fp1; | |
55 FILE *_r_fp2; | |
56 gzFile _r_gzfp1; | |
57 gzFile _r_gzfp2; | |
58 Read *_r_seq; | |
59 int _r_seqCnt; | |
60 int *_r_samplingLocs; | |
61 | |
62 /**********************************************/ | |
63 char *(*readFirstSeq)(char *); | |
64 char *(*readSecondSeq)(char *); | |
65 /**********************************************/ | |
66 char *readFirstSeqTXT( char *seq ) | |
67 { | |
68 return fgets(seq, SEQ_MAX_LENGTH, _r_fp1); | |
69 } | |
70 | |
71 /**********************************************/ | |
72 char *readSecondSeqTXT( char *seq ) | |
73 { | |
74 return fgets(seq, SEQ_MAX_LENGTH, _r_fp2); | |
75 } | |
76 /**********************************************/ | |
77 char *readFirstSeqGZ( char *seq ) | |
78 { | |
79 return gzgets(_r_gzfp1, seq, SEQ_MAX_LENGTH); | |
80 } | |
81 | |
82 /**********************************************/ | |
83 char *readSecondSeqGZ( char *seq ) | |
84 { | |
85 return gzgets(_r_gzfp2, seq, SEQ_MAX_LENGTH); | |
86 } | |
87 /**********************************************/ | |
88 int toCompareRead(const void * elem1, const void * elem2) | |
89 { | |
90 return strcmp(((Read *)elem1)->seq, ((Read *)elem2)->seq); | |
91 } | |
92 /**********************************************/ | |
93 int readAllReads(char *fileName1, | |
94 char *fileName2, | |
95 int compressed, | |
96 unsigned char *fastq, | |
97 unsigned char pairedEnd, | |
98 Read **seqList, | |
99 unsigned int *seqListSize) | |
100 { | |
101 double startTime=getTime(); | |
102 | |
103 char seq1[SEQ_MAX_LENGTH]; | |
104 char rseq1[SEQ_MAX_LENGTH]; | |
105 char name1[SEQ_MAX_LENGTH]; | |
106 char qual1[SEQ_MAX_LENGTH]; | |
107 char seq2[SEQ_MAX_LENGTH]; | |
108 char rseq2[SEQ_MAX_LENGTH]; | |
109 char name2[SEQ_MAX_LENGTH]; | |
110 char qual2[SEQ_MAX_LENGTH]; | |
111 | |
112 char dummy[SEQ_MAX_LENGTH]; | |
113 char ch; | |
114 int err1, err2; | |
115 int nCnt; | |
116 int discarded = 0; | |
117 int seqCnt = 0; | |
118 int maxCnt = 0; | |
119 int i; | |
120 Read *list = NULL; | |
121 | |
122 int clipped = 0; | |
123 | |
124 if (!compressed) | |
125 { | |
126 _r_fp1 = fileOpen( fileName1, "r"); | |
127 | |
128 if (_r_fp1 == NULL) | |
129 { | |
130 return 0; | |
131 } | |
132 | |
133 ch = fgetc(_r_fp1); | |
134 | |
135 if ( pairedEnd && fileName2 != NULL ) | |
136 { | |
137 _r_fp2 = fileOpen ( fileName2, "r" ); | |
138 if (_r_fp2 == NULL) | |
139 { | |
140 return 0; | |
141 } | |
142 } | |
143 else | |
144 { | |
145 _r_fp2 = _r_fp1; | |
146 } | |
147 | |
148 readFirstSeq = &readFirstSeqTXT; | |
149 readSecondSeq = &readSecondSeqTXT; | |
150 } | |
151 else | |
152 { | |
153 | |
154 _r_gzfp1 = fileOpenGZ (fileName1, "r"); | |
155 | |
156 if (_r_gzfp1 == NULL) | |
157 { | |
158 return 0; | |
159 } | |
160 | |
161 ch = gzgetc(_r_gzfp1); | |
162 | |
163 if ( pairedEnd && fileName2 != NULL ) | |
164 { | |
165 _r_gzfp2 = fileOpenGZ ( fileName2, "r" ); | |
166 if (_r_gzfp2 == NULL) | |
167 { | |
168 return 0; | |
169 } | |
170 } | |
171 else | |
172 { | |
173 _r_gzfp2 = _r_gzfp1; | |
174 } | |
175 | |
176 readFirstSeq = &readFirstSeqGZ; | |
177 readSecondSeq = &readSecondSeqGZ; | |
178 } | |
179 | |
180 if (ch == '>') | |
181 *fastq = 0; | |
182 else | |
183 *fastq = 1; | |
184 | |
185 // Counting the number of lines in the file | |
186 while (readFirstSeq(dummy)) maxCnt++; | |
187 | |
188 if (!compressed) | |
189 { | |
190 rewind(_r_fp1); | |
191 } | |
192 else | |
193 { | |
194 gzrewind(_r_gzfp1); | |
195 } | |
196 | |
197 // Calculating the Maximum # of sequences | |
198 if (*fastq) | |
199 { | |
200 maxCnt /= 4; | |
201 } | |
202 else | |
203 { | |
204 maxCnt /= 2; | |
205 } | |
206 | |
207 if (pairedEnd && fileName2 != NULL ) | |
208 maxCnt *= 2; | |
209 | |
210 list = getMem(sizeof(Read)*maxCnt); | |
211 | |
212 while( readFirstSeq(name1) ) | |
213 { | |
214 err1 = 0; | |
215 err2 = 0; | |
216 readFirstSeq(seq1); | |
217 name1[strlen(name1)-1] = '\0'; | |
218 | |
219 if ( *fastq ) | |
220 { | |
221 readFirstSeq(dummy); | |
222 readFirstSeq(qual1); | |
223 qual1[strlen(qual1)-1] = '\0'; | |
224 } | |
225 else | |
226 { | |
227 sprintf(qual1, "*"); | |
228 } | |
229 | |
230 // Cropping | |
231 if (cropSize > 0) | |
232 { | |
233 seq1[cropSize] = '\0'; | |
234 if ( *fastq ) | |
235 qual1[cropSize] = '\0'; | |
236 } | |
237 | |
238 | |
239 nCnt = 0; | |
240 for (i=0; i<strlen(seq1); i++) | |
241 { | |
242 seq1[i] = toupper (seq1[i]); | |
243 if (seq1[i] == 'N') | |
244 { | |
245 nCnt++; | |
246 } | |
247 else if (isspace(seq1[i])) | |
248 { | |
249 | |
250 seq1[i] = '\0'; | |
251 break; | |
252 } | |
253 } | |
254 | |
255 if (nCnt > errThreshold) | |
256 { | |
257 err1 = 1; | |
258 } | |
259 | |
260 // Reading the second seq of pair-ends | |
261 if (pairedEnd) | |
262 { | |
263 readSecondSeq(name2); | |
264 readSecondSeq(seq2); | |
265 name2[strlen(name2)-1] = '\0'; | |
266 | |
267 | |
268 if ( *fastq ) | |
269 { | |
270 readSecondSeq(dummy); | |
271 readSecondSeq(qual2); | |
272 | |
273 qual2[strlen(qual2)-1] = '\0'; | |
274 } | |
275 else | |
276 { | |
277 sprintf(qual2, "*"); | |
278 } | |
279 | |
280 | |
281 // Cropping | |
282 if (cropSize > 0) | |
283 { | |
284 seq2[cropSize] = '\0'; | |
285 if ( *fastq ) | |
286 qual2[cropSize] = '\0'; | |
287 } | |
288 | |
289 | |
290 nCnt = 0; | |
291 for (i=0; i<strlen(seq2); i++) | |
292 { | |
293 seq2[i] = toupper (seq2[i]); | |
294 if (seq2[i] == 'N') | |
295 { | |
296 nCnt++; | |
297 | |
298 } | |
299 else if (isspace(seq2[i])) | |
300 { | |
301 seq2[i] = '\0'; | |
302 } | |
303 } | |
304 if (nCnt > errThreshold) | |
305 { | |
306 err2 = 1; | |
307 } | |
308 | |
309 | |
310 if (strlen(seq1) < strlen(seq2)) { | |
311 seq2[strlen(seq1)] = '\0'; | |
312 if ( *fastq ) | |
313 qual2[strlen(seq1)] = '\0'; | |
314 if (!clipped) clipped = 2; | |
315 } | |
316 else if (strlen(seq1) > strlen(seq2)){ | |
317 seq1[strlen(seq2)] = '\0'; | |
318 if ( *fastq ) | |
319 qual1[strlen(seq2)] = '\0'; | |
320 if (!clipped) clipped = 1; | |
321 } | |
322 | |
323 if (clipped == 1 || clipped == 2){ | |
324 fprintf(stdout, "[PE mode warning] Sequence lengths are different, read #%d is clipped to match.\n", clipped); | |
325 clipped = 3; | |
326 } | |
327 | |
328 | |
329 | |
330 } | |
331 | |
332 if (!pairedEnd && !err1) | |
333 { | |
334 int _mtmp = strlen(seq1); | |
335 list[seqCnt].hits = getMem (1+3*_mtmp+3+strlen(name1)+1); | |
336 list[seqCnt].seq = list[seqCnt].hits + 1; | |
337 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1; | |
338 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1; | |
339 list[seqCnt].name = list[seqCnt].qual + _mtmp+1; | |
340 | |
341 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp); | |
342 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp); | |
343 | |
344 list[seqCnt].readNumber = seqCnt; | |
345 | |
346 reverseComplete(seq1, rseq1, _mtmp); | |
347 rseq1[_mtmp] = '\0'; | |
348 int i; | |
349 | |
350 list[seqCnt].hits[0] = 0; | |
351 | |
352 for (i=0; i<=_mtmp; i++) | |
353 { | |
354 list[seqCnt].seq[i] = seq1[i]; | |
355 list[seqCnt].rseq[i] = rseq1[i] ; | |
356 list[seqCnt].qual[i] = qual1[i]; | |
357 } | |
358 | |
359 | |
360 //MAKE HASH VALUE | |
361 short code = 0; | |
362 | |
363 for(i=0; i < 4; i++) | |
364 code = code * 5 + CHARCODE(list[seqCnt].seq[i]); | |
365 list[seqCnt].hashValue[0] = code; | |
366 | |
367 | |
368 for(i = 1; i < _mtmp-3; i++) | |
369 { | |
370 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]); | |
371 } | |
372 | |
373 | |
374 code = 0; | |
375 for(i=0; i < 4; i++) | |
376 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]); | |
377 list[seqCnt].rhashValue[0] = code; | |
378 | |
379 | |
380 for(i = 1; i < _mtmp-3; i++) | |
381 { | |
382 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]); | |
383 } | |
384 | |
385 int j = 0; | |
386 int tmpSize = _mtmp / (errThreshold+1); | |
387 | |
388 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1)); | |
389 for(i=0; i < errThreshold+1; i++) | |
390 { | |
391 code = 0; | |
392 for(j = 0; j < tmpSize; j++) | |
393 { | |
394 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]); | |
395 } | |
396 list[seqCnt].hashValSampleSize[i] = code; | |
397 } | |
398 | |
399 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0'); | |
400 | |
401 seqCnt++; | |
402 | |
403 } | |
404 else if (pairedEnd && !err1 && !err2) | |
405 { | |
406 | |
407 // Naming Conventions X/1, X/2 OR X | |
408 int tmplen = strlen(name1); | |
409 if (strcmp(name1, name2) != 0) | |
410 { | |
411 tmplen = strlen(name1)-2; | |
412 } | |
413 | |
414 //first seq | |
415 int _mtmp = strlen(seq1); | |
416 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1); | |
417 list[seqCnt].seq = list[seqCnt].hits + 1; | |
418 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1; | |
419 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1; | |
420 list[seqCnt].name = list[seqCnt].qual + _mtmp+1; | |
421 | |
422 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp); | |
423 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp); | |
424 list[seqCnt].readNumber = seqCnt; | |
425 | |
426 reverseComplete(seq1, rseq1, _mtmp); | |
427 rseq1[_mtmp] = '\0'; | |
428 int i; | |
429 | |
430 list[seqCnt].hits[0] = 0; | |
431 | |
432 for (i=0; i<=_mtmp; i++) | |
433 { | |
434 list[seqCnt].seq[i] = seq1[i]; | |
435 list[seqCnt].rseq[i] = rseq1[i] ; | |
436 list[seqCnt].qual[i] = qual1[i]; | |
437 } | |
438 | |
439 | |
440 name1[tmplen]='\0'; | |
441 | |
442 //MAKE HASH VALUE | |
443 short code = 0; | |
444 | |
445 for(i=0; i < 4; i++) | |
446 code = code * 5 + CHARCODE(list[seqCnt].seq[i]); | |
447 list[seqCnt].hashValue[0] = code; | |
448 | |
449 | |
450 for(i = 1; i < _mtmp-3; i++) | |
451 { | |
452 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]); | |
453 } | |
454 | |
455 | |
456 code = 0; | |
457 for(i=0; i < 4; i++) | |
458 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]); | |
459 list[seqCnt].rhashValue[0] = code; | |
460 | |
461 | |
462 for(i = 1; i < _mtmp-3; i++) | |
463 { | |
464 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]); | |
465 } | |
466 | |
467 int j = 0; | |
468 int tmpSize = _mtmp / (errThreshold+1); | |
469 | |
470 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1)); | |
471 for(i=0; i < errThreshold+1; i++) | |
472 { | |
473 code = 0; | |
474 for(j = 0; j < tmpSize; j++) | |
475 { | |
476 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]); | |
477 } | |
478 list[seqCnt].hashValSampleSize[i] = code; | |
479 } | |
480 | |
481 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0'); | |
482 | |
483 | |
484 seqCnt++; | |
485 | |
486 //second seq | |
487 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1); | |
488 list[seqCnt].seq = list[seqCnt].hits + 1; | |
489 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1; | |
490 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1; | |
491 list[seqCnt].name = list[seqCnt].qual + _mtmp+1; | |
492 | |
493 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp); | |
494 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp); | |
495 list[seqCnt].readNumber = seqCnt; | |
496 | |
497 reverseComplete(seq2, rseq2, _mtmp); | |
498 rseq2[_mtmp] = '\0'; | |
499 | |
500 list[seqCnt].hits[0] = 0; | |
501 | |
502 for (i=0; i<=_mtmp; i++) | |
503 { | |
504 list[seqCnt].seq[i] = seq2[i]; | |
505 list[seqCnt].rseq[i] = rseq2[i] ; | |
506 list[seqCnt].qual[i] = qual2[i]; | |
507 } | |
508 | |
509 | |
510 name2[tmplen]='\0'; | |
511 | |
512 //MAKE HASH VALUE | |
513 code = 0; | |
514 | |
515 for(i=0; i < 4; i++) | |
516 code = code * 5 + CHARCODE(list[seqCnt].seq[i]); | |
517 list[seqCnt].hashValue[0] = code; | |
518 | |
519 | |
520 for(i = 1; i < _mtmp-3; i++) | |
521 { | |
522 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]); | |
523 } | |
524 | |
525 | |
526 code = 0; | |
527 for(i=0; i < 4; i++) | |
528 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]); | |
529 list[seqCnt].rhashValue[0] = code; | |
530 | |
531 | |
532 for(i = 1; i < _mtmp-3; i++) | |
533 { | |
534 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]); | |
535 } | |
536 | |
537 j = 0; | |
538 tmpSize = _mtmp / (errThreshold+1); | |
539 | |
540 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1)); | |
541 for(i=0; i < errThreshold+1; i++) | |
542 { | |
543 code = 0; | |
544 for(j = 0; j < tmpSize; j++) | |
545 { | |
546 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]); | |
547 } | |
548 list[seqCnt].hashValSampleSize[i] = code; | |
549 } | |
550 | |
551 sprintf(list[seqCnt].name,"%s%c", ((char*)name2)+1,'\0'); | |
552 | |
553 seqCnt++; | |
554 | |
555 } | |
556 else | |
557 { | |
558 discarded++; | |
559 } | |
560 } | |
561 | |
562 if (seqCnt > 0) | |
563 { | |
564 SEQ_LENGTH = strlen(list[0].seq); | |
565 } | |
566 else | |
567 { | |
568 fprintf(stdout, "ERROR: No reads can be found for mapping\n"); | |
569 return 0; | |
570 } | |
571 | |
572 | |
573 // Closing Files | |
574 if (!compressed) | |
575 { | |
576 fclose(_r_fp1); | |
577 if ( pairedEnd && fileName2 != NULL ) | |
578 { | |
579 fclose(_r_fp2); | |
580 } | |
581 } | |
582 else | |
583 { | |
584 gzclose(_r_gzfp1); | |
585 if ( pairedEnd && fileName2 != NULL) | |
586 { | |
587 gzclose(_r_fp2); | |
588 } | |
589 } | |
590 | |
591 //qsort(list, seqCnt, sizeof(Read), toCompareRead); | |
592 | |
593 adjustQual(list, seqCnt); | |
594 | |
595 *seqList = list; | |
596 *seqListSize = seqCnt; | |
597 | |
598 | |
599 _r_seq = list; | |
600 _r_seqCnt = seqCnt; | |
601 | |
602 if ( pairedEnd ) discarded *= 2; | |
603 | |
604 if (seqCnt>1) | |
605 fprintf(stdout, "%d sequences are read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage()); | |
606 else | |
607 fprintf(stdout, "%d sequence is read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage()); | |
608 | |
609 | |
610 | |
611 return 1; | |
612 } | |
613 /**********************************************/ | |
614 void loadSamplingLocations(int **samplingLocs, int * samplingLocsSize) | |
615 { | |
616 int i; | |
617 int samLocsSize = errThreshold + 1; | |
618 int *samLocs = getMem(sizeof(int)*samLocsSize); | |
619 | |
620 for (i=0; i<samLocsSize; i++) | |
621 { | |
622 samLocs[i] = (SEQ_LENGTH / samLocsSize) *i; | |
623 if ( samLocs[i] + WINDOW_SIZE > SEQ_LENGTH) | |
624 samLocs[i] = SEQ_LENGTH - WINDOW_SIZE; | |
625 } | |
626 | |
627 // Outputing the sampling locations | |
628 | |
629 /* | |
630 | |
631 int j; | |
632 for (i=0; i<SEQ_LENGTH; i++) | |
633 { | |
634 fprintf(stdout, "-"); | |
635 } | |
636 fprintf(stdout, "\n"); | |
637 | |
638 for ( i=0; i<samLocsSize; i++ ) | |
639 { | |
640 for ( j=0; j<samLocs[i]; j++ ) | |
641 { | |
642 fprintf(stdout," "); | |
643 } | |
644 for (j=0; j<WINDOW_SIZE; j++) | |
645 { | |
646 fprintf(stdout,"+"); | |
647 } | |
648 fprintf(stdout, "\n"); | |
649 fflush(stdout); | |
650 } | |
651 | |
652 | |
653 for ( i=0; i<SEQ_LENGTH; i++ ) | |
654 { | |
655 fprintf(stdout, "-"); | |
656 } | |
657 fprintf(stdout, "\n"); | |
658 | |
659 */ | |
660 | |
661 *samplingLocs = samLocs; | |
662 *samplingLocsSize = samLocsSize; | |
663 _r_samplingLocs = samLocs; | |
664 } | |
665 | |
666 void finalizeReads(char *fileName) | |
667 { | |
668 FILE *fp1=NULL; | |
669 | |
670 if (fileName != NULL) | |
671 { | |
672 fp1 = fileOpen(fileName, "w"); | |
673 } | |
674 if (pairedEndMode) | |
675 _r_seqCnt /=2; | |
676 | |
677 int i=0; | |
678 for (i = 0; i < _r_seqCnt; i++) | |
679 { | |
680 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual,"*")!=0) | |
681 { | |
682 fprintf(fp1,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual); | |
683 } | |
684 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] == 0) | |
685 { | |
686 fprintf(fp1, ">%s/1\n%s\n>%s/2\n%shits=%d\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[2*i+1].hits[0]); | |
687 } | |
688 else if (!pairedEndMode && _r_seq[i].hits[0] == 0 && strcmp(_r_seq[i].qual, "*")!=0) | |
689 { | |
690 fprintf(fp1,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual); | |
691 } | |
692 else if (!pairedEndMode && _r_seq[i].hits[0] == 0) | |
693 { | |
694 fprintf(fp1,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq); | |
695 } | |
696 } | |
697 | |
698 fclose(fp1); | |
699 if (pairedEndMode) | |
700 _r_seqCnt *= 2; | |
701 | |
702 for (i = 0; i < _r_seqCnt; i++) | |
703 { | |
704 freeMem(_r_seq[i].hits,0); | |
705 } | |
706 | |
707 | |
708 freeMem(_r_seq,0); | |
709 freeMem(_r_samplingLocs,0); | |
710 } | |
711 | |
712 void adjustQual(Read *list, int seqCnt){ | |
713 /* This function will automatically determine the phred_offset and readjust quality values if needed */ | |
714 } | |
715 | |
716 | |
717 | |
718 /*void finalizeOEAReads(char *fileName) | |
719 { | |
720 FILE *fp1=NULL; | |
721 FILE *fp2=NULL; | |
722 | |
723 //printf("OEA%s\n", fileName); | |
724 char fileNameOEA1[200]; | |
725 char fileNameOEA2[200]; | |
726 sprintf(fileNameOEA1, "%s_OEA1", fileName); | |
727 sprintf(fileNameOEA2, "%s_OEA2", fileName); | |
728 | |
729 | |
730 if (fileName != NULL) | |
731 { | |
732 fp1 = fileOpen(fileNameOEA1, "w"); | |
733 fp2 = fileOpen(fileNameOEA2, "w"); | |
734 } | |
735 if (pairedEndMode) | |
736 _r_seqCnt /=2; | |
737 | |
738 int i=0; | |
739 printf("%d\n", _r_seqCnt); | |
740 for (i = 0; i < _r_seqCnt; i++) | |
741 { | |
742 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] != 0 && strcmp(_r_seq[2*i].qual, "*")==0) | |
743 { | |
744 fprintf(fp1,">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq); | |
745 } | |
746 else if (pairedEndMode && _r_seq[2*i].hits[0] != 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual, "*")==0) | |
747 { | |
748 fprintf(fp2, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq); | |
749 } | |
750 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] != 0 && strcmp(_r_seq[2*i].qual, "*")!=0) | |
751 { | |
752 fprintf(fp1,"@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n", _r_seq[2*i].name, | |
753 _r_seq[2*i].seq, | |
754 _r_seq[2*i].qual, | |
755 _r_seq[2*i+1].name, | |
756 _r_seq[2*i+1].seq, | |
757 _r_seq[2*i+1].qual); | |
758 } | |
759 else if ( pairedEndMode && _r_seq[2*i].hits[0] != 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual, "*")!=0 ) | |
760 { | |
761 fprintf(fp2,"@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n", _r_seq[2*i].name, | |
762 _r_seq[2*i].seq, | |
763 _r_seq[2*i].qual, | |
764 _r_seq[2*i+1].name, | |
765 _r_seq[2*i+1].seq, | |
766 _r_seq[2*i+1].qual); | |
767 } | |
768 } | |
769 | |
770 fclose(fp1); | |
771 fclose(fp2); | |
772 | |
773 }*/ |