comparison mrfast-2.1.0.5/Reads.c @ 1:d4054b05b015 default tip

Version update to 2.1.0.5
author calkan
date Fri, 09 Mar 2012 07:35:51 -0500
parents
children
comparison
equal deleted inserted replaced
0:7b3dc85dc7fd 1:d4054b05b015
1 /*
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * Redistributions of source code must retain the above copyright notice, this list
9 * of conditions and the following disclaimer.
10 * - Redistributions in binary form must reproduce the above copyright notice, this
11 * list of conditions and the following disclaimer in the documentation and/or other
12 * materials provided with the distribution.
13 * - Neither the names of the University of Washington, Simon Fraser University,
14 * nor the names of its contributors may be
15 * used to endorse or promote products derived from this software without specific
16 * prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 Authors:
33 Farhad Hormozdiari
34 Faraz Hach
35 Can Alkan
36 Emails:
37 farhadh AT uw DOT edu
38 fhach AT cs DOT sfu DOT ca
39 calkan AT uw DOT edu
40 */
41
42
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <zlib.h>
49 #include "Common.h"
50 #include "Reads.h"
51
52 #define CHARCODE(a) (a=='A' ? 0 : (a=='C' ? 1 : (a=='G' ? 2 : (a=='T' ? 3 : 4))))
53
54 FILE *_r_fp1;
55 FILE *_r_fp2;
56 gzFile _r_gzfp1;
57 gzFile _r_gzfp2;
58 Read *_r_seq;
59 int _r_seqCnt;
60 int *_r_samplingLocs;
61
62 /**********************************************/
63 char *(*readFirstSeq)(char *);
64 char *(*readSecondSeq)(char *);
65 /**********************************************/
66 char *readFirstSeqTXT( char *seq )
67 {
68 return fgets(seq, SEQ_MAX_LENGTH, _r_fp1);
69 }
70
71 /**********************************************/
72 char *readSecondSeqTXT( char *seq )
73 {
74 return fgets(seq, SEQ_MAX_LENGTH, _r_fp2);
75 }
76 /**********************************************/
77 char *readFirstSeqGZ( char *seq )
78 {
79 return gzgets(_r_gzfp1, seq, SEQ_MAX_LENGTH);
80 }
81
82 /**********************************************/
83 char *readSecondSeqGZ( char *seq )
84 {
85 return gzgets(_r_gzfp2, seq, SEQ_MAX_LENGTH);
86 }
87 /**********************************************/
88 int toCompareRead(const void * elem1, const void * elem2)
89 {
90 return strcmp(((Read *)elem1)->seq, ((Read *)elem2)->seq);
91 }
92 /**********************************************/
93 int readAllReads(char *fileName1,
94 char *fileName2,
95 int compressed,
96 unsigned char *fastq,
97 unsigned char pairedEnd,
98 Read **seqList,
99 unsigned int *seqListSize)
100 {
101 double startTime=getTime();
102
103 char seq1[SEQ_MAX_LENGTH];
104 char rseq1[SEQ_MAX_LENGTH];
105 char name1[SEQ_MAX_LENGTH];
106 char qual1[SEQ_MAX_LENGTH];
107 char seq2[SEQ_MAX_LENGTH];
108 char rseq2[SEQ_MAX_LENGTH];
109 char name2[SEQ_MAX_LENGTH];
110 char qual2[SEQ_MAX_LENGTH];
111
112 char dummy[SEQ_MAX_LENGTH];
113 char ch;
114 int err1, err2;
115 int nCnt;
116 int discarded = 0;
117 int seqCnt = 0;
118 int maxCnt = 0;
119 int i;
120 Read *list = NULL;
121
122 int clipped = 0;
123
124
125 if (!compressed)
126 {
127 _r_fp1 = fileOpen( fileName1, "r");
128
129 if (_r_fp1 == NULL)
130 {
131 return 0;
132 }
133
134 ch = fgetc(_r_fp1);
135
136 if ( pairedEnd && fileName2 != NULL )
137 {
138 _r_fp2 = fileOpen ( fileName2, "r" );
139 if (_r_fp2 == NULL)
140 {
141 return 0;
142 }
143 }
144 else
145 {
146 _r_fp2 = _r_fp1;
147 }
148
149 readFirstSeq = &readFirstSeqTXT;
150 readSecondSeq = &readSecondSeqTXT;
151 }
152 else
153 {
154
155 _r_gzfp1 = fileOpenGZ (fileName1, "r");
156
157 if (_r_gzfp1 == NULL)
158 {
159 return 0;
160 }
161
162 ch = gzgetc(_r_gzfp1);
163
164 if ( pairedEnd && fileName2 != NULL )
165 {
166 _r_gzfp2 = fileOpenGZ ( fileName2, "r" );
167 if (_r_gzfp2 == NULL)
168 {
169 return 0;
170 }
171 }
172 else
173 {
174 _r_gzfp2 = _r_gzfp1;
175 }
176
177 readFirstSeq = &readFirstSeqGZ;
178 readSecondSeq = &readSecondSeqGZ;
179 }
180
181 if (ch == '>')
182 *fastq = 0;
183 else
184 *fastq = 1;
185
186 // Counting the number of lines in the file
187 while (readFirstSeq(dummy)) maxCnt++;
188
189 if (!compressed)
190 {
191 rewind(_r_fp1);
192 }
193 else
194 {
195 gzrewind(_r_gzfp1);
196 }
197
198 // Calculating the Maximum # of sequences
199 if (*fastq)
200 {
201 maxCnt /= 4;
202 }
203 else
204 {
205 maxCnt /= 2;
206 }
207
208 if (pairedEnd && fileName2 != NULL )
209 maxCnt *= 2;
210
211 list = getMem(sizeof(Read)*maxCnt);
212
213 while( readFirstSeq(name1) )
214 {
215 err1 = 0;
216 err2 = 0;
217 readFirstSeq(seq1);
218 name1[strlen(name1)-1] = '\0';
219
220 if ( *fastq )
221 {
222 readFirstSeq(dummy);
223 readFirstSeq(qual1);
224 qual1[strlen(qual1)-1] = '\0';
225 }
226 else
227 {
228 sprintf(qual1, "*");
229 }
230
231 // Cropping
232 if (cropSize > 0)
233 {
234 seq1[cropSize] = '\0';
235 if ( *fastq )
236 qual1[cropSize] = '\0';
237 }
238
239
240 nCnt = 0;
241 for (i=0; i<strlen(seq1); i++)
242 {
243 seq1[i] = toupper (seq1[i]);
244 if (seq1[i] == 'N')
245 {
246 nCnt++;
247 }
248 else if (isspace(seq1[i]))
249 {
250
251 seq1[i] = '\0';
252 break;
253 }
254 }
255
256 if (nCnt > errThreshold)
257 {
258 err1 = 1;
259 }
260
261 // Reading the second seq of pair-ends
262 if (pairedEnd)
263 {
264 readSecondSeq(name2);
265 readSecondSeq(seq2);
266 name2[strlen(name2)-1] = '\0';
267
268
269 if ( *fastq )
270 {
271 readSecondSeq(dummy);
272 readSecondSeq(qual2);
273
274 qual2[strlen(qual2)-1] = '\0';
275 }
276 else
277 {
278 sprintf(qual2, "*");
279 }
280
281
282 // Cropping
283 if (cropSize > 0)
284 {
285 seq2[cropSize] = '\0';
286 if ( *fastq )
287 qual2[cropSize] = '\0';
288 }
289
290
291 nCnt = 0;
292 for (i=0; i<strlen(seq2); i++)
293 {
294 seq2[i] = toupper (seq2[i]);
295 if (seq2[i] == 'N')
296 {
297 nCnt++;
298
299 }
300 else if (isspace(seq2[i]))
301 {
302 seq2[i] = '\0';
303 }
304 }
305 if (nCnt > errThreshold)
306 {
307 err2 = 1;
308 }
309
310
311 if (strlen(seq1) < strlen(seq2)) {
312 seq2[strlen(seq1)] = '\0';
313 if ( *fastq )
314 qual2[strlen(seq1)] = '\0';
315 if (!clipped) clipped = 2;
316 }
317 else if (strlen(seq1) > strlen(seq2)){
318 seq1[strlen(seq2)] = '\0';
319 if ( *fastq )
320 qual1[strlen(seq2)] = '\0';
321 if (!clipped) clipped = 1;
322 }
323
324 if (clipped == 1 || clipped == 2){
325 fprintf(stdout, "[PE mode Warning] Sequence lengths are different, read #%d is clipped to match.\n", clipped);
326 clipped = 3;
327 }
328
329
330
331 }
332
333 if (!pairedEnd && !err1)
334 {
335 int _mtmp = strlen(seq1);
336 list[seqCnt].hits = getMem (1+3*_mtmp+3+strlen(name1)+1);
337 list[seqCnt].seq = list[seqCnt].hits + 1;
338 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
339 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
340 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
341
342 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
343 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
344
345 list[seqCnt].readNumber = seqCnt;
346
347 reverseComplete(seq1, rseq1, _mtmp);
348 rseq1[_mtmp] = '\0';
349 int i;
350
351 list[seqCnt].hits[0] = 0;
352
353 for (i=0; i<=_mtmp; i++)
354 {
355 list[seqCnt].seq[i] = seq1[i];
356 list[seqCnt].rseq[i] = rseq1[i] ;
357 list[seqCnt].qual[i] = qual1[i];
358 }
359
360
361 //MAKE HASH VALUE
362 short code = 0;
363
364 for(i=0; i < 4; i++)
365 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
366 list[seqCnt].hashValue[0] = code;
367
368
369 for(i = 1; i < _mtmp-3; i++)
370 {
371 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
372 }
373
374
375 code = 0;
376 for(i=0; i < 4; i++)
377 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
378 list[seqCnt].rhashValue[0] = code;
379
380
381 for(i = 1; i < _mtmp-3; i++)
382 {
383 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
384 }
385
386 int j = 0;
387 int tmpSize = _mtmp / (errThreshold+1);
388
389 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
390 for(i=0; i < errThreshold+1; i++)
391 {
392 code = 0;
393 for(j = 0; j < tmpSize; j++)
394 {
395 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
396 }
397 list[seqCnt].hashValSampleSize[i] = code;
398 }
399
400 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
401
402 seqCnt++;
403
404 }
405 else if (pairedEnd && !err1 && !err2)
406 {
407
408 // Naming Conventions X/1, X/2 OR X
409 int tmplen = strlen(name1);
410 if (strcmp(name1, name2) != 0)
411 {
412 tmplen = strlen(name1)-2;
413 }
414
415 //first seq
416 int _mtmp = strlen(seq1);
417 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
418 list[seqCnt].seq = list[seqCnt].hits + 1;
419 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
420 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
421 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
422
423 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
424 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
425 list[seqCnt].readNumber = seqCnt;
426
427 reverseComplete(seq1, rseq1, _mtmp);
428 rseq1[_mtmp] = '\0';
429 int i;
430
431 list[seqCnt].hits[0] = 0;
432
433 for (i=0; i<=_mtmp; i++)
434 {
435 list[seqCnt].seq[i] = seq1[i];
436 list[seqCnt].rseq[i] = rseq1[i] ;
437 list[seqCnt].qual[i] = qual1[i];
438 }
439
440
441 name1[tmplen]='\0';
442
443 //MAKE HASH VALUE
444 short code = 0;
445
446 for(i=0; i < 4; i++)
447 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
448 list[seqCnt].hashValue[0] = code;
449
450
451 for(i = 1; i < _mtmp-3; i++)
452 {
453 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
454 }
455
456
457 code = 0;
458 for(i=0; i < 4; i++)
459 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
460 list[seqCnt].rhashValue[0] = code;
461
462
463 for(i = 1; i < _mtmp-3; i++)
464 {
465 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
466 }
467
468 int j = 0;
469 int tmpSize = _mtmp / (errThreshold+1);
470
471 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
472 for(i=0; i < errThreshold+1; i++)
473 {
474 code = 0;
475 for(j = 0; j < tmpSize; j++)
476 {
477 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
478 }
479 list[seqCnt].hashValSampleSize[i] = code;
480 }
481
482 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
483
484
485 seqCnt++;
486
487 //second seq
488 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
489 list[seqCnt].seq = list[seqCnt].hits + 1;
490 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
491 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
492 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
493
494 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
495 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
496 list[seqCnt].readNumber = seqCnt;
497
498 reverseComplete(seq2, rseq2, _mtmp);
499 rseq2[_mtmp] = '\0';
500
501 list[seqCnt].hits[0] = 0;
502
503 for (i=0; i<=_mtmp; i++)
504 {
505 list[seqCnt].seq[i] = seq2[i];
506 list[seqCnt].rseq[i] = rseq2[i] ;
507 list[seqCnt].qual[i] = qual2[i];
508 }
509
510
511 name2[tmplen]='\0';
512
513 //MAKE HASH VALUE
514 code = 0;
515
516 for(i=0; i < 4; i++)
517 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
518 list[seqCnt].hashValue[0] = code;
519
520
521 for(i = 1; i < _mtmp-3; i++)
522 {
523 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
524 }
525
526
527 code = 0;
528 for(i=0; i < 4; i++)
529 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
530 list[seqCnt].rhashValue[0] = code;
531
532
533 for(i = 1; i < _mtmp-3; i++)
534 {
535 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
536 }
537
538 j = 0;
539 tmpSize = _mtmp / (errThreshold+1);
540
541 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
542 for(i=0; i < errThreshold+1; i++)
543 {
544 code = 0;
545 for(j = 0; j < tmpSize; j++)
546 {
547 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
548 }
549 list[seqCnt].hashValSampleSize[i] = code;
550 }
551
552 sprintf(list[seqCnt].name,"%s%c", ((char*)name2)+1,'\0');
553
554 seqCnt++;
555
556 }
557 else
558 {
559 discarded++;
560 }
561 }
562
563 if (seqCnt > 0)
564 {
565 SEQ_LENGTH = strlen(list[0].seq);
566 }
567 else
568 {
569 fprintf(stdout, "ERROR: No reads can be found for mapping\n");
570 return 0;
571 }
572
573
574 // Closing Files
575 if (!compressed)
576 {
577 fclose(_r_fp1);
578 if ( pairedEnd && fileName2 != NULL )
579 {
580 fclose(_r_fp2);
581 }
582 }
583 else
584 {
585 gzclose(_r_gzfp1);
586 if ( pairedEnd && fileName2 != NULL)
587 {
588 gzclose(_r_fp2);
589 }
590 }
591
592 //qsort(list, seqCnt, sizeof(Read), toCompareRead);
593
594 adjustQual(list, seqCnt);
595
596 *seqList = list;
597 *seqListSize = seqCnt;
598
599
600 _r_seq = list;
601 _r_seqCnt = seqCnt;
602
603 if ( pairedEnd ) discarded *= 2;
604
605 if (seqCnt>1)
606 fprintf(stdout, "%d sequences are read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage());
607 else
608 fprintf(stdout, "%d sequence is read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage());
609
610
611
612 return 1;
613 }
614 /**********************************************/
615 void loadSamplingLocations(int **samplingLocs, int * samplingLocsSize)
616 {
617 int i;
618 int samLocsSize = errThreshold + 1;
619 int *samLocs = getMem(sizeof(int)*samLocsSize);
620
621 for (i=0; i<samLocsSize; i++)
622 {
623 samLocs[i] = (SEQ_LENGTH / samLocsSize) *i;
624 if ( samLocs[i] + WINDOW_SIZE > SEQ_LENGTH)
625 samLocs[i] = SEQ_LENGTH - WINDOW_SIZE;
626 }
627
628 // Outputing the sampling locations
629
630 /*
631
632 int j;
633 for (i=0; i<SEQ_LENGTH; i++)
634 {
635 fprintf(stdout, "-");
636 }
637 fprintf(stdout, "\n");
638
639 for ( i=0; i<samLocsSize; i++ )
640 {
641 for ( j=0; j<samLocs[i]; j++ )
642 {
643 fprintf(stdout," ");
644 }
645 for (j=0; j<WINDOW_SIZE; j++)
646 {
647 fprintf(stdout,"+");
648 }
649 fprintf(stdout, "\n");
650 fflush(stdout);
651 }
652
653
654 for ( i=0; i<SEQ_LENGTH; i++ )
655 {
656 fprintf(stdout, "-");
657 }
658 fprintf(stdout, "\n");
659
660 */
661
662 *samplingLocs = samLocs;
663 *samplingLocsSize = samLocsSize;
664 _r_samplingLocs = samLocs;
665 }
666
667 void finalizeReads(char *fileName)
668 {
669 FILE *fp1=NULL;
670
671 if (fileName != NULL)
672 {
673 fp1 = fileOpen(fileName, "w");
674 }
675 if (pairedEndMode)
676 _r_seqCnt /=2;
677
678 int i=0;
679 for (i = 0; i < _r_seqCnt; i++)
680 {
681 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual,"*")!=0)
682 {
683 fprintf(fp1,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual);
684 }
685 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] == 0)
686 {
687 fprintf(fp1, ">%s/1\n%s\n>%s/2\n%shits=%d\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[2*i+1].hits[0]);
688 }
689 else if (!pairedEndMode && _r_seq[i].hits[0] == 0 && strcmp(_r_seq[i].qual, "*")!=0)
690 {
691 fprintf(fp1,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual);
692 }
693 else if (!pairedEndMode && _r_seq[i].hits[0] == 0)
694 {
695 fprintf(fp1,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq);
696 }
697 }
698
699 fclose(fp1);
700 if (pairedEndMode)
701 _r_seqCnt *= 2;
702
703 for (i = 0; i < _r_seqCnt; i++)
704 {
705 freeMem(_r_seq[i].hits,0);
706 }
707
708
709 freeMem(_r_seq,0);
710 freeMem(_r_samplingLocs,0);
711 }
712
713 void adjustQual(Read *list, int seqCnt){
714 /* This function will automatically determine the phred_offset and readjust quality values if needed */
715 int i,j,q, offset=64;
716 int len = strlen(list[0].qual);
717
718 for (i=0; i<10000 && i<seqCnt; i++){
719 for (j=0;j<len;j++){
720 q = (int) list[i].qual[j] - offset;
721 if (q < 0){
722 offset = 33;
723 break;
724 }
725 }
726 if (offset == 33)
727 break;
728 }
729
730 if (offset == 64){
731 fprintf(stdout, "[Quality Warning] Phred offset is 64. Readjusting to 33.\n");
732 fflush(stdout);
733 for (i=0;i<seqCnt;i++){
734 for (j=0;j<len;j++){
735 list[i].qual[j] -= 31;
736 }
737 }
738 }
739 }
740
741
742
743 /*void finalizeOEAReads(char *fileName)
744 {
745 FILE *fp1=NULL;
746 FILE *fp2=NULL;
747
748 //printf("OEA%s\n", fileName);
749 char fileNameOEA1[200];
750 char fileNameOEA2[200];
751 sprintf(fileNameOEA1, "%s_OEA1", fileName);
752 sprintf(fileNameOEA2, "%s_OEA2", fileName);
753
754
755 if (fileName != NULL)
756 {
757 fp1 = fileOpen(fileNameOEA1, "w");
758 fp2 = fileOpen(fileNameOEA2, "w");
759 }
760 if (pairedEndMode)
761 _r_seqCnt /=2;
762
763 int i=0;
764 printf("%d\n", _r_seqCnt);
765 for (i = 0; i < _r_seqCnt; i++)
766 {
767 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] != 0 && strcmp(_r_seq[2*i].qual, "*")==0)
768 {
769 fprintf(fp1,">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq);
770 }
771 else if (pairedEndMode && _r_seq[2*i].hits[0] != 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual, "*")==0)
772 {
773 fprintf(fp2, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq);
774 }
775 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] != 0 && strcmp(_r_seq[2*i].qual, "*")!=0)
776 {
777 fprintf(fp1,"@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n", _r_seq[2*i].name,
778 _r_seq[2*i].seq,
779 _r_seq[2*i].qual,
780 _r_seq[2*i+1].name,
781 _r_seq[2*i+1].seq,
782 _r_seq[2*i+1].qual);
783 }
784 else if ( pairedEndMode && _r_seq[2*i].hits[0] != 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual, "*")!=0 )
785 {
786 fprintf(fp2,"@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n", _r_seq[2*i].name,
787 _r_seq[2*i].seq,
788 _r_seq[2*i].qual,
789 _r_seq[2*i+1].name,
790 _r_seq[2*i+1].seq,
791 _r_seq[2*i+1].qual);
792 }
793 }
794
795 fclose(fp1);
796 fclose(fp2);
797
798 }*/