comparison mrfast-2.1.0.4/Reads.c @ 0:7b3dc85dc7fd

Uploaded mrfast source tarball
author calkan
date Tue, 21 Feb 2012 10:29:47 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7b3dc85dc7fd
1 /*
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without modification,
6 * are permitted provided that the following conditions are met:
7 *
8 * Redistributions of source code must retain the above copyright notice, this list
9 * of conditions and the following disclaimer.
10 * - Redistributions in binary form must reproduce the above copyright notice, this
11 * list of conditions and the following disclaimer in the documentation and/or other
12 * materials provided with the distribution.
13 * - Neither the names of the University of Washington, Simon Fraser University,
14 * nor the names of its contributors may be
15 * used to endorse or promote products derived from this software without specific
16 * prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 /*
32 Authors:
33 Farhad Hormozdiari
34 Faraz Hach
35 Can Alkan
36 Emails:
37 farhadh AT uw DOT edu
38 fhach AT cs DOT sfu DOT ca
39 calkan AT uw DOT edu
40 */
41
42
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <ctype.h>
48 #include <zlib.h>
49 #include "Common.h"
50 #include "Reads.h"
51
52 #define CHARCODE(a) (a=='A' ? 0 : (a=='C' ? 1 : (a=='G' ? 2 : (a=='T' ? 3 : 4))))
53
54 FILE *_r_fp1;
55 FILE *_r_fp2;
56 gzFile _r_gzfp1;
57 gzFile _r_gzfp2;
58 Read *_r_seq;
59 int _r_seqCnt;
60 int *_r_samplingLocs;
61
62 /**********************************************/
63 char *(*readFirstSeq)(char *);
64 char *(*readSecondSeq)(char *);
65 /**********************************************/
66 char *readFirstSeqTXT( char *seq )
67 {
68 return fgets(seq, SEQ_MAX_LENGTH, _r_fp1);
69 }
70
71 /**********************************************/
72 char *readSecondSeqTXT( char *seq )
73 {
74 return fgets(seq, SEQ_MAX_LENGTH, _r_fp2);
75 }
76 /**********************************************/
77 char *readFirstSeqGZ( char *seq )
78 {
79 return gzgets(_r_gzfp1, seq, SEQ_MAX_LENGTH);
80 }
81
82 /**********************************************/
83 char *readSecondSeqGZ( char *seq )
84 {
85 return gzgets(_r_gzfp2, seq, SEQ_MAX_LENGTH);
86 }
87 /**********************************************/
88 int toCompareRead(const void * elem1, const void * elem2)
89 {
90 return strcmp(((Read *)elem1)->seq, ((Read *)elem2)->seq);
91 }
92 /**********************************************/
93 int readAllReads(char *fileName1,
94 char *fileName2,
95 int compressed,
96 unsigned char *fastq,
97 unsigned char pairedEnd,
98 Read **seqList,
99 unsigned int *seqListSize)
100 {
101 double startTime=getTime();
102
103 char seq1[SEQ_MAX_LENGTH];
104 char rseq1[SEQ_MAX_LENGTH];
105 char name1[SEQ_MAX_LENGTH];
106 char qual1[SEQ_MAX_LENGTH];
107 char seq2[SEQ_MAX_LENGTH];
108 char rseq2[SEQ_MAX_LENGTH];
109 char name2[SEQ_MAX_LENGTH];
110 char qual2[SEQ_MAX_LENGTH];
111
112 char dummy[SEQ_MAX_LENGTH];
113 char ch;
114 int err1, err2;
115 int nCnt;
116 int discarded = 0;
117 int seqCnt = 0;
118 int maxCnt = 0;
119 int i;
120 Read *list = NULL;
121
122 int clipped = 0;
123
124 if (!compressed)
125 {
126 _r_fp1 = fileOpen( fileName1, "r");
127
128 if (_r_fp1 == NULL)
129 {
130 return 0;
131 }
132
133 ch = fgetc(_r_fp1);
134
135 if ( pairedEnd && fileName2 != NULL )
136 {
137 _r_fp2 = fileOpen ( fileName2, "r" );
138 if (_r_fp2 == NULL)
139 {
140 return 0;
141 }
142 }
143 else
144 {
145 _r_fp2 = _r_fp1;
146 }
147
148 readFirstSeq = &readFirstSeqTXT;
149 readSecondSeq = &readSecondSeqTXT;
150 }
151 else
152 {
153
154 _r_gzfp1 = fileOpenGZ (fileName1, "r");
155
156 if (_r_gzfp1 == NULL)
157 {
158 return 0;
159 }
160
161 ch = gzgetc(_r_gzfp1);
162
163 if ( pairedEnd && fileName2 != NULL )
164 {
165 _r_gzfp2 = fileOpenGZ ( fileName2, "r" );
166 if (_r_gzfp2 == NULL)
167 {
168 return 0;
169 }
170 }
171 else
172 {
173 _r_gzfp2 = _r_gzfp1;
174 }
175
176 readFirstSeq = &readFirstSeqGZ;
177 readSecondSeq = &readSecondSeqGZ;
178 }
179
180 if (ch == '>')
181 *fastq = 0;
182 else
183 *fastq = 1;
184
185 // Counting the number of lines in the file
186 while (readFirstSeq(dummy)) maxCnt++;
187
188 if (!compressed)
189 {
190 rewind(_r_fp1);
191 }
192 else
193 {
194 gzrewind(_r_gzfp1);
195 }
196
197 // Calculating the Maximum # of sequences
198 if (*fastq)
199 {
200 maxCnt /= 4;
201 }
202 else
203 {
204 maxCnt /= 2;
205 }
206
207 if (pairedEnd && fileName2 != NULL )
208 maxCnt *= 2;
209
210 list = getMem(sizeof(Read)*maxCnt);
211
212 while( readFirstSeq(name1) )
213 {
214 err1 = 0;
215 err2 = 0;
216 readFirstSeq(seq1);
217 name1[strlen(name1)-1] = '\0';
218
219 if ( *fastq )
220 {
221 readFirstSeq(dummy);
222 readFirstSeq(qual1);
223 qual1[strlen(qual1)-1] = '\0';
224 }
225 else
226 {
227 sprintf(qual1, "*");
228 }
229
230 // Cropping
231 if (cropSize > 0)
232 {
233 seq1[cropSize] = '\0';
234 if ( *fastq )
235 qual1[cropSize] = '\0';
236 }
237
238
239 nCnt = 0;
240 for (i=0; i<strlen(seq1); i++)
241 {
242 seq1[i] = toupper (seq1[i]);
243 if (seq1[i] == 'N')
244 {
245 nCnt++;
246 }
247 else if (isspace(seq1[i]))
248 {
249
250 seq1[i] = '\0';
251 break;
252 }
253 }
254
255 if (nCnt > errThreshold)
256 {
257 err1 = 1;
258 }
259
260 // Reading the second seq of pair-ends
261 if (pairedEnd)
262 {
263 readSecondSeq(name2);
264 readSecondSeq(seq2);
265 name2[strlen(name2)-1] = '\0';
266
267
268 if ( *fastq )
269 {
270 readSecondSeq(dummy);
271 readSecondSeq(qual2);
272
273 qual2[strlen(qual2)-1] = '\0';
274 }
275 else
276 {
277 sprintf(qual2, "*");
278 }
279
280
281 // Cropping
282 if (cropSize > 0)
283 {
284 seq2[cropSize] = '\0';
285 if ( *fastq )
286 qual2[cropSize] = '\0';
287 }
288
289
290 nCnt = 0;
291 for (i=0; i<strlen(seq2); i++)
292 {
293 seq2[i] = toupper (seq2[i]);
294 if (seq2[i] == 'N')
295 {
296 nCnt++;
297
298 }
299 else if (isspace(seq2[i]))
300 {
301 seq2[i] = '\0';
302 }
303 }
304 if (nCnt > errThreshold)
305 {
306 err2 = 1;
307 }
308
309
310 if (strlen(seq1) < strlen(seq2)) {
311 seq2[strlen(seq1)] = '\0';
312 if ( *fastq )
313 qual2[strlen(seq1)] = '\0';
314 if (!clipped) clipped = 2;
315 }
316 else if (strlen(seq1) > strlen(seq2)){
317 seq1[strlen(seq2)] = '\0';
318 if ( *fastq )
319 qual1[strlen(seq2)] = '\0';
320 if (!clipped) clipped = 1;
321 }
322
323 if (clipped == 1 || clipped == 2){
324 fprintf(stdout, "[PE mode warning] Sequence lengths are different, read #%d is clipped to match.\n", clipped);
325 clipped = 3;
326 }
327
328
329
330 }
331
332 if (!pairedEnd && !err1)
333 {
334 int _mtmp = strlen(seq1);
335 list[seqCnt].hits = getMem (1+3*_mtmp+3+strlen(name1)+1);
336 list[seqCnt].seq = list[seqCnt].hits + 1;
337 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
338 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
339 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
340
341 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
342 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
343
344 list[seqCnt].readNumber = seqCnt;
345
346 reverseComplete(seq1, rseq1, _mtmp);
347 rseq1[_mtmp] = '\0';
348 int i;
349
350 list[seqCnt].hits[0] = 0;
351
352 for (i=0; i<=_mtmp; i++)
353 {
354 list[seqCnt].seq[i] = seq1[i];
355 list[seqCnt].rseq[i] = rseq1[i] ;
356 list[seqCnt].qual[i] = qual1[i];
357 }
358
359
360 //MAKE HASH VALUE
361 short code = 0;
362
363 for(i=0; i < 4; i++)
364 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
365 list[seqCnt].hashValue[0] = code;
366
367
368 for(i = 1; i < _mtmp-3; i++)
369 {
370 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
371 }
372
373
374 code = 0;
375 for(i=0; i < 4; i++)
376 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
377 list[seqCnt].rhashValue[0] = code;
378
379
380 for(i = 1; i < _mtmp-3; i++)
381 {
382 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
383 }
384
385 int j = 0;
386 int tmpSize = _mtmp / (errThreshold+1);
387
388 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
389 for(i=0; i < errThreshold+1; i++)
390 {
391 code = 0;
392 for(j = 0; j < tmpSize; j++)
393 {
394 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
395 }
396 list[seqCnt].hashValSampleSize[i] = code;
397 }
398
399 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
400
401 seqCnt++;
402
403 }
404 else if (pairedEnd && !err1 && !err2)
405 {
406
407 // Naming Conventions X/1, X/2 OR X
408 int tmplen = strlen(name1);
409 if (strcmp(name1, name2) != 0)
410 {
411 tmplen = strlen(name1)-2;
412 }
413
414 //first seq
415 int _mtmp = strlen(seq1);
416 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
417 list[seqCnt].seq = list[seqCnt].hits + 1;
418 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
419 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
420 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
421
422 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
423 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
424 list[seqCnt].readNumber = seqCnt;
425
426 reverseComplete(seq1, rseq1, _mtmp);
427 rseq1[_mtmp] = '\0';
428 int i;
429
430 list[seqCnt].hits[0] = 0;
431
432 for (i=0; i<=_mtmp; i++)
433 {
434 list[seqCnt].seq[i] = seq1[i];
435 list[seqCnt].rseq[i] = rseq1[i] ;
436 list[seqCnt].qual[i] = qual1[i];
437 }
438
439
440 name1[tmplen]='\0';
441
442 //MAKE HASH VALUE
443 short code = 0;
444
445 for(i=0; i < 4; i++)
446 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
447 list[seqCnt].hashValue[0] = code;
448
449
450 for(i = 1; i < _mtmp-3; i++)
451 {
452 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
453 }
454
455
456 code = 0;
457 for(i=0; i < 4; i++)
458 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
459 list[seqCnt].rhashValue[0] = code;
460
461
462 for(i = 1; i < _mtmp-3; i++)
463 {
464 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
465 }
466
467 int j = 0;
468 int tmpSize = _mtmp / (errThreshold+1);
469
470 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
471 for(i=0; i < errThreshold+1; i++)
472 {
473 code = 0;
474 for(j = 0; j < tmpSize; j++)
475 {
476 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
477 }
478 list[seqCnt].hashValSampleSize[i] = code;
479 }
480
481 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
482
483
484 seqCnt++;
485
486 //second seq
487 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
488 list[seqCnt].seq = list[seqCnt].hits + 1;
489 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
490 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
491 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
492
493 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
494 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
495 list[seqCnt].readNumber = seqCnt;
496
497 reverseComplete(seq2, rseq2, _mtmp);
498 rseq2[_mtmp] = '\0';
499
500 list[seqCnt].hits[0] = 0;
501
502 for (i=0; i<=_mtmp; i++)
503 {
504 list[seqCnt].seq[i] = seq2[i];
505 list[seqCnt].rseq[i] = rseq2[i] ;
506 list[seqCnt].qual[i] = qual2[i];
507 }
508
509
510 name2[tmplen]='\0';
511
512 //MAKE HASH VALUE
513 code = 0;
514
515 for(i=0; i < 4; i++)
516 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
517 list[seqCnt].hashValue[0] = code;
518
519
520 for(i = 1; i < _mtmp-3; i++)
521 {
522 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
523 }
524
525
526 code = 0;
527 for(i=0; i < 4; i++)
528 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
529 list[seqCnt].rhashValue[0] = code;
530
531
532 for(i = 1; i < _mtmp-3; i++)
533 {
534 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
535 }
536
537 j = 0;
538 tmpSize = _mtmp / (errThreshold+1);
539
540 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
541 for(i=0; i < errThreshold+1; i++)
542 {
543 code = 0;
544 for(j = 0; j < tmpSize; j++)
545 {
546 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
547 }
548 list[seqCnt].hashValSampleSize[i] = code;
549 }
550
551 sprintf(list[seqCnt].name,"%s%c", ((char*)name2)+1,'\0');
552
553 seqCnt++;
554
555 }
556 else
557 {
558 discarded++;
559 }
560 }
561
562 if (seqCnt > 0)
563 {
564 SEQ_LENGTH = strlen(list[0].seq);
565 }
566 else
567 {
568 fprintf(stdout, "ERROR: No reads can be found for mapping\n");
569 return 0;
570 }
571
572
573 // Closing Files
574 if (!compressed)
575 {
576 fclose(_r_fp1);
577 if ( pairedEnd && fileName2 != NULL )
578 {
579 fclose(_r_fp2);
580 }
581 }
582 else
583 {
584 gzclose(_r_gzfp1);
585 if ( pairedEnd && fileName2 != NULL)
586 {
587 gzclose(_r_fp2);
588 }
589 }
590
591 //qsort(list, seqCnt, sizeof(Read), toCompareRead);
592
593 adjustQual(list, seqCnt);
594
595 *seqList = list;
596 *seqListSize = seqCnt;
597
598
599 _r_seq = list;
600 _r_seqCnt = seqCnt;
601
602 if ( pairedEnd ) discarded *= 2;
603
604 if (seqCnt>1)
605 fprintf(stdout, "%d sequences are read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage());
606 else
607 fprintf(stdout, "%d sequence is read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage());
608
609
610
611 return 1;
612 }
613 /**********************************************/
614 void loadSamplingLocations(int **samplingLocs, int * samplingLocsSize)
615 {
616 int i;
617 int samLocsSize = errThreshold + 1;
618 int *samLocs = getMem(sizeof(int)*samLocsSize);
619
620 for (i=0; i<samLocsSize; i++)
621 {
622 samLocs[i] = (SEQ_LENGTH / samLocsSize) *i;
623 if ( samLocs[i] + WINDOW_SIZE > SEQ_LENGTH)
624 samLocs[i] = SEQ_LENGTH - WINDOW_SIZE;
625 }
626
627 // Outputing the sampling locations
628
629 /*
630
631 int j;
632 for (i=0; i<SEQ_LENGTH; i++)
633 {
634 fprintf(stdout, "-");
635 }
636 fprintf(stdout, "\n");
637
638 for ( i=0; i<samLocsSize; i++ )
639 {
640 for ( j=0; j<samLocs[i]; j++ )
641 {
642 fprintf(stdout," ");
643 }
644 for (j=0; j<WINDOW_SIZE; j++)
645 {
646 fprintf(stdout,"+");
647 }
648 fprintf(stdout, "\n");
649 fflush(stdout);
650 }
651
652
653 for ( i=0; i<SEQ_LENGTH; i++ )
654 {
655 fprintf(stdout, "-");
656 }
657 fprintf(stdout, "\n");
658
659 */
660
661 *samplingLocs = samLocs;
662 *samplingLocsSize = samLocsSize;
663 _r_samplingLocs = samLocs;
664 }
665
666 void finalizeReads(char *fileName)
667 {
668 FILE *fp1=NULL;
669
670 if (fileName != NULL)
671 {
672 fp1 = fileOpen(fileName, "w");
673 }
674 if (pairedEndMode)
675 _r_seqCnt /=2;
676
677 int i=0;
678 for (i = 0; i < _r_seqCnt; i++)
679 {
680 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual,"*")!=0)
681 {
682 fprintf(fp1,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual);
683 }
684 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] == 0)
685 {
686 fprintf(fp1, ">%s/1\n%s\n>%s/2\n%shits=%d\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[2*i+1].hits[0]);
687 }
688 else if (!pairedEndMode && _r_seq[i].hits[0] == 0 && strcmp(_r_seq[i].qual, "*")!=0)
689 {
690 fprintf(fp1,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual);
691 }
692 else if (!pairedEndMode && _r_seq[i].hits[0] == 0)
693 {
694 fprintf(fp1,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq);
695 }
696 }
697
698 fclose(fp1);
699 if (pairedEndMode)
700 _r_seqCnt *= 2;
701
702 for (i = 0; i < _r_seqCnt; i++)
703 {
704 freeMem(_r_seq[i].hits,0);
705 }
706
707
708 freeMem(_r_seq,0);
709 freeMem(_r_samplingLocs,0);
710 }
711
712 void adjustQual(Read *list, int seqCnt){
713 /* This function will automatically determine the phred_offset and readjust quality values if needed */
714 }
715
716
717
718 /*void finalizeOEAReads(char *fileName)
719 {
720 FILE *fp1=NULL;
721 FILE *fp2=NULL;
722
723 //printf("OEA%s\n", fileName);
724 char fileNameOEA1[200];
725 char fileNameOEA2[200];
726 sprintf(fileNameOEA1, "%s_OEA1", fileName);
727 sprintf(fileNameOEA2, "%s_OEA2", fileName);
728
729
730 if (fileName != NULL)
731 {
732 fp1 = fileOpen(fileNameOEA1, "w");
733 fp2 = fileOpen(fileNameOEA2, "w");
734 }
735 if (pairedEndMode)
736 _r_seqCnt /=2;
737
738 int i=0;
739 printf("%d\n", _r_seqCnt);
740 for (i = 0; i < _r_seqCnt; i++)
741 {
742 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] != 0 && strcmp(_r_seq[2*i].qual, "*")==0)
743 {
744 fprintf(fp1,">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq);
745 }
746 else if (pairedEndMode && _r_seq[2*i].hits[0] != 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual, "*")==0)
747 {
748 fprintf(fp2, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq);
749 }
750 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] != 0 && strcmp(_r_seq[2*i].qual, "*")!=0)
751 {
752 fprintf(fp1,"@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n", _r_seq[2*i].name,
753 _r_seq[2*i].seq,
754 _r_seq[2*i].qual,
755 _r_seq[2*i+1].name,
756 _r_seq[2*i+1].seq,
757 _r_seq[2*i+1].qual);
758 }
759 else if ( pairedEndMode && _r_seq[2*i].hits[0] != 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual, "*")!=0 )
760 {
761 fprintf(fp2,"@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n", _r_seq[2*i].name,
762 _r_seq[2*i].seq,
763 _r_seq[2*i].qual,
764 _r_seq[2*i+1].name,
765 _r_seq[2*i+1].seq,
766 _r_seq[2*i+1].qual);
767 }
768 }
769
770 fclose(fp1);
771 fclose(fp2);
772
773 }*/