annotate mrfast-2.1.0.5/Reads.c @ 1:d4054b05b015 default tip

Version update to 2.1.0.5
author calkan
date Fri, 09 Mar 2012 07:35:51 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
1 /*
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
2 * Copyright (c) <2008 - 2012>, University of Washington, Simon Fraser University
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
3 * All rights reserved.
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
4 *
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
5 * Redistribution and use in source and binary forms, with or without modification,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
6 * are permitted provided that the following conditions are met:
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
7 *
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
8 * Redistributions of source code must retain the above copyright notice, this list
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
9 * of conditions and the following disclaimer.
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
10 * - Redistributions in binary form must reproduce the above copyright notice, this
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
11 * list of conditions and the following disclaimer in the documentation and/or other
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
12 * materials provided with the distribution.
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
13 * - Neither the names of the University of Washington, Simon Fraser University,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
14 * nor the names of its contributors may be
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
15 * used to endorse or promote products derived from this software without specific
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
16 * prior written permission.
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
17 *
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
22 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
23 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
24 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
25 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
26 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
28 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
29 */
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
30
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
31 /*
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
32 Authors:
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
33 Farhad Hormozdiari
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
34 Faraz Hach
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
35 Can Alkan
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
36 Emails:
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
37 farhadh AT uw DOT edu
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
38 fhach AT cs DOT sfu DOT ca
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
39 calkan AT uw DOT edu
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
40 */
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
41
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
42
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
43
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
44 #include <stdio.h>
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
45 #include <stdlib.h>
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
46 #include <string.h>
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
47 #include <ctype.h>
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
48 #include <zlib.h>
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
49 #include "Common.h"
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
50 #include "Reads.h"
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
51
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
52 #define CHARCODE(a) (a=='A' ? 0 : (a=='C' ? 1 : (a=='G' ? 2 : (a=='T' ? 3 : 4))))
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
53
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
54 FILE *_r_fp1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
55 FILE *_r_fp2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
56 gzFile _r_gzfp1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
57 gzFile _r_gzfp2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
58 Read *_r_seq;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
59 int _r_seqCnt;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
60 int *_r_samplingLocs;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
61
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
62 /**********************************************/
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
63 char *(*readFirstSeq)(char *);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
64 char *(*readSecondSeq)(char *);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
65 /**********************************************/
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
66 char *readFirstSeqTXT( char *seq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
67 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
68 return fgets(seq, SEQ_MAX_LENGTH, _r_fp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
69 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
70
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
71 /**********************************************/
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
72 char *readSecondSeqTXT( char *seq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
73 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
74 return fgets(seq, SEQ_MAX_LENGTH, _r_fp2);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
75 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
76 /**********************************************/
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
77 char *readFirstSeqGZ( char *seq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
78 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
79 return gzgets(_r_gzfp1, seq, SEQ_MAX_LENGTH);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
80 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
81
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
82 /**********************************************/
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
83 char *readSecondSeqGZ( char *seq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
84 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
85 return gzgets(_r_gzfp2, seq, SEQ_MAX_LENGTH);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
86 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
87 /**********************************************/
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
88 int toCompareRead(const void * elem1, const void * elem2)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
89 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
90 return strcmp(((Read *)elem1)->seq, ((Read *)elem2)->seq);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
91 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
92 /**********************************************/
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
93 int readAllReads(char *fileName1,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
94 char *fileName2,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
95 int compressed,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
96 unsigned char *fastq,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
97 unsigned char pairedEnd,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
98 Read **seqList,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
99 unsigned int *seqListSize)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
100 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
101 double startTime=getTime();
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
102
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
103 char seq1[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
104 char rseq1[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
105 char name1[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
106 char qual1[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
107 char seq2[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
108 char rseq2[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
109 char name2[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
110 char qual2[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
111
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
112 char dummy[SEQ_MAX_LENGTH];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
113 char ch;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
114 int err1, err2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
115 int nCnt;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
116 int discarded = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
117 int seqCnt = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
118 int maxCnt = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
119 int i;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
120 Read *list = NULL;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
121
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
122 int clipped = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
123
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
124
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
125 if (!compressed)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
126 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
127 _r_fp1 = fileOpen( fileName1, "r");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
128
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
129 if (_r_fp1 == NULL)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
130 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
131 return 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
132 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
133
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
134 ch = fgetc(_r_fp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
135
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
136 if ( pairedEnd && fileName2 != NULL )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
137 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
138 _r_fp2 = fileOpen ( fileName2, "r" );
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
139 if (_r_fp2 == NULL)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
140 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
141 return 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
142 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
143 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
144 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
145 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
146 _r_fp2 = _r_fp1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
147 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
148
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
149 readFirstSeq = &readFirstSeqTXT;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
150 readSecondSeq = &readSecondSeqTXT;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
151 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
152 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
153 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
154
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
155 _r_gzfp1 = fileOpenGZ (fileName1, "r");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
156
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
157 if (_r_gzfp1 == NULL)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
158 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
159 return 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
160 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
161
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
162 ch = gzgetc(_r_gzfp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
163
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
164 if ( pairedEnd && fileName2 != NULL )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
165 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
166 _r_gzfp2 = fileOpenGZ ( fileName2, "r" );
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
167 if (_r_gzfp2 == NULL)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
168 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
169 return 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
170 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
171 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
172 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
173 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
174 _r_gzfp2 = _r_gzfp1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
175 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
176
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
177 readFirstSeq = &readFirstSeqGZ;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
178 readSecondSeq = &readSecondSeqGZ;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
179 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
180
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
181 if (ch == '>')
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
182 *fastq = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
183 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
184 *fastq = 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
185
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
186 // Counting the number of lines in the file
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
187 while (readFirstSeq(dummy)) maxCnt++;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
188
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
189 if (!compressed)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
190 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
191 rewind(_r_fp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
192 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
193 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
194 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
195 gzrewind(_r_gzfp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
196 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
197
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
198 // Calculating the Maximum # of sequences
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
199 if (*fastq)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
200 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
201 maxCnt /= 4;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
202 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
203 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
204 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
205 maxCnt /= 2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
206 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
207
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
208 if (pairedEnd && fileName2 != NULL )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
209 maxCnt *= 2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
210
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
211 list = getMem(sizeof(Read)*maxCnt);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
212
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
213 while( readFirstSeq(name1) )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
214 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
215 err1 = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
216 err2 = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
217 readFirstSeq(seq1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
218 name1[strlen(name1)-1] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
219
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
220 if ( *fastq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
221 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
222 readFirstSeq(dummy);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
223 readFirstSeq(qual1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
224 qual1[strlen(qual1)-1] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
225 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
226 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
227 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
228 sprintf(qual1, "*");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
229 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
230
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
231 // Cropping
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
232 if (cropSize > 0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
233 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
234 seq1[cropSize] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
235 if ( *fastq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
236 qual1[cropSize] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
237 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
238
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
239
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
240 nCnt = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
241 for (i=0; i<strlen(seq1); i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
242 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
243 seq1[i] = toupper (seq1[i]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
244 if (seq1[i] == 'N')
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
245 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
246 nCnt++;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
247 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
248 else if (isspace(seq1[i]))
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
249 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
250
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
251 seq1[i] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
252 break;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
253 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
254 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
255
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
256 if (nCnt > errThreshold)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
257 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
258 err1 = 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
259 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
260
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
261 // Reading the second seq of pair-ends
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
262 if (pairedEnd)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
263 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
264 readSecondSeq(name2);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
265 readSecondSeq(seq2);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
266 name2[strlen(name2)-1] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
267
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
268
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
269 if ( *fastq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
270 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
271 readSecondSeq(dummy);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
272 readSecondSeq(qual2);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
273
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
274 qual2[strlen(qual2)-1] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
275 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
276 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
277 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
278 sprintf(qual2, "*");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
279 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
280
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
281
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
282 // Cropping
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
283 if (cropSize > 0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
284 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
285 seq2[cropSize] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
286 if ( *fastq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
287 qual2[cropSize] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
288 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
289
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
290
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
291 nCnt = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
292 for (i=0; i<strlen(seq2); i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
293 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
294 seq2[i] = toupper (seq2[i]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
295 if (seq2[i] == 'N')
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
296 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
297 nCnt++;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
298
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
299 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
300 else if (isspace(seq2[i]))
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
301 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
302 seq2[i] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
303 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
304 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
305 if (nCnt > errThreshold)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
306 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
307 err2 = 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
308 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
309
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
310
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
311 if (strlen(seq1) < strlen(seq2)) {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
312 seq2[strlen(seq1)] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
313 if ( *fastq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
314 qual2[strlen(seq1)] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
315 if (!clipped) clipped = 2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
316 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
317 else if (strlen(seq1) > strlen(seq2)){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
318 seq1[strlen(seq2)] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
319 if ( *fastq )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
320 qual1[strlen(seq2)] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
321 if (!clipped) clipped = 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
322 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
323
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
324 if (clipped == 1 || clipped == 2){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
325 fprintf(stdout, "[PE mode Warning] Sequence lengths are different, read #%d is clipped to match.\n", clipped);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
326 clipped = 3;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
327 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
328
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
329
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
330
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
331 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
332
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
333 if (!pairedEnd && !err1)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
334 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
335 int _mtmp = strlen(seq1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
336 list[seqCnt].hits = getMem (1+3*_mtmp+3+strlen(name1)+1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
337 list[seqCnt].seq = list[seqCnt].hits + 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
338 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
339 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
340 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
341
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
342 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
343 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
344
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
345 list[seqCnt].readNumber = seqCnt;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
346
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
347 reverseComplete(seq1, rseq1, _mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
348 rseq1[_mtmp] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
349 int i;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
350
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
351 list[seqCnt].hits[0] = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
352
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
353 for (i=0; i<=_mtmp; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
354 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
355 list[seqCnt].seq[i] = seq1[i];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
356 list[seqCnt].rseq[i] = rseq1[i] ;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
357 list[seqCnt].qual[i] = qual1[i];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
358 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
359
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
360
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
361 //MAKE HASH VALUE
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
362 short code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
363
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
364 for(i=0; i < 4; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
365 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
366 list[seqCnt].hashValue[0] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
367
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
368
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
369 for(i = 1; i < _mtmp-3; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
370 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
371 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
372 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
373
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
374
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
375 code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
376 for(i=0; i < 4; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
377 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
378 list[seqCnt].rhashValue[0] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
379
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
380
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
381 for(i = 1; i < _mtmp-3; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
382 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
383 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
384 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
385
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
386 int j = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
387 int tmpSize = _mtmp / (errThreshold+1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
388
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
389 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
390 for(i=0; i < errThreshold+1; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
391 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
392 code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
393 for(j = 0; j < tmpSize; j++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
394 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
395 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
396 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
397 list[seqCnt].hashValSampleSize[i] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
398 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
399
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
400 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
401
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
402 seqCnt++;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
403
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
404 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
405 else if (pairedEnd && !err1 && !err2)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
406 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
407
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
408 // Naming Conventions X/1, X/2 OR X
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
409 int tmplen = strlen(name1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
410 if (strcmp(name1, name2) != 0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
411 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
412 tmplen = strlen(name1)-2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
413 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
414
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
415 //first seq
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
416 int _mtmp = strlen(seq1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
417 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
418 list[seqCnt].seq = list[seqCnt].hits + 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
419 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
420 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
421 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
422
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
423 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
424 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
425 list[seqCnt].readNumber = seqCnt;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
426
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
427 reverseComplete(seq1, rseq1, _mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
428 rseq1[_mtmp] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
429 int i;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
430
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
431 list[seqCnt].hits[0] = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
432
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
433 for (i=0; i<=_mtmp; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
434 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
435 list[seqCnt].seq[i] = seq1[i];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
436 list[seqCnt].rseq[i] = rseq1[i] ;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
437 list[seqCnt].qual[i] = qual1[i];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
438 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
439
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
440
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
441 name1[tmplen]='\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
442
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
443 //MAKE HASH VALUE
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
444 short code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
445
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
446 for(i=0; i < 4; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
447 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
448 list[seqCnt].hashValue[0] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
449
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
450
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
451 for(i = 1; i < _mtmp-3; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
452 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
453 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
454 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
455
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
456
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
457 code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
458 for(i=0; i < 4; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
459 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
460 list[seqCnt].rhashValue[0] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
461
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
462
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
463 for(i = 1; i < _mtmp-3; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
464 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
465 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
466 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
467
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
468 int j = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
469 int tmpSize = _mtmp / (errThreshold+1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
470
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
471 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
472 for(i=0; i < errThreshold+1; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
473 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
474 code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
475 for(j = 0; j < tmpSize; j++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
476 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
477 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
478 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
479 list[seqCnt].hashValSampleSize[i] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
480 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
481
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
482 sprintf(list[seqCnt].name,"%s%c", ((char*)name1)+1,'\0');
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
483
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
484
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
485 seqCnt++;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
486
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
487 //second seq
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
488 list[seqCnt].hits = getMem (1+3*_mtmp+3+tmplen+1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
489 list[seqCnt].seq = list[seqCnt].hits + 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
490 list[seqCnt].rseq = list[seqCnt].seq + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
491 list[seqCnt].qual = list[seqCnt].rseq + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
492 list[seqCnt].name = list[seqCnt].qual + _mtmp+1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
493
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
494 list[seqCnt].hashValue = getMem(sizeof(short)*_mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
495 list[seqCnt].rhashValue = getMem(sizeof(short)*_mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
496 list[seqCnt].readNumber = seqCnt;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
497
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
498 reverseComplete(seq2, rseq2, _mtmp);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
499 rseq2[_mtmp] = '\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
500
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
501 list[seqCnt].hits[0] = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
502
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
503 for (i=0; i<=_mtmp; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
504 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
505 list[seqCnt].seq[i] = seq2[i];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
506 list[seqCnt].rseq[i] = rseq2[i] ;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
507 list[seqCnt].qual[i] = qual2[i];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
508 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
509
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
510
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
511 name2[tmplen]='\0';
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
512
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
513 //MAKE HASH VALUE
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
514 code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
515
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
516 for(i=0; i < 4; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
517 code = code * 5 + CHARCODE(list[seqCnt].seq[i]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
518 list[seqCnt].hashValue[0] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
519
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
520
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
521 for(i = 1; i < _mtmp-3; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
522 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
523 list[seqCnt].hashValue[i] = (list[seqCnt].hashValue[i-1] - 125 * CHARCODE(seq1[i-1])) * 5 + CHARCODE(seq1[i+3]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
524 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
525
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
526
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
527 code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
528 for(i=0; i < 4; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
529 code = code * 5 + CHARCODE(list[seqCnt].rseq[i]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
530 list[seqCnt].rhashValue[0] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
531
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
532
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
533 for(i = 1; i < _mtmp-3; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
534 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
535 list[seqCnt].rhashValue[i] = (list[seqCnt].rhashValue[i-1] - 125 * CHARCODE(rseq1[i-1])) * 5 + CHARCODE(rseq1[i+3]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
536 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
537
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
538 j = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
539 tmpSize = _mtmp / (errThreshold+1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
540
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
541 list[seqCnt].hashValSampleSize = getMem(sizeof(int)*(errThreshold+1));
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
542 for(i=0; i < errThreshold+1; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
543 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
544 code = 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
545 for(j = 0; j < tmpSize; j++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
546 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
547 code = code * 5 + CHARCODE(list[seqCnt].seq[i*tmpSize+j]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
548 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
549 list[seqCnt].hashValSampleSize[i] = code;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
550 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
551
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
552 sprintf(list[seqCnt].name,"%s%c", ((char*)name2)+1,'\0');
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
553
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
554 seqCnt++;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
555
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
556 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
557 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
558 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
559 discarded++;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
560 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
561 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
562
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
563 if (seqCnt > 0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
564 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
565 SEQ_LENGTH = strlen(list[0].seq);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
566 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
567 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
568 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
569 fprintf(stdout, "ERROR: No reads can be found for mapping\n");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
570 return 0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
571 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
572
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
573
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
574 // Closing Files
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
575 if (!compressed)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
576 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
577 fclose(_r_fp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
578 if ( pairedEnd && fileName2 != NULL )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
579 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
580 fclose(_r_fp2);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
581 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
582 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
583 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
584 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
585 gzclose(_r_gzfp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
586 if ( pairedEnd && fileName2 != NULL)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
587 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
588 gzclose(_r_fp2);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
589 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
590 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
591
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
592 //qsort(list, seqCnt, sizeof(Read), toCompareRead);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
593
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
594 adjustQual(list, seqCnt);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
595
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
596 *seqList = list;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
597 *seqListSize = seqCnt;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
598
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
599
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
600 _r_seq = list;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
601 _r_seqCnt = seqCnt;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
602
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
603 if ( pairedEnd ) discarded *= 2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
604
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
605 if (seqCnt>1)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
606 fprintf(stdout, "%d sequences are read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage());
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
607 else
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
608 fprintf(stdout, "%d sequence is read in %0.2f. (%d discarded) [Mem:%0.2f M]\n", seqCnt, (getTime()-startTime), discarded, getMemUsage());
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
609
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
610
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
611
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
612 return 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
613 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
614 /**********************************************/
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
615 void loadSamplingLocations(int **samplingLocs, int * samplingLocsSize)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
616 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
617 int i;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
618 int samLocsSize = errThreshold + 1;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
619 int *samLocs = getMem(sizeof(int)*samLocsSize);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
620
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
621 for (i=0; i<samLocsSize; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
622 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
623 samLocs[i] = (SEQ_LENGTH / samLocsSize) *i;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
624 if ( samLocs[i] + WINDOW_SIZE > SEQ_LENGTH)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
625 samLocs[i] = SEQ_LENGTH - WINDOW_SIZE;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
626 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
627
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
628 // Outputing the sampling locations
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
629
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
630 /*
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
631
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
632 int j;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
633 for (i=0; i<SEQ_LENGTH; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
634 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
635 fprintf(stdout, "-");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
636 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
637 fprintf(stdout, "\n");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
638
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
639 for ( i=0; i<samLocsSize; i++ )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
640 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
641 for ( j=0; j<samLocs[i]; j++ )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
642 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
643 fprintf(stdout," ");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
644 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
645 for (j=0; j<WINDOW_SIZE; j++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
646 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
647 fprintf(stdout,"+");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
648 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
649 fprintf(stdout, "\n");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
650 fflush(stdout);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
651 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
652
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
653
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
654 for ( i=0; i<SEQ_LENGTH; i++ )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
655 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
656 fprintf(stdout, "-");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
657 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
658 fprintf(stdout, "\n");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
659
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
660 */
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
661
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
662 *samplingLocs = samLocs;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
663 *samplingLocsSize = samLocsSize;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
664 _r_samplingLocs = samLocs;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
665 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
666
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
667 void finalizeReads(char *fileName)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
668 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
669 FILE *fp1=NULL;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
670
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
671 if (fileName != NULL)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
672 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
673 fp1 = fileOpen(fileName, "w");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
674 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
675 if (pairedEndMode)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
676 _r_seqCnt /=2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
677
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
678 int i=0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
679 for (i = 0; i < _r_seqCnt; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
680 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
681 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual,"*")!=0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
682 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
683 fprintf(fp1,"@%s/1\n%s\n+\n%s\n@%s/2\n%s\n+\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].qual, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[i*2+1].qual);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
684 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
685 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] == 0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
686 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
687 fprintf(fp1, ">%s/1\n%s\n>%s/2\n%shits=%d\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq, _r_seq[2*i+1].hits[0]);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
688 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
689 else if (!pairedEndMode && _r_seq[i].hits[0] == 0 && strcmp(_r_seq[i].qual, "*")!=0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
690 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
691 fprintf(fp1,"@%s\n%s\n+\n%s\n", _r_seq[i].name, _r_seq[i].seq, _r_seq[i].qual);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
692 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
693 else if (!pairedEndMode && _r_seq[i].hits[0] == 0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
694 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
695 fprintf(fp1,">%s\n%s\n", _r_seq[i].name, _r_seq[i].seq);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
696 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
697 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
698
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
699 fclose(fp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
700 if (pairedEndMode)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
701 _r_seqCnt *= 2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
702
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
703 for (i = 0; i < _r_seqCnt; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
704 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
705 freeMem(_r_seq[i].hits,0);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
706 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
707
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
708
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
709 freeMem(_r_seq,0);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
710 freeMem(_r_samplingLocs,0);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
711 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
712
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
713 void adjustQual(Read *list, int seqCnt){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
714 /* This function will automatically determine the phred_offset and readjust quality values if needed */
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
715 int i,j,q, offset=64;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
716 int len = strlen(list[0].qual);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
717
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
718 for (i=0; i<10000 && i<seqCnt; i++){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
719 for (j=0;j<len;j++){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
720 q = (int) list[i].qual[j] - offset;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
721 if (q < 0){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
722 offset = 33;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
723 break;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
724 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
725 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
726 if (offset == 33)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
727 break;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
728 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
729
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
730 if (offset == 64){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
731 fprintf(stdout, "[Quality Warning] Phred offset is 64. Readjusting to 33.\n");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
732 fflush(stdout);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
733 for (i=0;i<seqCnt;i++){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
734 for (j=0;j<len;j++){
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
735 list[i].qual[j] -= 31;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
736 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
737 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
738 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
739 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
740
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
741
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
742
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
743 /*void finalizeOEAReads(char *fileName)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
744 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
745 FILE *fp1=NULL;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
746 FILE *fp2=NULL;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
747
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
748 //printf("OEA%s\n", fileName);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
749 char fileNameOEA1[200];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
750 char fileNameOEA2[200];
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
751 sprintf(fileNameOEA1, "%s_OEA1", fileName);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
752 sprintf(fileNameOEA2, "%s_OEA2", fileName);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
753
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
754
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
755 if (fileName != NULL)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
756 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
757 fp1 = fileOpen(fileNameOEA1, "w");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
758 fp2 = fileOpen(fileNameOEA2, "w");
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
759 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
760 if (pairedEndMode)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
761 _r_seqCnt /=2;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
762
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
763 int i=0;
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
764 printf("%d\n", _r_seqCnt);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
765 for (i = 0; i < _r_seqCnt; i++)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
766 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
767 if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] != 0 && strcmp(_r_seq[2*i].qual, "*")==0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
768 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
769 fprintf(fp1,">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
770 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
771 else if (pairedEndMode && _r_seq[2*i].hits[0] != 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual, "*")==0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
772 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
773 fprintf(fp2, ">%s/1\n%s\n>%s/2\n%s\n", _r_seq[i*2].name, _r_seq[i*2].seq, _r_seq[i*2].name, _r_seq[i*2+1].seq);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
774 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
775 else if (pairedEndMode && _r_seq[2*i].hits[0] == 0 && _r_seq[2*i+1].hits[0] != 0 && strcmp(_r_seq[2*i].qual, "*")!=0)
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
776 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
777 fprintf(fp1,"@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n", _r_seq[2*i].name,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
778 _r_seq[2*i].seq,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
779 _r_seq[2*i].qual,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
780 _r_seq[2*i+1].name,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
781 _r_seq[2*i+1].seq,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
782 _r_seq[2*i+1].qual);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
783 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
784 else if ( pairedEndMode && _r_seq[2*i].hits[0] != 0 && _r_seq[2*i+1].hits[0] == 0 && strcmp(_r_seq[2*i].qual, "*")!=0 )
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
785 {
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
786 fprintf(fp2,"@%s\n%s\n+\n%s\n@%s\n%s\n+\n%s\n", _r_seq[2*i].name,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
787 _r_seq[2*i].seq,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
788 _r_seq[2*i].qual,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
789 _r_seq[2*i+1].name,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
790 _r_seq[2*i+1].seq,
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
791 _r_seq[2*i+1].qual);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
792 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
793 }
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
794
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
795 fclose(fp1);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
796 fclose(fp2);
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
797
d4054b05b015 Version update to 2.1.0.5
calkan
parents:
diff changeset
798 }*/