annotate rapsodyn/PrepareFastqLight.pl @ 4:9074a5104cdd draft

Uploaded
author mcharles
date Wed, 17 Sep 2014 04:20:08 -0400
parents 442a7c88b886
children b0cbb9d21aa9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
2 use strict;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
3 use warnings;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
4
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
5 my $read1 = $ARGV[0];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
6 my $read2 = $ARGV[1];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
7
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
8 my $output1 = $ARGV[2];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
9 my $output2 = $ARGV[3];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
10
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
11 my $TYPE = $ARGV[4];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
12 my $MIN_LENGTH = $ARGV[5];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
13 my $MIN_QUALITY = $ARGV[6];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
14
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
15 my $VERBOSE = $ARGV[7];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
16
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
17 if (!$VERBOSE){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
18 $VERBOSE ="OFF";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
19 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
20
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
21 open(READ1, $read1) or die ("Can't open $read1\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
22 open(READ2, $read2) or die ("Can't open $read2\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
23 open(OUT1, ">$output1") or die ("Can't open $output1\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
24 open(OUT2, ">$output2") or die ("Can't open $output2\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
25
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
26
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
27 my $error1=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
28 my $error2=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
29 my $error3=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
30 my $error4=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
31 my $error5=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
32 my $error6=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
33 my $error7=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
34 my $error8=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
35 my $error9=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
36 my $error10=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
37
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
38 while (my $ligne1_r1 =<READ1>){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
39 my $ligne2_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
40 my $ligne3_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
41 my $ligne4_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
42 my $ligne1_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
43 my $ligne2_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
44 my $ligne3_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
45 my $ligne4_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
46
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
47 #@ 1 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
48 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
49 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
50 print "Error in file format";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
51 if ($ligne1_r1){print $ligne1_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
52 if ($ligne2_r1){print $ligne2_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
53 if ($ligne3_r1){print $ligne3_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
54 if ($ligne4_r1){print $ligne4_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
55 if ($ligne1_r2){print $ligne1_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
56 if ($ligne2_r2){print $ligne2_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
57 if ($ligne3_r2){print $ligne3_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
58 if ($ligne4_r2){print $ligne4_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
59 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
60 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
61 $error1++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
62 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
63 elsif(($ligne1_r1 !~/^\@/)||($ligne1_r2 !~/^\@/)||($ligne3_r1 !~/^\+/)||($ligne3_r2 !~/^\+/)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
64 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
65 print "Error in header : format\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
66 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
67 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
68 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
69 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
70 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
71 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
72 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
73 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
74 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
75 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
76 $error2++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
77 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
78 #@ 1 - 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
79 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
80
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
81 my $length_seq1 = length($ligne2_r1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
82 my $length_qual1 =length($ligne4_r1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
83 my $seq1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
84 my $qual1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
85
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
86 my $length_seq2 = length($ligne2_r2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
87 my $length_qual2 =length($ligne4_r2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
88 my $seq2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
89 my $qual2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
90 my $header1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
91 my $header2="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
92 my $repheader1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
93 my $repheader2="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
94
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
95 if ($ligne1_r1 =~/^\@(.*?)\#/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
96 $header1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
97 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
98
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
99 if ($ligne3_r1 =~/^\+(.*?)\#/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
100 $repheader1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
101 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
102
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
103 if ($ligne1_r2 =~/^\@(.*?)\#/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
104 $header2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
105 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
106
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
107 if ($ligne3_r2 =~/^\+(.*?)\#/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
108 $repheader2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
109 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
110 #@ 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
111
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
112 ### Verification de la coherence sequence /qualité @ 1 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
113 if (($TYPE eq "illumina")&&((!$header1)||(!$header2)||(!$repheader1)||(!$repheader2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
114 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
115 print "Error in header : empty\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
116 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
117 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
118 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
119 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
120 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
121 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
122 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
123 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
124 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
125 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
126 $error3++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
127 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
128 elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
129 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
130 print "Error in header refgsd : empty\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
131 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
132 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
133 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
134 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
135 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
136 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
137 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
138 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
139 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
140 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
141 $error3++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
142 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
143 elsif (($TYPE eq "illumina")&&(($header1 ne $repheader1)||($header2 ne $repheader2)||($header1 ne $header2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
144 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
145 print "Error in header : different\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
146 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
147 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
148 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
149 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
150 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
151 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
152 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
153 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
154 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
155 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
156 $error4++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
157 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
158 elsif (($TYPE eq "sanger")&&($header1 ne $header2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
159 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
160 print "Error in header : different\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
161 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
162 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
163 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
164 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
165 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
166 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
167 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
168 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
169 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
170 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
171 $error4++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
172 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
173 elsif (($length_seq1 != $length_qual1)||($length_seq2 != $length_qual2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
174 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
175 print "Error in seq/qual length\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
176 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
177 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
178 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
179 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
180 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
181 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
182 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
183 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
184 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
185 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
186 $error5++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
187 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
188 #@ 1 - 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
189 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
190 ### Parsing sequence & qualité
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
191 if ($ligne2_r1 =~ /^([ATGCNX]+)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
192 $seq1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
193 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
194 if ($ligne2_r2 =~ /^([ATGCNX]+)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
195 $seq2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
196 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
197 if ($ligne4_r1 =~ /^(.*)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
198 $qual1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
199 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
200 if ($ligne4_r2 =~ /^(.*)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
201 $qual2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
202 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
203 #@ 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
204 ### Verification du parsing et de la coherence sequence /qualité (n°2)
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
205 if ((!$seq1)||(!$seq2)||(!$qual1)||(!$qual2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
206 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
207 print "Error parsing seq / quality \n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
208 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
209 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
210 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
211 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
212 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
213 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
214 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
215 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
216 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
217 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
218 $error6++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
219 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
220 elsif ((length($seq1) != length($qual1))||(length($seq2) != length($qual2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
221 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
222 print "Error in seq/qual length after parsing\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
223 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
224 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
225 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
226 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
227 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
228 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
229 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
230 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
231 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
232 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
233 $error7++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
234 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
235 #@ <1 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
236 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
237 my $fastq_lines_r1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
238 my $fastq_lines_r2="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
239 $fastq_lines_r1 = &grooming_and_trimming($ligne1_r1,$seq1,$qual1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
240 if ($fastq_lines_r1){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
241 $fastq_lines_r2 = &grooming_and_trimming($ligne1_r2,$seq2,$qual2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
242 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
243 if ($fastq_lines_r2){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
244 print OUT1 $fastq_lines_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
245 print OUT2 $fastq_lines_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
246 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
247 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
248 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
249
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
250 # print OUT1 $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
251 # print OUT1 $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
252 # print OUT1 $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
253 # print OUT1 $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
254 # print OUT2 $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
255 # print OUT2 $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
256 # print OUT2 $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
257 # print OUT2 $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
258
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
259 #@ 7 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
260 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
261 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
262
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
263
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
264
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
265 close (READ1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
266 close (READ2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
267 close (OUT1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
268 close (OUT2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
269
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
270
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
271
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
272
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
273 sub grooming_and_trimming{
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
274 my $header = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
275 my $seq = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
276 my $quality = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
277 my $quality_converted="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
278
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
279 my $startnoN = 0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
280 my $stopnoN = length($quality)-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
281
4
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
282 #print "HEAD:\t$header";
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
283 #print "SEQ:\t$seq\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
284
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
285 my $chercheN = $seq;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
286 my @bad_position;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
287 my $current_index = index($chercheN,"N");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
288 my $abs_index = $current_index;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
289 while ($current_index >=0){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
290 push (@bad_position,$abs_index);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
291
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
292 if ($current_index<length($seq)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
293 $chercheN = substr($chercheN,$current_index+1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
294 $current_index = index($chercheN,"N");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
295 $abs_index = $current_index + $bad_position[$#bad_position]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
296 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
297 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
298 last;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
299 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
300 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
301
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
302
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
303 if ($#bad_position>=0){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
304 my %coord=%{&extract_longer_string_coordinates_from_bad_position($startnoN,$stopnoN,\@bad_position)};
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
305 $startnoN = $coord{"start"};
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
306 $stopnoN = $coord{"stop"};
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
307 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
308 my $lengthnoN = $stopnoN - $startnoN + 1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
309 my $seqnoN = substr($seq,$startnoN,$lengthnoN);
4
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
310 # print "SEQnoN\t:$seqnoN\n";
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
311 # for (my $i=0;$i<=$#bad_position;$i++){
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
312 # print $bad_position[$i]."\t";
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
313 # }
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
314 # print "\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
315
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
316 if ($lengthnoN >= $MIN_LENGTH){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
317 my $startTrim = $startnoN;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
318 my $stopTrim = $stopnoN;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
319
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
320 my $quality_converted="";
4
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
321 #my @bad_position;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
322
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
323 my @q = split(//,$quality);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
324 #print "QUALITY\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
325 #print "$quality\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
326 for (my $i=0;$i<=$stopnoN;$i++){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
327 my $chr = $q[$i];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
328 my $num = ord($q[$i]);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
329 if ($TYPE eq "illumina"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
330 $num = $num -64+33;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
331 $quality_converted .= chr($num);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
332 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
333
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
334 if ($num <$MIN_QUALITY + 64 - 33 ){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
335 push(@bad_position,$i+$startnoN);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
336 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
337 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
338 if ($quality_converted){$quality = $quality_converted;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
339 #print "$quality\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
340
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
341
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
342
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
343 if ($#bad_position>=0){
4
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
344 @bad_position = sort {$a <=> $b} @bad_position;
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
345 # for (my $i=0;$i<=$#bad_position;$i++){
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
346 # print $bad_position[$i]."\t";
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
347 # }
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
348 # print "\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
349 my %coord=%{&extract_longer_string_coordinates_from_bad_position($startnoN,$stopnoN,\@bad_position)};
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
350 $startTrim = $coord{"start"};
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
351 $stopTrim = $coord{"stop"};
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
352 #print "$startTrim .. $stopTrim\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
353
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
354 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
355 my $lengthTrim = $stopTrim - $startTrim +1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
356
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
357
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
358 my $fastq_lines="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
359
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
360 if ($lengthTrim >= $MIN_LENGTH){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
361 $fastq_lines .= $header;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
362 $fastq_lines .= substr($seq,$startTrim,$lengthTrim)."\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
363 $fastq_lines .= "+\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
364 $fastq_lines .= substr($quality,$startTrim,$lengthTrim)."\n";
4
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
365 # print $fastq_lines;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
366 return $fastq_lines;
4
9074a5104cdd Uploaded
mcharles
parents: 0
diff changeset
367
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
368 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
369 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
370 return "";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
371 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
372
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
373
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
374
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
375 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
376 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
377 return "";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
378 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
379
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
380
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
381 # my @s = split(//,$seq);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
382 # my $sanger_quality="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
383
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
384
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
385
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
386
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
387 # return $sanger_quality;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
388 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
389
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
390 sub extract_longer_string_coordinates_from_bad_position{
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
391 my $start=shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
392 my $stop =shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
393 my $refbad = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
394 my @bad_position = @$refbad;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
395 my %coord;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
396
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
397 my $current_start = $start;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
398 my $current_stop = $bad_position[0]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
399 if ($current_stop < $start){$current_stop = $start;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
400
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
401
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
402 #debut -> premier N
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
403 my $current_length = $current_stop - $current_start +1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
404 my $test_length;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
405
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
406 #entre les N
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
407 for (my $i=1;$i<=$#bad_position;$i++){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
408 $test_length = $bad_position[$i]+1-$bad_position[$i-1]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
409 if ( $test_length > $current_length){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
410 $current_start = $bad_position[$i-1]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
411 $current_stop = $bad_position[$i]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
412 $current_length = $current_stop - $current_start +1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
413 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
414 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
415
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
416 #dernier N -> fin
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
417 $test_length = $stop-$bad_position[$#bad_position]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
418 if ( $test_length > $current_length){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
419 $current_start = $bad_position[$#bad_position]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
420 if ($current_start > $stop){$current_start=$stop;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
421 $current_stop = $stop;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
422 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
423 $coord{"start"}=$current_start;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
424 $coord{"stop"}= $current_stop;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
425 $coord{"lenght"}=$current_stop-$current_start+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
426
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
427 return \%coord;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
428 }