annotate rapsodyn/PrepareFastqLight.pl @ 6:1776b8ddd87e draft

Uploaded
author mcharles
date Mon, 29 Sep 2014 03:02:16 -0400
parents b0cbb9d21aa9
children 3f7b0788a1c4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
2 use strict;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
3 use warnings;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
4
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
5 my $read1 = $ARGV[0];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
6 my $read2 = $ARGV[1];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
7
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
8 my $output1 = $ARGV[2];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
9 my $output2 = $ARGV[3];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
10
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
11 my $TYPE = $ARGV[4];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
12 my $MIN_LENGTH = $ARGV[5];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
13 my $MIN_QUALITY = $ARGV[6];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
14
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
15 my $VERBOSE = $ARGV[7];
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
16
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
17 if (!$VERBOSE){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
18 $VERBOSE ="OFF";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
19 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
20
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
21 open(READ1, $read1) or die ("Can't open $read1\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
22 open(READ2, $read2) or die ("Can't open $read2\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
23 open(OUT1, ">$output1") or die ("Can't open $output1\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
24 open(OUT2, ">$output2") or die ("Can't open $output2\n");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
25
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
26
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
27 my $error1=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
28 my $error2=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
29 my $error3=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
30 my $error4=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
31 my $error5=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
32 my $error6=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
33 my $error7=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
34 my $error8=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
35 my $error9=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
36 my $error10=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
37
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
38 while (my $ligne1_r1 =<READ1>){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
39 my $ligne2_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
40 my $ligne3_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
41 my $ligne4_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
42 my $ligne1_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
43 my $ligne2_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
44 my $ligne3_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
45 my $ligne4_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
46
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
47 #@ 1 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
48 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
49 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
50 print "Error in file format";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
51 if ($ligne1_r1){print $ligne1_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
52 if ($ligne2_r1){print $ligne2_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
53 if ($ligne3_r1){print $ligne3_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
54 if ($ligne4_r1){print $ligne4_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
55 if ($ligne1_r2){print $ligne1_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
56 if ($ligne2_r2){print $ligne2_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
57 if ($ligne3_r2){print $ligne3_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
58 if ($ligne4_r2){print $ligne4_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
59 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
60 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
61 $error1++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
62 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
63 elsif(($ligne1_r1 !~/^\@/)||($ligne1_r2 !~/^\@/)||($ligne3_r1 !~/^\+/)||($ligne3_r2 !~/^\+/)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
64 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
65 print "Error in header : format\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
66 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
67 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
68 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
69 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
70 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
71 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
72 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
73 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
74 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
75 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
76 $error2++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
77 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
78 #@ 1 - 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
79 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
80
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
81 my $length_seq1 = length($ligne2_r1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
82 my $length_qual1 =length($ligne4_r1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
83 my $seq1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
84 my $qual1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
85
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
86 my $length_seq2 = length($ligne2_r2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
87 my $length_qual2 =length($ligne4_r2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
88 my $seq2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
89 my $qual2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
90 my $header1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
91 my $header2="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
92 my $repheader1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
93 my $repheader2="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
94
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
95 if ($ligne1_r1 =~/^\@(.*?)\#/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
96 $header1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
97 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
98
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
99 if ($ligne3_r1 =~/^\+(.*?)\#/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
100 $repheader1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
101 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
102
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
103 if ($ligne1_r2 =~/^\@(.*?)\#/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
104 $header2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
105 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
106
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
107 if ($ligne3_r2 =~/^\+(.*?)\#/){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
108 $repheader2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
109 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
110 #@ 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
111
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
112 ### Verification de la coherence sequence /qualité @ 1 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
113 if (($TYPE eq "illumina")&&((!$header1)||(!$header2)||(!$repheader1)||(!$repheader2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
114 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
115 print "Error in header : empty\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
116 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
117 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
118 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
119 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
120 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
121 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
122 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
123 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
124 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
125 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
126 $error3++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
127 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
128 elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
129 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
130 print "Error in header refgsd : empty\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
131 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
132 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
133 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
134 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
135 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
136 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
137 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
138 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
139 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
140 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
141 $error3++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
142 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
143 elsif (($TYPE eq "illumina")&&(($header1 ne $repheader1)||($header2 ne $repheader2)||($header1 ne $header2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
144 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
145 print "Error in header : different\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
146 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
147 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
148 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
149 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
150 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
151 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
152 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
153 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
154 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
155 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
156 $error4++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
157 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
158 elsif (($TYPE eq "sanger")&&($header1 ne $header2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
159 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
160 print "Error in header : different\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
161 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
162 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
163 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
164 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
165 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
166 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
167 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
168 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
169 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
170 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
171 $error4++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
172 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
173 elsif (($length_seq1 != $length_qual1)||($length_seq2 != $length_qual2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
174 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
175 print "Error in seq/qual length\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
176 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
177 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
178 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
179 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
180 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
181 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
182 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
183 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
184 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
185 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
186 $error5++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
187 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
188 #@ 1 - 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
189 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
190 ### Parsing sequence & qualité
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
191 if ($ligne2_r1 =~ /^([ATGCNX]+)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
192 $seq1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
193 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
194 if ($ligne2_r2 =~ /^([ATGCNX]+)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
195 $seq2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
196 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
197 if ($ligne4_r1 =~ /^(.*)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
198 $qual1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
199 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
200 if ($ligne4_r2 =~ /^(.*)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
201 $qual2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
202 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
203 #@ 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
204 ### Verification du parsing et de la coherence sequence /qualité (n°2)
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
205 if ((!$seq1)||(!$seq2)||(!$qual1)||(!$qual2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
206 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
207 print "Error parsing seq / quality \n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
208 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
209 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
210 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
211 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
212 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
213 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
214 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
215 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
216 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
217 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
218 $error6++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
219 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
220 elsif ((length($seq1) != length($qual1))||(length($seq2) != length($qual2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
221 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
222 print "Error in seq/qual length after parsing\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
223 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
224 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
225 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
226 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
227 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
228 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
229 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
230 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
231 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
232 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
233 $error7++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
234 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
235 #@ <1 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
236 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
237 my $fastq_lines_r1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
238 my $fastq_lines_r2="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
239 $fastq_lines_r1 = &grooming_and_trimming($ligne1_r1,$seq1,$qual1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
240 if ($fastq_lines_r1){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
241 $fastq_lines_r2 = &grooming_and_trimming($ligne1_r2,$seq2,$qual2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
242 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
243 if ($fastq_lines_r2){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
244 print OUT1 $fastq_lines_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
245 print OUT2 $fastq_lines_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
246 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
247 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
248 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
249
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
250 # print OUT1 $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
251 # print OUT1 $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
252 # print OUT1 $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
253 # print OUT1 $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
254 # print OUT2 $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
255 # print OUT2 $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
256 # print OUT2 $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
257 # print OUT2 $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
258
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
259 #@ 7 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
260 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
261 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
262
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
263
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
264
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
265 close (READ1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
266 close (READ2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
267 close (OUT1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
268 close (OUT2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
269
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
270
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
271 sub grooming_and_trimming{
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
272 my $header = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
273 my $seq = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
274 my $quality = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
275 my $quality_converted="";
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
276 my $quality_ori=$quality;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
277
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
278 my $lengthseq = length($seq);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
279 my $startTrim = 0;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
280 my $stopTrim = length($quality)-1;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
281 my $startnoN = $startTrim;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
282 my $stopnoN = $stopTrim;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
283
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
284
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
285 my $chercheN = $seq;
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
286 my @bad_position_N;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
287 my @bad_position_Q;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
288 my $current_index = index($chercheN,"N");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
289 my $abs_index = $current_index;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
290 while ($current_index >=0){
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
291 push (@bad_position_N,$abs_index);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
292
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
293 if ($current_index<length($seq)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
294 $chercheN = substr($chercheN,$current_index+1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
295 $current_index = index($chercheN,"N");
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
296 $abs_index = $current_index + $bad_position_N[$#bad_position_N]+1;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
297 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
298 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
299 last;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
300 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
301 }
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
302
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
303 my @q = split(//,$quality);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
304 for (my $i=0;$i<=$#q;$i++){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
305 my $chr = $q[$i];
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
306 my $num = ord($q[$i]);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
307 if ($TYPE eq "illumina"){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
308 $num = $num - 31; # 31 comme la difference entre la plage sanger (33-> 93 / 0->60) et illumina (64->104 / 0->40)
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
309 $quality_converted .= chr($num);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
310 }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
311
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
312 if ($num < $MIN_QUALITY + 33){ #33 comme le départ de la plage sanger
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
313 push(@bad_position_Q,$i);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
314 }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
315 }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
316 if ($quality_converted){$quality = $quality_converted;}
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
317
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
318 my @bad_position = (@bad_position_N, @bad_position_Q);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
319
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
320 if ($#bad_position>=0){
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
321 @bad_position = sort {$a <=> $b} @bad_position;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
322 my %coord=%{&extract_longer_string_coordinates_from_bad_position(0,$stopTrim,\@bad_position)};
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
323 $startTrim = $coord{"start"};
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
324 $stopTrim = $coord{"stop"};
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
325 #print "$startTrim .. $stopTrim\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
326
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
327 }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
328 my $lengthTrim = $stopTrim - $startTrim +1;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
329
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
330 my $fastq_lines="";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
331
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
332 # if ($header =~ /GA8\-EAS671_0005\:3\:1\:1043\:4432/){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
333 # print "HEAD:\t$header";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
334 # print "SEQ:\n$seq\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
335 # print "$quality_ori\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
336 # print "$quality\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
337 # for (my $i=0;$i<=$#bad_position;$i++){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
338 # print $bad_position[$i]."(".$q[$bad_position[$i]]." : ".ord($q[$bad_position[$i]]).")"."\t";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
339 # }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
340 # print "\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
341 # print "$startTrim .. $stopTrim / $lengthTrim \n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
342 # print $fastq_lines;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
343 # print "\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
344 # }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
345
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
346 if ($lengthTrim >= $MIN_LENGTH){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
347 $fastq_lines .= $header;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
348 $fastq_lines .= substr($seq,$startTrim,$lengthTrim)."\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
349 $fastq_lines .= "+\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
350 $fastq_lines .= substr($quality,$startTrim,$lengthTrim)."\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
351 return $fastq_lines;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
352
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
353 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
354 else {
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
355 #print "Insufficient length after trimming\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
356 return "";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
357 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
358 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
359
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
360 sub extract_longer_string_coordinates_from_bad_position{
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
361 my $start=shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
362 my $stop =shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
363 my $refbad = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
364 my @bad_position = @$refbad;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
365 my %coord;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
366
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
367 my $current_start = $start;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
368 my $current_stop = $bad_position[0]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
369 if ($current_stop < $start){$current_stop = $start;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
370
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
371
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
372 #debut -> premier N
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
373 my $current_length = $current_stop - $current_start +1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
374 my $test_length;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
375
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
376 #entre les N
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
377 for (my $i=1;$i<=$#bad_position;$i++){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
378 $test_length = $bad_position[$i]+1-$bad_position[$i-1]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
379 if ( $test_length > $current_length){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
380 $current_start = $bad_position[$i-1]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
381 $current_stop = $bad_position[$i]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
382 $current_length = $current_stop - $current_start +1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
383 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
384 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
385
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
386 #dernier N -> fin
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
387 $test_length = $stop-$bad_position[$#bad_position]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
388 if ( $test_length > $current_length){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
389 $current_start = $bad_position[$#bad_position]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
390 if ($current_start > $stop){$current_start=$stop;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
391 $current_stop = $stop;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
392 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
393 $coord{"start"}=$current_start;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
394 $coord{"stop"}= $current_stop;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
395 $coord{"lenght"}=$current_stop-$current_start+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
396
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
397 return \%coord;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
398 }