annotate rapsodyn/PrepareFastqLight.pl @ 15:56d328bce3a7 draft default tip

Uploaded
author mcharles
date Thu, 29 Jan 2015 08:54:06 -0500
parents 0a6c1cfe4dc8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
2 #v1.1.1 new check on read synchro
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
3 #v1.1.0 manage empty files
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
4 #v1.0.4 bug correction, last read not considered
8
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
5 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...)
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
6 #V1.0.2 added auto type detection
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
7 #V1.0.1 added log, option parameters
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
8 use strict;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
9 use warnings;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
10 use Getopt::Long;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
11
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
12 my $read1_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
13 my $read2_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
14 my $log_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
15 my $output1_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
16 my $output2_file;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
17
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
18 my $TYPE="sanger";
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
19 my $MIN_LENGTH=30;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
20 my $MIN_QUALITY=30;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
21
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
22 my $VERBOSE = "OFF";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
23
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
24 GetOptions (
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
25 "read1_file=s" => \$read1_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
26 "read2_file=s" => \$read2_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
27 "log_file=s" => \$log_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
28 "output1_file=s" => \$output1_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
29 "output2_file=s" => \$output2_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
30 "type=s" => \$TYPE,
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
31 "min_length=i" => \$MIN_LENGTH,
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
32 "min_quality=i" => \$MIN_QUALITY,
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
33 "verbose=s" => \$VERBOSE
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
34 ) or die("Error in command line arguments\n");
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
35
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
36
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
37 my $nb_read1=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
38 my $nb_base_read1=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
39 my $nb_read2=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
40 my $nb_base_read2=0;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
41
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
42 my $nb_read1_t=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
43 my $nb_base_read1_t=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
44 my $nb_read2_t=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
45 my $nb_base_read2_t=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
46
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
47 my $nb_base_current_t=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
48
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
49 open(READ1, $read1_file) or die ("Can't open $read1_file\n");
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
50 open(READ2, $read2_file) or die ("Can't open $read2_file\n");
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
51 open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n");
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
52 open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n");
8
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
53 open (LF,">$log_file") or die("Can't open $log_file\n");
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
54
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
55 if (( -z READ1)&&( -z READ2)){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
56 exit(0);
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
57 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
58 elsif (( -z READ1)||( -z READ2)){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
59 print STDERR "One empty File\n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
60 exit(0);
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
61 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
62
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
63
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
64 my $error1=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
65 my $error2=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
66 my $error3=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
67 my $error4=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
68 my $error5=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
69 my $error6=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
70 my $error7=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
71 my $error8=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
72 my $error9=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
73 my $error10=0;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
74
8
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
75 my $auto_type="";
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
76 my %qual;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
77 if ($TYPE eq "auto"){
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
78 my $compt=0;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
79 open(DETECT, $read1_file) or die ("Can't open $read1_file\n");
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
80 while (my $ligne1_r1 =<DETECT>){
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
81 my $ligne2_r1 =<DETECT>;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
82 my $ligne3_r1 =<DETECT>;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
83 my $ligne4_r1 =<DETECT>;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
84 $compt++;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
85 if ($ligne4_r1 =~ /^(.*)\s*$/i){
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
86 my $qual = $1;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
87 my @q = split(//,$qual);
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
88 for (my $i=0;$i<=$#q;$i++){
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
89 my $num = ord($q[$i]);
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
90 if ($qual{$num}){
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
91 $qual{$num}++;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
92 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
93 else {
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
94 $qual{$num} = 1;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
95 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
96 #range sanger / illumina 1.8+ : 33->94
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
97 #range illumina 1.3->1.7 : 64->105
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
98 if ($num > 94){$auto_type = "illumina";last;}
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
99 if ($num < 64){$auto_type = "sanger";last;}
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
100 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
101 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
102 else {
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
103 print STDERR "Error in format detection : quality not recognized\n$ligne4_r1";
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
104 exit(0);
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
105 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
106
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
107 if ($auto_type ne ""){
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
108 last;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
109 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
110
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
111 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
112 close (DETECT);
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
113 if ($auto_type eq ""){
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
114 print STDERR "Error in format detection : type not recognized parsing read1\n";
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
115 foreach my $key (sort {$a <=> $b} keys %qual){
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
116 print "$key\t:\t",$qual{$key},"\n";
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
117 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
118 exit(0);
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
119 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
120 else {
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
121 $TYPE = $auto_type;
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
122 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
123 }
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
124
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
125
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
126
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
127 my $compt=0;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
128 while (my $ligne1_r1 =<READ1>){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
129 my $ligne2_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
130 my $ligne3_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
131 my $ligne4_r1 =<READ1>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
132 my $ligne1_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
133 my $ligne2_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
134 my $ligne3_r2 =<READ2>;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
135 my $ligne4_r2 =<READ2>;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
136
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
137 $compt++;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
138 $nb_read1++;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
139 $nb_read2++;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
140
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
141
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
142 if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
143 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
144 print "Error in file format";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
145 if ($ligne1_r1){print $ligne1_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
146 if ($ligne2_r1){print $ligne2_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
147 if ($ligne3_r1){print $ligne3_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
148 if ($ligne4_r1){print $ligne4_r1;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
149 if ($ligne1_r2){print $ligne1_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
150 if ($ligne2_r2){print $ligne2_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
151 if ($ligne3_r2){print $ligne3_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
152 if ($ligne4_r2){print $ligne4_r2;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
153 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
154 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
155 $error1++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
156 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
157 elsif(($ligne1_r1 !~/^\@/)||($ligne1_r2 !~/^\@/)||($ligne3_r1 !~/^\+/)||($ligne3_r2 !~/^\+/)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
158 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
159 print "Error in header : format\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
160 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
161 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
162 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
163 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
164 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
165 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
166 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
167 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
168 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
169 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
170 $error2++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
171 }
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
172
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
173 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
174
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
175 my $length_seq1 = length(chomp($ligne2_r1));
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
176 my $length_qual1 =length(chomp($ligne4_r1));
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
177 my $seq1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
178 my $qual1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
179
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
180 my $length_seq2 = length(chomp($ligne2_r2));
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
181 my $length_qual2 =length(chomp($ligne4_r2));
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
182 my $seq2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
183 my $qual2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
184 my $header1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
185 my $header2="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
186 my $repheader1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
187 my $repheader2="";
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
188
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
189 my @tbl_header1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
190 my @tbl_header2;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
191 if ($ligne1_r1 =~/^\@(.*?)\s*$/){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
192 $header1 = $1;
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
193 @tbl_header1 = split(//,$header1);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
194 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
195
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
196 if ($ligne3_r1 =~/^\+(.*?)\s*$/){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
197 $repheader1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
198 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
199
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
200 if ($ligne1_r2 =~/^\@(.*?)\s*$/){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
201 $header2 = $1;
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
202 @tbl_header2 = split(//,$header2);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
203 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
204
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
205 if ($ligne3_r2 =~/^\+(.*?)\s*$/){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
206 $repheader2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
207 }
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
208 my $diffheader=0;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
209 if ($#tbl_header1 == $#tbl_header2){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
210 for (my $i=0;$i<=$#tbl_header1;$i++){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
211 if ($tbl_header1[$i] ne $tbl_header2[$i]){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
212 $diffheader++;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
213 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
214 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
215 }
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
216
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
217
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
218
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
219 ### Verification de la coherence sequence /qualité
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
220 if ((!$header1)||(!$header2)){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
221 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
222 print "Error in header : empty\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
223 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
224 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
225 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
226 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
227 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
228 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
229 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
230 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
231 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
232 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
233 $error3++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
234 }
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
235 elsif ((($repheader1)&&($header1 ne $repheader1))||(($repheader2)&&($header2 ne $repheader2))){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
236 if ($VERBOSE eq "ON"){
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
237 print "Error : difference in header and header repeat\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
238 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
239 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
240 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
241 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
242 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
243 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
244 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
245 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
246 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
247 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
248 $error4++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
249 }
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
250 elsif ($#tbl_header1 != $#tbl_header2){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
251 if ($VERBOSE eq "ON"){
15
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
252 print "Error : difference in header size between reads\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
253 print $ligne1_r1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
254 print $ligne2_r1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
255 print $ligne3_r1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
256 print $ligne4_r1;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
257 print $ligne1_r2;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
258 print $ligne2_r2;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
259 print $ligne3_r2;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
260 print $ligne4_r2;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
261 print "\n";
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
262 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
263 $error4++;
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
264 }
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
265 elsif ($diffheader > 1 ){ # More than ...1 and ...2 difference in read1 and read2 header
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
266 if ($VERBOSE eq "ON"){
56d328bce3a7 Uploaded
mcharles
parents: 10
diff changeset
267 print "Error can't establish synchro between reads, more than 1 difference between headers\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
268 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
269 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
270 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
271 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
272 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
273 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
274 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
275 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
276 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
277 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
278 $error4++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
279 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
280 elsif (($length_seq1 != $length_qual1)||($length_seq2 != $length_qual2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
281 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
282 print "Error in seq/qual length\n";
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
283 print "$length_seq1 / $length_qual1 \t $length_seq2 / $length_qual2\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
284 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
285 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
286 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
287 print $ligne4_r1;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
288 print "\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
289 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
290 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
291 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
292 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
293 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
294 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
295 $error5++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
296 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
297 #@ 1 - 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
298 else {
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 8
diff changeset
299 #print "TEST : $compt\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
300 ### Parsing sequence & qualité
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
301 if ($ligne2_r1 =~ /^([ATGCNX]+)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
302 $seq1 = $1;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
303 $nb_base_read1 += length($seq1);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
304 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
305 if ($ligne2_r2 =~ /^([ATGCNX]+)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
306 $seq2 = $1;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
307 $nb_base_read2 += length($seq2);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
308 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
309 if ($ligne4_r1 =~ /^(.*)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
310 $qual1 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
311 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
312 if ($ligne4_r2 =~ /^(.*)\s*$/i){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
313 $qual2 = $1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
314 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
315 #@ 2 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
316 ### Verification du parsing et de la coherence sequence /qualité (n°2)
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
317 if ((!$seq1)||(!$seq2)||(!$qual1)||(!$qual2)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
318 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
319 print "Error parsing seq / quality \n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
320 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
321 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
322 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
323 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
324 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
325 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
326 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
327 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
328 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
329 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
330 $error6++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
331 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
332 elsif ((length($seq1) != length($qual1))||(length($seq2) != length($qual2))){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
333 if ($VERBOSE eq "ON"){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
334 print "Error in seq/qual length after parsing\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
335 print $ligne1_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
336 print $ligne2_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
337 print $ligne3_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
338 print $ligne4_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
339 print $ligne1_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
340 print $ligne2_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
341 print $ligne3_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
342 print $ligne4_r2;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
343 print "\n";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
344 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
345 $error7++;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
346 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
347 #@ <1 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
348 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
349 my $fastq_lines_r1="";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
350 my $fastq_lines_r2="";
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
351 my $nb_base_current_read1_t = 0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
352 my $nb_base_current_read2_t = 0;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
353
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
354 $fastq_lines_r1 = &grooming_and_trimming($ligne1_r1,$seq1,$qual1);
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
355 $nb_base_current_read1_t = $nb_base_current_t;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
356 if ($fastq_lines_r1){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
357 $fastq_lines_r2 = &grooming_and_trimming($ligne1_r2,$seq2,$qual2);
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
358 $nb_base_current_read2_t = $nb_base_current_t;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
359 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
360 if ($fastq_lines_r2){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
361 print OUT1 $fastq_lines_r1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
362 print OUT2 $fastq_lines_r2;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
363
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
364 $nb_read1_t++;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
365 $nb_read2_t++;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
366 $nb_base_read1_t += $nb_base_current_read1_t;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
367 $nb_base_read2_t += $nb_base_current_read2_t;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
368
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
369
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
370 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
371 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
372 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
373
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
374
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
375 #@ 7 sec
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
376 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
377 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
378
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
379 close (READ1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
380 close (READ2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
381 close (OUT1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
382 close (OUT2);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
383
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
384 print LF "\n####\t Fastq preparation \n";
8
d857538d9fea Uploaded
mcharles
parents: 7
diff changeset
385 print LF "Fastq format : $TYPE\n";
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
386 print LF "## Before preparation\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
387 print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
388 print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
389 print LF "## After preparation\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
390 print LF "#Read1 :\t$nb_read1_t\t#Base :\t$nb_base_read1_t\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
391 print LF "#Read2 :\t$nb_read2_t\t#Base :\t$nb_base_read2_t\n";
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
392 close (LF);
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
393
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
394
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
395 sub grooming_and_trimming{
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
396 my $header = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
397 my $seq = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
398 my $quality = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
399 my $quality_converted="";
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
400 my $quality_ori=$quality;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
401
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
402 my $lengthseq = length($seq);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
403 my $startTrim = 0;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
404 my $stopTrim = length($quality)-1;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
405 my $startnoN = $startTrim;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
406 my $stopnoN = $stopTrim;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
407
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
408
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
409 my $chercheN = $seq;
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
410 my @bad_position_N;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
411 my @bad_position_Q;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
412 my $current_index = index($chercheN,"N");
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
413 my $abs_index = $current_index;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
414 while ($current_index >=0){
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
415 push (@bad_position_N,$abs_index);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
416
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
417 if ($current_index<length($seq)){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
418 $chercheN = substr($chercheN,$current_index+1);
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
419 $current_index = index($chercheN,"N");
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
420 $abs_index = $current_index + $bad_position_N[$#bad_position_N]+1;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
421 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
422 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
423 last;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
424 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
425 }
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
426
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
427 my @q = split(//,$quality);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
428 for (my $i=0;$i<=$#q;$i++){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
429 my $chr = $q[$i];
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
430 my $num = ord($q[$i]);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
431 if ($TYPE eq "illumina"){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
432 $num = $num - 31; # 31 comme la difference entre la plage sanger (33-> 93 / 0->60) et illumina (64->104 / 0->40)
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
433 $quality_converted .= chr($num);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
434 }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
435
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
436 if ($num < $MIN_QUALITY + 33){ #33 comme le départ de la plage sanger
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
437 push(@bad_position_Q,$i);
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
438 }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
439 }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
440 if ($quality_converted){$quality = $quality_converted;}
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
441
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
442 my @bad_position = (@bad_position_N, @bad_position_Q);
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
443
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
444 if ($#bad_position>=0){
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
445 @bad_position = sort {$a <=> $b} @bad_position;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
446 my %coord=%{&extract_longer_string_coordinates_from_bad_position(0,$stopTrim,\@bad_position)};
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
447 $startTrim = $coord{"start"};
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
448 $stopTrim = $coord{"stop"};
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
449 #print "$startTrim .. $stopTrim\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
450
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
451 }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
452 my $lengthTrim = $stopTrim - $startTrim +1;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
453
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
454 #if ($stats_length{$lengthTrim}){
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
455 # $stats_length{$lengthTrim} = 1;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
456 #}
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
457 #else {
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
458 # $stats_length{$lengthTrim}++;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
459 #}
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
460 my $fastq_lines="";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
461
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
462 # if ($header =~ /GA8\-EAS671_0005\:3\:1\:1043\:4432/){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
463 # print "HEAD:\t$header";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
464 # print "SEQ:\n$seq\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
465 # print "$quality_ori\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
466 # print "$quality\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
467 # for (my $i=0;$i<=$#bad_position;$i++){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
468 # print $bad_position[$i]."(".$q[$bad_position[$i]]." : ".ord($q[$bad_position[$i]]).")"."\t";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
469 # }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
470 # print "\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
471 # print "$startTrim .. $stopTrim / $lengthTrim \n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
472 # print $fastq_lines;
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
473 # print "\n";
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
474 # }
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
475
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
476 #for (my $i=$startTrim;$i<=$stopTrim;$i++){
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
477 # if ($stats_quality{ord($q{$i])}){
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
478 # $stats_quality{ord($q{$i])}=1;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
479 # }
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
480 # else {
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
481 # $stats_quality{ord($q{$i])}++;
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
482 # }
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
483 #}
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
484
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
485 if ($lengthTrim >= $MIN_LENGTH){
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
486 $fastq_lines .= $header;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
487 my $new_seq = substr($seq,$startTrim,$lengthTrim);
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
488 $nb_base_current_t = length($new_seq);
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
489 $fastq_lines .= $new_seq."\n";
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
490 $fastq_lines .= "+\n";
7
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
491 my $new_q = substr($quality,$startTrim,$lengthTrim);
3f7b0788a1c4 Uploaded
mcharles
parents: 5
diff changeset
492 $fastq_lines .= $new_q."\n";
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
493 return $fastq_lines;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
494
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
495 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
496 else {
5
b0cbb9d21aa9 Uploaded
mcharles
parents: 4
diff changeset
497 #print "Insufficient length after trimming\n";
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
498 return "";
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
499 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
500 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
501
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
502 sub extract_longer_string_coordinates_from_bad_position{
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
503 my $start=shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
504 my $stop =shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
505 my $refbad = shift;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
506 my @bad_position = @$refbad;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
507 my %coord;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
508
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
509 my $current_start = $start;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
510 my $current_stop = $bad_position[0]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
511 if ($current_stop < $start){$current_stop = $start;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
512
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
513
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
514 #debut -> premier N
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
515 my $current_length = $current_stop - $current_start +1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
516 my $test_length;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
517
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
518 #entre les N
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
519 for (my $i=1;$i<=$#bad_position;$i++){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
520 $test_length = $bad_position[$i]+1-$bad_position[$i-1]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
521 if ( $test_length > $current_length){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
522 $current_start = $bad_position[$i-1]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
523 $current_stop = $bad_position[$i]-1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
524 $current_length = $current_stop - $current_start +1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
525 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
526 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
527
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
528 #dernier N -> fin
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
529 $test_length = $stop-$bad_position[$#bad_position]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
530 if ( $test_length > $current_length){
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
531 $current_start = $bad_position[$#bad_position]+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
532 if ($current_start > $stop){$current_start=$stop;}
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
533 $current_stop = $stop;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
534 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
535 $coord{"start"}=$current_start;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
536 $coord{"stop"}= $current_stop;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
537 $coord{"lenght"}=$current_stop-$current_start+1;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
538
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
539 return \%coord;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
540 }