Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/PrepareFastqLight.pl @ 8:d857538d9fea draft
Uploaded
author | mcharles |
---|---|
date | Fri, 10 Oct 2014 07:05:36 -0400 |
parents | 3f7b0788a1c4 |
children | 0a6c1cfe4dc8 |
comparison
equal
deleted
inserted
replaced
7:3f7b0788a1c4 | 8:d857538d9fea |
---|---|
1 #!/usr/bin/perl | 1 #!/usr/bin/perl |
2 #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...) | |
3 #V1.0.2 added auto type detection | |
2 #V1.0.1 added log, option parameters | 4 #V1.0.1 added log, option parameters |
3 use strict; | 5 use strict; |
4 use warnings; | 6 use warnings; |
5 use Getopt::Long; | 7 use Getopt::Long; |
6 | 8 |
39 my $nb_read2_t=0; | 41 my $nb_read2_t=0; |
40 my $nb_base_read2_t=0; | 42 my $nb_base_read2_t=0; |
41 | 43 |
42 my $nb_base_current_t=0; | 44 my $nb_base_current_t=0; |
43 | 45 |
44 | |
45 open(READ1, $read1_file) or die ("Can't open $read1_file\n"); | 46 open(READ1, $read1_file) or die ("Can't open $read1_file\n"); |
46 open(READ2, $read2_file) or die ("Can't open $read2_file\n"); | 47 open(READ2, $read2_file) or die ("Can't open $read2_file\n"); |
47 open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n"); | 48 open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n"); |
48 open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n"); | 49 open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n"); |
50 open (LF,">$log_file") or die("Can't open $log_file\n"); | |
51 | |
49 | 52 |
50 my $error1=0; | 53 my $error1=0; |
51 my $error2=0; | 54 my $error2=0; |
52 my $error3=0; | 55 my $error3=0; |
53 my $error4=0; | 56 my $error4=0; |
55 my $error6=0; | 58 my $error6=0; |
56 my $error7=0; | 59 my $error7=0; |
57 my $error8=0; | 60 my $error8=0; |
58 my $error9=0; | 61 my $error9=0; |
59 my $error10=0; | 62 my $error10=0; |
63 | |
64 my $auto_type=""; | |
65 my %qual; | |
66 if ($TYPE eq "auto"){ | |
67 my $compt=0; | |
68 open(DETECT, $read1_file) or die ("Can't open $read1_file\n"); | |
69 while (my $ligne1_r1 =<DETECT>){ | |
70 my $ligne2_r1 =<DETECT>; | |
71 my $ligne3_r1 =<DETECT>; | |
72 my $ligne4_r1 =<DETECT>; | |
73 $compt++; | |
74 if ($ligne4_r1 =~ /^(.*)\s*$/i){ | |
75 my $qual = $1; | |
76 my @q = split(//,$qual); | |
77 for (my $i=0;$i<=$#q;$i++){ | |
78 my $num = ord($q[$i]); | |
79 if ($qual{$num}){ | |
80 $qual{$num}++; | |
81 } | |
82 else { | |
83 $qual{$num} = 1; | |
84 } | |
85 #range sanger / illumina 1.8+ : 33->94 | |
86 #range illumina 1.3->1.7 : 64->105 | |
87 if ($num > 94){$auto_type = "illumina";last;} | |
88 if ($num < 64){$auto_type = "sanger";last;} | |
89 } | |
90 } | |
91 else { | |
92 print STDERR "Error in format detection : quality not recognized\n$ligne4_r1"; | |
93 exit(0); | |
94 } | |
95 | |
96 if ($auto_type ne ""){ | |
97 last; | |
98 } | |
99 | |
100 } | |
101 close (DETECT); | |
102 if ($auto_type eq ""){ | |
103 print STDERR "Error in format detection : type not recognized parsing read1\n"; | |
104 foreach my $key (sort {$a <=> $b} keys %qual){ | |
105 print "$key\t:\t",$qual{$key},"\n"; | |
106 } | |
107 exit(0); | |
108 } | |
109 else { | |
110 $TYPE = $auto_type; | |
111 } | |
112 } | |
113 | |
114 | |
115 | |
60 | 116 |
61 while (my $ligne1_r1 =<READ1>){ | 117 while (my $ligne1_r1 =<READ1>){ |
62 my $ligne2_r1 =<READ1>; | 118 my $ligne2_r1 =<READ1>; |
63 my $ligne3_r1 =<READ1>; | 119 my $ligne3_r1 =<READ1>; |
64 my $ligne4_r1 =<READ1>; | 120 my $ligne4_r1 =<READ1>; |
117 my $header2=""; | 173 my $header2=""; |
118 my $repheader1=""; | 174 my $repheader1=""; |
119 my $repheader2=""; | 175 my $repheader2=""; |
120 | 176 |
121 | 177 |
122 if ($ligne1_r1 =~/^\@(.*?)\#/){ | 178 if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){ |
123 $header1 = $1; | 179 $header1 = $1; |
124 } | 180 } |
125 | 181 |
126 if ($ligne3_r1 =~/^\+(.*?)\#/){ | 182 if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){ |
127 $repheader1 = $1; | 183 $repheader1 = $1; |
128 } | 184 } |
129 | 185 |
130 if ($ligne1_r2 =~/^\@(.*?)\#/){ | 186 if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){ |
131 $header2 = $1; | 187 $header2 = $1; |
132 } | 188 } |
133 | 189 |
134 if ($ligne3_r2 =~/^\+(.*?)\#/){ | 190 if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){ |
135 $repheader2 = $1; | 191 $repheader2 = $1; |
136 } | 192 } |
137 #@ 2 sec | 193 #@ 2 sec |
138 | 194 |
139 ### Verification de la coherence sequence /qualité @ 1 sec | 195 ### Verification de la coherence sequence /qualité @ 1 sec |
296 close (READ1); | 352 close (READ1); |
297 close (READ2); | 353 close (READ2); |
298 close (OUT1); | 354 close (OUT1); |
299 close (OUT2); | 355 close (OUT2); |
300 | 356 |
301 open (LF,">$log_file") or die("Can't open $log_file\n"); | |
302 print LF "\n####\t Fastq preparation \n"; | 357 print LF "\n####\t Fastq preparation \n"; |
358 print LF "Fastq format : $TYPE\n"; | |
303 print LF "## Before preparation\n"; | 359 print LF "## Before preparation\n"; |
304 print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n"; | 360 print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n"; |
305 print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n"; | 361 print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n"; |
306 print LF "## After preparation\n"; | 362 print LF "## After preparation\n"; |
307 print LF "#Read1 :\t$nb_read1_t\t#Base :\t$nb_base_read1_t\n"; | 363 print LF "#Read1 :\t$nb_read1_t\t#Base :\t$nb_base_read1_t\n"; |