Mercurial > repos > mcharles > rapsosnp
comparison rapsodyn/PileupVariant.pl @ 10:0a6c1cfe4dc8 draft
Uploaded
author | mcharles |
---|---|
date | Mon, 19 Jan 2015 04:33:21 -0500 |
parents | 3f7b0788a1c4 |
children |
comparison
equal
deleted
inserted
replaced
9:0e7c6fe60646 | 10:0a6c1cfe4dc8 |
---|---|
1 #!/usr/bin/perl | 1 #!/usr/bin/perl |
2 #V1.0.1 added log, option parameters | 2 #V1.0.1 added log, option parameters |
3 use strict; | 3 use strict; |
4 use warnings; | 4 use warnings; |
5 use Getopt::Long; | 5 use Getopt::Long; |
6 #v1.1.0 choose exclusion file | |
7 #v1.0.2 manage empty files | |
8 #v1.0.1 bug correction | |
9 #V1.0.1 added log, option parameters | |
6 | 10 |
7 my $input_pileup_file; | 11 my $input_pileup_file; |
8 my $output_pileup_file; | 12 my $output_pileup_file; |
13 my $input_exclusion_file; | |
9 my $log_file; | 14 my $log_file; |
10 | 15 |
11 my $nb_base_covered=0; | 16 my $nb_base_covered=0; |
12 my $nb_variant=0; | 17 my $nb_variant=0; |
18 my $nb_variant_conserved=0; | |
19 my $do_exclusion=0; | |
20 my $nb_exclusion=0; | |
21 my $nb_excluded=0; | |
22 my %exclusion; | |
13 GetOptions ( | 23 GetOptions ( |
14 "input_pileup_file=s" => \$input_pileup_file, | 24 "input_pileup_file=s" => \$input_pileup_file, |
25 "input_exclusion_file=s" => \$input_exclusion_file, | |
15 "log_file=s" => \$log_file | 26 "log_file=s" => \$log_file |
16 ) or die("Error in command line arguments\n"); | 27 ) or die("Error in command line arguments\n"); |
17 | 28 |
18 open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n"); | 29 open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n"); |
30 if ( -z IN){ | |
31 exit(0); | |
32 } | |
33 | |
34 if ($input_exclusion_file){ | |
35 open (EX,$input_exclusion_file) or die ("Can't open $input_exclusion_file\n"); | |
36 if ( -z EX){ | |
37 | |
38 } | |
39 else { | |
40 $do_exclusion=1; | |
41 while (my $line=<EX>){ | |
42 $nb_exclusion++; | |
43 chomp($line); | |
44 my @fields = split(/\s+/,$line); | |
45 if (($fields[0])&&($fields[1])){ | |
46 $exclusion{"$fields[0]\t$fields[1]"}=1; | |
47 } | |
48 else { | |
49 print STDERR "Error formatting in Exclusion File\nExclusion file lines should be 'chromosome number' TAB 'position'\n$line\n",$fields[0],"\n",$fields[1],"\n"; | |
50 } | |
51 } | |
52 | |
53 } | |
54 close (EX); | |
55 } | |
19 | 56 |
20 #Extraction des variants | 57 #Extraction des variants |
21 my $nb_line=0; | 58 my $nb_line=0; |
22 while (my $line=<IN>){ | 59 while (my $line=<IN>){ |
23 #print $line; | 60 #print $line; |
24 $nb_base_covered++; | 61 if ($line !~ /^\s*$/){ |
25 $line =~ s/\$//g; #the read start at this position | 62 my @fields = split(/\s+/,$line); |
26 $line =~ s/\^.//g; #the read end at this position followed by quality char | 63 if ($fields[4]){ |
64 $nb_base_covered++; | |
65 my $pile = $fields[4]; | |
66 $pile =~ s/\$//g; #the read start at this position | |
67 $pile =~ s/\^.//g; #the read end at this position followed by quality char | |
68 if ($fields[4]=~/[ATGCN]/i){ #Indel are +/-\d[ATGCatgc]+ and SNP are [ATGCatgc]+ so [ATGCN]/i detection cover indel and snp | |
69 $nb_variant++; | |
70 if (($do_exclusion==1)&&($exclusion{"$fields[0]\t$fields[1]"})){ | |
71 | |
72 } | |
73 else { | |
74 print $line; | |
75 $nb_variant_conserved++; | |
76 } | |
77 } | |
78 } | |
79 elsif ($fields[3]==0){ | |
80 } | |
81 else { | |
82 print STDERR "Error in pileup format\nPileup result expected at column 5\n$line\n"; | |
83 } | |
84 } | |
27 #print $line; | 85 #print $line; |
28 | 86 |
29 my @field = split(/\s+/,$line); | 87 |
30 if ($field[4]=~/[ATGCN]/i){ | |
31 print $line; | |
32 $nb_variant++; | |
33 } | |
34 } | 88 } |
35 close(IN); | 89 close(IN); |
36 | 90 |
37 open (LF,">$log_file") or die("Can't open $log_file\n"); | 91 open (LF,">$log_file") or die("Can't open $log_file\n"); |
38 print LF "\n####\t Variant extraction \n"; | 92 print LF "\n####\t Variant extraction \n"; |
39 print LF "Position covered :\t$nb_base_covered\n"; | 93 print LF "Position covered\t:\t$nb_base_covered\n"; |
40 print LF "Variant detected :\t$nb_variant\n"; | 94 print LF "Variant detected\t:\t$nb_variant\n"; |
95 print LF "Variant conserved\t:\t$nb_variant_conserved\n"; | |
41 close (LF); | 96 close (LF); |