comparison rapsodyn/PileupVariant.pl @ 10:0a6c1cfe4dc8 draft

Uploaded
author mcharles
date Mon, 19 Jan 2015 04:33:21 -0500
parents 3f7b0788a1c4
children
comparison
equal deleted inserted replaced
9:0e7c6fe60646 10:0a6c1cfe4dc8
1 #!/usr/bin/perl 1 #!/usr/bin/perl
2 #V1.0.1 added log, option parameters 2 #V1.0.1 added log, option parameters
3 use strict; 3 use strict;
4 use warnings; 4 use warnings;
5 use Getopt::Long; 5 use Getopt::Long;
6 #v1.1.0 choose exclusion file
7 #v1.0.2 manage empty files
8 #v1.0.1 bug correction
9 #V1.0.1 added log, option parameters
6 10
7 my $input_pileup_file; 11 my $input_pileup_file;
8 my $output_pileup_file; 12 my $output_pileup_file;
13 my $input_exclusion_file;
9 my $log_file; 14 my $log_file;
10 15
11 my $nb_base_covered=0; 16 my $nb_base_covered=0;
12 my $nb_variant=0; 17 my $nb_variant=0;
18 my $nb_variant_conserved=0;
19 my $do_exclusion=0;
20 my $nb_exclusion=0;
21 my $nb_excluded=0;
22 my %exclusion;
13 GetOptions ( 23 GetOptions (
14 "input_pileup_file=s" => \$input_pileup_file, 24 "input_pileup_file=s" => \$input_pileup_file,
25 "input_exclusion_file=s" => \$input_exclusion_file,
15 "log_file=s" => \$log_file 26 "log_file=s" => \$log_file
16 ) or die("Error in command line arguments\n"); 27 ) or die("Error in command line arguments\n");
17 28
18 open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n"); 29 open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n");
30 if ( -z IN){
31 exit(0);
32 }
33
34 if ($input_exclusion_file){
35 open (EX,$input_exclusion_file) or die ("Can't open $input_exclusion_file\n");
36 if ( -z EX){
37
38 }
39 else {
40 $do_exclusion=1;
41 while (my $line=<EX>){
42 $nb_exclusion++;
43 chomp($line);
44 my @fields = split(/\s+/,$line);
45 if (($fields[0])&&($fields[1])){
46 $exclusion{"$fields[0]\t$fields[1]"}=1;
47 }
48 else {
49 print STDERR "Error formatting in Exclusion File\nExclusion file lines should be 'chromosome number' TAB 'position'\n$line\n",$fields[0],"\n",$fields[1],"\n";
50 }
51 }
52
53 }
54 close (EX);
55 }
19 56
20 #Extraction des variants 57 #Extraction des variants
21 my $nb_line=0; 58 my $nb_line=0;
22 while (my $line=<IN>){ 59 while (my $line=<IN>){
23 #print $line; 60 #print $line;
24 $nb_base_covered++; 61 if ($line !~ /^\s*$/){
25 $line =~ s/\$//g; #the read start at this position 62 my @fields = split(/\s+/,$line);
26 $line =~ s/\^.//g; #the read end at this position followed by quality char 63 if ($fields[4]){
64 $nb_base_covered++;
65 my $pile = $fields[4];
66 $pile =~ s/\$//g; #the read start at this position
67 $pile =~ s/\^.//g; #the read end at this position followed by quality char
68 if ($fields[4]=~/[ATGCN]/i){ #Indel are +/-\d[ATGCatgc]+ and SNP are [ATGCatgc]+ so [ATGCN]/i detection cover indel and snp
69 $nb_variant++;
70 if (($do_exclusion==1)&&($exclusion{"$fields[0]\t$fields[1]"})){
71
72 }
73 else {
74 print $line;
75 $nb_variant_conserved++;
76 }
77 }
78 }
79 elsif ($fields[3]==0){
80 }
81 else {
82 print STDERR "Error in pileup format\nPileup result expected at column 5\n$line\n";
83 }
84 }
27 #print $line; 85 #print $line;
28 86
29 my @field = split(/\s+/,$line); 87
30 if ($field[4]=~/[ATGCN]/i){
31 print $line;
32 $nb_variant++;
33 }
34 } 88 }
35 close(IN); 89 close(IN);
36 90
37 open (LF,">$log_file") or die("Can't open $log_file\n"); 91 open (LF,">$log_file") or die("Can't open $log_file\n");
38 print LF "\n####\t Variant extraction \n"; 92 print LF "\n####\t Variant extraction \n";
39 print LF "Position covered :\t$nb_base_covered\n"; 93 print LF "Position covered\t:\t$nb_base_covered\n";
40 print LF "Variant detected :\t$nb_variant\n"; 94 print LF "Variant detected\t:\t$nb_variant\n";
95 print LF "Variant conserved\t:\t$nb_variant_conserved\n";
41 close (LF); 96 close (LF);