annotate rapsodyn/PileupVariant.pl @ 14:93e6f2af1ce2 draft

Uploaded
author mcharles
date Mon, 26 Jan 2015 18:10:52 -0500
parents 0a6c1cfe4dc8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
2 #V1.0.1 added log, option parameters
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
3 use strict;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
4 use warnings;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
5 use Getopt::Long;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
6 #v1.1.0 choose exclusion file
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
7 #v1.0.2 manage empty files
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
8 #v1.0.1 bug correction
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
9 #V1.0.1 added log, option parameters
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
10
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
11 my $input_pileup_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
12 my $output_pileup_file;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
13 my $input_exclusion_file;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
14 my $log_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
15
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
16 my $nb_base_covered=0;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
17 my $nb_variant=0;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
18 my $nb_variant_conserved=0;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
19 my $do_exclusion=0;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
20 my $nb_exclusion=0;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
21 my $nb_excluded=0;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
22 my %exclusion;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
23 GetOptions (
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
24 "input_pileup_file=s" => \$input_pileup_file,
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
25 "input_exclusion_file=s" => \$input_exclusion_file,
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
26 "log_file=s" => \$log_file
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
27 ) or die("Error in command line arguments\n");
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
28
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
29 open(IN, $input_pileup_file) or die ("Can't open $input_pileup_file\n");
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
30 if ( -z IN){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
31 exit(0);
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
32 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
33
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
34 if ($input_exclusion_file){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
35 open (EX,$input_exclusion_file) or die ("Can't open $input_exclusion_file\n");
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
36 if ( -z EX){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
37
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
38 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
39 else {
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
40 $do_exclusion=1;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
41 while (my $line=<EX>){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
42 $nb_exclusion++;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
43 chomp($line);
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
44 my @fields = split(/\s+/,$line);
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
45 if (($fields[0])&&($fields[1])){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
46 $exclusion{"$fields[0]\t$fields[1]"}=1;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
47 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
48 else {
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
49 print STDERR "Error formatting in Exclusion File\nExclusion file lines should be 'chromosome number' TAB 'position'\n$line\n",$fields[0],"\n",$fields[1],"\n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
50 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
51 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
52
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
53 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
54 close (EX);
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
55 }
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
56
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
57 #Extraction des variants
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
58 my $nb_line=0;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
59 while (my $line=<IN>){
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
60 #print $line;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
61 if ($line !~ /^\s*$/){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
62 my @fields = split(/\s+/,$line);
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
63 if ($fields[4]){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
64 $nb_base_covered++;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
65 my $pile = $fields[4];
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
66 $pile =~ s/\$//g; #the read start at this position
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
67 $pile =~ s/\^.//g; #the read end at this position followed by quality char
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
68 if ($fields[4]=~/[ATGCN]/i){ #Indel are +/-\d[ATGCatgc]+ and SNP are [ATGCatgc]+ so [ATGCN]/i detection cover indel and snp
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
69 $nb_variant++;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
70 if (($do_exclusion==1)&&($exclusion{"$fields[0]\t$fields[1]"})){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
71
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
72 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
73 else {
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
74 print $line;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
75 $nb_variant_conserved++;
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
76 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
77 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
78 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
79 elsif ($fields[3]==0){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
80 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
81 else {
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
82 print STDERR "Error in pileup format\nPileup result expected at column 5\n$line\n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
83 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
84 }
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
85 #print $line;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
86
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
87
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
88 }
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
89 close(IN);
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
90
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
91 open (LF,">$log_file") or die("Can't open $log_file\n");
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
92 print LF "\n####\t Variant extraction \n";
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
93 print LF "Position covered\t:\t$nb_base_covered\n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
94 print LF "Variant detected\t:\t$nb_variant\n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
95 print LF "Variant conserved\t:\t$nb_variant_conserved\n";
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
96 close (LF);