comparison AnnotationStatsFromVCF/AnnotationStatsFromVCF.pl @ 6:ebb0ac9b6fa9 draft

planemo upload
author gandres
date Mon, 23 May 2016 17:49:17 -0400
parents
children
comparison
equal deleted inserted replaced
5:ec22fcacb66c 6:ebb0ac9b6fa9
1 #!/usr/bin/perl
2
3 use strict;
4 use Switch;
5 use Getopt::Long;
6 use Bio::SeqIO;
7
8 my $usage = qq~Usage:$0 <args> [<opts>]
9 where <args> are:
10 -v, --vcf <VCF input>
11 -o, --out <output>
12 -s, --step <step (in bp)>
13 ~;
14 $usage .= "\n";
15
16 my ($vcf,$out,$step,$min_depth);
17 $min_depth = 5;
18
19 GetOptions(
20 "vcf=s" => \$vcf,
21 "out=s" => \$out,
22 "min_depth=s"=> \$min_depth,
23 "step=s" => \$step,
24 );
25
26
27 die $usage
28 if ( !$vcf || !$step || !$out );
29
30 if ($step =~/^(\d+)\s*$/){
31 $step = $1;
32 }
33 else{
34 die "Error: step size must be an integer\n";
35 }
36
37
38 my $VCFTOOLS_EXE = "vcftools";
39
40 my %max;
41 my %counts_ns;
42 my %counts_s;
43 my $nb_gene = 0;
44 my $nb_intergenic = 0;
45 my $nb_exon = 0;
46 my $nb_intron = 0;
47 my $nb_UTR = 0;
48 my $nb_syn = 0;
49 my $nb_nsyn = 0;
50 if ($vcf =~/\.bcf/){
51 system("$VCFTOOLS_EXE --bcf $vcf --recode --recode-INFO-all --out $out");
52 $vcf = "$out.recode.vcf";
53 }
54 open(my $VCF,$vcf);
55 while(<$VCF>)
56 {
57 my @infos = split(/\t/,$_);
58 if (scalar @infos > 8 && !/#CHROM/)
59 {
60 my $chrom = $infos[0];
61 my $position = $infos[1];
62 my $id = $infos[2];
63
64
65 my $classe_position = int($position/$step);
66 if (/=NON_SYNONYMOUS_CODING/){
67 $counts_ns{$chrom}{$classe_position}++;
68 $nb_nsyn++;
69 }
70 if (/=SYNONYMOUS_CODING/){
71 $counts_s{$chrom}{$classe_position}++;
72 $nb_syn++;
73 }
74 if (/INTERGENIC/){
75 $nb_intergenic++;
76 }
77 elsif (/EFF=/){
78 $nb_gene++;
79 }
80 if (/CODING/){
81 }
82 elsif (/UTR/){
83 $nb_UTR++;
84 }
85 elsif (/INTRON/){
86 $nb_intron++;
87 }
88 $max{$chrom} = $classe_position;
89 }
90 }
91 my $nb_exon = $nb_gene - $nb_intron - $nb_UTR;
92
93 open(my $OUT,">$out");
94 #print $OUT "Chrom Bin Nb synonymous SNPs Nb non-synonymous SNPs dN/dS ratio\n";
95 print $OUT "Chrom Bin dN/dS ratio\n";
96 foreach my $chrom(sort keys(%counts_s))
97 {
98 my $maximum = $max{$chrom};
99 for(my $i=1;$i<=$maximum;$i++)
100 {
101 my $classe_position = $i;
102 my $nb_s = 0;
103 my $nb_ns = 0;
104 my $ratio = 0;
105 if ($counts_s{$chrom}{$classe_position}){$nb_s = $counts_s{$chrom}{$classe_position};}
106 if ($counts_ns{$chrom}{$classe_position}){$nb_ns = $counts_ns{$chrom}{$classe_position};}
107 if ($nb_s){$ratio = $nb_ns/$nb_s;}
108 my $bin = $classe_position * $step;
109 #print $OUT "$chrom $classe_position $nb_s $nb_ns $ratio\n";
110 print $OUT "$chrom $bin $ratio\n";
111 }
112 }
113 close($OUT);
114
115
116
117 open(my $A2, ">$out.location");
118 print $A2 "Intergenic $nb_intergenic Intergenic:$nb_intergenic\n";
119 print $A2 "Genic $nb_gene Exon:$nb_exon Intron:$nb_intron UTR:$nb_UTR\n";
120 close($A2);
121
122 open(my $A2, ">$out.effect");
123 print $A2 "Intron $nb_intron Intron:$nb_intron\n";
124 print $A2 "UTR $nb_UTR UTR:$nb_UTR\n";
125 print $A2 "Exon $nb_exon Synonym:$nb_syn Non-syn:$nb_nsyn\n";
126 close($A2);
127