Mercurial > repos > dereeper > sniplay
comparison AnnotationStatsFromVCF/AnnotationStatsFromVCF.pl @ 6:ebb0ac9b6fa9 draft
planemo upload
| author | gandres |
|---|---|
| date | Mon, 23 May 2016 17:49:17 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:ec22fcacb66c | 6:ebb0ac9b6fa9 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 use Switch; | |
| 5 use Getopt::Long; | |
| 6 use Bio::SeqIO; | |
| 7 | |
| 8 my $usage = qq~Usage:$0 <args> [<opts>] | |
| 9 where <args> are: | |
| 10 -v, --vcf <VCF input> | |
| 11 -o, --out <output> | |
| 12 -s, --step <step (in bp)> | |
| 13 ~; | |
| 14 $usage .= "\n"; | |
| 15 | |
| 16 my ($vcf,$out,$step,$min_depth); | |
| 17 $min_depth = 5; | |
| 18 | |
| 19 GetOptions( | |
| 20 "vcf=s" => \$vcf, | |
| 21 "out=s" => \$out, | |
| 22 "min_depth=s"=> \$min_depth, | |
| 23 "step=s" => \$step, | |
| 24 ); | |
| 25 | |
| 26 | |
| 27 die $usage | |
| 28 if ( !$vcf || !$step || !$out ); | |
| 29 | |
| 30 if ($step =~/^(\d+)\s*$/){ | |
| 31 $step = $1; | |
| 32 } | |
| 33 else{ | |
| 34 die "Error: step size must be an integer\n"; | |
| 35 } | |
| 36 | |
| 37 | |
| 38 my $VCFTOOLS_EXE = "vcftools"; | |
| 39 | |
| 40 my %max; | |
| 41 my %counts_ns; | |
| 42 my %counts_s; | |
| 43 my $nb_gene = 0; | |
| 44 my $nb_intergenic = 0; | |
| 45 my $nb_exon = 0; | |
| 46 my $nb_intron = 0; | |
| 47 my $nb_UTR = 0; | |
| 48 my $nb_syn = 0; | |
| 49 my $nb_nsyn = 0; | |
| 50 if ($vcf =~/\.bcf/){ | |
| 51 system("$VCFTOOLS_EXE --bcf $vcf --recode --recode-INFO-all --out $out"); | |
| 52 $vcf = "$out.recode.vcf"; | |
| 53 } | |
| 54 open(my $VCF,$vcf); | |
| 55 while(<$VCF>) | |
| 56 { | |
| 57 my @infos = split(/\t/,$_); | |
| 58 if (scalar @infos > 8 && !/#CHROM/) | |
| 59 { | |
| 60 my $chrom = $infos[0]; | |
| 61 my $position = $infos[1]; | |
| 62 my $id = $infos[2]; | |
| 63 | |
| 64 | |
| 65 my $classe_position = int($position/$step); | |
| 66 if (/=NON_SYNONYMOUS_CODING/){ | |
| 67 $counts_ns{$chrom}{$classe_position}++; | |
| 68 $nb_nsyn++; | |
| 69 } | |
| 70 if (/=SYNONYMOUS_CODING/){ | |
| 71 $counts_s{$chrom}{$classe_position}++; | |
| 72 $nb_syn++; | |
| 73 } | |
| 74 if (/INTERGENIC/){ | |
| 75 $nb_intergenic++; | |
| 76 } | |
| 77 elsif (/EFF=/){ | |
| 78 $nb_gene++; | |
| 79 } | |
| 80 if (/CODING/){ | |
| 81 } | |
| 82 elsif (/UTR/){ | |
| 83 $nb_UTR++; | |
| 84 } | |
| 85 elsif (/INTRON/){ | |
| 86 $nb_intron++; | |
| 87 } | |
| 88 $max{$chrom} = $classe_position; | |
| 89 } | |
| 90 } | |
| 91 my $nb_exon = $nb_gene - $nb_intron - $nb_UTR; | |
| 92 | |
| 93 open(my $OUT,">$out"); | |
| 94 #print $OUT "Chrom Bin Nb synonymous SNPs Nb non-synonymous SNPs dN/dS ratio\n"; | |
| 95 print $OUT "Chrom Bin dN/dS ratio\n"; | |
| 96 foreach my $chrom(sort keys(%counts_s)) | |
| 97 { | |
| 98 my $maximum = $max{$chrom}; | |
| 99 for(my $i=1;$i<=$maximum;$i++) | |
| 100 { | |
| 101 my $classe_position = $i; | |
| 102 my $nb_s = 0; | |
| 103 my $nb_ns = 0; | |
| 104 my $ratio = 0; | |
| 105 if ($counts_s{$chrom}{$classe_position}){$nb_s = $counts_s{$chrom}{$classe_position};} | |
| 106 if ($counts_ns{$chrom}{$classe_position}){$nb_ns = $counts_ns{$chrom}{$classe_position};} | |
| 107 if ($nb_s){$ratio = $nb_ns/$nb_s;} | |
| 108 my $bin = $classe_position * $step; | |
| 109 #print $OUT "$chrom $classe_position $nb_s $nb_ns $ratio\n"; | |
| 110 print $OUT "$chrom $bin $ratio\n"; | |
| 111 } | |
| 112 } | |
| 113 close($OUT); | |
| 114 | |
| 115 | |
| 116 | |
| 117 open(my $A2, ">$out.location"); | |
| 118 print $A2 "Intergenic $nb_intergenic Intergenic:$nb_intergenic\n"; | |
| 119 print $A2 "Genic $nb_gene Exon:$nb_exon Intron:$nb_intron UTR:$nb_UTR\n"; | |
| 120 close($A2); | |
| 121 | |
| 122 open(my $A2, ">$out.effect"); | |
| 123 print $A2 "Intron $nb_intron Intron:$nb_intron\n"; | |
| 124 print $A2 "UTR $nb_UTR UTR:$nb_UTR\n"; | |
| 125 print $A2 "Exon $nb_exon Synonym:$nb_syn Non-syn:$nb_nsyn\n"; | |
| 126 close($A2); | |
| 127 |
