comparison SNP_density/CalculateSlidingWindowsSNPdensitiesFromVCF.pl @ 9:98c37a5d67f4 draft

Uploaded
author dereeper
date Wed, 07 Feb 2018 22:08:47 -0500
parents
children
comparison
equal deleted inserted replaced
8:6bf69b40365c 9:98c37a5d67f4
1 #!/usr/bin/perl
2
3 use strict;
4 use Getopt::Long;
5 use Bio::SeqIO;
6
7 my $usage = qq~Usage:$0 <args> [<opts>]
8 where <args> are:
9 -i, --input <VCF/Hapmap input>
10 -o, --out <output in tabular format>
11 -s, --step <step (in bp)>
12 ~;
13 $usage .= "\n";
14
15 my ($input,$out,$step);
16
17 GetOptions(
18 "input=s" => \$input,
19 "out=s" => \$out,
20 "step=s" => \$step,
21 );
22
23
24 die $usage
25 if ( !$input || !$step || !$out );
26
27 my $max_chr_num = 100;
28
29 my %counts;
30 my %counts_by_ind;
31
32 my $VCFTOOLS_EXE = "vcftools";
33
34 my $is_vcf = `head -4000 $input | grep -c CHROM`;
35 my $is_bcf = 0;
36 if ($input =~/\.bcf/){
37 $is_bcf = 1;
38 }
39
40
41 my $IN;
42 my $headers;
43 my $start_indiv_retrieval = 12;
44 my $chrom_retrieval = 2;
45 my $pos_retrieval = 3;
46 if ($is_vcf or $is_bcf){
47 $start_indiv_retrieval = 9;
48 $chrom_retrieval = 0;
49 $pos_retrieval = 1;
50 if ($is_vcf){
51 $headers = `grep '#CHROM' $input`;
52 open($IN,$input);
53 }
54 elsif ($is_bcf){
55 my $cmd = "$VCFTOOLS_EXE --bcf $input --stdout --recode | head -4000 | grep CHROM";
56 $headers = `$cmd`;
57 my $cmd2 = "$VCFTOOLS_EXE --bcf $input --stdout --recode";
58 open $IN, '-|' , "$cmd2" or die "Can not run Vcftools";
59 }
60 }
61 else{
62 $headers= <$IN>;
63 open($IN,$input);
64 }
65 $headers=~s/\n//g;
66 $headers=~s/\r//g;
67 my @ind_names = split(/\t/,$headers);
68 my @individual_names;
69 for (my $i = $start_indiv_retrieval; $i <= $#ind_names; $i++)
70 {
71 push(@individual_names,$ind_names[$i]);
72 }
73 my %maximums;
74 while(<$IN>)
75 {
76 my $line = $_;
77 $line=~s/\n//g;
78 $line=~s/\r//g;
79 my @infos = split(/\t/,$line);
80 if (scalar @infos > 8 && !/#CHROM/){
81 my $chrom = $infos[$chrom_retrieval];
82 my $position = $infos[$pos_retrieval];
83 if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;}
84 my $classe_position = int($position/$step);
85 $counts{$chrom}{$classe_position}++;
86
87 my $ref_allele = $infos[11];
88 if ($is_vcf or $is_bcf){
89 $ref_allele = "0/0";
90 }
91 for (my $i = $start_indiv_retrieval; $i <= $#infos; $i++){
92 if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;}
93 if ($infos[$i] ne $ref_allele){
94 $counts_by_ind{$chrom}{$classe_position}{$i}++;
95 }
96 }
97 }
98 }
99 close($IN);
100
101 #######################################################
102 # global
103 #######################################################
104 open(my $OUT,">$out");
105 print $OUT "Chromosome Position SNPs\n";
106 my $chr_num = 0;
107 foreach my $chrom(sort keys(%counts))
108 {
109 $chr_num++;
110 my $ref_counts = $counts{$chrom};
111 my %final_counts = %$ref_counts;
112 my $x = 0;
113 #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts))
114 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++)
115 {
116 my $nb = 0;
117 if ($counts{$chrom}{$classe_position})
118 {
119 $nb = $counts{$chrom}{$classe_position};
120 }
121 $x += $step;
122 print $OUT "$chrom $x $nb\n";
123 }
124 if ($chr_num >= $max_chr_num){last;}
125 }
126 close($OUT);
127
128 #######################################################
129 # For each individual
130 #######################################################
131 open(my $OUT2,">$out.by_sample");
132 $chr_num = 0;
133 print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n";
134 foreach my $chrom(sort keys(%counts_by_ind))
135 {
136 $chr_num++;
137 my $ref_counts = $counts_by_ind{$chrom};
138 my %final_counts = %$ref_counts;
139 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++)
140 {
141 print $OUT2 "$chrom";
142 my $num_ind = $start_indiv_retrieval;
143 foreach my $indiv(@individual_names)
144 {
145 my $val = 0;
146
147 if ($counts_by_ind{$chrom}{$classe_position}{$num_ind})
148 {
149 $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind};
150 }
151 print $OUT2 " $val";
152 $num_ind++;
153 }
154 print $OUT2 "\n";
155 }
156 if ($chr_num >= $max_chr_num){last;}
157 }
158 close($OUT2);