Mercurial > repos > dereeper > sniplay
comparison SNP_density/CalculateSlidingWindowsSNPdensitiesFromVCF.pl @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
8:6bf69b40365c | 9:98c37a5d67f4 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Getopt::Long; | |
5 use Bio::SeqIO; | |
6 | |
7 my $usage = qq~Usage:$0 <args> [<opts>] | |
8 where <args> are: | |
9 -i, --input <VCF/Hapmap input> | |
10 -o, --out <output in tabular format> | |
11 -s, --step <step (in bp)> | |
12 ~; | |
13 $usage .= "\n"; | |
14 | |
15 my ($input,$out,$step); | |
16 | |
17 GetOptions( | |
18 "input=s" => \$input, | |
19 "out=s" => \$out, | |
20 "step=s" => \$step, | |
21 ); | |
22 | |
23 | |
24 die $usage | |
25 if ( !$input || !$step || !$out ); | |
26 | |
27 my $max_chr_num = 100; | |
28 | |
29 my %counts; | |
30 my %counts_by_ind; | |
31 | |
32 my $VCFTOOLS_EXE = "vcftools"; | |
33 | |
34 my $is_vcf = `head -4000 $input | grep -c CHROM`; | |
35 my $is_bcf = 0; | |
36 if ($input =~/\.bcf/){ | |
37 $is_bcf = 1; | |
38 } | |
39 | |
40 | |
41 my $IN; | |
42 my $headers; | |
43 my $start_indiv_retrieval = 12; | |
44 my $chrom_retrieval = 2; | |
45 my $pos_retrieval = 3; | |
46 if ($is_vcf or $is_bcf){ | |
47 $start_indiv_retrieval = 9; | |
48 $chrom_retrieval = 0; | |
49 $pos_retrieval = 1; | |
50 if ($is_vcf){ | |
51 $headers = `grep '#CHROM' $input`; | |
52 open($IN,$input); | |
53 } | |
54 elsif ($is_bcf){ | |
55 my $cmd = "$VCFTOOLS_EXE --bcf $input --stdout --recode | head -4000 | grep CHROM"; | |
56 $headers = `$cmd`; | |
57 my $cmd2 = "$VCFTOOLS_EXE --bcf $input --stdout --recode"; | |
58 open $IN, '-|' , "$cmd2" or die "Can not run Vcftools"; | |
59 } | |
60 } | |
61 else{ | |
62 $headers= <$IN>; | |
63 open($IN,$input); | |
64 } | |
65 $headers=~s/\n//g; | |
66 $headers=~s/\r//g; | |
67 my @ind_names = split(/\t/,$headers); | |
68 my @individual_names; | |
69 for (my $i = $start_indiv_retrieval; $i <= $#ind_names; $i++) | |
70 { | |
71 push(@individual_names,$ind_names[$i]); | |
72 } | |
73 my %maximums; | |
74 while(<$IN>) | |
75 { | |
76 my $line = $_; | |
77 $line=~s/\n//g; | |
78 $line=~s/\r//g; | |
79 my @infos = split(/\t/,$line); | |
80 if (scalar @infos > 8 && !/#CHROM/){ | |
81 my $chrom = $infos[$chrom_retrieval]; | |
82 my $position = $infos[$pos_retrieval]; | |
83 if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;} | |
84 my $classe_position = int($position/$step); | |
85 $counts{$chrom}{$classe_position}++; | |
86 | |
87 my $ref_allele = $infos[11]; | |
88 if ($is_vcf or $is_bcf){ | |
89 $ref_allele = "0/0"; | |
90 } | |
91 for (my $i = $start_indiv_retrieval; $i <= $#infos; $i++){ | |
92 if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;} | |
93 if ($infos[$i] ne $ref_allele){ | |
94 $counts_by_ind{$chrom}{$classe_position}{$i}++; | |
95 } | |
96 } | |
97 } | |
98 } | |
99 close($IN); | |
100 | |
101 ####################################################### | |
102 # global | |
103 ####################################################### | |
104 open(my $OUT,">$out"); | |
105 print $OUT "Chromosome Position SNPs\n"; | |
106 my $chr_num = 0; | |
107 foreach my $chrom(sort keys(%counts)) | |
108 { | |
109 $chr_num++; | |
110 my $ref_counts = $counts{$chrom}; | |
111 my %final_counts = %$ref_counts; | |
112 my $x = 0; | |
113 #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts)) | |
114 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) | |
115 { | |
116 my $nb = 0; | |
117 if ($counts{$chrom}{$classe_position}) | |
118 { | |
119 $nb = $counts{$chrom}{$classe_position}; | |
120 } | |
121 $x += $step; | |
122 print $OUT "$chrom $x $nb\n"; | |
123 } | |
124 if ($chr_num >= $max_chr_num){last;} | |
125 } | |
126 close($OUT); | |
127 | |
128 ####################################################### | |
129 # For each individual | |
130 ####################################################### | |
131 open(my $OUT2,">$out.by_sample"); | |
132 $chr_num = 0; | |
133 print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n"; | |
134 foreach my $chrom(sort keys(%counts_by_ind)) | |
135 { | |
136 $chr_num++; | |
137 my $ref_counts = $counts_by_ind{$chrom}; | |
138 my %final_counts = %$ref_counts; | |
139 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) | |
140 { | |
141 print $OUT2 "$chrom"; | |
142 my $num_ind = $start_indiv_retrieval; | |
143 foreach my $indiv(@individual_names) | |
144 { | |
145 my $val = 0; | |
146 | |
147 if ($counts_by_ind{$chrom}{$classe_position}{$num_ind}) | |
148 { | |
149 $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind}; | |
150 } | |
151 print $OUT2 " $val"; | |
152 $num_ind++; | |
153 } | |
154 print $OUT2 "\n"; | |
155 } | |
156 if ($chr_num >= $max_chr_num){last;} | |
157 } | |
158 close($OUT2); |