Mercurial > repos > dereeper > sniplay
comparison SNP_density/CalculateSlidingWindowsSNPdensitiesFromHapmap.pl @ 1:420b57c3c185 draft
Uploaded
author | dereeper |
---|---|
date | Fri, 10 Jul 2015 04:39:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:3e19d0dfcf3e | 1:420b57c3c185 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 use Switch; | |
5 use Getopt::Long; | |
6 use Bio::SeqIO; | |
7 | |
8 my $usage = qq~Usage:$0 <args> [<opts>] | |
9 where <args> are: | |
10 -i, --input <Hapmap input> | |
11 -o, --out <output in tabular format> | |
12 -s, --step <step (in bp)> | |
13 ~; | |
14 $usage .= "\n"; | |
15 | |
16 my ($input,$out,$step); | |
17 | |
18 GetOptions( | |
19 "input=s" => \$input, | |
20 "out=s" => \$out, | |
21 "step=s" => \$step, | |
22 ); | |
23 | |
24 | |
25 die $usage | |
26 if ( !$input || !$step || !$out ); | |
27 | |
28 my $max_chr_num = 100; | |
29 | |
30 my %counts; | |
31 my %counts_by_ind; | |
32 open(my $HAPMAP,$input); | |
33 my $headers= <$HAPMAP>; | |
34 $headers=~s/\n//g; | |
35 $headers=~s/\r//g; | |
36 my @ind_names = split(/\t/,$headers); | |
37 my @individual_names; | |
38 for (my $i = 12; $i <= $#ind_names; $i++) | |
39 { | |
40 push(@individual_names,$ind_names[$i]); | |
41 } | |
42 my %maximums; | |
43 while(<$HAPMAP>) | |
44 { | |
45 my $line = $_; | |
46 $line=~s/\n//g; | |
47 $line=~s/\r//g; | |
48 my @infos = split(/\t/,$line); | |
49 my $chrom = $infos[2]; | |
50 my $position = $infos[3]; | |
51 if ($position > $maximums{$chrom}){$maximums{$chrom}=$position;} | |
52 my $classe_position = int($position/$step); | |
53 $counts{$chrom}{$classe_position}++; | |
54 | |
55 my $ref_allele = $infos[11]; | |
56 for (my $i = 12; $i <= $#infos; $i++) | |
57 { | |
58 if (!$counts_by_ind{$chrom}{$classe_position}{$i}){$counts_by_ind{$chrom}{$classe_position}{$i} = 0;} | |
59 if ($infos[$i] ne $ref_allele) | |
60 { | |
61 $counts_by_ind{$chrom}{$classe_position}{$i}++; | |
62 } | |
63 } | |
64 } | |
65 close($HAPMAP); | |
66 | |
67 ####################################################### | |
68 # global | |
69 ####################################################### | |
70 open(my $OUT,">$out"); | |
71 print $OUT "Chromosome Position SNPs\n"; | |
72 my $chr_num = 0; | |
73 foreach my $chrom(sort keys(%counts)) | |
74 { | |
75 $chr_num++; | |
76 my $ref_counts = $counts{$chrom}; | |
77 my %final_counts = %$ref_counts; | |
78 my $x = 0; | |
79 #foreach my $classe_position(sort {$a<=>$b} keys(%final_counts)) | |
80 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) | |
81 { | |
82 my $nb = 0; | |
83 if ($counts{$chrom}{$classe_position}) | |
84 { | |
85 $nb = $counts{$chrom}{$classe_position}; | |
86 } | |
87 $x += $step; | |
88 print $OUT "$chrom $x $nb\n"; | |
89 } | |
90 if ($chr_num >= $max_chr_num){last;} | |
91 } | |
92 close($OUT); | |
93 | |
94 ####################################################### | |
95 # For each individual | |
96 ####################################################### | |
97 open(my $OUT2,">$out.by_sample"); | |
98 $chr_num = 0; | |
99 print $OUT2 "Chromosome ".join("\t",@individual_names) . "\n"; | |
100 foreach my $chrom(sort keys(%counts_by_ind)) | |
101 { | |
102 $chr_num++; | |
103 my $ref_counts = $counts_by_ind{$chrom}; | |
104 my %final_counts = %$ref_counts; | |
105 for (my $classe_position = 0; $classe_position <= $maximums{$chrom}/$step;$classe_position++) | |
106 { | |
107 print $OUT2 "$chrom"; | |
108 my $num_ind = 12; | |
109 foreach my $indiv(@individual_names) | |
110 { | |
111 my $val = 0; | |
112 | |
113 if ($counts_by_ind{$chrom}{$classe_position}{$num_ind}) | |
114 { | |
115 $val = $counts_by_ind{$chrom}{$classe_position}{$num_ind}; | |
116 } | |
117 print $OUT2 " $val"; | |
118 $num_ind++; | |
119 } | |
120 print $OUT2 "\n"; | |
121 } | |
122 if ($chr_num >= $max_chr_num){last;} | |
123 } | |
124 close($OUT2); |