13
|
1 #!/usr/bin/perl
|
|
2 use strict;
|
|
3 use POSIX;
|
|
4
|
|
5 my $usage = "cluster.pair.pl maxdist\n";
|
|
6 my $maxdist = shift or die $usage;
|
|
7
|
|
8 my %count;
|
|
9
|
|
10 while (<STDIN>){
|
|
11 chomp;
|
|
12 my ($sample, $chrstart, $start, $chrend, $end) = split /\t/;
|
|
13 my $nstart = floor ($start/$maxdist);
|
|
14 my $nend = floor ($end/$maxdist);
|
|
15 my $coord = {start=>$start, end=>$end};
|
|
16
|
|
17 push @{$count{$chrstart}->{$nstart}->{$chrend}->{$nend}->{$sample}}, $coord;
|
|
18 }
|
|
19
|
|
20 print_groups (\%count);
|
|
21
|
|
22 sub print_groups {
|
|
23 my ($rcount) = @_;
|
|
24 my %count = %{$rcount};
|
|
25
|
|
26 foreach my $chrstart (sort {$a<=>$b} keys %count) {
|
|
27 foreach my $posstart (sort {$a<=>$b} keys %{$count{$chrstart}}) {
|
|
28 my %fcoord = %{$count{$chrstart}->{$posstart}};
|
|
29
|
|
30 foreach my $chrend (sort {$a<=>$b} keys %fcoord) {
|
|
31 foreach my $posend (sort {$a<=>$b} keys %{$fcoord{$chrend}}){
|
|
32 my @nsamples = sort {$a cmp $b} (keys %{$fcoord{$chrend}->{$posend}});
|
|
33
|
|
34 my $cpos = $fcoord{$chrend}->{$posend};
|
|
35
|
|
36 my @coords;
|
|
37 my $totnum=0;
|
|
38
|
|
39 foreach my $sample (@nsamples) {
|
|
40 my ($num, $avgx, $avgy) = calc_moments(@{$cpos->{$sample}});
|
|
41 push (@coords, {start=>$avgx, end=>$avgy});
|
|
42 $totnum+=$num;
|
|
43 }
|
|
44
|
|
45 my ($num, $avgx, $avgy) = calc_moments(@coords);
|
|
46
|
|
47 print $chrstart."\t".$avgx."\t".$chrend."\t".$avgy ."\t".$num."\t".$totnum."\t" ;
|
|
48
|
|
49 print $_."\t" foreach (@nsamples);
|
|
50 print "\n";
|
|
51 }
|
|
52 }
|
|
53 }
|
|
54 }
|
|
55 }
|
|
56
|
|
57 sub calc_moments {
|
|
58 my (@pos) = @_;
|
|
59
|
|
60 my ($num, $sumx, $sumy) = (0,0,0);
|
|
61 foreach my $cpos (@pos) {
|
|
62 $num++;
|
|
63 $sumx+=$cpos->{start};
|
|
64 $sumy+=$cpos->{end};
|
|
65 }
|
|
66 my $avgx = sprintf ("%d", $sumx/$num);
|
|
67 my $avgy = sprintf ("%d", $sumy/$num);
|
|
68
|
|
69 return ($num, $avgx, $avgy);
|
|
70 }
|