diff 2.4/script/cluster.pair.pl @ 13:e3609c8714fb draft

Uploaded
author plus91-technologies-pvt-ltd
date Fri, 30 May 2014 03:37:55 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/2.4/script/cluster.pair.pl	Fri May 30 03:37:55 2014 -0400
@@ -0,0 +1,70 @@
+#!/usr/bin/perl                                                                                                                                            
+use strict;
+use POSIX;
+
+my $usage = "cluster.pair.pl maxdist\n";
+my $maxdist = shift or die $usage;
+
+my %count;
+
+while (<STDIN>){
+    chomp;
+    my ($sample, $chrstart, $start, $chrend, $end) = split /\t/;
+    my $nstart = floor ($start/$maxdist);
+    my $nend   = floor ($end/$maxdist);
+    my $coord = {start=>$start, end=>$end};
+
+    push @{$count{$chrstart}->{$nstart}->{$chrend}->{$nend}->{$sample}}, $coord;
+}
+
+print_groups (\%count);
+
+sub print_groups {
+    my ($rcount) = @_;
+    my %count = %{$rcount};
+
+    foreach my $chrstart (sort {$a<=>$b} keys %count) {
+	foreach my $posstart (sort {$a<=>$b} keys %{$count{$chrstart}}) {
+	    my %fcoord = %{$count{$chrstart}->{$posstart}};
+
+	    foreach my $chrend (sort {$a<=>$b} keys %fcoord) {
+		foreach my $posend (sort {$a<=>$b} keys %{$fcoord{$chrend}}){
+		    my @nsamples = sort {$a cmp $b} (keys %{$fcoord{$chrend}->{$posend}});
+
+		    my $cpos = $fcoord{$chrend}->{$posend};
+
+		    my @coords;
+		    my $totnum=0;
+	    
+		    foreach my $sample (@nsamples) {
+			my ($num, $avgx, $avgy) = calc_moments(@{$cpos->{$sample}});
+			push (@coords, {start=>$avgx, end=>$avgy});
+			$totnum+=$num;
+		    }
+
+		    my ($num, $avgx, $avgy)  = calc_moments(@coords);
+	    
+		    print $chrstart."\t".$avgx."\t".$chrend."\t".$avgy ."\t".$num."\t".$totnum."\t" ;
+	    
+		    print $_."\t" foreach (@nsamples);
+		    print "\n";
+		}
+	    }
+	}
+    }
+}
+
+sub calc_moments {
+    my (@pos) = @_;
+
+    my ($num, $sumx, $sumy) = (0,0,0);
+    foreach my $cpos (@pos) {
+	$num++;
+	$sumx+=$cpos->{start};
+	$sumy+=$cpos->{end};
+    }
+    my $avgx = sprintf ("%d", $sumx/$num);
+    my $avgy = sprintf ("%d", $sumy/$num);
+
+    return ($num, $avgx, $avgy);
+}