annotate correlation.pl @ 0:5ebbb889236a draft

Imported from capsule None
author devteam
date Mon, 28 Jul 2014 11:55:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/perl
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
2
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
3 ###########################################################################
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
4 # Purpose: To calculate the correlation of two sets of scores in one file.
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
5 # Usage: correlation.pl infile.bed output.txt column1 column2
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
6 # (column start from 1)
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
7 # Written by: Yi Zhang (June, 2005)
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
8 ###########################################################################
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
9 if (!$ARGV[0] || !$ARGV[1] || !defined($ARGV[2]) || !defined($ARGV[3]) ) {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
10 print STDERR "Usage: correlation.pl infile.bed output.txt column1 column2\n";
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
11 print STDERR " (column start from 1)\n";
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
12 exit;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
13 }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
14 my $file = $ARGV[0];
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
15 my $out = $ARGV[1];
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
16
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
17 die "<font color=\"yellow\">The input columns contain numerical values: $ARGV[2], $ARGV[3]</font>.\n" if ($ARGV[2] =~ /[a-zA-Z]+/ || $ARGV[3] =~ /[a-zA-Z]+/);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
18
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
19 my $col1 = $ARGV[2] - 1;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
20 my $col2 = $ARGV[3] - 1;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
21
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
22 my ($f, $o);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
23 my (@a, @b);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
24
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
25 my $n_t = 0;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
26 open($f, $file) or die "Could't open $file, $!\n";
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
27 while(<$f>) {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
28 chomp;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
29 my @t = split(/\t/);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
30 if ($n_t == 0) {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
31 $n_t = scalar(@t) - 1;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
32 die "<font color=\"yellow\">The input column number exceeds the size of the file: $col1, $col2, $n_t</font>\n" if ( $col1 > $n_t || $col2 > $n_t );
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
33 }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
34 die "<font color=\"yellow\">The columns you have selected contain non numeric characters:$t[$col1] and $t[$col2] \n</font>" if ($t[$col1] =~ /[a-zA-Z]+/ || $t[$col2] =~ /[a-zA-Z]+/);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
35 push(@a, $t[$col1]);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
36 push(@b, $t[$col2]);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
37 }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
38 close($f);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
39
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
40 my $result = correlation(\@a, \@b);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
41
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
42 open($o, ">$out") or die "Couldn't open $out, $!\n";
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
43 $col1 = $col1 + 1;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
44 $col2 = $col2 + 1;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
45 print $o "The correlation of column $col1 and $col2 is $result\n";
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
46 close($o);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
47 print "The correlation of column $col1 and $col2 is $result\n";
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
48
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
49 sub correlation {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
50 my ($array1ref, $array2ref) = @_;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
51 my ($sum1, $sum2);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
52 my ($sum1_squared, $sum2_squared);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
53 foreach (@$array1ref) { $sum1 += $_; $sum1_squared += $_**2; }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
54 foreach (@$array2ref) { $sum2 += $_; $sum2_squared += $_**2; }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
55 my $numerator = (@$array1ref**2) * covariance($array1ref, $array2ref);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
56 my $denominator = sqrt(((@$array1ref * $sum1_squared) - ($sum1**2)) *
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
57 ((@$array1ref * $sum2_squared) - ($sum2**2)));
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
58 my $r;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
59 if ($denominator == 0) {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
60 print STDERR "The denominator is 0.\n";
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
61 exit 0;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
62 } else {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
63 $r = $numerator / $denominator;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
64 }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
65 return $r;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
66 }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
67
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
68 sub covariance {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
69 my ($array1ref, $array2ref) = @_;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
70 my ($i, $result);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
71 for ($i = 0; $i < @$array1ref; $i++) {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
72 $result += $array1ref->[$i] * $array2ref->[$i];
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
73 }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
74 $result /= @$array1ref;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
75 $result -= mean($array1ref) * mean($array2ref);
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
76 }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
77
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
78 sub mean {
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
79 my ($arrayref) = @_;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
80 my $result;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
81 foreach (@$arrayref) { $result += $_; }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
82 return $result/@$arrayref;
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
83 }
5ebbb889236a Imported from capsule None
devteam
parents:
diff changeset
84