annotate lib/matrix2fisher.pl @ 2:37f15fe01f14

Uploaded
author jesse-erdmann
date Fri, 09 Dec 2011 12:02:18 -0500
parents 1437a2df99c0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
1 #!/usr/bin/perl
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
2 # This script takes a matrix as input and generates an R script to look for
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
3 # simularity between rows.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
4 #
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
5 # 'tab2list' is a complement to list2tab. It reads a 2-dimensional table and
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
6 # produces a list in the form: row label - column label - value.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
7 #
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
8 use Getopt::Long;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
9
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
10 $keycols = 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
11 $usage = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
12 @cis;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
13 %hash;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
14 open OUT, "> FISH/Fisher_pre.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
15 open OUT2, "> FISH/Fisher_pre_named.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
16 GetOptions("keycols=i" => \$keycols, "usage" => \$usage, "help"=>\$usage);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
17
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
18 if ($usage) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
19 print STDERR "usage: tab2list < infile > outfile
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
20 Options:
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
21 --keycols=X ... How many cols from left are to be kept (default: 1)\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
22 exit 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
23 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
24
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
25 $keycols--;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
26
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
27 $_ = <>;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
28 chomp;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
29 @headline = split /\t/, $_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
30 @anim = @headline;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
31 shift(@anim);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
32 while (<>) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
33 chomp;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
34 @line = split /\t/, $_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
35 $head = "";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
36 foreach my $i (0 .. $keycols) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
37 $head .= "$line[$i]\t";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
38 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
39 chop $head;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
40 push (@cis,$head);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
41
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
42 foreach my $i ($keycols+1 .. $#line) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
43 # print "$head\t$headline[$i]\t$line[$i]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
44 $value=$head."|".$headline[$i];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
45 $hash{$value}=$line[$i];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
46
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
47 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
48 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
49 foreach (@cis){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
50 #print "$_";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
51 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
52
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
53 print "\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
54 foreach (@anim){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
55 #print "$_";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
56 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
57
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
58 print "\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
59 foreach (keys %hash) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
60 # print "The key $_ $hash{$_}\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
61 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
62
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
63 %seen;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
64 $first=0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
65 $second=0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
66 $third=0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
67 $fourth=0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
68
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
69 foreach $x(@cis){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
70 foreach $y(@cis){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
71 $first=0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
72 $second=0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
73 $third=0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
74 $fourth=0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
75 if ($x eq $y){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
76 next;}
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
77 $z = $x."|".$y;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
78 if (exists $seen{$z}){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
79 next;}
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
80 $seen{$z}= '1';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
81 $z2 = $y."|".$x;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
82 $seen{$z2}= '1';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
83 foreach $a(@anim) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
84 $b =$x."|".$a;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
85 $c =$y."|".$a;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
86 if (($hash{$b}==0) and ($hash{$c}==0)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
87 $fourth = $fourth + 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
88 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
89 elsif (($hash{$b}==0) and ($hash{$c}==1)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
90 $third = $third + 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
91 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
92 elsif (($hash{$b}==1) and ($hash{$c}==0)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
93 $second = $second + 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
94 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
95 elsif (($hash{$b}==1) and ($hash{$c}==1)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
96 $first = $first + 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
97 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
98 #print "$a|$x|$y\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
99 #print "$hash{$b}|$hash{$c}\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
100 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
101 print OUT "object__object\t$first\t$second\t$third\t$fourth\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
102 print OUT2 "$x"."__"."$y\t$first\t$second\t$third\t$fourth\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
103 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
104 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
105 close OUT;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
106 close OUT2;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
107 open SOURCE, "< FISH/Fisher_pre.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
108 open OUT, "> FISH/Fisher.R";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
109 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
110 chomp $line;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
111 @field= split(/\t/, $line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
112 print OUT "$field[0] <- matrix(c($field[1],$field[2],$field[3],$field[4]), nr=2, dimnames=list(c(\"cis\", \"nocis\"), c(\"pheno\", \"notpheno\")))\n$field[0]\nfisher.test($field[0]".",alternative = \"greater\")\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
113 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
114 #system "module load R";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
115 #system "R < FISH/Fisher.R --vanilla > FISH/fisher_dump.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
116 open (my $fisher_dump, ">", "FISH/fisher_dump.txt") || die "Unable to open FISH/fisher_dump.txt. $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
117 open (my $out, "Rscript --vanilla FISH/Fisher.R | ");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
118 while (<$out>) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
119 print $fisher_dump $_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
120 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
121 close($out);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
122 close($fisher_dump);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
123 close OUT;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
124
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
125 open SOURCE, "< FISH/fisher_dump.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
126 open OUT, "> FISH/R_result.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
127 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
128 chomp ($line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
129 @field= split(/\t/, $line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
130 if ($field[0] =~ m/data/){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
131 print OUT "$field[0]\t";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
132 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
133 if ($field[0] =~ m/p-value/){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
134 print OUT "$field[0]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
135 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
136 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
137 close OUT;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
138 print "cooperation analyses complete";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
139