Mercurial > repos > jesse-erdmann > tapdance
diff lib/matrix2fisher.pl @ 0:1437a2df99c0
Uploaded
author | jesse-erdmann |
---|---|
date | Fri, 09 Dec 2011 11:56:56 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/matrix2fisher.pl Fri Dec 09 11:56:56 2011 -0500 @@ -0,0 +1,139 @@ +#!/usr/bin/perl +# This script takes a matrix as input and generates an R script to look for +# simularity between rows. +# +# 'tab2list' is a complement to list2tab. It reads a 2-dimensional table and +# produces a list in the form: row label - column label - value. +# +use Getopt::Long; + +$keycols = 1; +$usage = 0; +@cis; +%hash; +open OUT, "> FISH/Fisher_pre.txt"; +open OUT2, "> FISH/Fisher_pre_named.txt"; +GetOptions("keycols=i" => \$keycols, "usage" => \$usage, "help"=>\$usage); + +if ($usage) { + print STDERR "usage: tab2list < infile > outfile + Options: + --keycols=X ... How many cols from left are to be kept (default: 1)\n"; + exit 0; +} + +$keycols--; + +$_ = <>; +chomp; +@headline = split /\t/, $_; +@anim = @headline; +shift(@anim); +while (<>) { + chomp; + @line = split /\t/, $_; + $head = ""; + foreach my $i (0 .. $keycols) { + $head .= "$line[$i]\t"; + } + chop $head; +push (@cis,$head); + + foreach my $i ($keycols+1 .. $#line) { + # print "$head\t$headline[$i]\t$line[$i]\n"; + $value=$head."|".$headline[$i]; + $hash{$value}=$line[$i]; + +} +} +foreach (@cis){ +#print "$_"; +} + +print "\n"; +foreach (@anim){ +#print "$_"; +} + +print "\n"; +foreach (keys %hash) { +# print "The key $_ $hash{$_}\n"; +} + +%seen; +$first=0; +$second=0; +$third=0; +$fourth=0; + +foreach $x(@cis){ +foreach $y(@cis){ +$first=0; +$second=0; +$third=0; +$fourth=0; +if ($x eq $y){ +next;} +$z = $x."|".$y; +if (exists $seen{$z}){ +next;} +$seen{$z}= '1'; +$z2 = $y."|".$x; +$seen{$z2}= '1'; +foreach $a(@anim) { +$b =$x."|".$a; +$c =$y."|".$a; +if (($hash{$b}==0) and ($hash{$c}==0)){ +$fourth = $fourth + 1; +} +elsif (($hash{$b}==0) and ($hash{$c}==1)){ +$third = $third + 1; +} +elsif (($hash{$b}==1) and ($hash{$c}==0)){ +$second = $second + 1; +} +elsif (($hash{$b}==1) and ($hash{$c}==1)){ +$first = $first + 1; +} +#print "$a|$x|$y\n"; +#print "$hash{$b}|$hash{$c}\n"; +} +print OUT "object__object\t$first\t$second\t$third\t$fourth\n"; +print OUT2 "$x"."__"."$y\t$first\t$second\t$third\t$fourth\n"; +} +} +close OUT; +close OUT2; +open SOURCE, "< FISH/Fisher_pre.txt"; +open OUT, "> FISH/Fisher.R"; +while (defined($line = <SOURCE>)) { +chomp $line; + @field= split(/\t/, $line); +print OUT "$field[0] <- matrix(c($field[1],$field[2],$field[3],$field[4]), nr=2, dimnames=list(c(\"cis\", \"nocis\"), c(\"pheno\", \"notpheno\")))\n$field[0]\nfisher.test($field[0]".",alternative = \"greater\")\n"; +} +#system "module load R"; +#system "R < FISH/Fisher.R --vanilla > FISH/fisher_dump.txt"; +open (my $fisher_dump, ">", "FISH/fisher_dump.txt") || die "Unable to open FISH/fisher_dump.txt. $!\n"; +open (my $out, "Rscript --vanilla FISH/Fisher.R | "); +while (<$out>) { + print $fisher_dump $_; +} +close($out); +close($fisher_dump); +close OUT; + +open SOURCE, "< FISH/fisher_dump.txt"; +open OUT, "> FISH/R_result.txt"; +while (defined($line = <SOURCE>)) { + chomp ($line); + @field= split(/\t/, $line); + if ($field[0] =~ m/data/){ + print OUT "$field[0]\t"; + } + if ($field[0] =~ m/p-value/){ + print OUT "$field[0]\n"; + } + } +close OUT; +print "cooperation analyses complete"; +