annotate lib/Fisher_clean.pl @ 3:17ce4f3bffa2 default tip

Uploaded
author jesse-erdmann
date Tue, 24 Jan 2012 18:33:41 -0500
parents 1437a2df99c0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
1 #this is a script to automate data workup following Fishers exact test association analyses
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
2 # This script takes the R_result and the names file and then generates
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
3 #4 4xls spreadsheets
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
4 # 3 images
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
5 @name;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
6 @sub_name;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
7 @result;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
8 @sub_result;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
9 $count = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
10 unless (-d "results/Assoc") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
11 mkdir("results/Assoc");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
12 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
13 open OUT1, "> results/Assoc/Associations.xls";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
14 open OUT2, "> results/Assoc/Ann_cis_list.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
15 open OUT3, "> results/Assoc/Ann_cis_table.xls";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
16 print OUT3 "Event1\tEvent2\tFET Pvalue\tBonferroni Significance\tBH 20%FDR Significancei\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
17 open OUT4, "> results/Assoc/Cis_cis_list.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
18 open OUT5, "> results/Assoc/Cis_cis_table.xls";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
19 print OUT5 "Event1\tEvent2\tFET Pvalue\tBonferroni Significance\tBH 20%FDR Significance\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
20 open OUT6, "> results/Assoc/Ann_ann_list.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
21 open OUT7, "> results/Assoc/Ann_Ann_table.xls";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
22 print OUT7 "Event1\tEvent2\tFET Pvalue\tBonferroni Significance\tBH 20%FDR Significance\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
23 open SOURCE, "< results/Assoc/R_result.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
24 @tableAnnCis;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
25 @tableAnnAnn;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
26 @tableCisCis;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
27 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
28 $count++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
29 chomp $line;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
30 @result= split(/\t/, $line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
31 @sub_result= split(/\s=\s|\s<\s/, $result[1]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
32 print $sub_result[1];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
33 $pval[$count]= $sub_result[1];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
34 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
35 $count = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
36 open SOURCE, "< results/Assoc/Fisher_pre_named.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
37 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
38 $count++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
39 chomp $line;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
40 @name= split(/\t/, $line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
41 @sub_name= split(/__/, $name[0]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
42 print OUT1 "$line\t$pval[$count]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
43 if (($sub_name[1] =~m/descriptor/) and ($sub_name[0] !~ m/descriptor/)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
44 $viewpval= -(log($pval[$count])/log(10));
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
45 print OUT2 "$sub_name[1]\t$sub_name[0]\t$viewpval\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
46 if ($pval[$count] < 0.05) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
47 $data4table = "$sub_name[1]\t$sub_name[0]\t$pval[$count]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
48 push(@tableAnnCis, $data4table);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
49 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
50 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
51
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
52
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
53 if (($sub_name[1] !~m/descriptor/) and ($sub_name[0] !~ m/descriptor/)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
54 $viewpval= -log($pval[$count]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
55 print OUT4 "$sub_name[1]\t$sub_name[0]\t$viewpval\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
56 print OUT4 "$sub_name[0]\t$sub_name[1]\t$viewpval\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
57 if ($pval[$count] < 0.05) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
58 $data4table = "$sub_name[1]\t$sub_name[0]\t$pval[$count]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
59 push(@tableCisCis, $data4table);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
60 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
61 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
62
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
63 if (($sub_name[1] =~m/descriptor/) and ($sub_name[0] =~ m/descriptor/)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
64 $viewpval= -log($pval[$count]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
65 print OUT6 "$sub_name[1]\t$sub_name[0]\t$viewpval\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
66 print OUT6 "$sub_name[0]\t$sub_name[1]\t$viewpval\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
67 if ($pval[$count] < 0.05) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
68 $data4table = "$sub_name[1]\t$sub_name[0]\t$pval[$count]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
69 push(@tableAnnAnn, $data4table);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
70 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
71 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
72
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
73
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
74 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
75 @sortedAnnCis = sort { (split '\t', $a)[2] <=> (split '\t', $b)[2] } @tableAnnCis;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
76 @sortedCisCis = sort { (split '\t', $a)[2] <=> (split '\t', $b)[2] } @tableCisCis;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
77 @sortedAnnAnn = sort { (split '\t', $a)[2] <=> (split '\t', $b)[2] } @tableAnnAnn;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
78
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
79 open SOURCE, "< FISH/multi_test.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
80 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
81 chomp $line;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
82 @name=split(/\t/, $line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
83 $corrAnnCis = $name[2];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
84 $corrCisCis = $name[3];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
85 $corrAnnAnn = $name[4];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
86 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
87
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
88 $pos = 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
89 foreach (@sortedAnnCis) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
90 $bonf = 0.05/$corrAnnCis;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
91 $fdr = ($pos/$corrAnnCis)*0.20;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
92 $pos++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
93 chomp $_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
94 print OUT3 "$_\t$bonf\t$fdr\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
95 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
96 $pos = 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
97
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
98 foreach (@sortedCisCis) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
99 $bonf = 0.05/$corrCisCis;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
100 $fdr = ($pos/$corrCisCis)*0.20;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
101 $pos++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
102 chomp $_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
103 print OUT5 "$_\t$bonf\t$fdr\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
104 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
105 $pos =1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
106 foreach (@sortedAnnAnn) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
107 $bonf = 0.05/$corrAnnAnn;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
108 $fdr = ($pos/$corrAnnAnn)*0.20;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
109 $pos++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
110 chomp $_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
111 print OUT7 "$_\t$bonf\t$fdr\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
112 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
113
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
114
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
115 close OUT7;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
116 close OUT6;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
117 close OUT5;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
118 close OUT4;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
119 close OUT3;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
120 close OUT2;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
121 close OUT1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
122 system ("perl lib/list2tab.pl 1 2 3 0 results/Assoc/Ann_cis_list.txt > results/Assoc/Ann_cis_matrix.txt");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
123 system ("perl lib/list2tab.pl 1 2 3 0 results/Assoc/Cis_cis_list.txt > results/Assoc/Cis_cis_matrix.txt");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
124 system ("perl lib/list2tab.pl 1 2 3 0 results/Assoc/Ann_ann_list.txt > results/Assoc/Ann_ann_matrix.txt");