annotate lib/cis_finder.pl @ 3:17ce4f3bffa2 default tip

Uploaded
author jesse-erdmann
date Tue, 24 Jan 2012 18:33:41 -0500
parents 1437a2df99c0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
1 #This script takes a set of chromosomes and positions from a text file and generates defined CIS'es
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
2 #version 5.0 ALS
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
3 #Nov 15 2010
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
4
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
5 # Open the presorted NR chromosome file and load it into a hash.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
6 require 'config.pl';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
7 $boxSize = $ARGV[0];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
8
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
9 open OUT, "> CIS/cis$ARGV[0].txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
10 open SOURCE, "< CIS/nr.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
11 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
12 $count++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
13 chomp $line;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
14 @field= split(/\t/, $line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
15 $hash[$count]= $field[1];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
16 $chrom[$count]= $field[0];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
17 $lib[$count]= $field[2];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
18 $orient[$count]=$field[5];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
19 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
20
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
21 #Count the number of nr positions within a box the size of which is defined below and add to a parallel hash.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
22 foreach $n (1..$count) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
23 $cis_count = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
24 @lib_list = '';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
25 @pos_list = '';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
26 $lib_count = '';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
27 $orient_count = '0';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
28 $i =$n;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
29 #print "$n\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
30 $j = $hash["$n"]+$boxSize;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
31 #print "pos=>".$hash[$n]."\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
32 while (($hash["$n"] <= $hash["$i"]) and ($j > ($hash["$i"]+0)) and ($i <= $count)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
33 push(@lib_list,$lib["$i"]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
34 push(@pos_list,$hash["$i"]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
35 if ($orient[$i] eq "+") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
36 $orient_count = $orient_count +1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
37 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
38
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
39 $i=$i+1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
40 #print "$i\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
41 $cis_count=$cis_count+1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
42 #print "$cis_count\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
43
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
44 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
45 $cis[$n]=$cis_count;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
46 $strand[$n]=$orient_count;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
47 #print "Cis_count=$cis_count\t";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
48 #print @lib_list;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
49 #This section rather hackily counts the number of libraries represented in the CIS window.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
50 %seen=();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
51 @uniq = ();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
52 $libstring = '';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
53 foreach $item (@lib_list) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
54 unless ($seen{$item}) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
55 $seen{$item}= 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
56 push(@uniq,$item)
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
57 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
58 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
59 #print "\n@uniq\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
60
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
61 $lib_count = -1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
62 foreach $i (@uniq) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
63 $lib_count++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
64 $libstring = $libstring.$i.'::';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
65 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
66
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
67 #print "Library_count=$lib_count";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
68
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
69 $lib[$n]=$lib_count;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
70 $lib_name[$n]=$libstring;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
71 #print "$cis[$n]\t$lib[$n]";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
72
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
73 #This section rather hackily counts the the number of regions represented in the CIS window.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
74 %seen=();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
75 @uniq = ();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
76 $posstring = '';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
77 foreach $item (@pos_list) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
78 unless ($seen{$item}) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
79 $seen{$item}= 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
80 push(@uniq,$item)
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
81 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
82 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
83 #print "\n@uniq\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
84
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
85 $pos_count = -1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
86 foreach $i (@uniq) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
87 $pos_count++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
88 $posstring = $posstring.$i.'::';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
89 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
90
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
91 #print "Library_count=$lib_count";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
92
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
93 $pos[$n]=$pos_count;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
94 $pos_name[$n]=$posstring;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
95 #print "\t$pos[$n]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
96
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
97
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
98
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
99 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
100
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
101
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
102 $cis[0] = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
103
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
104 # select peaks and export them with complex logic scheme.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
105 # update Sept22 seperate this into 4 cases.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
106
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
107 foreach $n (1..$count) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
108 $prev = $n-1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
109 $this = $n;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
110 $next = $n+1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
111 #print "\t$ARGV[1]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
112
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
113
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
114 if ($ARGV[1] eq library) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
115 #print "$ARGV[1]\t$cis[$this]\t$lib[$n]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
116 $cis[$this]=$lib[$this];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
117 $cis[$next]=$lib[$next];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
118 $cis[$prev]=$lib[$prev];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
119 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
120 # outcome 1 no other cises are relevant i.e. nothing within + or - cis size.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
121 if ((($hash[$this] - $hash[$prev] > $boxSize) or ($chrom[$this] ne $chrom[$prev])) and (($hash[$next]-$hash[$this] > $boxSize) or ($chrom[$next] ne $chrom[$this])) and ($cis[$this] > 1)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
122 outputpart1();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
123 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
124
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
125 # outcome 2 no other cis + cis size cis - cis exists and is smaller.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
126 if (($hash[$this] - $hash[$prev] >0) and ($hash[$this] - $hash[$prev] <= $boxSize) and ($cis[$prev] < $cis[$this]) and ($hash[$next]-$hash[$this] > $boxSize) and ($cis[$this] > 1)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
127 outputpart1();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
128 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
129
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
130 # outcome 3 no other cis - cis size + cis exists and is smaller are relevant nothing within + or - cis size.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
131 if ((($hash[$this] - $hash[$prev] > $boxSize) or ($chrom[$this] ne $chrom[$prev])) and ($hash[$next]-$hash[$this] >= 0) and ($hash[$next]-$hash[$this] <= $boxSize) and ($cis[$this] >= $cis[$next]) and ($cis[$this] > 1)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
132 outputpart1();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
133 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
134
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
135 # outcome 4 other cises on both sides.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
136 if (($hash[$this] - $hash[$prev] >0) and ($hash[$this] - $hash[$prev] <= $boxSize) and ($hash[$next]-$hash[$this] > 0) and ($hash[$next]-$hash[$this] <= $boxSize) and ($cis[$prev] < $cis[$this]) and ($cis[$this] >= $cis[$next]) and $cis[$this] > 1 ) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
137 outputpart1();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
138 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
139 #print "$chrom[$n]\t$hash[$n]\t$cis[$n]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
140 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
141 #hack to prevent permanent looping...observedon one occasion unsure why...
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
142 $iter = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
143 $count3 = 1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
144 $count2 = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
145 while (($count3 ne $count2) and ($iter < 100)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
146 print "$iter";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
147 $iter++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
148 close OUT;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
149 #print "$count3\t\t\t\t$count2\n\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
150 $count3 = $count2;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
151 $count2 = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
152 %hash= ();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
153 %chrom= ();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
154 %cis=();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
155 %libraries=();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
156 %full_line=();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
157 #open OUT, "> cis2.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
158 open SOURCE, "< CIS/cis$ARGV[0].txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
159 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
160 $count2++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
161 chomp $line;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
162 $full_line[$count2]=$line;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
163 @field= split(/\t/, $line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
164 $hash[$count2]= $field[1];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
165 $chrom[$count2]= $field[0];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
166 $cis[$count2]= $field[4];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
167 #$libraries[$count2]=$field[11];
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
168 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
169 close SOURCE;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
170 open OUT, "> CIS/cis$ARGV[0].txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
171 # select peaks and export them with complex logic scheme.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
172 # update Sept22 seperate this into 4 cases.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
173
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
174 foreach $n (1..$count2) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
175 $prev = $n-1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
176 $this = $n;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
177 $next = $n+1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
178
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
179 # outcome 1 no other cises are relevant i.e. nothing within + or - cis size.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
180 if ((($hash[$this] - $hash[$prev] > $boxSize) or ($chrom[$this] ne $chrom[$prev])) and (($hash[$this]-$hash[$next] < -$boxSize) or ($chrom[$next] ne $chrom[$this])) and ($cis[$this] > 1)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
181 output();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
182 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
183
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
184 # outcome 2 no other cis + cis size cis - cis exists and is smaller.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
185 if (($hash[$this] - $hash[$prev] >0) and ($hash[$this] - $hash[$prev] <= $boxSize) and ($cis[$prev] < $cis[$this]) and ($hash[$next]-$hash[$this] > $boxSize) and ($cis[$this] > 1)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
186 output();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
187 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
188
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
189 # outcome 3 no other cis - cis size + cis exists and is smaller are relevant nothing within + or - cis size.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
190 if ((($hash[$this] - $hash[$prev] > $boxSize) or ($chrom[$this] ne $chrom[$prev])) and ($hash[$next]-$hash[$this] >= 0) and ($hash[$next]-$hash[$this] <= $boxSize) and ($cis[$this] >= $cis[$next]) and ($cis[$this] > 1)){
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
191 output();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
192 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
193
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
194 # outcome 4 other cises on both sides.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
195 if (($hash[$this] - $hash[$prev] >0) and ($hash[$this] - $hash[$prev] <= $boxSize) and ($hash[$next]-$hash[$this] >= 0) and ($hash[$next]-$hash[$this] <= $boxSize) and ($cis[$prev] < $cis[$this]) and ($cis[$this] >= $cis[$next]) and $cis[$this] > 1 ) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
196 output();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
197 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
198 #print "$chrom[$n]\t$hash[$n]\t$cis[$n]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
199 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
200
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
201 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
202 close OUT;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
203
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
204 #Subroutines
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
205 # provide data in the form of poisson('BoxSize','Total','count')
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
206 sub outputpart1 {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
207 $pval = poissonBonf($boxSize,$count,$lib[$this]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
208 $pvalTotal = poissonBonf($boxSize,$count,$cis[$this]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
209 $pvalRegion = poissonBonf($boxSize,$count,$pos[$this]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
210 if ($pval < $CIS_total_pvalue) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
211 $end = $hash[$n]+$boxSize;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
212 print OUT "$chrom[$n]\t$hash[$n]\t$end\t$chrom[$n]_$hash[$n]\t$lib[$this]\t$boxSize\t$pval\t$cis[$this]\t$pvalTotal\t$pos[$this]\t$pvalRegion\t$lib_name[$n]\t$strand[$this]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
213 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
214 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
215
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
216 sub output {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
217 print OUT "$full_line[$n]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
218 #if ($pval < 0.05) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
219 #$end = $hash[$n]+$boxSize;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
220 #print OUT "$chrom[$n]\t$hash[$n]\t$end\t$chrom[$n]_$hash[$n]\t$cis[$this]\t$boxSize\t$pval\t$bonf\t$libraries[$n]\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
221 #}
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
222 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
223 sub poisson {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
224 return ((($_[1]/(2393726033/$_[0]))**($_[2]))*(2.71828**-($_[1]/(2393726033/$_[0])))/(fac($_[2])));
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
225 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
226
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
227 sub poissonBonf {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
228 return (((($_[1]/(2393726033/$_[0]))**($_[2]))*(2.71828**-($_[1]/(2393726033/$_[0])))/(fac($_[2])))*$_[1]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
229 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
230
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
231 sub fac {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
232 my ($n) = @_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
233
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
234 if ($n < 2) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
235 return $n;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
236 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
237 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
238 return $n * fac($n-1);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
239 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
240 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
241
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
242