annotate lib/TAP2.pl @ 0:1437a2df99c0

Uploaded
author jesse-erdmann
date Fri, 09 Dec 2011 11:56:56 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
1 #This is the TAp2.pl script
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
2 #The goal of this is to count the number of CIS present in a NR set
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
3 # one file is required nr.txt sorted in chromosome order!
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
4 # Aaron Lyman Sarver Oct 21, 2010
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
5 # This script takes over following the bowtie mapping and generates CIS.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
6 # Update NOV15,2010 to pass library information through
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
7 #$Fully modified december 1 2010i
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
8 #Fully modifed May 1 2010
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
9 use File::Copy;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
10 require 'config.pl';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
11 require 'lib/feature_finder_methods.pl';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
12 my $path = $0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
13 $path =~ s/\/\w*\.pl$//g;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
14 require "$path/project_man.pl";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
15 unless(-d "CIS") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
16 mkdir("CIS");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
17 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
18
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
19 open CIS, "> results/summary_CIS_$proj.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
20 close CIS;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
21
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
22 #generate required tables
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
23 $sth = $dbh->prepare("drop table if exists cis_result_final_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
24 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
25 $sth = $dbh->prepare("create table cis_result_final_$proj (chromo varchar(20), start int, stop int, name varchar(20), number varchar(20), size varchar(20), pvalue double, total varchar(20), pvaluetotal double,region varchar(20), pvalueregion double, library_name varchar(5000), strand int, gene_name varchar(1000), type varchar(20))");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
26 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
27
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
28 $sth = $dbh->prepare("drop table if exists sort_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
29 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
30 $sth = $dbh->prepare("create table sort_$proj (chromo varchar(20), start int, stop int, name varchar(20), number varchar(20), size varchar(20), pvalue double, total varchar(20), pvaluetotal double,region varchar(20), pvalueregion double, library_name varchar(5000), strand int, gene_name varchar(1000), type varchar(20))");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
31 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
32 $sth = $dbh->prepare("drop table if exists sort2_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
33 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
34 $sth = $dbh->prepare("create table sort2_$proj (chromo varchar(20), start int, stop int, name varchar(20), number varchar(20), size varchar(20), pvalue double, total varchar(20), pvaluetotal double,region varchar(20), pvalueregion double, library_name varchar(5000), strand int, gene_name varchar(1000), type varchar(20))");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
35 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
36
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
37 #load concatamer information from file
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
38 $sth = $dbh->prepare("drop table if exists chromo_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
39 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
40 $sth = $dbh->prepare("create table chromo_$proj (chromo varchar(20) ) ");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
41 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
42 $sth = $dbh->prepare("load DATA local INFILE 'data/chromo.tab' INTO TABLE chromo_$proj ");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
43 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
44 $sth = $dbh->prepare("select distinct chromo from chromo_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
45 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
46 print "10.3 - Loaded concatamer chromo\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
47 #generate donor exclusion portion of the SQL query
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
48 @exclude;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
49 $query_chromo= '';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
50 while ((@row) = $sth->fetchrow_array) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
51 push (@exclude,$row[0]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
52 print "\n$row[0]";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
53 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
54 print @exclude;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
55 foreach $desc(@exclude) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
56 $query_chromo = $query_chromo."and chromo != '".$desc."' ";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
57 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
58 print $query_chromo;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
59
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
60 #10.4 load library metadata
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
61 $sth = $dbh->prepare("drop table if exists metadata_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
62 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
63 $sth = $dbh->prepare("create table metadata_$proj (library varchar(20), descriptor varchar(20), type varchar (20) ) ");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
64 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
65 $sth = $dbh->prepare("load DATA local INFILE 'data/metadata.tab' INTO TABLE metadata_$proj ");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
66 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
67 $sth = $dbh->prepare("select distinct descriptor from metadata_$proj where type = 'cis'");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
68 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
69 print "10.4 - Loaded metadata\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
70
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
71 #10.2 load the feature hash
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
72 print "10.2 - Loading features...\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
73 my ($feature_file, $window, $fchrom, $fstart, $fend, $debug) = ($annotation_file, 20000, 0, 1, 2, 1);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
74 my $feature_hash_ref = &feature_index(\$feature_file, \$window, \$fchrom, \$fstart, \$fend, \$debug);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
75 print "10.2 - Features loaded.\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
76
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
77 #generate metadata based library portion of the SQL query
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
78 @descriptors;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
79 $query_definition = '';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
80 while ((@row) = $sth->fetchrow_array) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
81 push (@descriptors,$row[0]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
82 print "\n$row[0]";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
83 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
84 print @descriptors;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
85 foreach $desc(@descriptors) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
86 $query_definition = '';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
87 $query = "select distinct library from metadata_$proj where descriptor = '$desc'";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
88 #print "\n$query\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
89 $sth = $dbh->prepare("$query");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
90 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
91
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
92 while ((@row) = $sth->fetchrow_array) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
93 $query_definition = $query_definition."or A.library = '$row[0]' ";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
94 #print "$row[0]";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
95 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
96 print "\n$desc\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
97 #print "$query_definition\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
98 #setup the looping featured
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
99 @options = ('');
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
100 foreach $library(@options) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
101 @percent =("$library_percent");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
102 foreach $percent(@percent) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
103
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
104 #11.1 query to select regions of interest
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
105 $sth = $dbh->prepare("select chromo, start, A.library,count, count/total,orient from illumina_hist_$proj A, lib_mappable_$proj B where A.library = B.library and count/total > $percent $query_chromo and (1=2 $query_definition)
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
106 order by chromo,start+0 ;");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
107 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
108
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
109 unless(-d "results") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
110 mkdir("results");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
111 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
112 unless(-d "results/$desc") {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
113 mkdir("results/$desc");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
114 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
115
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
116 open OUT, "> CIS/nr.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
117 $name = "$desc-nr-$proj-$percent";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
118 open OUT2, "> results/$desc/$name.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
119 open OUT3, "> results/$desc/$name.BED";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
120 print OUT3 "track type ='BED' name='$name' description='$name Insertions' visibility=2 itemRgb='On'";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
121 while ((@row) = $sth->fetchrow_array) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
122 print OUT "\n$row[0]\t$row[1]\t$row[2]\t$row[3]\t$row[4]\t$row[5]";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
123 $start = $row[1]-50;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
124 $stop = $row[1]+50;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
125 print OUT2 "\n$row[0]\t$start\t$stop\t$row[2]\t$row[3]\t$row[4]\t$row[5]";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
126 print OUT3 "\n$row[0]\t$start\t$stop\t$row[2]\t$row[3]\t$row[5]\t$start\t$stop\t159,0,197";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
127 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
128 close OUT;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
129
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
130 #11.2 Anotate the inserts from the .txt file
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
131 my ($interval_file, $out_file, $req_res_type, $direction, $interval_cols, $feature_cols, $display_distance) = ("results/$desc/$name.txt", "results/$desc/$name.ann.txt", "ALL", "BOTH", "1:2:3:4:10", "1:2:3:4:6", 0);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
132 &process_intervals(\$interval_file, \$out_file, $feature_hash_ref, \$req_res_type, \$direction, \$window, \$html, \$cturl, \$db, \$interval_cols, \$feature_cols, \$debug, \$display_distance);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
133 unlink("results/$desc/$name.txt");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
134 #generate a list of gense with high density inserts
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
135 #if (($percent == '0.01') and ($library eq 'library')) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
136 #system("perl lib/convert_nr_2_annot.pl results/$desc/$name.txt results/$desc/$name.ingenuity.txt");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
137 #}
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
138
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
139 $count = 0;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
140 open SOURCE, "< CIS/nr.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
141 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
142 $count++;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
143 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
144 @libSizes =();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
145 open CIS, ">> results/summary_CIS_$proj.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
146 print "\n\n11.3 - $name Total number of NR regions $count\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
147 print CIS "\n\n$name\nExcluded chromosomes $query_chromo\nTotal number of NR regions $count\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
148 print CIS "CIS analyses calculate window sizes based on NR count\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
149
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
150 #11.4 Ideal window size calculation
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
151 $n = 10000;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
152 $f = 3;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
153 while ($n < 300000) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
154 $stat =poissonBonf($n,$count,$f);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
155 if ($stat > 0.05) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
156 $nf=$n-1000;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
157 $stat2=poissonBonf($nf,$count,$f);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
158 if ($stat2 < 0.05) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
159 print "$stat2\t$nf\t$f\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
160 print CIS "$stat2\t$nf\t$f\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
161 unshift(@libSizes, $nf);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
162 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
163 $f=$f+1;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
164 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
165 $n = $n+1000;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
166 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
167 $nf='301000';
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
168 unshift(@libSizes, $nf);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
169 print @libSizes;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
170 print CIS @libsizes;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
171 close CIS;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
172 if ($count < 2000) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
173 @libSizes =('301000','200000','100000','50000','25000','12500');
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
174 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
175 if ($count > 200000) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
176 @libSizes =('301000','200000','100000','50000','25000','12500');
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
177 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
178
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
179 #11.5
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
180 foreach $lib(@libSizes) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
181
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
182 #11.5 call the cis_finder.pl script to calculate the CIS.
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
183 system("perl lib/cis_finder.pl $lib");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
184 print "\n$lib\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
185
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
186
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
187 #11.6 call the feature_finder.pl script to annotat the CIS regions
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
188 my ($interval_file, $out_file, $req_res_type, $direction, $interval_cols, $feature_cols, $display_distance) = ("CIS/cis$lib.txt", "CIS/final_cis_named_$lib.txt", "ALL", "BOTH", "1:2:3:4:10", "1:2:3:4:6", 0);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
189 &process_intervals(\$interval_file, \$out_file, $feature_hash_ref, \$req_res_type, \$direction, \$window, \$html, \$cturl, \$db, \$interval_cols, \$feature_cols, \$debug, \$display_distance);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
190 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
191
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
192 #load the results into the database.and resolve overlap
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
193 $sth = $dbh->prepare("delete from sort_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
194 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
195 $sth = $dbh->prepare("delete from sort2_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
196 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
197
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
198 #11.7
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
199 foreach $lib(@libSizes) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
200 $sth = $dbh->prepare("load DATA local INFILE 'CIS/final_cis_named_$lib.txt' INTO TABLE sort2_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
201 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
202 resolve_cis();
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
203 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
204
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
205 #MOve and rename CIS to cis_result_final table
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
206 $filename = $desc."_".$percent.$library;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
207 $query2 = "delete from cis_result_final_$proj where type= '$filename'";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
208 print "\n$query2\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
209 $sth = $dbh->prepare("$query2");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
210 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
211 $sth = $dbh->prepare("insert into cis_result_final_$proj select * from sort_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
212 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
213 $query3 = "update cis_result_final_$proj set type = '$filename' where type is NULL";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
214 print "$query3";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
215 $sth = $dbh->prepare("$query3");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
216 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
217
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
218 #Export CIS list to the results subdirectory
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
219 $query4 = "select number,CONCAT(chromo,':',start,'-',stop)as pos, total, pvaluetotal,number, pvalue,region,pvalueregion,gene_name,library_name,strand from cis_result_final_$proj where type = '$filename' and pvalueregion < '$CIS_region_pvalue' and pvaluetotal < '$CIS_total_pvalue' and pvalue < '$CIS_library_pvalue' and gene_name not like '%BAD%' order by pvalueregion;";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
220 $sth = $dbh->prepare("$query4");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
221 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
222 open CIS, "> results/$desc/cis_$name.xls";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
223 print CIS "$filename\nnumber\tpos\t#inserts\tpvalueinsert#\t#library\tpvaluelibrary#\t#regions\tpvalueregion#\tgene_name\tlibrary_name\tnumber of inserts drive transcription on positive strand";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
224 while ((@row) = $sth->fetchrow_array) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
225 print CIS "\n$row[0]\t$row[1]\t$row[2]\t$row[3]\t$row[4]\t$row[5]\t$row[6]\t$row[7]\t$row[8]\t$row[9]\t$row[10]\t$row[11]\t";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
226 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
227 close CIS;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
228
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
229
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
230 open SOURCE, "< results/$desc/cis_$name.xls";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
231 open CIS, "> results/$desc/plot_$name.wig";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
232 print CIS "track type=wiggle_0 name=\"$proj.$filename\" description=\"$proj.$filename\" visibility=full autoScale=off";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
233 while (defined($line = <SOURCE>)) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
234 chomp $line;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
235 @field= split(/\t/, $line);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
236 @chromo=split(/:|-/, $field[1]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
237
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
238 if ($field[1] =~ m/chr/) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
239 if ($field[7] < 1.0e-100) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
240 $field[7] = 1.0e-100;}
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
241
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
242 $log= -(log($field[7])/log(10));
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
243 print CIS "\n$chromo[0]\t$chromo[1]\t$chromo[2]\t$log";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
244 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
245 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
246 close CIS;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
247
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
248 #11.11 Generate the cis summary file of top 25 library CIS
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
249 $query4 = "select number,CONCAT(chromo,':',start,'-',stop)as pos, total, pvaluetotal,number, pvalue,region,pvalueregion,gene_name,library_name,strand from cis_result_final_$proj where type = '$filename' and pvalueregion < '$CIS_region_pvalue' and pvaluetotal < '$CIS_total_pvalue' and pvalue < '$CIS_library_pvalue' and gene_name not like '%BAD%' order by pvalueregion;";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
250 print "$query4";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
251 $sth = $dbh->prepare("$query4");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
252 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
253 open CIS, ">> results/summary_CIS_$proj.txt";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
254 print CIS "\n\n$filename\nnumber\tpos\t#inserts\tpvalueinsert#\t#library\tpvaluelibrary#\t#regions\tpvalueregion#\tgene_name\tlibrary_name\t+strand\t";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
255 while ((@row) = $sth->fetchrow_array) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
256 print CIS "\n$row[0]\t$row[1]\t$row[2]\t$row[3]\t$row[4]\t$row[5]\t$row[6]\t$row[7]\t$row[8]\t$row[9]\t$row[10]\t$row[11]\t";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
257 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
258 close CIS;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
259
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
260 #generate matrix and carry out association analyses
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
261 if (($desc eq 'all') and ($percent == 0.0001) ) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
262 print "\n\n Starting Association analyses for $name";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
263 mkdir "FISH";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
264 mkdir "results/Assoc";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
265 system ('perl lib/visualize.pl');
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
266 copy("FISH/cis4cluster.txt", "results/Assoc/cis4cluster.txt") or die "Unable to copy file: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
267 #system ('module load R');
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
268 system ('perl lib/matrix2fisher.pl FISH/cis4cluster.txt');
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
269 copy("FISH/R_result.txt", "results/Assoc/R_result.txt") or die "Unable to copy file: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
270 copy("FISH/Fisher_pre_named.txt", "results/Assoc/Fisher_pre_named.txt") or die "Unable to copy file: $!\n";
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
271 system ('perl lib/Fisher_clean.pl');
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
272 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
273
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
274 }#close the threshhold loop
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
275 }#close the libraries loop
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
276 }#close the metadata loop
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
277
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
278 &set_project_status($proj, 0, 1);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
279
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
280 exit(0);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
281
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
282 sub resolve_cis {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
283 $sth = $dbh->prepare("delete from A using sort_$proj A, sort2_$proj B where A.chromo = B.chromo and A.start <= B.start and A.stop >= B.start and A.pvalue > B.pvalue");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
284 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
285
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
286 $sth = $dbh->prepare("delete from A using sort_$proj A, sort2_$proj B where A.chromo = B.chromo and A.start <= B.stop and A.stop >= B.stop and A.pvalue > B.pvalue");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
287 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
288 $sth = $dbh->prepare("delete from B using sort_$proj A, sort2_$proj B where A.chromo = B.chromo and A.start <= B.start and A.stop >= B.start and A.pvalue < B.pvalue; ");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
289 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
290 $sth = $dbh->prepare("delete from B using sort_$proj A, sort2_$proj B where A.chromo = B.chromo and A.start <= B.stop and A.stop >= B.stop and A.pvalue <=
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
291 B.pvalue; ");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
292 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
293
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
294 $sth = $dbh->prepare("insert into sort_$proj select * from sort2_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
295 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
296
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
297 $sth = $dbh->prepare("delete from sort2_$proj");
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
298 $sth->execute;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
299 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
300
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
301 sub poisson {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
302 return ((($_[1]/(2393726033/$_[0]))**($_[2]))*(2.71828**-($_[1]/(2393726033/$_[0
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
303 ])))/(fac($_[2])));
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
304 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
305
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
306 sub poissonBonf {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
307 return (((($_[1]/(2393726033/$_[0]))**($_[2]))*(2.71828**-($_[1]/(2393726033/$_[
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
308 0])))/(fac($_[2])))*$_[1]);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
309 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
310
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
311 sub fac {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
312 my ($n) = @_;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
313
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
314 if ($n < 2) {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
315 return $n;
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
316 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
317 else {
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
318 return $n * fac($n-1);
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
319 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
320 }
1437a2df99c0 Uploaded
jesse-erdmann
parents:
diff changeset
321