annotate frequency_sliding/frequency_sliding.py @ 0:9b835eab8a1d draft

Uploaded
author nanettec
date Fri, 18 Mar 2016 05:22:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
1 """
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
2 @summary: Calculate the frequency of eQTLs and genes per sliding window bin
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
3 @author: nanette.coetzer@gmail.com
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
4 @version 5
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
5
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
6 """
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
7 import optparse, sys
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
8 import subprocess
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
9 import tempfile
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
10 import os, re
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
11
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
12 def stop_err( msg ):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
13 sys.stderr.write( "%s\n" % msg )
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
14 sys.exit()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
15
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
16 def __main__():
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
17 #Parse Command Line
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
18 parser = optparse.OptionParser()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
19 parser.add_option("-m", "--rscript", default=None, dest="rscript",
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
20 help="R script")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
21 parser.add_option("-i", "--input1", default=None, dest="input1",
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
22 help="Frequency per bin file")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
23 parser.add_option("-j", "--input2", default=None, dest="input2",
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
24 help="Lookup summary file")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
25 parser.add_option("-k", "--input3", default=None, dest="input3",
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
26 help="Frequency summary file")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
27 parser.add_option("-o", "--output1", default=None, dest="output1",
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
28 help="Sliding frequency file")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
29 parser.add_option("-p", "--output2", default=None, dest="output2",
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
30 help="Sliding lookup file")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
31 parser.add_option("-q", "--output3", default=None, dest="output3",
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
32 help="eQTL and genes polygon plot")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
33 (options, args) = parser.parse_args()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
34
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
35 try:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
36 open(options.input1, "r").close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
37 except TypeError, e:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
38 stop_err("You need to supply the Lookup frequency file:\n" + str(e))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
39 except IOError, e:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
40 stop_err("Can not open the Lookup frequency file:\n" + str(e))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
41
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
42 try:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
43 open(options.input2, "r").close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
44 except TypeError, e:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
45 stop_err("You need to supply the Lookup summary file:\n" + str(e))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
46 except IOError, e:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
47 stop_err("Can not open the Lookup summary file:\n" + str(e))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
48
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
49 try:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
50 open(options.input3, "r").close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
51 except TypeError, e:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
52 stop_err("You need to supply the Frequency summary file:\n" + str(e))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
53 except IOError, e:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
54 stop_err("Can not open the Frequency summary file:\n" + str(e))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
55
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
56 ##############################################
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
57 ##############################################
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
58
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
59 infile3 = open(options.input3,'r') # Frequency summary file
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
60 for line in infile3:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
61 if line.startswith("Number"):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
62 num_intervals = int(line.strip().split("\t")[1])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
63 #print num_intervals
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
64 infile3.close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
65
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
66 chr_second_last = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
67 chr_end = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
68 tot = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
69 infile2 = open(options.input2,'r') # Lookup summary file
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
70 for line in infile2:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
71 l = line.strip().split("\t")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
72 if not l[4].startswith("i"):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
73 tot += int(l[4])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
74 chr_second_last.append(tot-2)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
75 chr_end.append(tot)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
76
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
77 freq_dict = {}
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
78 sliding_id = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
79 prev_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
80 idlist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
81 slidingdict = {}
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
82 cur_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
83 add_last = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
84 i = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
85 sizelist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
86
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
87 infile1 = open(options.input1,'r') # Frequency.txt
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
88
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
89 # 1b is where an interval always starts with a 2cM bin (plus a small bin if available) --> never split 2 bins on either sides of a marker
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
90 if num_intervals == 1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
91 for line in infile1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
92 l2 = line.strip().split("\t")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
93 if not l2[0].startswith("int"):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
94 i += 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
95 freq_dict[l2[0]] = l2
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
96 if prev_size == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
97 # end of chromosome
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
98 if int(l2[0]) in chr_end:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
99 pass
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
100 else:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
101 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
102 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
103 sizelist.append(float(l2[6]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
104 cur_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
105 add_last = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
106 else:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
107 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
108
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
109 if prev_size < 2 and add_last == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
110 cur_size = float(cur_size) + float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
111 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
112 sizelist.append(float(l2[6]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
113 elif prev_size == 2:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
114 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
115 slidingdict[int(sliding_id)] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
116 idlist = [int(l2[0])]
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
117 sizelist = [float(l2[6])]
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
118 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
119 cur_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
120
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
121 if add_last == 1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
122 cur_size = float(cur_size) + float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
123 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
124 sizelist.append(float(l2[6]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
125 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
126 slidingdict[sliding_id] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
127 prev_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
128 idlist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
129 sizelist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
130 cur_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
131
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
132 if int(l2[0]) in chr_second_last:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
133 add_last = 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
134
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
135 # 1 is where an interval starts with a small bin if available (plus a 2cM bin)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
136 if num_intervals == 9:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
137 for line in infile1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
138 l2 = line.strip().split("\t")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
139 if not l2[0].startswith("int"):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
140 i += 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
141 freq_dict[l2[0]] = l2
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
142 print slidingdict
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
143 if prev_size == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
144 # end of chromosome
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
145 if int(l2[0]) in chr_end:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
146 pass
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
147 else:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
148 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
149 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
150 cur_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
151 add_last = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
152 if float(cur_size) >= 2 and add_last == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
153 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
154 slidingdict[int(sliding_id)] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
155
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
156 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
157 idlist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
158 cur_size = float(0)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
159 else:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
160 cur_size = float(cur_size) + float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
161 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
162 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
163 if add_last == 1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
164 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
165 slidingdict[sliding_id] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
166 prev_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
167 idlist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
168 cur_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
169
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
170 if int(l2[0]) in chr_second_last:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
171 add_last = 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
172 if float(cur_size) >= 2 and add_last == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
173 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
174 slidingdict[int(sliding_id)] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
175 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
176 idlist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
177 cur_size = float(0)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
178
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
179
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
180 if num_intervals == 2:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
181 for line in infile1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
182 l2 = line.strip().split("\t")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
183 if not l2[0].startswith("int"):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
184 i += 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
185 freq_dict[l2[0]] = l2
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
186 if prev_size == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
187 # end of chromosome
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
188 if int(l2[0]) in chr_end:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
189 pass
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
190 else:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
191 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
192 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
193 cur_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
194 add_last = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
195 else:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
196 cur_size = float(cur_size) + float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
197 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
198 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
199 if add_last == 1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
200 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
201 slidingdict[sliding_id] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
202 prev_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
203 idlist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
204 cur_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
205
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
206 if int(l2[0]) in chr_second_last:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
207 add_last = 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
208 if float(cur_size) >= 4 and add_last == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
209 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
210 slidingdict[int(sliding_id)] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
211
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
212 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
213 idlist = [int(l2[0])]
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
214 cur_size = float(prev_size)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
215
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
216 if num_intervals == 3:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
217 for line in infile1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
218 l2 = line.strip().split("\t")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
219 if not l2[0].startswith("int"):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
220 i += 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
221 freq_dict[l2[0]] = l2
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
222 if prev_size == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
223 # end of chromosome
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
224 if int(l2[0]) in chr_end:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
225 pass
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
226 else:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
227 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
228 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
229 sizelist.append(float(l2[6]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
230 cur_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
231 add_last = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
232 else:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
233 cur_size = float(cur_size) + float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
234 idlist.append(int(l2[0]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
235 sizelist.append(float(l2[6]))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
236 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
237 if add_last == 1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
238 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
239 slidingdict[sliding_id] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
240 prev_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
241 idlist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
242 sizelist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
243 cur_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
244
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
245 if int(l2[0]) in chr_second_last:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
246 add_last = 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
247 if float(cur_size) >= 6 and add_last == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
248 sliding_id = sliding_id + 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
249 slidingdict[int(sliding_id)] = idlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
250 prev_size = float(l2[6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
251 newsizelist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
252 newidlist = []
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
253 newstart = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
254 c = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
255 cur_size = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
256 for s in range(len(sizelist)):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
257 if newstart == 1:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
258 newsizelist.append(sizelist[s])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
259 newidlist.append(idlist[s])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
260 cur_size += float(sizelist[s])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
261 if sizelist[s] == 2.0 and newstart == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
262 c += 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
263 if c == 2:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
264 newsizelist.append(sizelist[s])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
265 newidlist.append(idlist[s])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
266 cur_size += float(sizelist[s])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
267 newstart = 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
268 idlist = newidlist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
269 sizelist = newsizelist
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
270
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
271 out1 = open(options.output1,'w') # Frequency.txt
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
272 out2 = open(options.output2,'w')
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
273 #out1.write("sliding.id\tchr\tsliding.cM\tsliding.eQTL\tsliding.cis.eQTL\tsliding.trans.eQTL\tsliding.genes\tsliding.eQTL/cM\tsliding.eQTL/cM/10genes\n")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
274 out1.write("sliding.id\tchr\tsliding.cM\tsliding.all.eQTL\tsliding.cis.eQTL\tsliding.trans.eQTL\tsliding.genes\tsliding.all.eQTL/cM\tsliding.cis.eQTL/cM\tsliding.trans.eQTL/cM\tsliding.genes/cM\n")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
275 tcM = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
276 tEQTL = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
277 tGENES = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
278
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
279 for j in range(len(slidingdict.keys())):
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
280 totcM = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
281 toteqtl = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
282 totcis = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
283 tottrans = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
284 totgenes = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
285 totEcM_all = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
286 totEcM_cis = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
287 totEcM_trans = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
288 totGcM = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
289 #totEcM10G = 0
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
290 out2.write(str(j+1)+"\t"+str(slidingdict[j+1])+"\n")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
291 for k in slidingdict[j+1]:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
292 totcM += float(freq_dict[str(k)][6])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
293 toteqtl += float(freq_dict[str(k)][7])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
294 totcis += float(freq_dict[str(k)][8])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
295 tottrans += float(freq_dict[str(k)][9])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
296 totgenes += float(freq_dict[str(k)][10])
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
297
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
298 chr = freq_dict[str(k)][1]
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
299 if totgenes == 0:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
300 totgenes = 1
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
301
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
302 totEcM_all += round((toteqtl/totcM),2)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
303 totEcM_cis += round((totcis/totcM),2)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
304 totEcM_trans += round((tottrans/totcM),2)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
305 totGcM += round((totgenes/totcM),2)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
306 out1.write(str(j+1)+"\t"+str(chr)+"\t"+str(totcM)+"\t"+str(toteqtl)+"\t"+str(totcis)+"\t"+str(tottrans)+"\t"+str(totgenes)+"\t"+str(totEcM_all)+"\t"+str(totEcM_cis)+"\t"+str(totEcM_trans)+"\t"+str(totGcM)+"\n")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
307
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
308 out1.close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
309 infile1.close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
310 infile2.close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
311 out2.close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
312
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
313
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
314 # Create temp direcotry
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
315 tempdir = tempfile.mkdtemp()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
316
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
317 # copy INPUT file to the temp directory
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
318 s = "cp %s %s/sliding_frequency.txt" %(options.output1, tempdir)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
319 subprocess.call(s, shell=True)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
320
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
321 # create R script => save in temp directory
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
322 # generate new header
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
323 new_script = open(tempdir+"/new_script.r","w")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
324 header = "setwd(\"%s\")" %tempdir
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
325 new_script.write(header+"\n")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
326 # add script body
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
327 script = open(options.rscript,"r")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
328 for line in script:
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
329 new_script.write(line.strip()+"\n")
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
330 new_script.close()
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
331
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
332 # run R script from temp directory
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
333 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
334 subprocess.call(s, shell=True)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
335
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
336 # run R script from temp directory
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
337 s = "R CMD BATCH %s/new_script.r out.txt" %tempdir
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
338 subprocess.call(s, shell=True)
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
339
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
340 os.system("mv %s/Rplot_eQTL_genes_polygon.pdf %s" %(tempdir,options.output3))
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
341
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
342 ##############################################
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
343
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
344 if __name__=="__main__":
9b835eab8a1d Uploaded
nanettec
parents:
diff changeset
345 __main__()