Mercurial > repos > iuc > bigwig_outlier_bed
comparison bigwig_outlier_bed.py @ 4:2488bcddaf14 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bigwig_outlier_bed commit d27456ca56231eb9a4eb360c99f44af6c18a0afc
author | iuc |
---|---|
date | Mon, 30 Sep 2024 01:42:34 +0000 |
parents | 00b3da7776a0 |
children |
comparison
equal
deleted
inserted
replaced
3:00b3da7776a0 | 4:2488bcddaf14 |
---|---|
115 self.bedouthi = args.bedouthi | 115 self.bedouthi = args.bedouthi |
116 self.bedoutlo = args.bedoutlo | 116 self.bedoutlo = args.bedoutlo |
117 self.bedouthilo = args.bedouthilo | 117 self.bedouthilo = args.bedouthilo |
118 self.tableoutfile = args.tableoutfile | 118 self.tableoutfile = args.tableoutfile |
119 self.bedwin = args.minwin | 119 self.bedwin = args.minwin |
120 self.bedoutzero = args.bedoutzero | |
120 self.qlo = None | 121 self.qlo = None |
121 self.qhi = None | 122 self.qhi = None |
122 if args.outbeds != "outtab": | 123 if args.outbeds != "outtab": |
123 self.qhi = args.qhi | 124 self.qhi = args.qhi |
124 if args.qlo: | 125 if args.qlo: |
131 nlab = len(args.bigwiglabels) | 132 nlab = len(args.bigwiglabels) |
132 if nlab < nbw: | 133 if nlab < nbw: |
133 self.bwlabels += ["Nolabel"] * (nbw - nlab) | 134 self.bwlabels += ["Nolabel"] * (nbw - nlab) |
134 self.makeBed() | 135 self.makeBed() |
135 | 136 |
136 def processVals(self, bw, isTop): | 137 def processVals(self, bw, isTop, isZero): |
137 """ | 138 """ |
138 idea from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | 139 idea from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html |
139 Fast segmentation into regions by taking np.diff on the boolean array of over (under) cutpoint indicators in bwex. | 140 Fast segmentation into regions by taking np.diff on the boolean array of over (under) cutpoint indicators in bwex. |
140 This only gives non-zero values at the segment boundaries where there's a change, so those zeros are all removed in bwexdnz | 141 This only gives non-zero values at the segment boundaries where there's a change, so those zeros are all removed in bwexdnz |
141 leaving an array of segment start/end positions. That's twisted around into an array of start/end coordinates. | 142 leaving an array of segment start/end positions. That's twisted around into an array of start/end coordinates. |
142 Magical. Fast. Could do the same for means or medians over windows for sparse bigwigs like repeat regions. | 143 Magical. Fast. Could do the same for means or medians over windows for sparse bigwigs like repeat regions. |
143 """ | 144 """ |
144 if isTop: | 145 if isTop: |
145 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s | 146 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s |
147 elif isZero: | |
148 bwex = np.r_[False, bw == 0, False] # extend with 0s | |
146 else: | 149 else: |
147 bwex = np.r_[False, bw <= self.bwbot, False] | 150 bwex = np.r_[False, bw <= self.bwbot, False] |
148 bwexd = np.diff(bwex) | 151 bwexd = np.diff(bwex) |
149 bwexdnz = bwexd.nonzero()[0] # start and end transition of each segment - nice! | 152 bwexdnz = bwexd.nonzero()[0] # start and end transition of each segment - nice! |
150 bwregions = np.reshape(bwexdnz, (-1, 2)) | 153 bwregions = np.reshape(bwexdnz, (-1, 2)) |
188 return row | 191 return row |
189 | 192 |
190 def makeBed(self): | 193 def makeBed(self): |
191 bedhi = [] | 194 bedhi = [] |
192 bedlo = [] | 195 bedlo = [] |
196 bedzero = [] | |
193 restab = [] | 197 restab = [] |
194 bwlabels = self.bwlabels | 198 bwlabels = self.bwlabels |
195 bwnames = self.bwnames | 199 bwnames = self.bwnames |
196 reshead = "bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot" | 200 reshead = "bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot" |
197 for i, bwname in enumerate(bwnames): | 201 for i, bwname in enumerate(bwnames): |
227 "\n".join(first_few) | 231 "\n".join(first_few) |
228 + "\nHistogram of %s bigwig values\n" % bwlabel | 232 + "\nHistogram of %s bigwig values\n" % bwlabel |
229 + histo | 233 + histo |
230 ) | 234 ) |
231 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered | 235 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered |
236 if self.bedoutzero is not None: | |
237 bwzero = self.processVals(bw, isTop=False, isZero=True) | |
238 for j, seg in enumerate(bwzero): | |
239 seglen = seg[1] - seg[0] | |
240 if seglen >= self.bedwin: | |
241 score = seglen | |
242 bedzero.append( | |
243 ( | |
244 chr, | |
245 seg[0], | |
246 seg[1], | |
247 "%s_%d" % (bwlabel, score), | |
248 score, | |
249 ) | |
250 ) | |
232 if self.qhi is not None: | 251 if self.qhi is not None: |
233 self.bwtop = np.quantile(bw, self.qhi) | 252 self.bwtop = np.quantile(bw, self.qhi) |
234 bwhi = self.processVals(bw, isTop=True) | 253 bwhi = self.processVals(bw, isTop=True, isZero=False) |
235 for j, seg in enumerate(bwhi): | 254 for j, seg in enumerate(bwhi): |
236 seglen = seg[1] - seg[0] | 255 seglen = seg[1] - seg[0] |
237 if seglen >= self.bedwin: | 256 if seglen >= self.bedwin: |
238 score = np.sum(bw[seg[0]:seg[1]]) / float(seglen) | 257 score = np.sum(bw[seg[0]:seg[1]]) / float(seglen) |
239 bedhi.append( | 258 bedhi.append( |
245 score, | 264 score, |
246 ) | 265 ) |
247 ) | 266 ) |
248 if self.qlo is not None: | 267 if self.qlo is not None: |
249 self.bwbot = np.quantile(bw, self.qlo) | 268 self.bwbot = np.quantile(bw, self.qlo) |
250 bwlo = self.processVals(bw, isTop=False) | 269 bwlo = self.processVals(bw, isTop=False, isZero=False) |
251 for j, seg in enumerate(bwlo): | 270 for j, seg in enumerate(bwlo): |
252 seglen = seg[1] - seg[0] | 271 seglen = seg[1] - seg[0] |
253 if seg[1] - seg[0] >= self.bedwin: | 272 if seg[1] - seg[0] >= self.bedwin: |
254 score = ( | 273 score = ( |
255 -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen) | 274 -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen) |
278 stable = "\n".join(restab) | 297 stable = "\n".join(restab) |
279 with open(self.tableoutfile, "w") as t: | 298 with open(self.tableoutfile, "w") as t: |
280 t.write(stable) | 299 t.write(stable) |
281 t.write("\n") | 300 t.write("\n") |
282 some = False | 301 some = False |
302 if self.outbeds in ["outzero"]: | |
303 self.writeBed(bedzero, self.bedoutzero) | |
304 some = True | |
283 if self.qlo: | 305 if self.qlo: |
284 if self.outbeds in ["outall", "outlo", "outlohi"]: | 306 if self.outbeds in ["outall", "outlo", "outlohi"]: |
285 self.writeBed(bedlo, self.bedoutlo) | 307 self.writeBed(bedlo, self.bedoutlo) |
286 some = True | 308 some = True |
287 if self.qhi: | 309 if self.qhi: |
306 a("-l", "--qlo", default=None) | 328 a("-l", "--qlo", default=None) |
307 a("-i", "--qhi", default=None, type=float) | 329 a("-i", "--qhi", default=None, type=float) |
308 a("--bedouthi", default=None) | 330 a("--bedouthi", default=None) |
309 a("--bedoutlo", default=None) | 331 a("--bedoutlo", default=None) |
310 a("--bedouthilo", default=None) | 332 a("--bedouthilo", default=None) |
333 a("--bedoutzero", default=None) | |
311 a("-w", "--bigwig", nargs="+") | 334 a("-w", "--bigwig", nargs="+") |
312 a("-n", "--bigwiglabels", nargs="+") | 335 a("-n", "--bigwiglabels", nargs="+") |
313 a("-o", "--outbeds", default="outhilo", help="optional high and low combined bed") | 336 a("-o", "--outbeds", default="outhilo", help="optional high and low combined bed") |
314 a("-t", "--tableoutfile", default=None) | 337 a("-t", "--tableoutfile", default=None) |
315 args = parser.parse_args() | 338 args = parser.parse_args() |