comparison bigwig_outlier_bed.py @ 4:2488bcddaf14 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bigwig_outlier_bed commit d27456ca56231eb9a4eb360c99f44af6c18a0afc
author iuc
date Mon, 30 Sep 2024 01:42:34 +0000
parents 00b3da7776a0
children
comparison
equal deleted inserted replaced
3:00b3da7776a0 4:2488bcddaf14
115 self.bedouthi = args.bedouthi 115 self.bedouthi = args.bedouthi
116 self.bedoutlo = args.bedoutlo 116 self.bedoutlo = args.bedoutlo
117 self.bedouthilo = args.bedouthilo 117 self.bedouthilo = args.bedouthilo
118 self.tableoutfile = args.tableoutfile 118 self.tableoutfile = args.tableoutfile
119 self.bedwin = args.minwin 119 self.bedwin = args.minwin
120 self.bedoutzero = args.bedoutzero
120 self.qlo = None 121 self.qlo = None
121 self.qhi = None 122 self.qhi = None
122 if args.outbeds != "outtab": 123 if args.outbeds != "outtab":
123 self.qhi = args.qhi 124 self.qhi = args.qhi
124 if args.qlo: 125 if args.qlo:
131 nlab = len(args.bigwiglabels) 132 nlab = len(args.bigwiglabels)
132 if nlab < nbw: 133 if nlab < nbw:
133 self.bwlabels += ["Nolabel"] * (nbw - nlab) 134 self.bwlabels += ["Nolabel"] * (nbw - nlab)
134 self.makeBed() 135 self.makeBed()
135 136
136 def processVals(self, bw, isTop): 137 def processVals(self, bw, isTop, isZero):
137 """ 138 """
138 idea from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html 139 idea from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html
139 Fast segmentation into regions by taking np.diff on the boolean array of over (under) cutpoint indicators in bwex. 140 Fast segmentation into regions by taking np.diff on the boolean array of over (under) cutpoint indicators in bwex.
140 This only gives non-zero values at the segment boundaries where there's a change, so those zeros are all removed in bwexdnz 141 This only gives non-zero values at the segment boundaries where there's a change, so those zeros are all removed in bwexdnz
141 leaving an array of segment start/end positions. That's twisted around into an array of start/end coordinates. 142 leaving an array of segment start/end positions. That's twisted around into an array of start/end coordinates.
142 Magical. Fast. Could do the same for means or medians over windows for sparse bigwigs like repeat regions. 143 Magical. Fast. Could do the same for means or medians over windows for sparse bigwigs like repeat regions.
143 """ 144 """
144 if isTop: 145 if isTop:
145 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s 146 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s
147 elif isZero:
148 bwex = np.r_[False, bw == 0, False] # extend with 0s
146 else: 149 else:
147 bwex = np.r_[False, bw <= self.bwbot, False] 150 bwex = np.r_[False, bw <= self.bwbot, False]
148 bwexd = np.diff(bwex) 151 bwexd = np.diff(bwex)
149 bwexdnz = bwexd.nonzero()[0] # start and end transition of each segment - nice! 152 bwexdnz = bwexd.nonzero()[0] # start and end transition of each segment - nice!
150 bwregions = np.reshape(bwexdnz, (-1, 2)) 153 bwregions = np.reshape(bwexdnz, (-1, 2))
188 return row 191 return row
189 192
190 def makeBed(self): 193 def makeBed(self):
191 bedhi = [] 194 bedhi = []
192 bedlo = [] 195 bedlo = []
196 bedzero = []
193 restab = [] 197 restab = []
194 bwlabels = self.bwlabels 198 bwlabels = self.bwlabels
195 bwnames = self.bwnames 199 bwnames = self.bwnames
196 reshead = "bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot" 200 reshead = "bigwig\tcontig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"
197 for i, bwname in enumerate(bwnames): 201 for i, bwname in enumerate(bwnames):
227 "\n".join(first_few) 231 "\n".join(first_few)
228 + "\nHistogram of %s bigwig values\n" % bwlabel 232 + "\nHistogram of %s bigwig values\n" % bwlabel
229 + histo 233 + histo
230 ) 234 )
231 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered 235 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered
236 if self.bedoutzero is not None:
237 bwzero = self.processVals(bw, isTop=False, isZero=True)
238 for j, seg in enumerate(bwzero):
239 seglen = seg[1] - seg[0]
240 if seglen >= self.bedwin:
241 score = seglen
242 bedzero.append(
243 (
244 chr,
245 seg[0],
246 seg[1],
247 "%s_%d" % (bwlabel, score),
248 score,
249 )
250 )
232 if self.qhi is not None: 251 if self.qhi is not None:
233 self.bwtop = np.quantile(bw, self.qhi) 252 self.bwtop = np.quantile(bw, self.qhi)
234 bwhi = self.processVals(bw, isTop=True) 253 bwhi = self.processVals(bw, isTop=True, isZero=False)
235 for j, seg in enumerate(bwhi): 254 for j, seg in enumerate(bwhi):
236 seglen = seg[1] - seg[0] 255 seglen = seg[1] - seg[0]
237 if seglen >= self.bedwin: 256 if seglen >= self.bedwin:
238 score = np.sum(bw[seg[0]:seg[1]]) / float(seglen) 257 score = np.sum(bw[seg[0]:seg[1]]) / float(seglen)
239 bedhi.append( 258 bedhi.append(
245 score, 264 score,
246 ) 265 )
247 ) 266 )
248 if self.qlo is not None: 267 if self.qlo is not None:
249 self.bwbot = np.quantile(bw, self.qlo) 268 self.bwbot = np.quantile(bw, self.qlo)
250 bwlo = self.processVals(bw, isTop=False) 269 bwlo = self.processVals(bw, isTop=False, isZero=False)
251 for j, seg in enumerate(bwlo): 270 for j, seg in enumerate(bwlo):
252 seglen = seg[1] - seg[0] 271 seglen = seg[1] - seg[0]
253 if seg[1] - seg[0] >= self.bedwin: 272 if seg[1] - seg[0] >= self.bedwin:
254 score = ( 273 score = (
255 -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen) 274 -1 * np.sum(bw[seg[0]:seg[1]]) / float(seglen)
278 stable = "\n".join(restab) 297 stable = "\n".join(restab)
279 with open(self.tableoutfile, "w") as t: 298 with open(self.tableoutfile, "w") as t:
280 t.write(stable) 299 t.write(stable)
281 t.write("\n") 300 t.write("\n")
282 some = False 301 some = False
302 if self.outbeds in ["outzero"]:
303 self.writeBed(bedzero, self.bedoutzero)
304 some = True
283 if self.qlo: 305 if self.qlo:
284 if self.outbeds in ["outall", "outlo", "outlohi"]: 306 if self.outbeds in ["outall", "outlo", "outlohi"]:
285 self.writeBed(bedlo, self.bedoutlo) 307 self.writeBed(bedlo, self.bedoutlo)
286 some = True 308 some = True
287 if self.qhi: 309 if self.qhi:
306 a("-l", "--qlo", default=None) 328 a("-l", "--qlo", default=None)
307 a("-i", "--qhi", default=None, type=float) 329 a("-i", "--qhi", default=None, type=float)
308 a("--bedouthi", default=None) 330 a("--bedouthi", default=None)
309 a("--bedoutlo", default=None) 331 a("--bedoutlo", default=None)
310 a("--bedouthilo", default=None) 332 a("--bedouthilo", default=None)
333 a("--bedoutzero", default=None)
311 a("-w", "--bigwig", nargs="+") 334 a("-w", "--bigwig", nargs="+")
312 a("-n", "--bigwiglabels", nargs="+") 335 a("-n", "--bigwiglabels", nargs="+")
313 a("-o", "--outbeds", default="outhilo", help="optional high and low combined bed") 336 a("-o", "--outbeds", default="outhilo", help="optional high and low combined bed")
314 a("-t", "--tableoutfile", default=None) 337 a("-t", "--tableoutfile", default=None)
315 args = parser.parse_args() 338 args = parser.parse_args()