comparison find_str.py @ 4:5f8efb080f49 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 891fc6321cd94c9a63c880d75989d79521f1a9b6
author iuc
date Sat, 14 Sep 2024 12:17:02 +0000
parents 2b970db61912
children
comparison
equal deleted inserted replaced
3:8c8299e553ec 4:5f8efb080f49
5 from pyfastx import Fastx # 0.5.2 5 from pyfastx import Fastx # 0.5.2
6 6
7 """ 7 """
8 Allows all STR or those for a subset of motifs to be written to a bed file 8 Allows all STR or those for a subset of motifs to be written to a bed file
9 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. 9 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP.
10 Note that there are only four possible types of dinucleotide repeat, because CA = AC = GT = TG, GA = AG = CT = TC, AT = TA, and GC = CG.
10 """ 11 """
11 12
12 13
13 def getDensity(name, bed, chrlen, winwidth): 14 def getDensity(name, bed, chrlen, winwidth):
14 """ 15 """
20 for b in bed: 21 for b in bed:
21 nt = b[5] 22 nt = b[5]
22 bin = int(b[1] / winwidth) 23 bin = int(b[1] / winwidth)
23 d[bin] += nt 24 d[bin] += nt
24 bedg = [ 25 bedg = [
25 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) 26 (name, (x * winwidth), ((x + 1) * winwidth), float(d[x]))
26 for x in range(nwin + 1) 27 for x in range(nwin + 1)
27 if (x + 1) * winwidth <= chrlen 28 if (x + 1) * winwidth <= chrlen
28 ] 29 ]
29 return bedg 30 return bedg
30 31
80 cbed.append(row) 81 cbed.append(row)
81 elif args.hexa and len(ssr.motif) == 6: 82 elif args.hexa and len(ssr.motif) == 6:
82 cbed.append(row) 83 cbed.append(row)
83 if args.bigwig: 84 if args.bigwig:
84 w = getDensity(name, cbed, chrlen, args.winwidth) 85 w = getDensity(name, cbed, chrlen, args.winwidth)
85 wig += w 86 wig.extend(w)
86 bed += cbed 87 bed.extend(cbed)
87 if args.bigwig: 88 if args.bigwig:
88 wig.sort() 89 wig.sort()
89 bedg = ["%s %d %d %.2f" % x for x in wig]
90 with open("temp.bedg", "w") as bw: 90 with open("temp.bedg", "w") as bw:
91 bw.write("\n".join(bedg)) 91 for row in wig:
92 bw.write("%s %d %d %.2f\n" % row)
92 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] 93 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()]
93 with open("temp.chromlen", "w") as cl: 94 with open("temp.chromlen", "w") as cl:
94 cl.write("\n".join(chroms)) 95 cl.write("\n".join(chroms))
95 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", args.bed] 96 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", args.bed]
96 subprocess.run(cmd) 97 subprocess.run(cmd)
97 else: 98 else:
98 bed.sort() 99 bed.sort()
99 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed]
100 with open(args.bed, "w") as outbed: 100 with open(args.bed, "w") as outbed:
101 outbed.write("\n".join(obed)) 101 for row in bed:
102 outbed.write("\n") 102 outbed.write("%s\t%d\t%d\t%s_%d\t%d\n" % row)
103 103
104 104
105 if __name__ == "__main__": 105 if __name__ == "__main__":
106 parser = argparse.ArgumentParser() 106 parser = argparse.ArgumentParser()
107 a = parser.add_argument 107 a = parser.add_argument