Mercurial > repos > iuc > microsatbed
comparison find_str.py @ 4:5f8efb080f49 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 891fc6321cd94c9a63c880d75989d79521f1a9b6
author | iuc |
---|---|
date | Sat, 14 Sep 2024 12:17:02 +0000 |
parents | 2b970db61912 |
children |
comparison
equal
deleted
inserted
replaced
3:8c8299e553ec | 4:5f8efb080f49 |
---|---|
5 from pyfastx import Fastx # 0.5.2 | 5 from pyfastx import Fastx # 0.5.2 |
6 | 6 |
7 """ | 7 """ |
8 Allows all STR or those for a subset of motifs to be written to a bed file | 8 Allows all STR or those for a subset of motifs to be written to a bed file |
9 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. | 9 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. |
10 Note that there are only four possible types of dinucleotide repeat, because CA = AC = GT = TG, GA = AG = CT = TC, AT = TA, and GC = CG. | |
10 """ | 11 """ |
11 | 12 |
12 | 13 |
13 def getDensity(name, bed, chrlen, winwidth): | 14 def getDensity(name, bed, chrlen, winwidth): |
14 """ | 15 """ |
20 for b in bed: | 21 for b in bed: |
21 nt = b[5] | 22 nt = b[5] |
22 bin = int(b[1] / winwidth) | 23 bin = int(b[1] / winwidth) |
23 d[bin] += nt | 24 d[bin] += nt |
24 bedg = [ | 25 bedg = [ |
25 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) | 26 (name, (x * winwidth), ((x + 1) * winwidth), float(d[x])) |
26 for x in range(nwin + 1) | 27 for x in range(nwin + 1) |
27 if (x + 1) * winwidth <= chrlen | 28 if (x + 1) * winwidth <= chrlen |
28 ] | 29 ] |
29 return bedg | 30 return bedg |
30 | 31 |
80 cbed.append(row) | 81 cbed.append(row) |
81 elif args.hexa and len(ssr.motif) == 6: | 82 elif args.hexa and len(ssr.motif) == 6: |
82 cbed.append(row) | 83 cbed.append(row) |
83 if args.bigwig: | 84 if args.bigwig: |
84 w = getDensity(name, cbed, chrlen, args.winwidth) | 85 w = getDensity(name, cbed, chrlen, args.winwidth) |
85 wig += w | 86 wig.extend(w) |
86 bed += cbed | 87 bed.extend(cbed) |
87 if args.bigwig: | 88 if args.bigwig: |
88 wig.sort() | 89 wig.sort() |
89 bedg = ["%s %d %d %.2f" % x for x in wig] | |
90 with open("temp.bedg", "w") as bw: | 90 with open("temp.bedg", "w") as bw: |
91 bw.write("\n".join(bedg)) | 91 for row in wig: |
92 bw.write("%s %d %d %.2f\n" % row) | |
92 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] | 93 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] |
93 with open("temp.chromlen", "w") as cl: | 94 with open("temp.chromlen", "w") as cl: |
94 cl.write("\n".join(chroms)) | 95 cl.write("\n".join(chroms)) |
95 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", args.bed] | 96 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", args.bed] |
96 subprocess.run(cmd) | 97 subprocess.run(cmd) |
97 else: | 98 else: |
98 bed.sort() | 99 bed.sort() |
99 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] | |
100 with open(args.bed, "w") as outbed: | 100 with open(args.bed, "w") as outbed: |
101 outbed.write("\n".join(obed)) | 101 for row in bed: |
102 outbed.write("\n") | 102 outbed.write("%s\t%d\t%d\t%s_%d\t%d\n" % row) |
103 | 103 |
104 | 104 |
105 if __name__ == "__main__": | 105 if __name__ == "__main__": |
106 parser = argparse.ArgumentParser() | 106 parser = argparse.ArgumentParser() |
107 a = parser.add_argument | 107 a = parser.add_argument |