Mercurial > repos > fubar > microsatbed
comparison find_str.py @ 29:efc775ab30fe draft
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
| author | fubar |
|---|---|
| date | Fri, 19 Jul 2024 23:32:59 +0000 |
| parents | 26e9575c2c83 |
| children | 53c4f91c6031 |
comparison
equal
deleted
inserted
replaced
| 28:4cb6cc083620 | 29:efc775ab30fe |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import shutil | 2 import shutil |
| 3 import subprocess | 3 import subprocess |
| 4 | 4 |
| 5 import pybigtools | |
| 6 import pytrf # 1.3.0 | 5 import pytrf # 1.3.0 |
| 7 from pyfastx import Fastx # 0.5.2 | 6 from pyfastx import Fastx # 0.5.2 |
| 8 | 7 |
| 9 """ | 8 """ |
| 10 Allows all STR or those for a subset of motifs to be written to a bed file | 9 Allows all STR or those for a subset of motifs to be written to a bed file |
| 11 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. | 10 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. |
| 12 """ | 11 """ |
| 13 | 12 |
| 14 | 13 |
| 15 def getDensity(name, bed, chrlen, winwidth): | 14 def getDensity(name, bed, chrlen, winwidth): |
| 15 """ | |
| 16 pybigtools can write bigwigs and they are processed by other ucsc tools - but jb2 will not read them. | |
| 17 Swapped the conversion to use a bedgraph file processed by bedGraphToBigWig | |
| 18 """ | |
| 16 nwin = int(chrlen / winwidth) | 19 nwin = int(chrlen / winwidth) |
| 17 d = [0.0 for x in range(nwin + 1)] | 20 d = [0.0 for x in range(nwin + 1)] |
| 18 for b in bed: | 21 for b in bed: |
| 19 nt = b[5] | 22 nt = b[5] |
| 20 bin = int(b[1] / winwidth) | 23 bin = int(b[1] / winwidth) |
| 21 d[bin] += nt | 24 d[bin] += nt |
| 22 dw = [ | 25 bedg = [ |
| 23 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) | 26 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) |
| 24 for x in range(nwin + 1) | 27 for x in range(nwin + 1) |
| 25 if (x + 1) * winwidth <= chrlen | 28 if (x + 1) * winwidth <= chrlen |
| 26 ] | 29 ] |
| 27 return dw | 30 return bedg |
| 28 | 31 |
| 29 | 32 |
| 30 def write_ssrs(args): | 33 def write_ssrs(args): |
| 31 """ | 34 """ |
| 32 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats | 35 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats |
| 84 wig += w | 87 wig += w |
| 85 bed += cbed | 88 bed += cbed |
| 86 if args.bigwig: | 89 if args.bigwig: |
| 87 wig.sort() | 90 wig.sort() |
| 88 bedg = ['%s %d %d %.3f' % x for x in wig] | 91 bedg = ['%s %d %d %.3f' % x for x in wig] |
| 89 # bedg.insert(0,'track type=bedGraph') https://genomebrowser.wustl.edu/goldenPath/help/bigWig.html | |
| 90 with open("temp.bedg", "w") as bw: | 92 with open("temp.bedg", "w") as bw: |
| 91 bw.write('\n'.join(bedg)) | 93 bw.write('\n'.join(bedg)) |
| 92 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] | 94 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] |
| 93 with open("temp.chromlen", "w") as cl: | 95 with open("temp.chromlen", "w") as cl: |
| 94 cl.write('\n'.join(chroms)) | 96 cl.write('\n'.join(chroms)) |
| 95 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", "temp.bw" ] | 97 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", "temp.bw" ] |
| 96 subprocess.run(cmd) | 98 subprocess.run(cmd) |
| 97 #bw = pybigtools.open("temp.bw", "w") | |
| 98 #bw.write(chrlens, wig) | |
| 99 shutil.move("temp.bw", args.bed) | 99 shutil.move("temp.bw", args.bed) |
| 100 else: | 100 else: |
| 101 bed.sort() | 101 bed.sort() |
| 102 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] | 102 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] |
| 103 with open(args.bed, "w") as outbed: | 103 with open(args.bed, "w") as outbed: |
