Mercurial > repos > fubar > microsatbed
comparison find_str.py @ 29:efc775ab30fe draft
planemo upload for repository https://github.com/fubar2/microsatbed commit d952bc313f408735456747c3d33e09a3170c8f59-dirty
| author | fubar | 
|---|---|
| date | Fri, 19 Jul 2024 23:32:59 +0000 | 
| parents | 26e9575c2c83 | 
| children | 53c4f91c6031 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 28:4cb6cc083620 | 29:efc775ab30fe | 
|---|---|
| 1 import argparse | 1 import argparse | 
| 2 import shutil | 2 import shutil | 
| 3 import subprocess | 3 import subprocess | 
| 4 | 4 | 
| 5 import pybigtools | |
| 6 import pytrf # 1.3.0 | 5 import pytrf # 1.3.0 | 
| 7 from pyfastx import Fastx # 0.5.2 | 6 from pyfastx import Fastx # 0.5.2 | 
| 8 | 7 | 
| 9 """ | 8 """ | 
| 10 Allows all STR or those for a subset of motifs to be written to a bed file | 9 Allows all STR or those for a subset of motifs to be written to a bed file | 
| 11 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. | 10 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. | 
| 12 """ | 11 """ | 
| 13 | 12 | 
| 14 | 13 | 
| 15 def getDensity(name, bed, chrlen, winwidth): | 14 def getDensity(name, bed, chrlen, winwidth): | 
| 15 """ | |
| 16 pybigtools can write bigwigs and they are processed by other ucsc tools - but jb2 will not read them. | |
| 17 Swapped the conversion to use a bedgraph file processed by bedGraphToBigWig | |
| 18 """ | |
| 16 nwin = int(chrlen / winwidth) | 19 nwin = int(chrlen / winwidth) | 
| 17 d = [0.0 for x in range(nwin + 1)] | 20 d = [0.0 for x in range(nwin + 1)] | 
| 18 for b in bed: | 21 for b in bed: | 
| 19 nt = b[5] | 22 nt = b[5] | 
| 20 bin = int(b[1] / winwidth) | 23 bin = int(b[1] / winwidth) | 
| 21 d[bin] += nt | 24 d[bin] += nt | 
| 22 dw = [ | 25 bedg = [ | 
| 23 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) | 26 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) | 
| 24 for x in range(nwin + 1) | 27 for x in range(nwin + 1) | 
| 25 if (x + 1) * winwidth <= chrlen | 28 if (x + 1) * winwidth <= chrlen | 
| 26 ] | 29 ] | 
| 27 return dw | 30 return bedg | 
| 28 | 31 | 
| 29 | 32 | 
| 30 def write_ssrs(args): | 33 def write_ssrs(args): | 
| 31 """ | 34 """ | 
| 32 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats | 35 The integers in the call change the minimum repeats for mono-, di-, tri-, tetra-, penta-, hexa-nucleotide repeats | 
| 84 wig += w | 87 wig += w | 
| 85 bed += cbed | 88 bed += cbed | 
| 86 if args.bigwig: | 89 if args.bigwig: | 
| 87 wig.sort() | 90 wig.sort() | 
| 88 bedg = ['%s %d %d %.3f' % x for x in wig] | 91 bedg = ['%s %d %d %.3f' % x for x in wig] | 
| 89 # bedg.insert(0,'track type=bedGraph') https://genomebrowser.wustl.edu/goldenPath/help/bigWig.html | |
| 90 with open("temp.bedg", "w") as bw: | 92 with open("temp.bedg", "w") as bw: | 
| 91 bw.write('\n'.join(bedg)) | 93 bw.write('\n'.join(bedg)) | 
| 92 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] | 94 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] | 
| 93 with open("temp.chromlen", "w") as cl: | 95 with open("temp.chromlen", "w") as cl: | 
| 94 cl.write('\n'.join(chroms)) | 96 cl.write('\n'.join(chroms)) | 
| 95 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", "temp.bw" ] | 97 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", "temp.bw" ] | 
| 96 subprocess.run(cmd) | 98 subprocess.run(cmd) | 
| 97 #bw = pybigtools.open("temp.bw", "w") | |
| 98 #bw.write(chrlens, wig) | |
| 99 shutil.move("temp.bw", args.bed) | 99 shutil.move("temp.bw", args.bed) | 
| 100 else: | 100 else: | 
| 101 bed.sort() | 101 bed.sort() | 
| 102 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] | 102 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] | 
| 103 with open(args.bed, "w") as outbed: | 103 with open(args.bed, "w") as outbed: | 
