diff find_str.py @ 4:5f8efb080f49 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 891fc6321cd94c9a63c880d75989d79521f1a9b6
author iuc
date Sat, 14 Sep 2024 12:17:02 +0000
parents 2b970db61912
children
line wrap: on
line diff
--- a/find_str.py	Thu Sep 12 15:41:01 2024 +0000
+++ b/find_str.py	Sat Sep 14 12:17:02 2024 +0000
@@ -7,6 +7,7 @@
 """
 Allows all STR or those for a subset of motifs to be written to a bed file
 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP.
+Note that there are only four possible types of dinucleotide repeat, because CA = AC = GT = TG, GA = AG = CT = TC, AT = TA, and GC = CG.
 """
 
 
@@ -22,7 +23,7 @@
         bin = int(b[1] / winwidth)
         d[bin] += nt
     bedg = [
-        (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x]))
+        (name, (x * winwidth), ((x + 1) * winwidth), float(d[x]))
         for x in range(nwin + 1)
         if (x + 1) * winwidth <= chrlen
     ]
@@ -82,13 +83,13 @@
                 cbed.append(row)
         if args.bigwig:
             w = getDensity(name, cbed, chrlen, args.winwidth)
-            wig += w
-        bed += cbed
+            wig.extend(w)
+        bed.extend(cbed)
     if args.bigwig:
         wig.sort()
-        bedg = ["%s %d %d %.2f" % x for x in wig]
         with open("temp.bedg", "w") as bw:
-            bw.write("\n".join(bedg))
+            for row in wig:
+                bw.write("%s %d %d %.2f\n" % row)
         chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()]
         with open("temp.chromlen", "w") as cl:
             cl.write("\n".join(chroms))
@@ -96,10 +97,9 @@
         subprocess.run(cmd)
     else:
         bed.sort()
-        obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed]
         with open(args.bed, "w") as outbed:
-            outbed.write("\n".join(obed))
-            outbed.write("\n")
+            for row in bed:
+                outbed.write("%s\t%d\t%d\t%s_%d\t%d\n" % row)
 
 
 if __name__ == "__main__":