diff tn93_cluster.py @ 2:eb6f0ec5b95e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit e9f254ea1c6712a96512cae4df91bfec8207a492
author iuc
date Sat, 28 Sep 2024 16:34:39 +0000
parents 112d80c9ccca
children
line wrap: on
line diff
--- a/tn93_cluster.py	Wed Apr 20 17:00:11 2022 +0000
+++ b/tn93_cluster.py	Sat Sep 28 16:34:39 2024 +0000
@@ -12,29 +12,34 @@
         with open(fasta_file, "w") as fh2:
             for c in cluster_json:
                 if reference_name is not None:
-                    if reference_name in c['members']:
-                        cc = c['centroid'].split('\n')
+                    if reference_name in c["members"]:
+                        cc = c["centroid"].split("\n")
                         cc[0] = ">" + reference_name
                         print("\n".join(cc), file=fh2)
                         continue
-                print(c['centroid'], file=fh2)
+                print(c["centroid"], file=fh2)
 
-    return(os.path.getmtime(fasta_file), len(cluster_json))
+    return (os.path.getmtime(fasta_file), len(cluster_json))
 
 
 def run_command(command):
-    proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    proc = subprocess.Popen(
+        shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
     stdout, stderr = proc.communicate()
     result = proc.returncode
     if result != 0:
-        print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr)
-        print('--------------------- STDOUT ---------------------')
-        print(stdout.decode().replace('\\n', '\n'))
-        print('------------------- END STDOUT -------------------')
-        print('--------------------- STDERR ---------------------', file=sys.stderr)
-        print(stderr.decode().replace('\\n', '\n'), file=sys.stderr)
-        print('------------------- END STDERR -------------------', file=sys.stderr)
-    return(int(result))
+        print(
+            "Command `%s` failed with exit code %s\n" % (command, result),
+            file=sys.stderr,
+        )
+        print("--------------------- STDOUT ---------------------")
+        print(stdout.decode().replace("\\n", "\n"))
+        print("------------------- END STDOUT -------------------")
+        print("--------------------- STDERR ---------------------", file=sys.stderr)
+        print(stderr.decode().replace("\\n", "\n"), file=sys.stderr)
+        print("------------------- END STDERR -------------------", file=sys.stderr)
+    return int(result)
 
 
 def main(arguments):
@@ -42,34 +47,55 @@
     step = threshold * 0.25
     with open(arguments.reference) as fh:
         for line in fh:
-            if line[0] == '>':
-                _ref_seq_name = line[1:].split(' ')[0].strip()
+            if line[0] == ">":
+                _ref_seq_name = line[1:].split(" ")[0].strip()
                 break
     while threshold <= 1:
-        command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input)
+        command = (
+            "tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s"
+            % (
+                threshold,
+                arguments.ambigs,
+                arguments.cluster_type,
+                arguments.overlap,
+                arguments.fraction,
+                arguments.input,
+            )
+        )
         return_code = run_command(command)
         if return_code != 0:
             return return_code
-        input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name)
+        input_stamp, cluster_count = cluster_to_fasta(
+            "clusters.json", "clusters.fa", _ref_seq_name
+        )
         if cluster_count <= arguments.cluster_count:
             break
         else:
             threshold += step
-        print('Found %d clusters at threshold %f' % (cluster_count, threshold))
+        print("Found %d clusters at threshold %f" % (cluster_count, threshold))
     return 0
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well')
-    parser.add_argument('--input', help='Input MSA', required=True, type=str)
-    parser.add_argument('--reference', help='Reference sequence', required=True, type=str)
-    parser.add_argument('--output', help='Input MSA', required=True, type=str)
-    parser.add_argument('--threshold', help='Threshold', required=True, type=float)
-    parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str)
-    parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str)
-    parser.add_argument('--overlap', help='Overlap', required=True, type=int)
-    parser.add_argument('--fraction', help='Fraction', required=True, type=float)
-    parser.add_argument('--cluster-count', help='Max query', required=True, type=int)
-    parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Combine alignments into a single file, adding a reference sequence as well"
+    )
+    parser.add_argument("--input", help="Input MSA", required=True, type=str)
+    parser.add_argument(
+        "--reference", help="Reference sequence", required=True, type=str
+    )
+    parser.add_argument("--output", help="Input MSA", required=True, type=str)
+    parser.add_argument("--threshold", help="Threshold", required=True, type=float)
+    parser.add_argument("--ambigs", help="Handle ambigs", required=True, type=str)
+    parser.add_argument("--cluster-type", help="Cluster type", required=True, type=str)
+    parser.add_argument("--overlap", help="Overlap", required=True, type=int)
+    parser.add_argument("--fraction", help="Fraction", required=True, type=float)
+    parser.add_argument("--cluster-count", help="Max query", required=True, type=int)
+    parser.add_argument(
+        "--compressed",
+        help="File to write compressed clusters to",
+        required=True,
+        type=str,
+    )
     arguments = parser.parse_args()
     exit(main(arguments))