# HG changeset patch
# User iuc
# Date 1582754781 18000
# Node ID 2af9137ba06788858bf2ec88491ec9e3cca6202b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit c6efcbece52dec310253537b35419839746fff7f"
diff -r 000000000000 -r 2af9137ba067 data_manager/mash_sketch_builder.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/mash_sketch_builder.py Wed Feb 26 17:06:21 2020 -0500
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+
+import argparse
+import errno
+import json
+import os
+import subprocess
+import uuid
+
+
+DATA_TABLE_NAME = "mash_sketches"
+
+
+def mash_sketch(mash_sketch_args, sketch_name, target_directory, data_table_name=DATA_TABLE_NAME):
+ UUID = str(uuid.uuid4())
+
+ os.mkdir(os.path.join(target_directory, UUID))
+
+ sketch_path = os.path.join(target_directory, UUID, "sketch")
+
+ args = [
+ '-k', str(mash_sketch_args["kmer_size"]),
+ '-s', str(mash_sketch_args["sketch_size"]),
+ '-w', str(mash_sketch_args["probability_threshold"]),
+ '-o', str(sketch_path),
+ '-p', str(mash_sketch_args["threads"]),
+ str(mash_sketch_args["fasta"]),
+ ]
+
+ if mash_sketch_args["individual_sequences"]:
+ args = args + ["-i"]
+
+ subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory)
+
+ data_table_entry = {
+ 'data_tables': {
+ data_table_name: [
+ {
+ "value": UUID,
+ "name": sketch_name,
+ "path": UUID,
+ }
+ ]
+ }
+ }
+
+ return data_table_entry
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('data_manager_json')
+ parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length')
+ parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length')
+ parser.add_argument('--probability-threshold', dest='probability_threshold', type=float, default=0.01, help='Probability threshold for warning about low k-mer size')
+ parser.add_argument('--individual-sequences', dest='individual_sequences', action='store_true', default=False, help='Sketch individual sequences (for multi-fasta files)')
+ parser.add_argument('--fasta', dest='fasta', help='Fasta file to sketch')
+ parser.add_argument('--threads', dest='threads', default=1, help='threads')
+ parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch')
+ args = parser.parse_args()
+
+ data_manager_input = json.loads(open(args.data_manager_json).read())
+
+ target_directory = data_manager_input['output_data'][0]['extra_files_path']
+
+ try:
+ os.mkdir( target_directory )
+ except OSError as exc:
+ if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
+ pass
+ else:
+ raise
+
+ data_manager_output = {}
+
+ mash_sketch_args = {
+ "kmer_size": args.kmer_size,
+ "sketch_size": args.sketch_size,
+ "probability_threshold": args.probability_threshold,
+ "fasta": args.fasta,
+ "individual_sequences": args.individual_sequences,
+ "threads": args.threads,
+ }
+
+ data_manager_output = mash_sketch(
+ mash_sketch_args,
+ args.sketch_name,
+ target_directory,
+ )
+
+ open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True))
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 2af9137ba067 data_manager/mash_sketch_builder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/mash_sketch_builder.xml Wed Feb 26 17:06:21 2020 -0500
@@ -0,0 +1,74 @@
+
+
+ builder
+
+ mash
+ python
+
+
+ 2.1
+
+ mash --version
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10.1186/s13059-016-0997-x
+
+
diff -r 000000000000 -r 2af9137ba067 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Wed Feb 26 17:06:21 2020 -0500
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
diff -r 000000000000 -r 2af9137ba067 test-data/mash_sketch_data_manager.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mash_sketch_data_manager.json Wed Feb 26 17:06:21 2020 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"mash_sketches": [{"name": "sketch", "path": "sketch", "value": "sketch"}]}}
diff -r 000000000000 -r 2af9137ba067 test-data/test_assembly.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_assembly.fasta Wed Feb 26 17:06:21 2020 -0500
@@ -0,0 +1,3 @@
+>test
+GCATGTCGATCTGTGTGCTAGTCGTAGTCGATCGATCTGATCGATCTGTCAGTCAGTAGT
+CTCAGCGATGCATTATTATATTATATTATCGATCGATGCTGATCGATTATATTCGATCTG
diff -r 000000000000 -r 2af9137ba067 tool-data/mash_sketches.loc.sample
diff -r 000000000000 -r 2af9137ba067 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Feb 26 17:06:21 2020 -0500
@@ -0,0 +1,8 @@
+
+
+
+
+