Mercurial > repos > iuc > data_manager_mash_sketch_builder
comparison data_manager/mash_sketch_builder.py @ 0:2af9137ba067 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit c6efcbece52dec310253537b35419839746fff7f"
| author | iuc |
|---|---|
| date | Wed, 26 Feb 2020 17:06:21 -0500 |
| parents | |
| children | b6016642539d |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2af9137ba067 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import errno | |
| 5 import json | |
| 6 import os | |
| 7 import subprocess | |
| 8 import uuid | |
| 9 | |
| 10 | |
| 11 DATA_TABLE_NAME = "mash_sketches" | |
| 12 | |
| 13 | |
| 14 def mash_sketch(mash_sketch_args, sketch_name, target_directory, data_table_name=DATA_TABLE_NAME): | |
| 15 UUID = str(uuid.uuid4()) | |
| 16 | |
| 17 os.mkdir(os.path.join(target_directory, UUID)) | |
| 18 | |
| 19 sketch_path = os.path.join(target_directory, UUID, "sketch") | |
| 20 | |
| 21 args = [ | |
| 22 '-k', str(mash_sketch_args["kmer_size"]), | |
| 23 '-s', str(mash_sketch_args["sketch_size"]), | |
| 24 '-w', str(mash_sketch_args["probability_threshold"]), | |
| 25 '-o', str(sketch_path), | |
| 26 '-p', str(mash_sketch_args["threads"]), | |
| 27 str(mash_sketch_args["fasta"]), | |
| 28 ] | |
| 29 | |
| 30 if mash_sketch_args["individual_sequences"]: | |
| 31 args = args + ["-i"] | |
| 32 | |
| 33 subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory) | |
| 34 | |
| 35 data_table_entry = { | |
| 36 'data_tables': { | |
| 37 data_table_name: [ | |
| 38 { | |
| 39 "value": UUID, | |
| 40 "name": sketch_name, | |
| 41 "path": UUID, | |
| 42 } | |
| 43 ] | |
| 44 } | |
| 45 } | |
| 46 | |
| 47 return data_table_entry | |
| 48 | |
| 49 | |
| 50 def main(): | |
| 51 parser = argparse.ArgumentParser() | |
| 52 parser.add_argument('data_manager_json') | |
| 53 parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length') | |
| 54 parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length') | |
| 55 parser.add_argument('--probability-threshold', dest='probability_threshold', type=float, default=0.01, help='Probability threshold for warning about low k-mer size') | |
| 56 parser.add_argument('--individual-sequences', dest='individual_sequences', action='store_true', default=False, help='Sketch individual sequences (for multi-fasta files)') | |
| 57 parser.add_argument('--fasta', dest='fasta', help='Fasta file to sketch') | |
| 58 parser.add_argument('--threads', dest='threads', default=1, help='threads') | |
| 59 parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch') | |
| 60 args = parser.parse_args() | |
| 61 | |
| 62 data_manager_input = json.loads(open(args.data_manager_json).read()) | |
| 63 | |
| 64 target_directory = data_manager_input['output_data'][0]['extra_files_path'] | |
| 65 | |
| 66 try: | |
| 67 os.mkdir( target_directory ) | |
| 68 except OSError as exc: | |
| 69 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): | |
| 70 pass | |
| 71 else: | |
| 72 raise | |
| 73 | |
| 74 data_manager_output = {} | |
| 75 | |
| 76 mash_sketch_args = { | |
| 77 "kmer_size": args.kmer_size, | |
| 78 "sketch_size": args.sketch_size, | |
| 79 "probability_threshold": args.probability_threshold, | |
| 80 "fasta": args.fasta, | |
| 81 "individual_sequences": args.individual_sequences, | |
| 82 "threads": args.threads, | |
| 83 } | |
| 84 | |
| 85 data_manager_output = mash_sketch( | |
| 86 mash_sketch_args, | |
| 87 args.sketch_name, | |
| 88 target_directory, | |
| 89 ) | |
| 90 | |
| 91 open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True)) | |
| 92 | |
| 93 | |
| 94 if __name__ == "__main__": | |
| 95 main() |
