comparison data_manager/mash_sketch_builder.py @ 0:2af9137ba067 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mash_sketch_builder/ commit c6efcbece52dec310253537b35419839746fff7f"
author iuc
date Wed, 26 Feb 2020 17:06:21 -0500
parents
children b6016642539d
comparison
equal deleted inserted replaced
-1:000000000000 0:2af9137ba067
1 #!/usr/bin/env python
2
3 import argparse
4 import errno
5 import json
6 import os
7 import subprocess
8 import uuid
9
10
11 DATA_TABLE_NAME = "mash_sketches"
12
13
14 def mash_sketch(mash_sketch_args, sketch_name, target_directory, data_table_name=DATA_TABLE_NAME):
15 UUID = str(uuid.uuid4())
16
17 os.mkdir(os.path.join(target_directory, UUID))
18
19 sketch_path = os.path.join(target_directory, UUID, "sketch")
20
21 args = [
22 '-k', str(mash_sketch_args["kmer_size"]),
23 '-s', str(mash_sketch_args["sketch_size"]),
24 '-w', str(mash_sketch_args["probability_threshold"]),
25 '-o', str(sketch_path),
26 '-p', str(mash_sketch_args["threads"]),
27 str(mash_sketch_args["fasta"]),
28 ]
29
30 if mash_sketch_args["individual_sequences"]:
31 args = args + ["-i"]
32
33 subprocess.check_call(['mash', 'sketch'] + args, cwd=target_directory)
34
35 data_table_entry = {
36 'data_tables': {
37 data_table_name: [
38 {
39 "value": UUID,
40 "name": sketch_name,
41 "path": UUID,
42 }
43 ]
44 }
45 }
46
47 return data_table_entry
48
49
50 def main():
51 parser = argparse.ArgumentParser()
52 parser.add_argument('data_manager_json')
53 parser.add_argument('--kmer-size', dest='kmer_size', type=int, default=35, help='kmer length')
54 parser.add_argument('--sketch-size', dest='sketch_size', type=int, default=31, help='minimizer length')
55 parser.add_argument('--probability-threshold', dest='probability_threshold', type=float, default=0.01, help='Probability threshold for warning about low k-mer size')
56 parser.add_argument('--individual-sequences', dest='individual_sequences', action='store_true', default=False, help='Sketch individual sequences (for multi-fasta files)')
57 parser.add_argument('--fasta', dest='fasta', help='Fasta file to sketch')
58 parser.add_argument('--threads', dest='threads', default=1, help='threads')
59 parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch')
60 args = parser.parse_args()
61
62 data_manager_input = json.loads(open(args.data_manager_json).read())
63
64 target_directory = data_manager_input['output_data'][0]['extra_files_path']
65
66 try:
67 os.mkdir( target_directory )
68 except OSError as exc:
69 if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
70 pass
71 else:
72 raise
73
74 data_manager_output = {}
75
76 mash_sketch_args = {
77 "kmer_size": args.kmer_size,
78 "sketch_size": args.sketch_size,
79 "probability_threshold": args.probability_threshold,
80 "fasta": args.fasta,
81 "individual_sequences": args.individual_sequences,
82 "threads": args.threads,
83 }
84
85 data_manager_output = mash_sketch(
86 mash_sketch_args,
87 args.sketch_name,
88 target_directory,
89 )
90
91 open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True))
92
93
94 if __name__ == "__main__":
95 main()