comparison data_manager/data_manager_semibin.py @ 0:676915c44e1e draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author iuc
date Fri, 14 Oct 2022 21:32:55 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:676915c44e1e
1 #!/usr/bin/env python
2 #
3 # Data manager for reference data for the MetaPhlAn Galaxy tools
4 import argparse
5 import json
6 import subprocess
7 from datetime import date
8 from pathlib import Path
9
10
11 # Utility functions for interacting with Galaxy JSON
12 def read_input_json(json_fp):
13 """Read the JSON supplied from the data manager tool
14
15 Returns a tuple (param_dict,extra_files_path)
16
17 'param_dict' is an arbitrary dictionary of parameters
18 input into the tool; 'extra_files_path' is the path
19 to a directory where output files must be put for the
20 receiving data manager to pick them up.
21
22 NB the directory pointed to by 'extra_files_path'
23 doesn't exist initially, it is the job of the script
24 to create it if necessary.
25
26 """
27 with open(json_fp) as fh:
28 params = json.load(fh)
29 return (params['param_dict'],
30 Path(params['output_data'][0]['extra_files_path']))
31
32
33 # Utility functions for creating data table dictionaries
34 #
35 # Example usage:
36 # >>> d = create_data_tables_dict()
37 # >>> add_data_table(d,'my_data')
38 # >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
39 # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
40 # >>> print(json.dumps(d))
41 def create_data_tables_dict():
42 """Return a dictionary for storing data table information
43
44 Returns a dictionary that can be used with 'add_data_table'
45 and 'add_data_table_entry' to store information about a
46 data table. It can be converted to JSON to be sent back to
47 the data manager.
48
49 """
50 d = {
51 'data_tables': {}
52 }
53 return d
54
55
56 def add_data_table(d, table):
57 """Add a data table to the data tables dictionary
58
59 Creates a placeholder for a data table called 'table'.
60
61 """
62 d['data_tables'][table] = []
63
64
65 def add_data_table_entry(d, table, entry):
66 """Add an entry to a data table
67
68 Appends an entry to the data table 'table'. 'entry'
69 should be a dictionary where the keys are the names of
70 columns in the data table.
71
72 Raises an exception if the named data table doesn't
73 exist.
74
75 """
76 try:
77 d['data_tables'][table].append(entry)
78 except KeyError:
79 raise Exception("add_data_table_entry: no table '%s'" % table)
80
81
82 def download_gtdb(data_tables, table_name, target_dp, test=False):
83 """Download GTDB
84
85 Creates references to the specified file(s) on the Galaxy
86 server in the appropriate data table (determined from the
87 file extension).
88
89 The 'data_tables' dictionary should have been created using
90 the 'create_data_tables_dict' and 'add_data_table' functions.
91
92 Arguments:
93 data_tables: a dictionary containing the data table info
94 table_name: name of the table
95 target_dp: directory to put copy or link to the data file
96
97 """
98 db_dp = target_dp
99 if not test:
100 cmd = "SemiBin download_GTDB --reference-db-data-dir %s" % (db_dp)
101 subprocess.check_call(cmd, shell=True)
102 dbkey = 'gtdb'
103 name = "GTDB reference genome generated by MMseqs2 used in SemiBin"
104 else:
105 dbkey = 'test'
106 name = "Test"
107 empty_fp = db_dp / Path("empty")
108 empty_fp.touch()
109 add_data_table_entry(
110 data_tables,
111 table_name,
112 dict(
113 dbkey=dbkey,
114 value='%s' % (date.today().strftime("%d%m%Y")),
115 name=name,
116 path=str(db_dp)))
117
118
119 if __name__ == "__main__":
120 print("Starting...")
121
122 # Read command line
123 parser = argparse.ArgumentParser(description='Download reference genomes (GTDB)')
124 parser.add_argument('--json', help="Path to JSON file")
125 parser.add_argument('--test', action='store_true', help="Test")
126 args = parser.parse_args()
127 print("args : %s" % args)
128
129 # Read the input JSON
130 json_fp = Path(args.json)
131 params, target_dp = read_input_json(json_fp)
132
133 # Make the target directory
134 print("Making %s" % target_dp)
135 target_dp.mkdir(parents=True, exist_ok=True)
136
137 # Set up data tables dictionary
138 data_tables = create_data_tables_dict()
139 add_data_table(data_tables, "gtdb")
140
141 # Fetch data from specified data sources
142 print("Download and build database")
143 download_gtdb(
144 data_tables,
145 "gtdb",
146 target_dp,
147 args.test)
148
149 # Write output JSON
150 print("Outputting JSON")
151 with open(json_fp, 'w') as fh:
152 json.dump(data_tables, fh, sort_keys=True)
153 print("Done.")