Mercurial > repos > iuc > data_manager_metaphlan_database_downloader
comparison data_manager/data_manager_metaphlan_download.py @ 0:169b08c9713c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_metaphlan_database_downloader commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
author | iuc |
---|---|
date | Mon, 19 Apr 2021 20:54:06 +0000 |
parents | |
children | 5fe20c915fa6 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:169b08c9713c |
---|---|
1 #!/usr/bin/env python | |
2 # | |
3 # Data manager for reference data for the MetaPhlAn Galaxy tools | |
4 import argparse | |
5 import json | |
6 import subprocess | |
7 from datetime import date | |
8 from pathlib import Path | |
9 | |
10 | |
11 # Utility functions for interacting with Galaxy JSON | |
12 def read_input_json(json_fp): | |
13 """Read the JSON supplied from the data manager tool | |
14 | |
15 Returns a tuple (param_dict,extra_files_path) | |
16 | |
17 'param_dict' is an arbitrary dictionary of parameters | |
18 input into the tool; 'extra_files_path' is the path | |
19 to a directory where output files must be put for the | |
20 receiving data manager to pick them up. | |
21 | |
22 NB the directory pointed to by 'extra_files_path' | |
23 doesn't exist initially, it is the job of the script | |
24 to create it if necessary. | |
25 | |
26 """ | |
27 with open(json_fp) as fh: | |
28 params = json.load(fh) | |
29 return (params['param_dict'], | |
30 Path(params['output_data'][0]['extra_files_path'])) | |
31 | |
32 | |
33 # Utility functions for creating data table dictionaries | |
34 # | |
35 # Example usage: | |
36 # >>> d = create_data_tables_dict() | |
37 # >>> add_data_table(d,'my_data') | |
38 # >>> add_data_table_entry(dict(dbkey='hg19',value='human')) | |
39 # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) | |
40 # >>> print(json.dumps(d)) | |
41 def create_data_tables_dict(): | |
42 """Return a dictionary for storing data table information | |
43 | |
44 Returns a dictionary that can be used with 'add_data_table' | |
45 and 'add_data_table_entry' to store information about a | |
46 data table. It can be converted to JSON to be sent back to | |
47 the data manager. | |
48 | |
49 """ | |
50 d = { | |
51 'data_tables': {} | |
52 } | |
53 return d | |
54 | |
55 | |
56 def add_data_table(d, table): | |
57 """Add a data table to the data tables dictionary | |
58 | |
59 Creates a placeholder for a data table called 'table'. | |
60 | |
61 """ | |
62 d['data_tables'][table] = [] | |
63 | |
64 | |
65 def add_data_table_entry(d, table, entry): | |
66 """Add an entry to a data table | |
67 | |
68 Appends an entry to the data table 'table'. 'entry' | |
69 should be a dictionary where the keys are the names of | |
70 columns in the data table. | |
71 | |
72 Raises an exception if the named data table doesn't | |
73 exist. | |
74 | |
75 """ | |
76 try: | |
77 d['data_tables'][table].append(entry) | |
78 except KeyError: | |
79 raise Exception("add_data_table_entry: no table '%s'" % table) | |
80 | |
81 | |
82 def download_metaphlan_db(data_tables, index, table_name, target_dp): | |
83 """Download MetaPhlAn database | |
84 | |
85 Creates references to the specified file(s) on the Galaxy | |
86 server in the appropriate data table (determined from the | |
87 file extension). | |
88 | |
89 The 'data_tables' dictionary should have been created using | |
90 the 'create_data_tables_dict' and 'add_data_table' functions. | |
91 | |
92 Arguments: | |
93 data_tables: a dictionary containing the data table info | |
94 index: version | |
95 table_name: name of the table | |
96 target_dp: directory to put copy or link to the data file | |
97 | |
98 """ | |
99 db_dp = target_dp / Path(index) | |
100 cmd = "metaphlan --install --index %s --bowtie2db %s" % (index, db_dp) | |
101 subprocess.check_call(cmd, shell=True) | |
102 add_data_table_entry( | |
103 data_tables, | |
104 table_name, | |
105 dict( | |
106 dbkey=index, | |
107 value='%s-%s' % (index, date.today().strftime("%d%m%Y")), | |
108 name="MetaPhlAn clade-specific marker genes (%s)" % index, | |
109 path=str(db_dp))) | |
110 | |
111 | |
112 if __name__ == "__main__": | |
113 print("Starting...") | |
114 | |
115 # Read command line | |
116 parser = argparse.ArgumentParser(description='Download and build MetaPhlan database') | |
117 parser.add_argument('--index', help="Version of the database") | |
118 parser.add_argument('--json', help="Path to JSON file") | |
119 args = parser.parse_args() | |
120 print("args : %s" % args) | |
121 | |
122 # Read the input JSON | |
123 json_fp = Path(args.json) | |
124 params, target_dp = read_input_json(json_fp) | |
125 | |
126 # Make the target directory | |
127 print("Making %s" % target_dp) | |
128 target_dp.mkdir(parents=True, exist_ok=True) | |
129 | |
130 # Set up data tables dictionary | |
131 data_tables = create_data_tables_dict() | |
132 add_data_table(data_tables, "metaphlan_database") | |
133 | |
134 # Fetch data from specified data sources | |
135 print("Download and build database") | |
136 download_metaphlan_db( | |
137 data_tables, | |
138 args.index, | |
139 "metaphlan_database", | |
140 target_dp) | |
141 | |
142 # Write output JSON | |
143 print("Outputting JSON") | |
144 with open(json_fp, 'w') as fh: | |
145 json.dump(data_tables, fh, sort_keys=True) | |
146 print("Done.") |