comparison data_manager/data_manager_metaphlan_download.py @ 0:169b08c9713c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_metaphlan_database_downloader commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
author iuc
date Mon, 19 Apr 2021 20:54:06 +0000
parents
children 5fe20c915fa6
comparison
equal deleted inserted replaced
-1:000000000000 0:169b08c9713c
1 #!/usr/bin/env python
2 #
3 # Data manager for reference data for the MetaPhlAn Galaxy tools
4 import argparse
5 import json
6 import subprocess
7 from datetime import date
8 from pathlib import Path
9
10
11 # Utility functions for interacting with Galaxy JSON
12 def read_input_json(json_fp):
13 """Read the JSON supplied from the data manager tool
14
15 Returns a tuple (param_dict,extra_files_path)
16
17 'param_dict' is an arbitrary dictionary of parameters
18 input into the tool; 'extra_files_path' is the path
19 to a directory where output files must be put for the
20 receiving data manager to pick them up.
21
22 NB the directory pointed to by 'extra_files_path'
23 doesn't exist initially, it is the job of the script
24 to create it if necessary.
25
26 """
27 with open(json_fp) as fh:
28 params = json.load(fh)
29 return (params['param_dict'],
30 Path(params['output_data'][0]['extra_files_path']))
31
32
33 # Utility functions for creating data table dictionaries
34 #
35 # Example usage:
36 # >>> d = create_data_tables_dict()
37 # >>> add_data_table(d,'my_data')
38 # >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
39 # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
40 # >>> print(json.dumps(d))
41 def create_data_tables_dict():
42 """Return a dictionary for storing data table information
43
44 Returns a dictionary that can be used with 'add_data_table'
45 and 'add_data_table_entry' to store information about a
46 data table. It can be converted to JSON to be sent back to
47 the data manager.
48
49 """
50 d = {
51 'data_tables': {}
52 }
53 return d
54
55
56 def add_data_table(d, table):
57 """Add a data table to the data tables dictionary
58
59 Creates a placeholder for a data table called 'table'.
60
61 """
62 d['data_tables'][table] = []
63
64
65 def add_data_table_entry(d, table, entry):
66 """Add an entry to a data table
67
68 Appends an entry to the data table 'table'. 'entry'
69 should be a dictionary where the keys are the names of
70 columns in the data table.
71
72 Raises an exception if the named data table doesn't
73 exist.
74
75 """
76 try:
77 d['data_tables'][table].append(entry)
78 except KeyError:
79 raise Exception("add_data_table_entry: no table '%s'" % table)
80
81
82 def download_metaphlan_db(data_tables, index, table_name, target_dp):
83 """Download MetaPhlAn database
84
85 Creates references to the specified file(s) on the Galaxy
86 server in the appropriate data table (determined from the
87 file extension).
88
89 The 'data_tables' dictionary should have been created using
90 the 'create_data_tables_dict' and 'add_data_table' functions.
91
92 Arguments:
93 data_tables: a dictionary containing the data table info
94 index: version
95 table_name: name of the table
96 target_dp: directory to put copy or link to the data file
97
98 """
99 db_dp = target_dp / Path(index)
100 cmd = "metaphlan --install --index %s --bowtie2db %s" % (index, db_dp)
101 subprocess.check_call(cmd, shell=True)
102 add_data_table_entry(
103 data_tables,
104 table_name,
105 dict(
106 dbkey=index,
107 value='%s-%s' % (index, date.today().strftime("%d%m%Y")),
108 name="MetaPhlAn clade-specific marker genes (%s)" % index,
109 path=str(db_dp)))
110
111
112 if __name__ == "__main__":
113 print("Starting...")
114
115 # Read command line
116 parser = argparse.ArgumentParser(description='Download and build MetaPhlan database')
117 parser.add_argument('--index', help="Version of the database")
118 parser.add_argument('--json', help="Path to JSON file")
119 args = parser.parse_args()
120 print("args : %s" % args)
121
122 # Read the input JSON
123 json_fp = Path(args.json)
124 params, target_dp = read_input_json(json_fp)
125
126 # Make the target directory
127 print("Making %s" % target_dp)
128 target_dp.mkdir(parents=True, exist_ok=True)
129
130 # Set up data tables dictionary
131 data_tables = create_data_tables_dict()
132 add_data_table(data_tables, "metaphlan_database")
133
134 # Fetch data from specified data sources
135 print("Download and build database")
136 download_metaphlan_db(
137 data_tables,
138 args.index,
139 "metaphlan_database",
140 target_dp)
141
142 # Write output JSON
143 print("Outputting JSON")
144 with open(json_fp, 'w') as fh:
145 json.dump(data_tables, fh, sort_keys=True)
146 print("Done.")