annotate data_manager/data_manager_semibin.py @ 0:1e4dd26db773 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author bgruening
date Fri, 14 Oct 2022 21:29:47 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
2 #
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
3 # Data manager for reference data for the MetaPhlAn Galaxy tools
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
4 import argparse
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
5 import json
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
6 import subprocess
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
7 from datetime import date
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
8 from pathlib import Path
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
9
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
10
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
11 # Utility functions for interacting with Galaxy JSON
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
12 def read_input_json(json_fp):
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
13 """Read the JSON supplied from the data manager tool
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
14
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
15 Returns a tuple (param_dict,extra_files_path)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
16
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
17 'param_dict' is an arbitrary dictionary of parameters
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
18 input into the tool; 'extra_files_path' is the path
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
19 to a directory where output files must be put for the
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
20 receiving data manager to pick them up.
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
21
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
22 NB the directory pointed to by 'extra_files_path'
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
23 doesn't exist initially, it is the job of the script
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
24 to create it if necessary.
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
25
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
26 """
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
27 with open(json_fp) as fh:
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
28 params = json.load(fh)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
29 return (params['param_dict'],
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
30 Path(params['output_data'][0]['extra_files_path']))
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
31
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
32
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
33 # Utility functions for creating data table dictionaries
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
34 #
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
35 # Example usage:
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
36 # >>> d = create_data_tables_dict()
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
37 # >>> add_data_table(d,'my_data')
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
38 # >>> add_data_table_entry(dict(dbkey='hg19',value='human'))
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
39 # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse'))
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
40 # >>> print(json.dumps(d))
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
41 def create_data_tables_dict():
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
42 """Return a dictionary for storing data table information
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
43
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
44 Returns a dictionary that can be used with 'add_data_table'
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
45 and 'add_data_table_entry' to store information about a
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
46 data table. It can be converted to JSON to be sent back to
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
47 the data manager.
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
48
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
49 """
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
50 d = {
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
51 'data_tables': {}
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
52 }
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
53 return d
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
54
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
55
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
56 def add_data_table(d, table):
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
57 """Add a data table to the data tables dictionary
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
58
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
59 Creates a placeholder for a data table called 'table'.
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
60
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
61 """
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
62 d['data_tables'][table] = []
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
63
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
64
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
65 def add_data_table_entry(d, table, entry):
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
66 """Add an entry to a data table
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
67
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
68 Appends an entry to the data table 'table'. 'entry'
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
69 should be a dictionary where the keys are the names of
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
70 columns in the data table.
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
71
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
72 Raises an exception if the named data table doesn't
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
73 exist.
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
74
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
75 """
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
76 try:
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
77 d['data_tables'][table].append(entry)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
78 except KeyError:
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
79 raise Exception("add_data_table_entry: no table '%s'" % table)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
80
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
81
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
82 def download_gtdb(data_tables, table_name, target_dp, test=False):
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
83 """Download GTDB
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
84
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
85 Creates references to the specified file(s) on the Galaxy
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
86 server in the appropriate data table (determined from the
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
87 file extension).
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
88
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
89 The 'data_tables' dictionary should have been created using
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
90 the 'create_data_tables_dict' and 'add_data_table' functions.
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
91
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
92 Arguments:
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
93 data_tables: a dictionary containing the data table info
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
94 table_name: name of the table
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
95 target_dp: directory to put copy or link to the data file
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
96
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
97 """
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
98 db_dp = target_dp
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
99 if not test:
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
100 cmd = "SemiBin download_GTDB --reference-db-data-dir %s" % (db_dp)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
101 subprocess.check_call(cmd, shell=True)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
102 dbkey = 'gtdb'
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
103 name = "GTDB reference genome generated by MMseqs2 used in SemiBin"
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
104 else:
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
105 dbkey = 'test'
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
106 name = "Test"
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
107 empty_fp = db_dp / Path("empty")
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
108 empty_fp.touch()
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
109 add_data_table_entry(
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
110 data_tables,
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
111 table_name,
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
112 dict(
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
113 dbkey=dbkey,
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
114 value='%s' % (date.today().strftime("%d%m%Y")),
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
115 name=name,
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
116 path=str(db_dp)))
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
117
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
118
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
119 if __name__ == "__main__":
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
120 print("Starting...")
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
121
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
122 # Read command line
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
123 parser = argparse.ArgumentParser(description='Download reference genomes (GTDB)')
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
124 parser.add_argument('--json', help="Path to JSON file")
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
125 parser.add_argument('--test', action='store_true', help="Test")
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
126 args = parser.parse_args()
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
127 print("args : %s" % args)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
128
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
129 # Read the input JSON
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
130 json_fp = Path(args.json)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
131 params, target_dp = read_input_json(json_fp)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
132
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
133 # Make the target directory
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
134 print("Making %s" % target_dp)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
135 target_dp.mkdir(parents=True, exist_ok=True)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
136
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
137 # Set up data tables dictionary
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
138 data_tables = create_data_tables_dict()
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
139 add_data_table(data_tables, "gtdb")
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
140
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
141 # Fetch data from specified data sources
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
142 print("Download and build database")
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
143 download_gtdb(
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
144 data_tables,
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
145 "gtdb",
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
146 target_dp,
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
147 args.test)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
148
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
149 # Write output JSON
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
150 print("Outputting JSON")
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
151 with open(json_fp, 'w') as fh:
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
152 json.dump(data_tables, fh, sort_keys=True)
1e4dd26db773 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
bgruening
parents:
diff changeset
153 print("Done.")