changeset 5:b4b2b284230a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gemini_database_downloader commit 9981ac1338c91a3ab46557ce6b821da3d5b4bc86
author iuc
date Wed, 19 Dec 2018 12:36:22 -0500
parents fe5a9a7d95b0
children f57426daa04d
files data_manager/data_manager_gemini_download.py
diffstat 1 files changed, 28 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/data_manager_gemini_download.py	Fri Dec 14 12:40:15 2018 -0500
+++ b/data_manager/data_manager_gemini_download.py	Wed Dec 19 12:36:22 2018 -0500
@@ -9,37 +9,49 @@
 import yaml
 
 
+def write_gemini_config(config, config_file):
+    with open(config_file, 'w') as fo:
+        yaml.dump(config, fo, allow_unicode=False, default_flow_style=False)
+
+
 def main():
     today = datetime.date.today()
     params = json.loads( open( sys.argv[1] ).read() )
     target_directory = params[ 'output_data' ][0]['extra_files_path']
     os.mkdir( target_directory )
-    # The target_directory needs to be specified twice for the following
-    # invocation of gemini.
-    # In essence, the GEMINI_CONFIG environment variable makes gemini store
-    # its yaml configuration file in that directory, while the
-    # --annotation-dir argument makes it write the same path into the yaml
-    # file, which is then used for determining where the actual annotation
-    # files should be stored.
+
+    # Generate a minimal configuration file for GEMINI update
+    # to instruct the tool to download the annotation data into a
+    # subfolder of the target directory.
+    config_file = os.path.join(target_directory, 'gemini-config.yaml')
+    anno_dir = os.path.join(target_directory, 'gemini/data')
+    gemini_bootstrap_config = {'annotation_dir': anno_dir}
+    write_gemini_config(gemini_bootstrap_config, config_file)
+
+    # Now gemini update can be called to download the data.
+    # The GEMINI_CONFIG environment variable lets the tool discover
+    # the configuration file we prepared for it.
+    # Note that the tool will rewrite the file turning it into a
+    # complete gemini configuration file.
     gemini_env = os.environ.copy()
     gemini_env['GEMINI_CONFIG'] = target_directory
-    cmd = "gemini --annotation-dir %s update --dataonly %s %s" % (
-        target_directory,
+    cmd = "gemini update --dataonly %s %s" % (
         params['param_dict']['gerp_bp'],
         params['param_dict']['cadd']
     )
     subprocess.check_call( cmd, shell=True, env=gemini_env )
 
-    # modify the newly created gemini config file to contain a relative
-    # annotation dir path, which will be interpreted as relative to
-    # the job working directory at runtime by any gemini tool
-    config_file = os.path.join(target_directory, 'gemini-config.yaml')
+    # GEMINI tool wrappers that need access to the annotation files
+    # are supposed to symlink them into a gemini/data subfolder of
+    # the job working directory. To have GEMINI discover them there,
+    # we need to set this location as the 'annotation_dir' in the
+    # configuration file.
     with open(config_file) as fi:
         config = yaml.load(fi)
     config['annotation_dir'] = 'gemini/data'
-    with open(config_file, 'w') as fo:
-        yaml.dump(config, fo, allow_unicode=False, default_flow_style=False)
+    write_gemini_config(config, config_file)
 
+    # Finally, we prepare the metadata for the new data table record ...
     data_manager_dict = {
         'data_tables': {
             'gemini_versioned_databases': [
@@ -55,7 +67,7 @@
         }
     }
 
-    # save info to json file
+    # ... and save it to the json results file
     with open( sys.argv[1], 'wb' ) as out:
         out.write( json.dumps( data_manager_dict ) )