Mercurial > repos > iuc > data_manager_gemini_database_downloader
changeset 9:27a6a256cd23 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gemini_database_downloader commit 275b7863ff4f8b0dff9cd7ea6c4b635694f0168d
author | iuc |
---|---|
date | Sat, 03 Dec 2022 10:37:24 +0000 |
parents | 52b6a4d98009 |
children | |
files | data_manager/data_manager_gemini_download.py data_manager/data_manager_gemini_download.xml test-data/gemini_versioned_databases.loc test-data/test.json tool_data_table_conf.xml.test |
diffstat | 5 files changed, 79 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_gemini_download.py Sun Nov 22 12:49:35 2020 +0000 +++ b/data_manager/data_manager_gemini_download.py Sat Dec 03 10:37:24 2022 +0000 @@ -1,4 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python2 + +# IMPORTANT: This will run using Python 2 still! import datetime import json @@ -14,6 +16,11 @@ yaml.dump(config, fo, allow_unicode=False, default_flow_style=False) +def load_gemini_config(config_file): + with open(config_file) as fi: + return yaml.load(fi) + + def main(): today = datetime.date.today() with open(sys.argv[1]) as fh: @@ -21,36 +28,7 @@ target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) - # Generate a minimal configuration file for GEMINI update - # to instruct the tool to download the annotation data into a - # subfolder of the target directory. - config_file = os.path.join(target_directory, 'gemini-config.yaml') - anno_dir = os.path.join(target_directory, 'gemini/data') - gemini_bootstrap_config = {'annotation_dir': anno_dir} - write_gemini_config(gemini_bootstrap_config, config_file) - - # Now gemini update can be called to download the data. - # The GEMINI_CONFIG environment variable lets the tool discover - # the configuration file we prepared for it. - # Note that the tool will rewrite the file turning it into a - # complete gemini configuration file. - gemini_env = os.environ.copy() - gemini_env['GEMINI_CONFIG'] = target_directory - cmd = "gemini update --dataonly %s %s" % ( - params['param_dict']['gerp_bp'], - params['param_dict']['cadd'] - ) - subprocess.check_call(cmd, shell=True, env=gemini_env) - - # GEMINI tool wrappers that need access to the annotation files - # are supposed to symlink them into a gemini/data subfolder of - # the job working directory. To have GEMINI discover them there, - # we need to set this location as the 'annotation_dir' in the - # configuration file. - with open(config_file) as fi: - config = yaml.load(fi) - config['annotation_dir'] = 'gemini/data' - write_gemini_config(config, config_file) + # Prepare the metadata for the new data table record # The name of the database should reflect whether it was built with or # without the optional GERP-bp data, the CADD scores, or both. @@ -65,7 +43,6 @@ else: anno_desc = '' - # Finally, we prepare the metadata for the new data table record ... data_manager_dict = { 'data_tables': { 'gemini_versioned_databases': [ @@ -83,10 +60,49 @@ } } - # ... and save it to the json results file + # Save the data table metadata to the json results file with open(sys.argv[1], 'w') as fh: json.dump(data_manager_dict, fh, sort_keys=True) + # Generate a minimal configuration file for GEMINI update + # to instruct the tool to download the annotation data into a + # subfolder of the target directory. + config_file = os.path.join(target_directory, 'gemini-config.yaml') + anno_dir = os.path.join(target_directory, 'gemini/data') + gemini_bootstrap_config = {'annotation_dir': anno_dir} + write_gemini_config(gemini_bootstrap_config, config_file) + + # Verify that we can read the config_file just created as we need to do so + # after the data download has finished and it is very annoying to have this + # fail after dozens of Gbs of data have been downloaded + config = load_gemini_config(config_file) + + # Now gemini update can be called to download the data. + # The GEMINI_CONFIG environment variable lets the tool discover + # the configuration file we prepared for it. + # Note that the tool will rewrite the file turning it into a + # complete gemini configuration file. + gemini_env = os.environ.copy() + gemini_env['GEMINI_CONFIG'] = target_directory + cmd = ['gemini', 'update', '--dataonly'] + if params['param_dict']['gerp_bp']: + cmd += ['--extra', 'gerp_bp'] + if params['param_dict']['cadd']: + cmd += ['--extra', 'cadd_score'] + + if not params['param_dict']['test_data_manager']: + # This is not a test => Going to embark on a massive download now + subprocess.check_call(cmd, env=gemini_env) + + # GEMINI tool wrappers that need access to the annotation files + # are supposed to symlink them into a gemini/data subfolder of + # the job working directory. To have GEMINI discover them there, + # we need to set this location as the 'annotation_dir' in the + # configuration file. + config = load_gemini_config(config_file) + config['annotation_dir'] = 'gemini/data' + write_gemini_config(config, config_file) + if __name__ == "__main__": main()
--- a/data_manager/data_manager_gemini_download.xml Sun Nov 22 12:49:35 2020 +0000 +++ b/data_manager/data_manager_gemini_download.xml Sat Dec 03 10:37:24 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="data_manager_gemini_download" name="GEMINI Download" version="@VERSION@" tool_type="manage_data"> +<tool id="data_manager_gemini_download" name="GEMINI Download" version="@VERSION@+galaxy1" tool_type="manage_data" profile="18.09"> <description>the annotation files required by the GEMINI suite of tools</description> <macros> <token name="@VERSION@">0.20.1</token> @@ -11,16 +11,32 @@ python '$__tool_directory__/data_manager_gemini_download.py' '$out_file' </command> <inputs> - <param name="cadd" type="boolean" truevalue="--extra cadd_score" falsevalue="" checked="True" + <param name="cadd" type="boolean" truevalue="cadd" falsevalue="" checked="True" label="Download CADD scores for GEMINI database annotation" help="(--extra cadd_score)"/> - <param name="gerp_bp" type="boolean" truevalue="--extra gerp_bp" falsevalue="" checked="True" + <param name="gerp_bp" type="boolean" truevalue="gerp_bp" falsevalue="" checked="True" label="Download GERP for GEMINI database annotation" help="(--extra gerp_bp)"/> <param name="gemini_db_version" type="hidden" value="@DB_VERSION@"/> + <param name="test_data_manager" type="hidden" value=""/> </inputs> <outputs> <data name="out_file" format="data_manager_json" label="${tool.name}"/> </outputs> <tests> + <test expect_num_outputs="1"> + <param name="test_data_manager" value="--test" /> + <param name="cadd" value="false" /> + <param name="gerp_bp" value="false" /> + <output name="out_file" file="test.json" compare="re_match" /> + </test> + <!-- Test 2 with the latest option --> + <test expect_num_outputs="1"> + <param name="test_data_manager" value="--test" /> + <output name="out_file"> + <assert_contents> + <has_text text=""name": "GEMINI annotations w/ GERP & CADD" /> + </assert_contents> + </output> + </test> </tests> <help> This tool downloads the GEMINI annotation files and makes them available to
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_versioned_databases.loc Sat Dec 03 10:37:24 2022 +0000 @@ -0,0 +1,3 @@ +## GEMINI versioned databases +#DownloadDate dbkey DBversion Description +#2018-07-08 hg19 181 GEMINI annotations (2018-07-08 snapshot)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.json Sat Dec 03 10:37:24 2022 +0000 @@ -0,0 +1,1 @@ +\{"data_tables": \{"gemini_versioned_databases": \[\{"dbkey": "hg19", "name": "GEMINI annotations \(.+ snapshot\)", "path": "./.+", "value": ".+", "version": "200"\}\]\}\}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Sat Dec 03 10:37:24 2022 +0000 @@ -0,0 +1,7 @@ +<tables> + <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, version, name, path</columns> + <file path="${__HERE__}/test-data/gemini_versioned_databases.loc" /> + </table> +</tables> +