Mercurial > repos > iuc > data_manager_gemini_database_downloader
changeset 4:fe5a9a7d95b0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_gemini_database_downloader commit 283362494058ed64143b1f27afb447b8a1cb4313
author | iuc |
---|---|
date | Fri, 14 Dec 2018 12:40:15 -0500 |
parents | 172815da3d41 |
children | b4b2b284230a |
files | data_manager/data_manager_gemini_download.py data_manager/data_manager_gemini_download.xml data_manager_conf.xml tool-data/gemini_databases.loc.sample tool-data/gemini_versioned_databases.loc.sample tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 61 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_gemini_download.py Tue Apr 04 18:09:05 2017 -0400 +++ b/data_manager/data_manager_gemini_download.py Fri Dec 14 12:40:15 2018 -0500 @@ -6,18 +6,51 @@ import subprocess import sys +import yaml + def main(): today = datetime.date.today() params = json.loads( open( sys.argv[1] ).read() ) target_directory = params[ 'output_data' ][0]['extra_files_path'] os.mkdir( target_directory ) - cmd = "gemini --annotation-dir %s update --dataonly %s %s" % (target_directory, params['param_dict']['gerp_bp'], params['param_dict']['cadd'] ) - subprocess.check_call( cmd, shell=True ) + # The target_directory needs to be specified twice for the following + # invocation of gemini. + # In essence, the GEMINI_CONFIG environment variable makes gemini store + # its yaml configuration file in that directory, while the + # --annotation-dir argument makes it write the same path into the yaml + # file, which is then used for determining where the actual annotation + # files should be stored. + gemini_env = os.environ.copy() + gemini_env['GEMINI_CONFIG'] = target_directory + cmd = "gemini --annotation-dir %s update --dataonly %s %s" % ( + target_directory, + params['param_dict']['gerp_bp'], + params['param_dict']['cadd'] + ) + subprocess.check_call( cmd, shell=True, env=gemini_env ) + + # modify the newly created gemini config file to contain a relative + # annotation dir path, which will be interpreted as relative to + # the job working directory at runtime by any gemini tool + config_file = os.path.join(target_directory, 'gemini-config.yaml') + with open(config_file) as fi: + config = yaml.load(fi) + config['annotation_dir'] = 'gemini/data' + with open(config_file, 'w') as fo: + yaml.dump(config, fo, allow_unicode=False, default_flow_style=False) + data_manager_dict = { 'data_tables': { - 'gemini_databases': [ - {'value': today.isoformat(), 'dbkey': 'hg19', 'name': 'GEMINI annotations (%s)' % today.isoformat(), 'path': './%s' % today.isoformat() } + 'gemini_versioned_databases': [ + { + 'value': today.isoformat(), + 'dbkey': 'hg19', + 'version': params['param_dict']['gemini_db_version'], + 'name': + 'GEMINI annotations (%s snapshot)' % today.isoformat(), + 'path': './%s' % today.isoformat() + } ] } }
--- a/data_manager/data_manager_gemini_download.xml Tue Apr 04 18:09:05 2017 -0400 +++ b/data_manager/data_manager_gemini_download.xml Fri Dec 14 12:40:15 2018 -0500 @@ -1,7 +1,11 @@ -<tool id="data_manager_gemini_download" name="GEMINI Download" version="0.18.1" tool_type="manage_data"> - <description>Download a new database</description> +<tool id="data_manager_gemini_download" name="GEMINI Download" version="@VERSION@.1" tool_type="manage_data"> + <description>the annotation files required by the GEMINI suite of tools</description> + <macros> + <token name="@VERSION@">0.18.1</token> + <token name="@DB_VERSION@">181</token> + </macros> <requirements> - <requirement type="package" version="0.18.1">gemini</requirement> + <requirement type="package" version="@VERSION@">gemini</requirement> </requirements> <command detect_errors="exit_code"> python '$__tool_directory__/data_manager_gemini_download.py' '$out_file' @@ -11,6 +15,7 @@ label="Download CADD scores for GEMINI database annotation" help="(--extra cadd_score)"/> <param name="gerp_bp" type="boolean" truevalue="--extra gerp_bp" falsevalue="" checked="True" label="Download GERP for GEMINI database annotation" help="(--extra gerp_bp)"/> + <param name="gemini_db_version" type="hidden" value="@DB_VERSION@"/> </inputs> <outputs> <data name="out_file" format="data_manager_json" label="${tool.name}"/> @@ -18,9 +23,13 @@ <tests> </tests> <help> -This tool downloads the GEMINI databases. +This tool downloads the GEMINI annotation files and makes them available to +corresponding versions of the GEMINI suite of tools. Downloading the (very large) CADD_ scores and GERP_ annotation files is optional. -For details about this tool, please go to http://gemini.readthedocs.org +Please visit http://gemini.readthedocs.io for details about GEMINI. + +.. _GERP: http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html +.. _CADD: https://cadd.gs.washington.edu/ </help> <citations> <citation type="doi">10.1371/journal.pcbi.1003153</citation>
--- a/data_manager_conf.xml Tue Apr 04 18:09:05 2017 -0400 +++ b/data_manager_conf.xml Fri Dec 14 12:40:15 2018 -0500 @@ -1,16 +1,17 @@ <?xml version="1.0"?> <data_managers> <data_manager tool_file="data_manager/data_manager_gemini_download.xml" id="data_manager_gemini_download" > - <data_table name="gemini_databases"> <!-- Defines a Data Table to be modified. --> + <data_table name="gemini_versioned_databases"> <!-- Defines a Data Table to be modified. --> <output> <!-- Handle the output of the Data Manager Tool --> <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> <column name="dbkey" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="version" /> <!-- columns that are going to be specified by the Data Manager Tool --> <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> <column name="path" output_ref="out_file" > <move type="directory" relativize_symlinks="True"> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">gemini/data/${dbkey}/${value}</target> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">gemini/${version}/${dbkey}/${value}</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/gemini/data/${dbkey}/${value}/gemini/data/</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/gemini/${version}/${dbkey}/${value}/</value_translation> <value_translation type="function">abspath</value_translation> </column> </output>
--- a/tool-data/gemini_databases.loc.sample Tue Apr 04 18:09:05 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -## GEMINI databases -#Version dbkey Description -#08_08_2014 hg19 Database (08-08-2014)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gemini_versioned_databases.loc.sample Fri Dec 14 12:40:15 2018 -0500 @@ -0,0 +1,3 @@ +## GEMINI versioned databases +#DownloadDate dbkey DBversion Description +#2018-07-08 hg19 181 GEMINI annotations (2018-07-08 snapshot)
--- a/tool_data_table_conf.xml.sample Tue Apr 04 18:09:05 2017 -0400 +++ b/tool_data_table_conf.xml.sample Fri Dec 14 12:40:15 2018 -0500 @@ -1,7 +1,7 @@ <tables> - <table name="gemini_databases" comment_char="#" allow_duplicate_entries="False"> - <columns>value, dbkey, name, path</columns> - <file path="tool-data/gemini_databases.loc" /> + <table name="gemini_versioned_databases" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, version, name, path</columns> + <file path="tool-data/gemini_versioned_databases.loc" /> </table> </tables>