Mercurial > repos > iuc > data_manager_dram_database_downloader
changeset 0:bd342a6a5b0f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dram_database_downloader commit 52575ece22fcdbb6fc3aa3582ea377075aaa4db1
author | iuc |
---|---|
date | Thu, 01 Sep 2022 17:16:07 +0000 |
parents | |
children | 40c8fc0a2bb0 |
files | data_manager/data_manager_dram_download.py data_manager/data_manager_dram_download.xml data_manager_conf.xml test-data/dram_databases.loc tool-data/dram_databases.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 7 files changed, 377 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_dram_download.py Thu Sep 01 17:16:07 2022 +0000 @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import subprocess + + +parser = argparse.ArgumentParser() + +parser.add_argument('--kofam_hmm_loc', action='store', dest='kofam_hmm_loc', default=None, help='hmm file for KOfam') +parser.add_argument('--kofam_ko_list_loc', action='store', dest='kofam_ko_list_loc', default=None, help='KOfam ko list file') +parser.add_argument('--skip_uniref', action='store', dest='skip_uniref', default='no', help='Flag to Download and process uniref') +parser.add_argument('--uniref_loc', action='store', dest='uniref_loc', default=None, help='uniref file') +parser.add_argument('--uniref_version', action='store', dest='uniref_version', type=int, default=90, help='uniref version to download') +parser.add_argument('--pfam_loc', action='store', dest='pfam_loc', default=None, help='pfam-A full file') +parser.add_argument('--pfam_hmm_dat', action='store', dest='pfam_hmm_dat', help='pfam hmm .dat file to get PF descriptions') +parser.add_argument('--dbcan_loc', action='store', dest='dbcan_loc', default=None, help='dbCAN file') +parser.add_argument('--dbcan_fam_activities', action='store', dest='dbcan_fam_activities', default=None, help='CAZY family activities file') +parser.add_argument('--dbcan_version', action='store', dest='dbcan_version', type=int, default=10, help='Version of dbCAN to use') +parser.add_argument('--vogdb_loc', action='store', dest='vogdb_loc', default=None, help='hmm file for vogdb') +parser.add_argument('--vog_annotations', action='store', dest='vog_annotations', default=None, help='vogdb annotations file') +parser.add_argument('--viral_loc', action='store', dest='viral_loc', default=None, help='merged viral protein faa file') +parser.add_argument('--peptidase_loc', action='store', dest='peptidase_loc', default=None, help='MEROPS peptidase fasta file') +parser.add_argument('--genome_summary_form_loc', action='store', dest='genome_summary_form_loc', default=None, help='genome summary form file') +parser.add_argument('--module_step_form_loc', action='store', dest='module_step_form_loc', default=None, help='module step form file') +parser.add_argument('--etc_module_database_loc', action='store', dest='etc_module_database_loc', default=None, help='etc module database file') +parser.add_argument('--function_heatmap_form_loc', action='store', dest='function_heatmap_form_loc', default=None, help='function heatmap form file') +parser.add_argument('--amg_database_loc', action='store', dest='amg_database_loc', default=None, help='amg database file') +parser.add_argument('--db_version', action='store', dest='db_version', help='Version of DRAM databases') +parser.add_argument('--threads', action='store', dest='threads', type=int, help='Number of processes') +parser.add_argument('--out_file', action='store', dest='out_file', help='JSON output file') + +args = parser.parse_args() + +with open(args.out_file) as fh: + params = json.load(fh) + +target_directory = params['output_data'][0]['extra_files_path'] +os.makedirs(target_directory) + +# Download the data. +cmd = 'DRAM-setup.py prepare_databases --output_dir %s' % target_directory +if args.kofam_hmm_loc is not None: + cmd = '%s --kofam_hmm_loc %s' % (cmd, args.kofam_hmm_loc) +if args.kofam_ko_list_loc is not None: + cmd = '%s --kofam_ko_list_loc %s' % (cmd, args.kofam_ko_list_loc) +if args.skip_uniref == 'yes': + cmd = '%s --skip_uniref' % cmd +else: + if args.uniref_loc is not None: + cmd = '%s --uniref_loc %s' % (cmd, args.uniref_loc) + cmd = '%s --uniref_version %d' % (cmd, args.uniref_version) +if args.pfam_loc is not None: + cmd = '%s --pfam_loc %s' % (cmd, args.pfam_loc) +if args.pfam_hmm_dat is not None: + cmd = '%s --pfam_hmm_dat %s' % (cmd, args.pfam_hmm_dat) +if args.dbcan_loc is not None: + cmd = '%s --dbcan_loc %s' % (cmd, args.dbcan_loc) +if args.dbcan_fam_activities is not None: + cmd = '%s --dbcan_fam_activities %s' % (cmd, args.dbcan_fam_activities) +cmd = '%s --dbcan_version %d' % (cmd, args.dbcan_version) +if args.vogdb_loc is not None: + cmd = '%s --vogdb_loc %s' % (cmd, args.vogdb_loc) +if args.vog_annotations is not None: + cmd = '%s --vog_annotations %s' % (cmd, args.vog_annotations) +if args.viral_loc is not None: + cmd = '%s --viral_loc %s' % (cmd, args.viral_loc) +if args.peptidase_loc is not None: + cmd = '%s --peptidase_loc %s' % (cmd, args.peptidase_loc) +if args.genome_summary_form_loc is not None: + cmd = '%s --genome_summary_form_loc %s' % (cmd, args.genome_summary_form_loc) +if args.module_step_form_loc is not None: + cmd = '%s --module_step_form_loc %s' % (cmd, args.module_step_form_loc) +if args.etc_module_database_loc is not None: + cmd = '%s --etc_module_database_loc %s' % (cmd, args.etc_module_database_loc) +if args.function_heatmap_form_loc is not None: + cmd = '%s --function_heatmap_form_loc %s' % (cmd, args.function_heatmap_form_loc) +if args.amg_database_loc is not None: + cmd = '%s --amg_database_loc %s' % (cmd, args.amg_database_loc) +cmd = '%s --threads %d' % (cmd, args.threads) + +subprocess.check_call(cmd, shell=True) + +data_manager_json = {'data_tables': {}} +data_manager_entry = {} +data_manager_entry['value'] = args.db_version +data_manager_entry['name'] = 'DRAM %s databases' % args.db_version +data_manager_entry['path'] = target_directory +data_manager_json['data_tables']['dram_databases'] = data_manager_entry + +with open(args.out_file, 'w') as fh: + json.dump(data_manager_json, fh, sort_keys=True)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_dram_download.xml Thu Sep 01 17:16:07 2022 +0000 @@ -0,0 +1,235 @@ +<tool id="data_manager_dram_download" name="DRAM: Download databases" version="1.3.5" tool_type="manage_data" profile="21.05"> + <description>required by the DRAM suite of tools</description> + <requirements> + <requirement type="package" version="1.3.5">dram</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +python '$__tool_directory__/data_manager_dram_download.py' +--out_file '$out_file' +#if str($kofam_hmm_loc) != '': + --kofam_hmm_loc '$kofam_hmm_loc' +#end if +#if str($kofam_ko_list_loc) != '': + --kofam_ko_list_loc '$kofam_ko_list_loc' +#end if +#if str($skip_uniref_cond.skip_uniref) == 'yes': + --skip_uniref '$skip_uniref_cond.skip_uniref' +#else: + #if str($skip_uniref_cond.uniref_loc) != '': + --uniref_loc '$skip_uniref_cond.uniref_loc' + #end if + --uniref_version $skip_uniref_cond.uniref_version +#end if +#if str($pfam_loc) != '': + --pfam_loc '$pfam_loc' +#end if +#if str($pfam_hmm_dat) != '': + --pfam_hmm_dat '$pfam_hmm_dat' +#end if +#if str($dbcan_loc) != '': + --dbcan_loc '$dbcan_loc' +#end if +#if str($dbcan_fam_activities) != '': + --dbcan_fam_activities '$dbcan_fam_activities' +#end if +--dbcan_version $dbcan_version +#if str($vogdb_loc) != '': + --vogdb_loc '$vogdb_loc' +#end if +#if str($vog_annotations) != '': + --vog_annotations '$vog_annotations' +#end if +#if str($viral_loc) != '': + --viral_loc '$viral_loc' +#end if +#if str($peptidase_loc) != '': + --peptidase_loc '$peptidase_loc' +#end if +#if str($genome_summary_form_loc) != '': + --genome_summary_form_loc '$genome_summary_form_loc' +#end if +#if str($module_step_form_loc) != '': + --module_step_form_loc '$module_step_form_loc' +#end if +#if str($etc_module_database_loc) != '': + --etc_module_database_loc '$etc_module_database_loc' +#end if +#if str($function_heatmap_form_loc) != '': + --function_heatmap_form_loc '$function_heatmap_form_loc' +#end if +#if str($amg_database_loc) != '': + --amg_database_loc '$amg_database_loc' +#end if +--threads \${GALAXY_SLOTS:-10} +--db_version '$db_version' + ]]></command> + <inputs> + <param argument="--kofam_hmm_loc" type="text" value="" label="Absolute path to hmm file for KOfam (profiles.tar.gz) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--kofam_ko_list_loc" type="text" value="" label="Absolute path to KOfam ko list file (ko_list.gz) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + + <conditional name="skip_uniref_cond"> + <param argument="--skip_uniref" type="select" label="Skip downloading and processing uniref?" help="Choosing Yes saves time and memory and does not impact DRAM distillation"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + <param argument="--uniref_loc" type="text" value="" label="Absolute path to uniref (uniref90.fasta.gz) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--uniref_version" type="integer" value="90" label="UniRef version to download" help="Ignored if uniref is not downloaded and processed"/> + </when> + <when value="yes"/> + </conditional> + <param argument="--pfam_loc" type="text" value="" label="Absolute path to pfam-A full file (Pfam-A.full.gz) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--pfam_hmm_dat" type="text" value="" label="Absolute path to pfam hmm .dat file to get PF descriptions (Pfam-A.hmm.dat.gz) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--dbcan_loc" type="text" value="" label="Absolute path to dbCAN file (dbCAN-HMMdb-V9.txt) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--dbcan_fam_activities" type="text" value="" label="Absolute path to CAZY family activities file (CAZyDB.07302020.fam-activities.txt) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--dbcan_version" type="integer" value="10" label="Version of dbCAN to use"/> + <param argument="--vogdb_loc" type="text" value="" label="Absolute path to hmm file for vogdb (vog.hmm.tar.gz) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--vog_annotations" type="text" value="" label="Absolute path to vogdb annotations file (vog.annotations.tsv.gz) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--viral_loc" type="text" value="" label="Absolute path to merged viral protein faa file (viral.x.protein.faa.gz) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--peptidase_loc" type="text" value="" label="Absolute path to MEROPS peptidase fasta file (pepunit.lib) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--genome_summary_form_loc" type="text" value="" label="Absolute path to genome summary form file (genome_summary_form.YYYYMMDD.tsv) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--module_step_form_loc" type="text" value="" label="Absolute path to module step form file (module_step_form.YYYYMMDD.tsv) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--etc_module_database_loc" type="text" value="" label="Absolute path to etc module database file (etc_mdoule_database.YYYYMMDD.tsv) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--function_heatmap_form_loc" type="text" value="" label="Absolute path to function heatmap form file (function_heatmap_form.YYYYMMDD.tsv) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--amg_database_loc" type="text" value="" label="Absolute path to amg database file (amg_database.YYYYMMDD.tsv) if already installed" help="Leave blank to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + <param argument="--db_version" type="text" value="1.3.5" label="Version of DRAM databases to install"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json"/> + </outputs> + <tests> + <test expect_failure="true"> + <param name="kofam_hmm_loc" value="profiles.tar.gz"/> + <param name="kofam_ko_list_loc" value="ko_list.gz"/> + <param name="skip_uniref" value="yes"/> + <param name="pfam_loc" value="Pfam-A.full.gz"/> + <param name="pfam_hmm_dat" value="Pfam-A.hmm.dat.gz"/> + <param name="dbcan_loc" value="dbCAN-HMMdb-V9.txt"/> + <param name="dbcan_fam_activities" value="CAZyDB.07302020.fam-activities.txt"/> + <param name="vogdb_loc" value="vog.hmm.tar.gz"/> + <param name="vog_annotations" value="vog.annotations.tsv.gz"/> + <param name="viral_loc" value="viral.x.protein.faa.gz"/> + <param name="peptidase_loc" value="pepunit.lib"/> + <param name="genome_summary_form_loc" value="genome_summary_form.20220715.tsv"/> + <param name="module_step_form_loc" value="module_step_form.20220715.tsv"/> + <param name="etc_module_database_loc" value="etc_mdoule_database.20220715.tsv"/> + <param name="function_heatmap_form_loc" value="function_heatmap_form.20220715.tsv"/> + <param name="amg_database_loc" value="amg_database.20220715.tsv"/> + <assert_stderr> + <has_text text="Database location does not exist"/> + </assert_stderr> + </test> + </tests> + <help> +This tool downloads and processes DRAM databases for annotation and makes them available to corresponding versions +of the DRAM suite of tools. + +See https://github.com/WrightonLabCSU/DRAM/wiki for details about DRAM. + </help> + <citations> + <citation type="doi">10.1093/nar/gkaa621</citation> + </citations> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Thu Sep 01 17:16:07 2022 +0000 @@ -0,0 +1,19 @@ +<data_managers> + <data_manager tool_file="data_manager/data_manager_dram_download.xml" id="data_manager_dram_download"> + <data_table name="dram_databases"> + <output> + <column name="value"/> + <column name="name"/> + <column name="path" output_ref="out_file"> + <move type="directory" relativize_symlinks="True"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">DRAM/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/DRAM/${value}/</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers> + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dram_databases.loc Thu Sep 01 17:16:07 2022 +0000 @@ -0,0 +1,12 @@ +# This is a sample file distributed with Galaxy that enables tools +# to use a directory of DRAM databases. The dram_databases.loc +# file has this format (longer white space characters are TAB characters): +# +# <db_version> <display_name> <directory_path> +# +# So, for example, if you have the DRAM databases stored in +# /depot/data2/galaxy/dram/, +# then the gtdbtk_databases.loc entry would look like this: +# +# 3.5.1 DRAM database /depot/data2/galaxy/dram +3.5.1 DRAM 3.5.1 databases ${__HERE__}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/dram_databases.loc.sample Thu Sep 01 17:16:07 2022 +0000 @@ -0,0 +1,3 @@ +## DRAM databases +#Value Name Path +#2022-07-04 DRAM annotations (2022-07-04) /depot/data2/galaxy/tool-data/dram_databases/2022-07-04
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Sep 01 17:16:07 2022 +0000 @@ -0,0 +1,8 @@ +<tables> + <!-- Locations of DRAM databases version 1.3.5 and higher --> + <table name="dram_databases" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/dram_databases.loc" /> + </table> +</tables> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Sep 01 17:16:07 2022 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Location of DRAM databases version 1.3.5 and higher --> + <table name="dram_databases" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/dram_databases.loc" /> + </table> +</tables>