Mercurial > repos > iuc > data_manager_cat
changeset 0:cffd8e2382cf draft
"planemo upload commit 4e5a8d7629d90c41219b48b648dd4ab675b84af0"
author | iuc |
---|---|
date | Mon, 09 Dec 2019 10:28:15 -0500 |
parents | |
children | 74af283d8ebd |
files | data_manager/data_manager_cat.py data_manager/data_manager_cat.xml data_manager_conf.xml test-data/CAT_prepare_test.tar.gz test-data/cat_database.loc tool-data/cat_database.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 8 files changed, 226 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_cat.py Mon Dec 09 10:28:15 2019 -0500 @@ -0,0 +1,116 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import json +import os.path +import subprocess +import sys +import tarfile +import tempfile +import zipfile +try: + # For Python 3.0 and later + from urllib.request import urlopen +except ImportError: + # Fall back to Python 2 imports + from urllib2 import urlopen + + +def url_download(url, workdir): + file_path = os.path.join(workdir, 'download.dat') + src = None + dst = None + try: + src = urlopen(url) + with open(file_path, 'wb') as dst: + while True: + chunk = src.read(2**10) + if chunk: + dst.write(chunk) + else: + break + finally: + if src: + src.close() + if tarfile.is_tarfile(file_path): + fh = tarfile.open(file_path, 'r:*') + elif zipfile.is_zipfile(file_path): + fh = zipfile.ZipFile(file_path, 'r') + else: + return + fh.extractall(workdir) + os.remove(file_path) + + +def cat_prepare(install_dir, db_dir=None, tax_dir=None): + if db_dir and tax_dir: + cmd = ['CAT', 'prepare', '--existing', '-d', db_dir, '-t', tax_dir] + else: + cmd = ['CAT', 'prepare', '--fresh', '-q'] + cmd_stdout = tempfile.NamedTemporaryFile() + cmd_stderr = tempfile.NamedTemporaryFile() + return_code = subprocess.call(cmd, shell=False, cwd=install_dir, + stdout=cmd_stdout, stderr=cmd_stderr) + if return_code: + msg = "stdout:\n%s\nstderr:\n%s" % (cmd_stdout.read(), + cmd_stderr.read()) + cmd_stdout.close() + cmd_stderr.close() + raise Exception('Error: (%s), returncode=%s %s' + % (' '.join(cmd), return_code, msg)) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--config_file', required=True) + parser.add_argument('--install_path', default=None) + parser.add_argument('--db_url', default=None) + parser.add_argument('--database_folder', default=None) + parser.add_argument('--taxonomy_folder', default=None) + args = parser.parse_args() + + cat_path = None + cat_db = None + tax_db = None + if args.database_folder and args.taxonomy_folder: + cat_path = os.path.dirname(args.database_folder) + cat_db = os.path.basename(args.database_folder) + tax_db = os.path.basename(args.taxonomy_folder) + cat_prepare(os.getcwd(), + db_dir=args.database_folder, + tax_dir=args.taxonomy_folder) + elif not args.install_path: + sys.exit(1) + else: + if not os.path.exists(args.install_path): + os.makedirs(args.install_path) + if args.db_url: + url_download(args.db_url, args.install_path) + else: + cat_prepare(args.install_path) + for root, dirs, files in os.walk(args.install_path): + for dname in dirs: + if dname.endswith('CAT_database'): + cat_db = dname + elif dname.endswith('taxonomy'): + tax_db = dname + if cat_db and tax_db: + cat_path = root + break + cat_dir = os.path.basename(cat_path) + dm_dict = {} + dm_dict['data_tables'] = dm_dict.get('data_tables', {}) + data_table = 'cat_database' + dm_dict['data_tables'][data_table]\ + = dm_dict['data_tables'].get(data_table, []) + data_table_entry = dict(value=cat_dir, name=cat_dir, + database_folder=os.path.join(cat_dir, cat_db), + taxonomy_folder=os.path.join(cat_dir, tax_db)) + dm_dict['data_tables'][data_table].append(data_table_entry) + # save info to json file + open(args.config_file, 'w').write(json.dumps(dm_dict)) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_cat.xml Mon Dec 09 10:28:15 2019 -0500 @@ -0,0 +1,55 @@ +<tool id="data_manager_cat" name="CAT DB" version="5.0.3.0" tool_type="manage_data" profile="18.09"> + <description>Install a new CAT database</description> + <requirements> + <requirement type="package" version="5.0.3">cat</requirement> + </requirements> + <version_command><![CDATA[ CAT --version ]]></version_command> + <command detect_errors="exit_code"><![CDATA[ +mkdir -p '$out_file.extra_files_path' && +python '${__tool_directory__}/data_manager_cat.py' + --config_file '$out_file' + --install_path '$out_file.extra_files_path' +#if $db.src == 'download' + --db_url '$db.db_url' +#end if + ]]></command> + <inputs> + <conditional name="db"> + <param name="src" type="select" label="Download or Build DBs"> + <option value="download">download</option> + <option value="build">build</option> + </param> + <when value="download"> + <param name="db_url" type="text" label="DB URL at https://tbb.bio.uu.nl/bastiaan/CAT_prepare/" + help="example: https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz"> + </param> + </when> + <when value="build"> + </when> + </conditional> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" label="${tool.name}"/> + </outputs> + <tests> + <test> + <conditional name="db"> + <param name="src" value="download"/> + <param name="db_url" value="https://github.com/galaxyproject/tools-iuc/blob/ce82f787f1f035debfc86de09a271a9ec8d27e0e/data_managers/data_manager_cat/test-data/CAT_prepare_test.tar.gz?raw=true"/> + </conditional> + <output name="out_file"> + <assert_contents> + <has_text text="CAT_prepare_test/taxonomy" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +This tool prepares reference data for CAT, the Contig Annotation Tool. +It can either download prebuilt reference data from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/ +or build new reference data using the CAT prepare application. + +This requires at least 100GB of RAM, 250GB of disk space, and 24 hours. + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Mon Dec 09 10:28:15 2019 -0500 @@ -0,0 +1,27 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/data_manager_cat.xml" id="data_manager_cat" > + <data_table name="cat_database"> <!-- Defines a Data Table to be modified. --> + <output> <!-- Handle the output of the Data Manager Tool --> + <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="database_folder" output_ref="out_file" > + <move type="directory" relativize_symlinks="True"> + <src>${database_folder}</src> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">CAT/${database_folder}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/CAT/${database_folder}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + <column name="taxonomy_folder" output_ref="out_file" > + <move type="directory" relativize_symlinks="True"> + <src>${taxonomy_folder}</src> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">CAT/${taxonomy_folder}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/CAT/${taxonomy_folder}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cat_database.loc Mon Dec 09 10:28:15 2019 -0500 @@ -0,0 +1,7 @@ +## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz +# ls CAT_prepare_20190719/ +# 2019-07-19.CAT_prepare.fresh.log +# 2019-07-19_CAT_database +# 2019-07-19_taxonomy +#value name database_folder taxonomy_folder +#2019-07-19_CAT_database 2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/cat_database.loc.sample Mon Dec 09 10:28:15 2019 -0500 @@ -0,0 +1,7 @@ +## A typical download from https://tbb.bio.uu.nl/bastiaan/CAT_prepare/CAT_prepare_20190719.tar.gz +# ls CAT_prepare_20190719/ +# 2019-07-19.CAT_prepare.fresh.log +# 2019-07-19_CAT_database +# 2019-07-19_taxonomy +#value name database_folder taxonomy_folder +#2019-07-19_CAT_database 2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_CAT_database /opt/galaxy/tool-data/cat_database/CAT_prepare_20190719/2019-07-19_taxonomy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Dec 09 10:28:15 2019 -0500 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of CAT databases --> + <table name="cat_database" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, database_folder, taxonomy_folder</columns> + <file path="tool-data/cat_database.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Dec 09 10:28:15 2019 -0500 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of CAT databases --> + <table name="cat_database" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, database_folder, taxonomy_folder</columns> + <file path="${__HERE__}/test-data/cat_database.loc" /> + </table> +</tables>