Mercurial > repos > galaxyp > data_manager_eggnog_mapper
changeset 0:6d8144eef202 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper_data_manager commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
author | galaxyp |
---|---|
date | Mon, 11 Nov 2019 11:49:16 -0500 |
parents | |
children | 077cf0a99144 |
files | data_manager/data_manager_eggnog.py data_manager/data_manager_eggnog.xml data_manager/eggnog_macros.xml data_manager_conf.xml test-data/cached_locally/eggnog.db test-data/cached_locally/eggnog_mapper_db.loc test-data/cached_locally/eggnog_mapper_hmm_dbs.loc tool-data/eggnog_mapper_db.loc.sample tool-data/eggnog_mapper_hmm_dbs.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 11 files changed, 377 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_eggnog.py Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import argparse +import json +import os.path +import sqlite3 +import sys +from sqlite3 import OperationalError + + +def _get_db_version(sqlitedb_path): + version = '4.5' + try: + query = 'select version from version' + conn = sqlite3.connect(sqlitedb_path) + cur = conn.cursor() + cur.execute(query) + version = cur.fetchone()[0] + except OperationalError as e: + print('Assuming eggnog version %s because %s %s' % + (version, sqlitedb_path, e), file=sys.stderr) + return version + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--config_file') + parser.add_argument('--install_path') + parser.add_argument('--dbs', default='') + args = parser.parse_args() + + eggnog_db_path = os.path.join(args.install_path, 'eggnog.db') + if not os.path.exists(eggnog_db_path): + print('Can not open: %s' % eggnog_db_path, file=sys.stderr) + exit(1) + db_version = _get_db_version(eggnog_db_path) + + # params = json.loads(open(args.config_file).read()) + dm_dict = {} + dm_dict['data_tables'] = dm_dict.get('data_tables', {}) + data_table = 'eggnog_mapper_db' + dm_dict['data_tables'][data_table]\ + = dm_dict['data_tables'].get(data_table, []) + data_table_entry = dict(value=db_version, name=db_version, + path=args.install_path) + dm_dict['data_tables'][data_table].append(data_table_entry) + data_table = 'eggnog_mapper_hmm_dbs' + dm_dict['data_tables'][data_table]\ + = dm_dict['data_tables'].get(data_table, []) + if args.dbs: + dbs = [x.strip() for x in args.dbs.split(',')] + for db in dbs: + key = '%s_%s' % (db_version, db) + data_table_entry = dict(key=key, db_version=db_version, + value=db, name=db, path=db) + dm_dict['data_tables'][data_table].append(data_table_entry) + + # save info to json file + open(args.config_file, 'wb').write(json.dumps(dm_dict)) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/data_manager_eggnog.xml Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,33 @@ +<tool id="data_manager_eggnog" name="EggNOG DB Download" version="@VERSION@.1" tool_type="manage_data"> + <description>eggnog data</description> + <macros> + <import>eggnog_macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +#import json, os +#set params = json.loads(open(str($out_file)).read()) +#set install_path = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace') +@DOWNLOAD_CMD@ + ]]></command> + <inputs> + <expand macro="data_manager_params"/> + </inputs> + <expand macro="data_manager_outputs"/> + <tests> + <expand macro="data_manager_test"/> + </tests> + <help><![CDATA[ +This tool downloads eggnog data using download_eggnog_data.py +and populates the data tables: eggnog_mapper_db and eggnog_mapper_hmm_dbs. +The data is located at: http://eggnogdb.embl.de/download/emapperdb-4.5.1/ +The optional eggNOG HMM databases: http://eggnogdb.embl.de/download/emapperdb-4.5.1/hmmdb_levels/ +can vary from 1G to 80G in size. + +This will install data relative to the galaxy_data_manager_data_path declared in config/galaxy.yml +in directory eggnog_data/<eggnog data version>/. +The eggnog version is queried from the downloaded SQlite eggnog.db from table version. + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/eggnog_macros.xml Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,173 @@ +<?xml version="1.0"?> +<macros> + <token name="@VERSION@">1.0.3</token> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/nar/gkv1248</citation> + </citations> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@VERSION@">eggnog-mapper</requirement> + </requirements> + </xml> + <xml name="version_command"> + <version_command>emapper.py --version</version_command> + </xml> + <xml name="data_manager_params"> + <param name="test" type="hidden" value="false" /> + <param name="diamond_database" type="boolean" truevalue="" falsevalue="-D" checked="true" label="Install the diamond database"/> + <param argument="dbs" type="select" multiple="true" label="eggNOG HMM databases to download. If none are selected only diamond can be used'"> + <option value="arch" selected="true">Archea arch_1 (arch)</option> + <option value="bact" selected="true">Bacteria bact_50 (bact)</option> + <option value="euk" selected="true">Eukaryote euk_500 (euk)</option> + <option value="NOG" selected="true">All organisms (NOG)</option> + <option value="aciNOG">Acidobacteria (aciNOG)</option> + <option value="acidNOG">Acidobacteriia (acidNOG)</option> + <option value="acoNOG">Aconoidasida (acoNOG)</option> + <option value="actNOG">Actinobacteria (actNOG)</option> + <option value="agaNOG">Agaricales (agaNOG)</option> + <option value="agarNOG">Agaricomycetes (agarNOG)</option> + <option value="apiNOG">Apicomplexa (apiNOG)</option> + <option value="aproNOG">Proteobacteria_alpha (aproNOG)</option> + <option value="aquNOG">Aquificae (aquNOG)</option> + <option value="arNOG">Archaea (arNOG)</option> + <option value="arcNOG">Archaeoglobi (arcNOG)</option> + <option value="artNOG">Arthropoda (artNOG)</option> + <option value="arthNOG">Arthrodermataceae (arthNOG)</option> + <option value="ascNOG">Ascomycota (ascNOG)</option> + <option value="aveNOG">Aves (aveNOG)</option> + <option value="bacNOG">Bacilli (bacNOG)</option> + <option value="bactNOG">Bacteria (bactNOG)</option> + <option value="bacteNOG">Bacteroidia (bacteNOG)</option> + <option value="basNOG">Basidiomycota (basNOG)</option> + <option value="bctoNOG">Bacteroidetes (bctoNOG)</option> + <option value="biNOG">Bilateria (biNOG)</option> + <option value="bproNOG">Proteobacteria_beta (bproNOG)</option> + <option value="braNOG">Brassicales (braNOG)</option> + <option value="carNOG">Carnivora (carNOG)</option> + <option value="chaNOG">Chaetomiaceae (chaNOG)</option> + <option value="chlNOG">Chlorobi (chlNOG)</option> + <option value="chlaNOG">Chlamydiae (chlaNOG)</option> + <option value="chloNOG">Chloroflexi (chloNOG)</option> + <option value="chlorNOG">Chloroflexi (chlorNOG)</option> + <option value="chloroNOG">Chlorophyta (chloroNOG)</option> + <option value="chorNOG">Chordata (chorNOG)</option> + <option value="chrNOG">Chromadorea (chrNOG)</option> + <option value="cloNOG">Clostridia (cloNOG)</option> + <option value="cocNOG">Coccidia (cocNOG)</option> + <option value="creNOG">Crenarchaeota (creNOG)</option> + <option value="cryNOG">Cryptosporidiidae (cryNOG)</option> + <option value="cyaNOG">Cyanobacteria (cyaNOG)</option> + <option value="cytNOG">Cytophagia (cytNOG)</option> + <option value="debNOG">Debaryomycetaceae (debNOG)</option> + <option value="defNOG">Deferribacteres (defNOG)</option> + <option value="dehNOG">Dehalococcoidetes (dehNOG)</option> + <option value="deiNOG">Deinococcusthermus (deiNOG)</option> + <option value="delNOG">delta/epsilon (delNOG)</option> + <option value="dipNOG">Diptera (dipNOG)</option> + <option value="dotNOG">Dothideomycetes (dotNOG)</option> + <option value="dproNOG">Proteobacteria_delta (dproNOG)</option> + <option value="droNOG">Drosophilidae (droNOG)</option> + <option value="eproNOG">Proteobacteria_epsilon (eproNOG)</option> + <option value="eryNOG">Erysipelotrichi (eryNOG)</option> + <option value="euNOG">Eukaryotes (euNOG)</option> + <option value="eurNOG">Euryarchaeota (eurNOG)</option> + <option value="euroNOG">Eurotiomycetes (euroNOG)</option> + <option value="eurotNOG">Eurotiales (eurotNOG)</option> + <option value="fiNOG">Fishes (fiNOG)</option> + <option value="firmNOG">Firmicutes (firmNOG)</option> + <option value="flaNOG">Flavobacteriia (flaNOG)</option> + <option value="fuNOG">Fungi (fuNOG)</option> + <option value="fusoNOG">Fusobacteria (fusoNOG)</option> + <option value="gproNOG">Proteobacteria_gamma (gproNOG)</option> + <option value="haeNOG">Haemosporida (haeNOG)</option> + <option value="halNOG">Halobacteria (halNOG)</option> + <option value="homNOG">Hominidae (homNOG)</option> + <option value="hymNOG">Hymenoptera (hymNOG)</option> + <option value="hypNOG">Hypocreales (hypNOG)</option> + <option value="inNOG">Insects (inNOG)</option> + <option value="kinNOG">Kinetoplastida (kinNOG)</option> + <option value="lepNOG">Lepidoptera (lepNOG)</option> + <option value="lilNOG">Liliopsida (lilNOG)</option> + <option value="maNOG">Mammals (maNOG)</option> + <option value="magNOG">Magnaporthales (magNOG)</option> + <option value="meNOG">Animals (meNOG)</option> + <option value="metNOG">Methanobacteria (metNOG)</option> + <option value="methNOG">Methanococci (methNOG)</option> + <option value="methaNOG">Methanomicrobia (methaNOG)</option> + <option value="necNOG">Nectriaceae (necNOG)</option> + <option value="negNOG">Negativicutes (negNOG)</option> + <option value="nemNOG">Nematodes (nemNOG)</option> + <option value="onyNOG">Onygenales (onyNOG)</option> + <option value="opiNOG">Opisthokonts (opiNOG)</option> + <option value="perNOG">Peronosporales (perNOG)</option> + <option value="plaNOG">Planctomycetes (plaNOG)</option> + <option value="pleNOG">Pleosporales (pleNOG)</option> + <option value="poaNOG">Poales (poaNOG)</option> + <option value="prNOG">Primates (prNOG)</option> + <option value="proNOG">Proteobacteria (proNOG)</option> + <option value="rhaNOG">Rhabditida (rhaNOG)</option> + <option value="roNOG">Rodents (roNOG)</option> + <option value="sacNOG">Saccharomycetaceae (sacNOG)</option> + <option value="saccNOG">Saccharomycetes (saccNOG)</option> + <option value="sorNOG">Sordariales (sorNOG)</option> + <option value="sordNOG">Sordariomycetes (sordNOG)</option> + <option value="sphNOG">Sphingobacteriia (sphNOG)</option> + <option value="spiNOG">Spirochaetes (spiNOG)</option> + <option value="spriNOG">Supraprimates (spriNOG)</option> + <option value="strNOG">Streptophyta (strNOG)</option> + <option value="synNOG">Synergistetes (synNOG)</option> + <option value="tenNOG">Tenericutes (tenNOG)</option> + <option value="thaNOG">Thaumarchaeota (thaNOG)</option> + <option value="theNOG">Thermoplasmata (theNOG)</option> + <option value="therNOG">Thermotogae (therNOG)</option> + <option value="thermNOG">Thermococci (thermNOG)</option> + <option value="treNOG">Tremellales (treNOG)</option> + <option value="veNOG">Vertebrates (veNOG)</option> + <option value="verNOG">Verrucomicrobia (verNOG)</option> + <option value="verrNOG">Verrucomicrobiae (verrNOG)</option> + <option value="virNOG">Viridiplantae (virNOG)</option> + </param> + </xml> + <xml name="data_manager_outputs"> + <outputs> + <data name="out_file" format="data_manager_json" label="${tool.name}"/> + </outputs> + </xml> + <token name="@DOWNLOAD_CMD@"><![CDATA[ +## tool should set install_path +#if $test == 'true' +#import os.path +#set $install_path = $os.path.join($os.path.dirname($__tool_directory__), 'test-data/cached_locally') +#end if +#if $dbs: +#set $eggnogdbs = ' '.join(str($dbs).split(',')) +#else +#set $eggnogdbs = 'none' +#end if +mkdir -p '${install_path}' && +download_eggnog_data.py + $diamond_database -y -q +#if $test == 'true' + -s +#end if + --data_dir '$install_path' + $eggnogdbs && +python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file' --install_path '$install_path' --dbs '$dbs' + ]]></token> + <xml name="data_manager_test"> + <!-- + <test> + <param name="test" value="true"/> + <param name="diamond_database" value="false"/> + <param name="dbs" value="thaNOG"/> + <output name="out_file"> + <assert_contents> + <has_text text="eggnog_mapper_db" /> + </assert_contents> + </output> + </test> + --> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,27 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/data_manager_eggnog.xml" id="data_manager_eggnog" > + <data_table name="eggnog_mapper_db"> <!-- Defines a Data Table to be modified. --> + <output> <!-- Handle the output of the Data Manager Tool --> + <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="path" output_ref="out_file" > + <move type="directory" relativize_symlinks="True"> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">eggnog_data/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/eggnog_data/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + <data_table name="eggnog_mapper_hmm_dbs"> <!-- Defines a Data Table to be modified. --> + <output> <!-- Handle the output of the Data Manager Tool --> + <column name="key" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="db_version" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="name" /> <!-- columns that are going to be specified by the Data Manager Tool --> + <column name="path" /> <!-- columns that are going to be specified by the Data Manager Tool --> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/eggnog_mapper_db.loc Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,2 @@ +#value name path +4.5 eggNOG_4.5 ${__HERE__}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/eggnog_mapper_hmm_dbs.loc Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,2 @@ +#key db_version value name path +4.5_ENOG411CB2I 4.5 ENOG411CB2I ENOG411CB2I ${__HERE__}/hmmdb_levels/ENOG411CB2I/ENOG411CB2I
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/eggnog_mapper_db.loc.sample Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,25 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of eggnog_mapper data files. +# +# eggnog-mapper requires the following files to be installed in the data directory: +# https://github.com/jhcepas/eggnog-mapper/blob/master/data/og2level.tsv.gz +# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog.db.gz +# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/OG_fasta.tar.gz +# In addition individual HMM DBs can be installed from: +# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/hmmdb_levels/ +# A complete diamond database is available from: +# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog_proteins.dmnd.gz +# +# The python script download_eggnog_data.py, +# included with eggnog_mapper, can be used to download the files to the correct directory +# +# The near-equivalence of columns "value" and "db" is needed for the tests to work, +# and for the setting of --data_dir to the parent directory of eggnog.db +# The complicated eggNOG database structure makes passing custom HMM databases somewhat tricky. +# See test-data/cached_locally/eggnog_mapper.loc for how this was done with the included test databases +# In all other cases, when the appropriate HMM database (for example, "thaNOG") was downloaded from eggnogdb.embl.de, +# value and db should be the same (in the example, both should be "thaNOG") +# +# +#db_version name path +#4.5.1 eggnog_4.5.1 /path/to/directory/that/contains/eggnog.db
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/eggnog_mapper_hmm_dbs.loc.sample Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,28 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of eggnog_mapper data files. +# +# eggnog-mapper requires the following files to be installed in the data directory: +# https://github.com/jhcepas/eggnog-mapper/blob/master/data/og2level.tsv.gz +# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog.db.gz +# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/OG_fasta.tar.gz +# In addition individual HMM DBs can be installed from: +# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/hmmdb_levels/ +# A complete diamond database is available from: +# http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog_proteins.dmnd.gz +# +# The python script download_eggnog_data.py, +# included with eggnog_mapper, can be used to download the files to the correct directory +# +# The near-equivalence of columns "value" and "db" is needed for the tests to work, +# and for the setting of --data_dir to the parent directory of eggnog.db +# The complicated eggNOG database structure makes passing custom HMM databases somewhat tricky. +# See test-data/cached_locally/eggnog_mapper.loc for how this was done with the included test databases +# In all other cases, when the appropriate HMM database (for example, "thaNOG") was downloaded from eggnogdb.embl.de, +# value and db should be the same (in the example, both should be "thaNOG") +# +# +#key db_version value name path +#4.5.1_NOG 4.5.1 NOG Full eggNOG database (NOG) +#4.5.1_euk 4.5.1 euk Eukaryotes (euk) +#4.5.1_aproNOG 4.5.1 aproNOG Proteobacteria_alpha (aproNOG) +#4.5.1_aproNOG 4.5.1 ENOG411CB2I ENOG411CB2I (custom) /path/to/custom/hmmdb/ENOG411CB2I
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,11 @@ +<tables> + <!-- Locations of all eggnog_mapper data --> + <table name="eggnog_mapper_db" comment_char="#" allow_duplicate_entries="False"> + <columns>value,name,path</columns> + <file path="tool-data/eggnog_mapper_db.loc" /> + </table> + <table name="eggnog_mapper_hmm_dbs" comment_char="#" allow_duplicate_entries="False"> + <columns>key,db_version,value,name,path</columns> + <file path="tool-data/eggnog_mapper_hmm_dbs.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Mon Nov 11 11:49:16 2019 -0500 @@ -0,0 +1,11 @@ +<tables> + <!-- Locations of all eggnog_mapper data --> + <table name="eggnog_mapper_db" comment_char="#"> + <columns>value,name,path</columns> + <file path="${__HERE__}/test-data/cached_locally/eggnog_mapper_db.loc" /> + </table> + <table name="eggnog_mapper_hmm_dbs" comment_char="#"> + <columns>key,db_version,value,name,path</columns> + <file path="${__HERE__}/test-data/cached_locally/eggnog_mapper_hmm_dbs.loc" /> + </table> +</tables>