changeset 0:6d8144eef202 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/eggnog_mapper/eggnog_mapper_data_manager commit ba81f4bfe31157aa6b986e81d5e4405ae1372b3b"
author galaxyp
date Mon, 11 Nov 2019 11:49:16 -0500
parents
children 077cf0a99144
files data_manager/data_manager_eggnog.py data_manager/data_manager_eggnog.xml data_manager/eggnog_macros.xml data_manager_conf.xml test-data/cached_locally/eggnog.db test-data/cached_locally/eggnog_mapper_db.loc test-data/cached_locally/eggnog_mapper_hmm_dbs.loc tool-data/eggnog_mapper_db.loc.sample tool-data/eggnog_mapper_hmm_dbs.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 11 files changed, 377 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_eggnog.py	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import argparse
+import json
+import os.path
+import sqlite3
+import sys
+from sqlite3 import OperationalError
+
+
+def _get_db_version(sqlitedb_path):
+    version = '4.5'
+    try:
+        query = 'select version from version'
+        conn = sqlite3.connect(sqlitedb_path)
+        cur = conn.cursor()
+        cur.execute(query)
+        version = cur.fetchone()[0]
+    except OperationalError as e:
+        print('Assuming eggnog version %s because %s   %s' %
+              (version, sqlitedb_path, e), file=sys.stderr)
+    return version
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config_file')
+    parser.add_argument('--install_path')
+    parser.add_argument('--dbs', default='')
+    args = parser.parse_args()
+
+    eggnog_db_path = os.path.join(args.install_path, 'eggnog.db')
+    if not os.path.exists(eggnog_db_path):
+        print('Can not open: %s' % eggnog_db_path, file=sys.stderr)
+        exit(1)
+    db_version = _get_db_version(eggnog_db_path)
+
+    # params = json.loads(open(args.config_file).read())
+    dm_dict = {}
+    dm_dict['data_tables'] = dm_dict.get('data_tables', {})
+    data_table = 'eggnog_mapper_db'
+    dm_dict['data_tables'][data_table]\
+        = dm_dict['data_tables'].get(data_table, [])
+    data_table_entry = dict(value=db_version, name=db_version,
+                            path=args.install_path)
+    dm_dict['data_tables'][data_table].append(data_table_entry)
+    data_table = 'eggnog_mapper_hmm_dbs'
+    dm_dict['data_tables'][data_table]\
+        = dm_dict['data_tables'].get(data_table, [])
+    if args.dbs:
+        dbs = [x.strip() for x in args.dbs.split(',')]
+        for db in dbs:
+            key = '%s_%s' % (db_version, db)
+            data_table_entry = dict(key=key, db_version=db_version,
+                                    value=db, name=db, path=db)
+            dm_dict['data_tables'][data_table].append(data_table_entry)
+
+    # save info to json file
+    open(args.config_file, 'wb').write(json.dumps(dm_dict))
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager_eggnog.xml	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,33 @@
+<tool id="data_manager_eggnog" name="EggNOG DB Download" version="@VERSION@.1" tool_type="manage_data">
+    <description>eggnog data</description>
+    <macros>
+        <import>eggnog_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+#import json, os
+#set params = json.loads(open(str($out_file)).read())
+#set install_path = $params['output_data'][0]['extra_files_path'].encode('ascii', 'replace')
+@DOWNLOAD_CMD@
+    ]]></command>
+    <inputs>
+        <expand macro="data_manager_params"/>
+    </inputs>
+    <expand macro="data_manager_outputs"/>
+    <tests>
+        <expand macro="data_manager_test"/>
+    </tests>
+    <help><![CDATA[
+This tool downloads eggnog data using download_eggnog_data.py 
+and populates the data tables: eggnog_mapper_db and eggnog_mapper_hmm_dbs.
+The data is located at: http://eggnogdb.embl.de/download/emapperdb-4.5.1/
+The optional eggNOG HMM databases: http://eggnogdb.embl.de/download/emapperdb-4.5.1/hmmdb_levels/ 
+can vary from 1G to 80G in size.
+
+This will install data relative to the galaxy_data_manager_data_path declared in config/galaxy.yml 
+in directory eggnog_data/<eggnog data version>/.
+The eggnog version is queried from the downloaded SQlite eggnog.db from table version.  
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/eggnog_macros.xml	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,173 @@
+<?xml version="1.0"?>
+<macros>
+   <token name="@VERSION@">1.0.3</token>
+   <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/nar/gkv1248</citation>
+        </citations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@VERSION@">eggnog-mapper</requirement>
+        </requirements>
+    </xml>
+    <xml name="version_command">
+        <version_command>emapper.py --version</version_command>
+    </xml>
+    <xml name="data_manager_params">
+        <param name="test" type="hidden" value="false" />
+        <param name="diamond_database" type="boolean" truevalue="" falsevalue="-D" checked="true" label="Install the diamond database"/>
+        <param argument="dbs" type="select" multiple="true" label="eggNOG HMM databases to download. If none are selected only diamond can be used'">
+            <option value="arch" selected="true">Archea  arch_1 (arch)</option>
+            <option value="bact" selected="true">Bacteria bact_50 (bact)</option>
+            <option value="euk" selected="true">Eukaryote euk_500 (euk)</option>
+            <option value="NOG" selected="true">All organisms (NOG)</option>
+            <option value="aciNOG">Acidobacteria (aciNOG)</option>
+            <option value="acidNOG">Acidobacteriia (acidNOG)</option>
+            <option value="acoNOG">Aconoidasida (acoNOG)</option>
+            <option value="actNOG">Actinobacteria (actNOG)</option>
+            <option value="agaNOG">Agaricales (agaNOG)</option>
+            <option value="agarNOG">Agaricomycetes (agarNOG)</option>
+            <option value="apiNOG">Apicomplexa (apiNOG)</option>
+            <option value="aproNOG">Proteobacteria_alpha (aproNOG)</option>
+            <option value="aquNOG">Aquificae (aquNOG)</option>
+            <option value="arNOG">Archaea (arNOG)</option>
+            <option value="arcNOG">Archaeoglobi (arcNOG)</option>
+            <option value="artNOG">Arthropoda (artNOG)</option>
+            <option value="arthNOG">Arthrodermataceae (arthNOG)</option>
+            <option value="ascNOG">Ascomycota (ascNOG)</option>
+            <option value="aveNOG">Aves (aveNOG)</option>
+            <option value="bacNOG">Bacilli (bacNOG)</option>
+            <option value="bactNOG">Bacteria (bactNOG)</option>
+            <option value="bacteNOG">Bacteroidia (bacteNOG)</option>
+            <option value="basNOG">Basidiomycota (basNOG)</option>
+            <option value="bctoNOG">Bacteroidetes (bctoNOG)</option>
+            <option value="biNOG">Bilateria (biNOG)</option>
+            <option value="bproNOG">Proteobacteria_beta (bproNOG)</option>
+            <option value="braNOG">Brassicales (braNOG)</option>
+            <option value="carNOG">Carnivora (carNOG)</option>
+            <option value="chaNOG">Chaetomiaceae (chaNOG)</option>
+            <option value="chlNOG">Chlorobi (chlNOG)</option>
+            <option value="chlaNOG">Chlamydiae (chlaNOG)</option>
+            <option value="chloNOG">Chloroflexi (chloNOG)</option>
+            <option value="chlorNOG">Chloroflexi (chlorNOG)</option>
+            <option value="chloroNOG">Chlorophyta (chloroNOG)</option>
+            <option value="chorNOG">Chordata (chorNOG)</option>
+            <option value="chrNOG">Chromadorea (chrNOG)</option>
+            <option value="cloNOG">Clostridia (cloNOG)</option>
+            <option value="cocNOG">Coccidia (cocNOG)</option>
+            <option value="creNOG">Crenarchaeota (creNOG)</option>
+            <option value="cryNOG">Cryptosporidiidae (cryNOG)</option>
+            <option value="cyaNOG">Cyanobacteria (cyaNOG)</option>
+            <option value="cytNOG">Cytophagia (cytNOG)</option>
+            <option value="debNOG">Debaryomycetaceae (debNOG)</option>
+            <option value="defNOG">Deferribacteres (defNOG)</option>
+            <option value="dehNOG">Dehalococcoidetes (dehNOG)</option>
+            <option value="deiNOG">Deinococcusthermus (deiNOG)</option>
+            <option value="delNOG">delta/epsilon (delNOG)</option>
+            <option value="dipNOG">Diptera (dipNOG)</option>
+            <option value="dotNOG">Dothideomycetes (dotNOG)</option>
+            <option value="dproNOG">Proteobacteria_delta (dproNOG)</option>
+            <option value="droNOG">Drosophilidae (droNOG)</option>
+            <option value="eproNOG">Proteobacteria_epsilon (eproNOG)</option>
+            <option value="eryNOG">Erysipelotrichi (eryNOG)</option>
+            <option value="euNOG">Eukaryotes (euNOG)</option>
+            <option value="eurNOG">Euryarchaeota (eurNOG)</option>
+            <option value="euroNOG">Eurotiomycetes (euroNOG)</option>
+            <option value="eurotNOG">Eurotiales (eurotNOG)</option>
+            <option value="fiNOG">Fishes (fiNOG)</option>
+            <option value="firmNOG">Firmicutes (firmNOG)</option>
+            <option value="flaNOG">Flavobacteriia (flaNOG)</option>
+            <option value="fuNOG">Fungi (fuNOG)</option>
+            <option value="fusoNOG">Fusobacteria (fusoNOG)</option>
+            <option value="gproNOG">Proteobacteria_gamma (gproNOG)</option>
+            <option value="haeNOG">Haemosporida (haeNOG)</option>
+            <option value="halNOG">Halobacteria (halNOG)</option>
+            <option value="homNOG">Hominidae (homNOG)</option>
+            <option value="hymNOG">Hymenoptera (hymNOG)</option>
+            <option value="hypNOG">Hypocreales (hypNOG)</option>
+            <option value="inNOG">Insects (inNOG)</option>
+            <option value="kinNOG">Kinetoplastida (kinNOG)</option>
+            <option value="lepNOG">Lepidoptera (lepNOG)</option>
+            <option value="lilNOG">Liliopsida (lilNOG)</option>
+            <option value="maNOG">Mammals (maNOG)</option>
+            <option value="magNOG">Magnaporthales (magNOG)</option>
+            <option value="meNOG">Animals (meNOG)</option>
+            <option value="metNOG">Methanobacteria (metNOG)</option>
+            <option value="methNOG">Methanococci (methNOG)</option>
+            <option value="methaNOG">Methanomicrobia (methaNOG)</option>
+            <option value="necNOG">Nectriaceae (necNOG)</option>
+            <option value="negNOG">Negativicutes (negNOG)</option>
+            <option value="nemNOG">Nematodes (nemNOG)</option>
+            <option value="onyNOG">Onygenales (onyNOG)</option>
+            <option value="opiNOG">Opisthokonts (opiNOG)</option>
+            <option value="perNOG">Peronosporales (perNOG)</option>
+            <option value="plaNOG">Planctomycetes (plaNOG)</option>
+            <option value="pleNOG">Pleosporales (pleNOG)</option>
+            <option value="poaNOG">Poales (poaNOG)</option>
+            <option value="prNOG">Primates (prNOG)</option>
+            <option value="proNOG">Proteobacteria (proNOG)</option>
+            <option value="rhaNOG">Rhabditida (rhaNOG)</option>
+            <option value="roNOG">Rodents (roNOG)</option>
+            <option value="sacNOG">Saccharomycetaceae (sacNOG)</option>
+            <option value="saccNOG">Saccharomycetes (saccNOG)</option>
+            <option value="sorNOG">Sordariales (sorNOG)</option>
+            <option value="sordNOG">Sordariomycetes (sordNOG)</option>
+            <option value="sphNOG">Sphingobacteriia (sphNOG)</option>
+            <option value="spiNOG">Spirochaetes (spiNOG)</option>
+            <option value="spriNOG">Supraprimates (spriNOG)</option>
+            <option value="strNOG">Streptophyta (strNOG)</option>
+            <option value="synNOG">Synergistetes (synNOG)</option>
+            <option value="tenNOG">Tenericutes (tenNOG)</option>
+            <option value="thaNOG">Thaumarchaeota (thaNOG)</option>
+            <option value="theNOG">Thermoplasmata (theNOG)</option>
+            <option value="therNOG">Thermotogae (therNOG)</option>
+            <option value="thermNOG">Thermococci (thermNOG)</option>
+            <option value="treNOG">Tremellales (treNOG)</option>
+            <option value="veNOG">Vertebrates (veNOG)</option>
+            <option value="verNOG">Verrucomicrobia (verNOG)</option>
+            <option value="verrNOG">Verrucomicrobiae (verrNOG)</option>
+            <option value="virNOG">Viridiplantae (virNOG)</option>
+        </param>
+    </xml>
+    <xml name="data_manager_outputs">
+        <outputs>
+            <data name="out_file" format="data_manager_json" label="${tool.name}"/>
+        </outputs>
+    </xml>
+    <token name="@DOWNLOAD_CMD@"><![CDATA[
+## tool should set install_path
+#if $test == 'true'
+#import os.path
+#set $install_path = $os.path.join($os.path.dirname($__tool_directory__), 'test-data/cached_locally')
+#end if
+#if $dbs:
+#set $eggnogdbs = ' '.join(str($dbs).split(','))
+#else
+#set $eggnogdbs = 'none'
+#end if
+mkdir -p '${install_path}' &&
+download_eggnog_data.py 
+  $diamond_database -y -q 
+#if $test == 'true'
+  -s
+#end if
+  --data_dir '$install_path' 
+  $eggnogdbs &&
+python '${__tool_directory__}/data_manager_eggnog.py' --config_file '$out_file' --install_path '$install_path' --dbs '$dbs'
+    ]]></token>
+    <xml name="data_manager_test">
+        <!--
+        <test>
+            <param name="test" value="true"/>
+            <param name="diamond_database" value="false"/>
+            <param name="dbs" value="thaNOG"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="eggnog_mapper_db" />
+                </assert_contents>
+            </output>
+        </test>
+        -->
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+<data_managers>
+  <data_manager tool_file="data_manager/data_manager_eggnog.xml" id="data_manager_eggnog" >
+    <data_table name="eggnog_mapper_db">  <!-- Defines a Data Table to be modified. -->
+      <output> <!-- Handle the output of the Data Manager Tool -->
+        <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+        <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+        <column name="path" output_ref="out_file" >
+          <move type="directory" relativize_symlinks="True">
+            <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">eggnog_data/${value}</target>
+          </move>
+          <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/eggnog_data/${value}</value_translation>
+          <value_translation type="function">abspath</value_translation>
+        </column>
+      </output>
+    </data_table>
+    <data_table name="eggnog_mapper_hmm_dbs">  <!-- Defines a Data Table to be modified. -->
+      <output> <!-- Handle the output of the Data Manager Tool -->
+        <column name="key" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+        <column name="db_version" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+        <column name="value" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+        <column name="name" />  <!-- columns that are going to be specified by the Data Manager Tool -->
+        <column name="path" /> <!-- columns that are going to be specified by the Data Manager Tool -->
+      </output>
+    </data_table>
+  </data_manager>
+</data_managers>
Binary file test-data/cached_locally/eggnog.db has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/eggnog_mapper_db.loc	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,2 @@
+#value	name	path
+4.5	eggNOG_4.5	${__HERE__}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cached_locally/eggnog_mapper_hmm_dbs.loc	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,2 @@
+#key	db_version	value	name	path
+4.5_ENOG411CB2I	4.5	ENOG411CB2I	ENOG411CB2I	${__HERE__}/hmmdb_levels/ENOG411CB2I/ENOG411CB2I
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/eggnog_mapper_db.loc.sample	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,25 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of eggnog_mapper data files. 
+#
+# eggnog-mapper requires the following files to be installed in the data directory:
+#  https://github.com/jhcepas/eggnog-mapper/blob/master/data/og2level.tsv.gz
+#  http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog.db.gz
+#  http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/OG_fasta.tar.gz
+# In addition individual HMM DBs can be installed from:
+#  http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/hmmdb_levels/
+# A complete diamond database is available from:
+#  http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog_proteins.dmnd.gz
+#
+# The python script download_eggnog_data.py, 
+# included with eggnog_mapper, can be used to download the files to the correct directory
+#
+# The near-equivalence of columns "value" and "db" is needed for the tests to work,
+# and for the setting of --data_dir to the parent directory of eggnog.db
+# The complicated eggNOG database structure makes passing custom HMM databases somewhat tricky. 
+# See test-data/cached_locally/eggnog_mapper.loc for how this was done with the included test databases
+# In all other cases, when the appropriate HMM database (for example, "thaNOG") was downloaded from eggnogdb.embl.de, 
+# value and db should be the same (in the example, both should be "thaNOG")
+#
+#
+#db_version	name	path	
+#4.5.1	eggnog_4.5.1	/path/to/directory/that/contains/eggnog.db
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/eggnog_mapper_hmm_dbs.loc.sample	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,28 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of eggnog_mapper data files. 
+#
+# eggnog-mapper requires the following files to be installed in the data directory:
+#  https://github.com/jhcepas/eggnog-mapper/blob/master/data/og2level.tsv.gz
+#  http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog.db.gz
+#  http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/OG_fasta.tar.gz
+# In addition individual HMM DBs can be installed from:
+#  http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/hmmdb_levels/
+# A complete diamond database is available from:
+#  http://eggnogdb.embl.de/download/eggnog_4.5/eggnog-mapper-data/eggnog_proteins.dmnd.gz
+#
+# The python script download_eggnog_data.py, 
+# included with eggnog_mapper, can be used to download the files to the correct directory
+#
+# The near-equivalence of columns "value" and "db" is needed for the tests to work,
+# and for the setting of --data_dir to the parent directory of eggnog.db
+# The complicated eggNOG database structure makes passing custom HMM databases somewhat tricky. 
+# See test-data/cached_locally/eggnog_mapper.loc for how this was done with the included test databases
+# In all other cases, when the appropriate HMM database (for example, "thaNOG") was downloaded from eggnogdb.embl.de, 
+# value and db should be the same (in the example, both should be "thaNOG")
+#
+#
+#key	db_version	value	name	path
+#4.5.1_NOG	4.5.1	NOG	Full eggNOG database (NOG)	
+#4.5.1_euk	4.5.1	euk	Eukaryotes (euk)	
+#4.5.1_aproNOG	4.5.1	aproNOG	Proteobacteria_alpha (aproNOG)	
+#4.5.1_aproNOG	4.5.1	ENOG411CB2I	ENOG411CB2I (custom)	/path/to/custom/hmmdb/ENOG411CB2I
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,11 @@
+<tables>
+    <!-- Locations of all eggnog_mapper data -->
+    <table name="eggnog_mapper_db" comment_char="#" allow_duplicate_entries="False">
+        <columns>value,name,path</columns>
+        <file path="tool-data/eggnog_mapper_db.loc" />
+    </table>
+    <table name="eggnog_mapper_hmm_dbs" comment_char="#" allow_duplicate_entries="False">
+        <columns>key,db_version,value,name,path</columns>
+        <file path="tool-data/eggnog_mapper_hmm_dbs.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Mon Nov 11 11:49:16 2019 -0500
@@ -0,0 +1,11 @@
+<tables>
+    <!-- Locations of all eggnog_mapper data -->
+    <table name="eggnog_mapper_db" comment_char="#">
+        <columns>value,name,path</columns>
+        <file path="${__HERE__}/test-data/cached_locally/eggnog_mapper_db.loc" />
+    </table>
+    <table name="eggnog_mapper_hmm_dbs" comment_char="#">
+        <columns>key,db_version,value,name,path</columns>
+        <file path="${__HERE__}/test-data/cached_locally/eggnog_mapper_hmm_dbs.loc" />
+    </table>
+</tables>