changeset 0:f57c13f5878b draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author iuc
date Fri, 08 Nov 2019 18:47:32 -0500
parents
children bf7b2c14cabc
files data_manager/dada2_fetcher.xml data_manager/data_manager.py data_manager_conf.xml test-data/PR24.11.1_json test-data/RefSeq_RDP2018_json test-data/dada2_species.loc test-data/dada2_taxonomy.loc test-data/greengenes13.84_json test-data/gtdb2018_json test-data/hitdb1_json test-data/rdp16_json test-data/silva132_json test-data/silvaeuk132_json test-data/unite8fungi_json test-data/unite8fungisingletons_json tool-data/dada2_species.loc.sample tool-data/dada2_taxonomy.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 19 files changed, 416 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/dada2_fetcher.xml	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,181 @@
+<?xml version="1.0"?>
+<tool id="dada2_fetcher" name="dada2 data manager" tool_type="manage_data" version="0.0.7">
+    <description>Download reference databases</description>
+    <command detect_errors="exit_code"><![CDATA[
+    python '$__tool_directory__/data_manager.py'
+    --out '$out_file'
+    #set dataset = str($db_cond.db_select) + '_' + str($db_cond.version_select)
+    --dataset '$dataset'
+    ]]>
+    </command>
+    <inputs>
+        <conditional name="db_cond">
+            <param name="db_select" type="select" label="Taxonomic database">
+                <option value="silva">Silva</option>
+                <option value="rdp">RDP</option>
+                <option value="greengenes">GreenGenes</option>
+                <option value="unite">UNITE Fungi: General Fasta</option>
+                <!-- UNITE Eukaryotes not yet supported https://github.com/benjjneb/dada2/issues/702 -->
+                <option value="RefSeq_RDP">NCBI RefSeq 16S rRNA database supplemented by RDP</option>
+                <option value="gtdb">GTDB: Genome Taxonomy Database (Bacteria &amp; Archaea)</option>
+                <option value="hitdb">HitDB (Human InTestinal 16S)</option>
+                <option value="silva_euk_18S">Silva Eukaryotic 18S</option>
+                <option value="PR2">Protist Ribosomal Reference database (PR2)</option>
+            </param>
+            <when value="silva">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="132">132</option>
+                    <option value="128">128</option>
+                </param>
+            </when>
+            <when value="rdp">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="16">16</option>
+                    <option value="14">14</option>
+                </param>
+            </when>
+            <when value="greengenes">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="13.84">13.84</option>
+                </param>
+            </when>
+            <when value="unite">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="8.0_fungi">release 8.0 for Fungi</option>
+                    <option value="8.0_fungi_singletons">release 8.0 for Fungi including global and 97% singletons</option>
+                </param>
+            </when>
+            <when value="RefSeq_RDP">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="2018_05">05/2018</option>
+                </param>
+            </when>
+            <when value="gtdb">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="2018_11">11/2018</option>
+                </param>
+            </when>
+            <when value="hitdb">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="1">1</option>
+                </param>
+            </when>
+            <when value="silva_euk_18S">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="132">132</option>
+                </param>
+            </when>
+            <when value="PR2">
+                <param name="version_select" type="select" label="Database version">
+                    <option value="4.11.1">4.11.1</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="db_cond|db_select" value="silva"/>
+            <param name="db_cond|version_select" value="132"/>
+            <output name="out_file" file="silva132_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="rdp"/>
+            <param name="db_cond|version_select" value="16"/>
+            <output name="out_file" file="rdp16_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="greengenes"/>
+            <param name="db_cond|version_select" value="13.84"/>
+            <output name="out_file" file="greengenes13.84_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="unite"/>
+            <param name="db_cond|version_select" value="8.0_fungi"/>
+            <output name="out_file" file="unite8fungi_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="unite"/>
+            <param name="db_cond|version_select" value="8.0_fungi_singletons"/>
+            <output name="out_file" file="unite8fungisingletons_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="RefSeq_RDP"/>
+            <param name="db_cond|version_select" value="2018_05"/>
+            <output name="out_file" file="RefSeq_RDP2018_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="gtdb"/>
+            <param name="db_cond|version_select" value="2018_11"/>
+            <output name="out_file" file="gtdb2018_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="hitdb"/>
+            <param name="db_cond|version_select" value="1"/>
+            <output name="out_file" file="hitdb1_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="silva_euk_18S"/>
+            <param name="db_cond|version_select" value="132"/>
+            <output name="out_file" file="silvaeuk132_json"/>
+        </test>
+        <test>
+            <param name="db_cond|db_select" value="PR2"/>
+            <param name="db_cond|version_select" value="4.11.1"/>
+            <output name="out_file" file="PR24.11.1_json"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Public Reference databases maintained by the DADA2 project
+..........................................................
+
+The following refrence databases which are describes as maintained by the DADA2 project (https://benjjneb.github.io/dada2/training.html) are available
+
+- Silva (https://www.arb-silva.de/)
+- RDP (http://rdp.cme.msu.edu/)
+- GreenGenes (http://greengenes.secondgenome.com/)
+- UNITE general FASTA (https://unite.ut.ee/repository.php)
+
+While Silva and RDP contain reference databases for taxonomy and species assignment, the greengenes and UNITE databases only contains a reference database for taxonomy assignment.
+
+For the Silva databases check the license information: http://www.arb-silva.de/silva-license-information.
+
+Except for UNITE all reference databases are downloaded from the corresponding zenodo links that are listed on the DADA2 website. The UNITE databases are taken from the links provided on the UNITE website
+
+More detailed informations in the reference data bases can be found on the DADA2 website and contained links: https://benjjneb.github.io/dada2/training.html.
+
+Further public Reference databases listed by the DADA2 project
+..............................................................
+
+Several contributed reference databases are listed of the DADA2 project website (https://benjjneb.github.io/dada2/training.html):
+
+- RefSeq + RDP (NCBI RefSeq 16S rRNA database supplemented by RDP)
+- GTDB: Genome Taxonomy Database (More info: http://gtdb.ecogenomic.org/)
+- HitDB version 1 (Human InTestinal 16S rRNA) (https://github.com/microbiome/HITdb)
+- RDP fungi LSU
+- Silva Eukaryotic 18S
+- PR2 (https://github.com/pr2database/pr2database)
+
+Except for PR2, all reference databases are downloaded from the corresponding zenodo links that are listed on the DADA2 website. The PR2 database is taken from their github page.
+
+More detailed informations in the reference data bases can be found on the DADA2 website and contained links: https://benjjneb.github.io/dada2/training.html.
+    ]]></help>
+    <citations>
+        <!-- silva -->
+         <citation type="doi">10.1093/nar/gks1219</citation>
+        <!-- rdp -->>
+        <citation type="doi">10.1093/nar/gkt1244</citation>
+        <!-- greengenes -->
+        <citation type="doi">10.1128/AEM.03006-05</citation>
+        <!-- unite -->
+        <citation type="doi">10.15156/BIO/786343</citation>
+        <!-- TODO gtdb ??? -->
+        <!-- hitdb -->
+        <citation type="doi">10.1186/s12864-015-2265-y</citation>
+        <!-- PR2 -->
+        <citation type="doi">10.1093/nar/gks1160</citation>
+    </citations>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/data_manager.py	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,133 @@
+import argparse
+import json
+import os
+try:
+    # For Python 3.0 and later
+    from urllib.request import Request, urlopen
+except ImportError:
+    # Fall back to Python 2 imports
+    from urllib2 import Request, urlopen
+
+DEFAULT_TAXLEVELS = "Kingdom,Phylum,Class,Order,Family,Genus,Species"
+
+FILE2NAME = {
+    "silva_132": "Silva version 132",
+    "silva_128": "Silva version 128",
+    "rdp_16": "RDP trainset 16",
+    "rdp_14": "RDP trainset 14",
+    "greengenes_13.84": "GreenGenes version 13.84",
+    "unite_8.0_fungi": "UNITE: General Fasta release 8.0 for Fungi",
+    "unite_8.0_fungi_singletons": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons",
+    "RefSeq_RDP_2018_05": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)",
+    "gtdb_2018_11": "GTDB: Genome Taxonomy Database (Bacteria &amp; Archaea) (11/2018)",
+    "hitdb_1": "HitDB version 1 (Human InTestinal 16S rRNA)",
+    "silva_euk_18S_132": "Silva version 132 Eukaryotic 18S",
+    "PR2_4.11.1": "Protist Ribosomal Reference database (PR2) 4.11.1"
+}
+
+FILE2TAXURL = {
+    "silva_132": "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz?download=1",
+    "silva_128": "https://zenodo.org/record/824551/files/silva_nr_v128_train_set.fa.gz?download=1",
+    "rdp_16": "https://zenodo.org/record/801828/files/rdp_train_set_16.fa.gz?download=1",
+    "rdp_14": "https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1",
+    "unite_8.0_fungi": "https://files.plutof.ut.ee/public/orig/EB/0C/EB0CCB3A871B77EA75E472D13926271076904A588D2E1C1EA5AFCF7397D48378.zip",
+    "unite_8.0_fungi_singletons": "https://files.plutof.ut.ee/doi/06/A2/06A2C86256EED64085670EB0C54B7115F6DAC8F311C656A9CB33E386CFABA0D0.zip",
+    "greengenes_13.84": "https://zenodo.org/record/158955/files/gg_13_8_train_set_97.fa.gz?download=1",
+    "RefSeq_RDP_2018_05": "https://zenodo.org/record/2541239/files/RefSeq-RDP16S_v2_May2018.fa.gz?download=1",
+    "gtdb_2018_11": "https://zenodo.org/record/2541239/files/GTDB_bac-arc_ssu_r86.fa.gz?download=1",
+    "hitdb_1": "https://zenodo.org/record/159205/files/hitdb_v1.00.fa.gz?download=1",
+    "silva_euk_18S_132": "https://zenodo.org/record/1447330/files/silva_132.18s.99_rep_set.dada2.fa.gz?download=1",
+    "PR2_4.11.1": "https://github.com/pr2database/pr2database/releases/download/4.11.1/pr2_version_4.11.1_dada2.fasta.gz"
+}
+
+FILE2SPECIESURL = {
+    "silva_132": "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz?download=1",
+    "silva_128": "https://zenodo.org/record/824551/files/silva_species_assignment_v128.fa.gz?download=1",
+    "rdp_16": "https://zenodo.org/record/801828/files/rdp_species_assignment_16.fa.gz?download=1",
+    "rdp_14": "https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1"
+}
+
+FILE2TAXLEVELS = {
+    "PR2_4.11.1": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species"
+}
+
+
+def url_download(url, fname, workdir):
+    """
+    download url to workdir/fname
+    """
+    file_path = os.path.join(workdir, fname)
+    if not os.path.exists(workdir):
+        os.makedirs(workdir)
+    src = None
+    dst = None
+    try:
+        req = Request(url)
+        src = urlopen(req)
+        with open(file_path, 'wb') as dst:
+            while True:
+                chunk = src.read(2**10)
+                if chunk:
+                    dst.write(chunk)
+                else:
+                    break
+    finally:
+        if src:
+            src.close()
+
+#   special treatment of UNITE DBs: they are zip files containing two fasta (xyz.fasta and developer/xyz.fasta)
+    if fname.startswith("unite"):
+        import glob
+        import gzip
+        import shutil
+        import zipfile
+        # unzip download
+        zip_ref = zipfile.ZipFile(file_path, 'r')
+        zip_ref.extractall(workdir)
+        zip_ref.close()
+        # gzip top level fasta file
+        fastas = glob.glob("%s/*fasta" % workdir)
+        if len(fastas) != 1:
+            msg = "UNITE download %s contained %d fasta file(s): %s" % (url, len(fastas), " ".join(fastas))
+            raise Exception(msg)
+        with open(fastas[0], 'rb') as f_in:
+            with gzip.open(file_path, 'wb') as f_out:
+                shutil.copyfileobj(f_in, f_out)
+
+
+def remote_dataset(dataset, outjson):
+
+    with open(outjson) as jf:
+        params = json.loads(jf.read())
+
+    workdir = params['output_data'][0]['extra_files_path']
+    os.mkdir(workdir)
+    url_download( FILE2TAXURL[dataset], dataset + ".taxonomy", workdir)
+
+    data_manager_json = {"data_tables": {}}
+    data_manager_entry = {}
+    data_manager_entry['value'] = dataset
+    data_manager_entry['name'] = FILE2NAME[dataset]
+    data_manager_entry['path'] = dataset + ".taxonomy"
+    data_manager_entry['taxlevels'] = FILE2TAXLEVELS.get(dataset, DEFAULT_TAXLEVELS)
+    data_manager_json["data_tables"]["dada2_taxonomy"] = data_manager_entry
+
+    if FILE2SPECIESURL.get(dataset, False ):
+        url_download( FILE2SPECIESURL[dataset], dataset + ".species", workdir)
+        data_manager_entry = {}
+        data_manager_entry['value'] = dataset
+        data_manager_entry['name'] = FILE2NAME[dataset]
+        data_manager_entry['path'] = dataset + ".species"
+        data_manager_json["data_tables"]["dada2_species"] = data_manager_entry
+
+    with file(outjson, 'w') as jf:
+        jf.write(json.dumps(data_manager_json))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Create data manager json.')
+    parser.add_argument('--out', action='store', help='JSON filename')
+    parser.add_argument('--dataset', action='store', help='Download data set name')
+    args = parser.parse_args()
+
+    remote_dataset(args.dataset, args.out)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/dada2_fetcher.xml" id="dada2_fetcher">
+        <data_table name="dada2_taxonomy">
+            <output>
+                <column name="value" />
+                <column name="name" />
+                <column name="path" output_ref="out_file">
+                    <move type="file" relativize_symlinks="True">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">dada2/${path}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/dada2/${path}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+                <column name="taxlevels" />
+            </output>
+        </data_table>
+        <data_table name="dada2_species">
+            <output>
+                <column name="value" />
+                <column name="name" />
+                <column name="path" output_ref="out_file">
+                    <move type="file" relativize_symlinks="True">
+                        <source>${path}</source>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">dada2/${path}</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/dada2/${path}</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/PR24.11.1_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_taxonomy": {"path": "PR2_4.11.1.taxonomy", "name": "Protist Ribosomal Reference database (PR2) 4.11.1", "value": "PR2_4.11.1", "taxlevels": "Kingdom,Supergroup,Division,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/RefSeq_RDP2018_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_taxonomy": {"path": "RefSeq_RDP_2018_05.taxonomy", "name": "NCBI RefSeq 16S rRNA database supplemented by RDP (05/2018)", "value": "RefSeq_RDP_2018_05", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dada2_species.loc	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,9 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of dada2 reference data sets for species assignment, using three
+# tab separated columns:
+#
+# <unique_build_id>	<display_name>	<fasta_file_path>
+#
+# Datasets can be retrieved from http://busco.ezlab.org/frame_wget.html
+#
+# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dada2_taxonomy.loc	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,9 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of dada2 reference data sets for taxonomy assignment, using three
+# tab separated columns:
+#
+# <unique_build_id>	<display_name>	<fasta_file_path>	<taxlevels>
+#
+# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
+# 
+# taxlevels is a comma separated list of taxonomy levels
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/greengenes13.84_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_taxonomy": {"path": "greengenes_13.84.taxonomy", "name": "GreenGenes version 13.84", "value": "greengenes_13.84", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gtdb2018_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_taxonomy": {"path": "gtdb_2018_11.taxonomy", "name": "GTDB: Genome Taxonomy Database (Bacteria &amp; Archaea) (11/2018)", "value": "gtdb_2018_11", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hitdb1_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_taxonomy": {"path": "hitdb_1.taxonomy", "name": "HitDB version 1 (Human InTestinal 16S rRNA)", "value": "hitdb_1", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rdp16_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_species": {"path": "rdp_16.species", "name": "RDP trainset 16", "value": "rdp_16"}, "dada2_taxonomy": {"path": "rdp_16.taxonomy", "name": "RDP trainset 16", "value": "rdp_16", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/silva132_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_species": {"path": "silva_132.species", "name": "Silva version 132", "value": "silva_132"}, "dada2_taxonomy": {"path": "silva_132.taxonomy", "name": "Silva version 132", "value": "silva_132", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/silvaeuk132_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_taxonomy": {"path": "silva_euk_18S_132.taxonomy", "name": "Silva version 132 Eukaryotic 18S", "value": "silva_euk_18S_132", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unite8fungi_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_taxonomy": {"path": "unite_8.0_fungi.taxonomy", "name": "UNITE: General Fasta release 8.0 for Fungi", "value": "unite_8.0_fungi", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unite8fungisingletons_json	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,1 @@
+{"data_tables": {"dada2_taxonomy": {"path": "unite_8.0_fungi_singletons.taxonomy", "name": "UNITE: General Fasta release 8.0 for Fungi including global and 97% singletons", "value": "unite_8.0_fungi_singletons", "taxlevels": "Kingdom,Phylum,Class,Order,Family,Genus,Species"}}}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dada2_species.loc.sample	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,9 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of dada2 reference data sets for species assignment, using three
+# tab separated columns:
+#
+# <unique_build_id>	<display_name>	<fasta_file_path>
+#
+# Datasets can be retrieved from http://busco.ezlab.org/frame_wget.html
+#
+# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dada2_taxonomy.loc.sample	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,9 @@
+# This is a sample file distributed with Galaxy that is used to define a
+# list of dada2 reference data sets for taxonomy assignment, using three
+# tab separated columns:
+#
+# <unique_build_id>	<display_name>	<fasta_file_path>	<taxlevels>
+#
+# Datasets can be retrieved from https://benjjneb.github.io/dada2/training.html
+# 
+# taxlevels is a comma separated list of taxonomy levels
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="dada2_species" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/dada2_species.loc" />
+    </table>
+    <table name="dada2_taxonomy" comment_char="#">
+        <columns>value, name, path, taxlevels</columns>
+        <file path="tool-data/dada2_taxonomy.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Nov 08 18:47:32 2019 -0500
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<tables>
+    <table name="dada2_species" comment_char="#">
+        <columns>value, name, path</columns>
+		<file path="${__HERE__}/test-data/dada2_species.loc" />
+    </table>
+    <table name="dada2_taxonomy" comment_char="#">
+        <columns>value, name, path, taxlevels</columns>
+		<file path="${__HERE__}/test-data/dada2_taxonomy.loc" />
+    </table>
+</tables>