changeset 2:0e532fc0a0a6 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit b824ec6d0faa5353c5893b5a2a540b8251cae309"
author iuc
date Thu, 17 Sep 2020 09:37:41 +0000
parents aec831b54a5b
children 9d09724f2bf1
files data_manager/data_manager_fetch_mothur_reference_data.xml data_manager/fetch_mothur_reference_data.py
diffstat 2 files changed, 257 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/data_manager_fetch_mothur_reference_data.xml	Thu Nov 28 15:47:32 2019 -0500
+++ b/data_manager/data_manager_fetch_mothur_reference_data.xml	Thu Sep 17 09:37:41 2020 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="data_manager_fetch_mothur_reference_data" name="Fetch Mothur toolsuite reference data" version="0.1.4" tool_type="manage_data" profile="19.05">
+<tool id="data_manager_fetch_mothur_reference_data" name="Fetch Mothur toolsuite reference data" version="0.1.5" tool_type="manage_data" profile="19.05">
     <description>Fetch and install reference data for Mothur</description>
     <requirements>
         <requirement type="package" version="2.7">python</requirement>
@@ -74,6 +74,223 @@
                 </assert_contents>
             </output>
         </test>
+        <test>
+            <param name="data_source|ref_data" value="lookup_gsflx"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="GSFLX" />
+                    <has_text text="LookUp_GSFLX.pat" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="lookup_gs20"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="GS20" />
+                    <has_text text="LookUp_GS20.pat" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="RDP_v16"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="16S rRNA RDP training set 16" />
+                    <has_text text="trainset16_022016.rdp.fasta" />
+                    <has_text text="trainset16_022016.rdp.tax" />
+                    <has_text text="trainset16_022016.pds.fasta" />
+                    <has_text text="trainset16_022016.pds.tax" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="RDP_v14"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="16S rRNA RDP training set 14" />
+                    <has_text text="trainset14_032015.rdp.fasta" />
+                    <has_text text="trainset14_032015.rdp.tax" />
+                    <has_text text="trainset14_032015.pds.fasta" />
+                    <has_text text="trainset14_032015.pds.tax" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="RDP_v10"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="16S rRNA RDP training set 10" />
+                    <has_text text="trainset10_082014.rdp.fasta" />
+                    <has_text text="trainset10_082014.rdp.tax" />
+                    <has_text text="trainset10_082014.pds.fasta" />
+                    <has_text text="trainset10_082014.pds.tax" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="RDP_v9"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="16S rRNA PDS training set 9" />
+                    <has_text text="trainset9_032012.rdp.fasta" />
+                    <has_text text="trainset9_032012.rdp.tax" />
+                    <has_text text="trainset9_032012.pds.fasta" />
+                    <has_text text="trainset9_032012.pds.tax" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="RDP_v7"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="16S rRNA RDP training set 7" />
+                    <has_text text="FungiLSU_train_1400bp_8506_mod.fasta" />
+                    <has_text text="FungiLSU_train_1400bp_8506_mod.tax" />
+                    <has_text text="trainset7_112011.rdp.fasta" />
+                    <has_text text="trainset7_112011.rdp.tax" />
+                    <has_text text="trainset7_112011.pds.fasta" />
+                    <has_text text="trainset7_112011.pds.tax" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="RDP_v6"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="RDP training set 6" />
+                    <has_text text="trainset6_032010.rdp.fasta" />
+                    <has_text text="trainset6_032010.rdp.tax" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- SILVA data is to large (>1GB each) for CI testing on github actions
+             so we skip them -->
+        <!--<test>
+            <param name="data_source|ref_data" value="silva_release_128"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="SILVA release 128" />
+                    <has_text text="silva.nr_v128.tax" />
+                    <has_text text="silva.seed_v128.tax" />
+                    <has_text text="silva.nr_v128.align" />
+                    <has_text text="silva.seed_v128.align" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="silva_release_123"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="SILVA release 123" />
+                    <has_text text="silva.nr_v123.align" />
+                    <has_text text="silva.seed_v123.align" />
+                    <has_text text="silva.nr_v123.tax" />
+                    <has_text text="silva.seed_v123.tax" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="silva_release_119"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="SILVA release 119" />
+                    <has_text text="silva.nr_v119.align" />
+                    <has_text text="silva.seed_v119.align" />
+                    <has_text text="silva.nr_v119.tax" />
+                    <has_text text="silva.seed_v119.tax" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="silva_release_102"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="SILVA release 102" />
+                    <has_text text="silva.bacteria.fasta" />
+                    <has_text text="silva.gold.ng.fasta" />
+                    <has_text text="nogap.archaea.fasta" />
+                    <has_text text="silva.archaea.fasta" />
+                    <has_text text="nogap.eukarya.fasta" />
+                    <has_text text="silva.eukarya.fasta" />
+                    <has_text text="silva.bacteria.gg.tax" />
+                    <has_text text="silva.bacteria.ncbi.tax" />
+                    <has_text text="silva.bacteria.rdp.tax" />
+                    <has_text text="silva.bacteria.rdp6.tax" />
+                    <has_text text="silva.bacteria.silva.tax" />
+                    <has_text text="silva.archaea.gg.tax" />
+                    <has_text text="silva.archaea.ncbi.tax" />
+                    <has_text text="silva.archaea.rdp.tax" />
+                    <has_text text="silva.archaea.silva.tax" />
+                    <has_text text="silva.eukarya.ncbi.tax" />
+                    <has_text text="silva.eukarya.silva.tax" />
+                </assert_contents>
+            </output>
+        </test>-->
+
+        <!-- also greengenes is large (400MB-1.5GB) so only tests for older
+             (smaller) releases are executed -->
+        <!--<test>
+            <param name="data_source|ref_data" value="greengenes_August2013"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="Greengenes August 2013" />
+                    <has_text text="gg_13_8_99.gg.tax" />
+                    <has_text text="gg_13_8_99.fasta" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="greengenes_May2013"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="Greengenes May 2013" />
+                    <has_text text="gg_13_5_99.pds.tax" />
+                    <has_text text="gg_13_5_99.gg.tax" />
+                    <has_text text="gg_13_5_99.align" />
+                    <has_text text="gg_13_5_99.fasta" />
+                </assert_contents>
+            </output>
+        </test>-->
+        <test>
+            <param name="data_source|ref_data" value="greengenes_old"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="Greengenes pre-May 2013" />
+                    <has_text text="gg_99.pds.tax" />
+                    <has_text text="core_set_aligned.imputed.fasta" />
+                    <has_text text="gg_99.pds.ng.fasta" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="greengenes_gold_alignment"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="Greengenes gold alignment" />
+                    <has_text text="rRNA16S.gold.NAST_ALIGNED.fasta" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="data_source|ref_data" value="secondary_structure_maps_silva"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="SILVA" />
+                    <has_text text="silva.ss.map" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="data_source|ref_data" value="secondary_structure_maps_greengenes"/>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="Greengenes" />
+                    <has_text text="gg.ss.map" />
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help>
 .. class:: infomark
--- a/data_manager/fetch_mothur_reference_data.py	Thu Nov 28 15:47:32 2019 -0500
+++ b/data_manager/fetch_mothur_reference_data.py	Thu Sep 17 09:37:41 2020 +0000
@@ -10,6 +10,7 @@
 import tempfile
 import urllib2
 import zipfile
+from functools import reduce
 
 # When extracting files from archives, skip names that
 # start with the following strings
@@ -27,118 +28,118 @@
     # Look up data
     # http://www.mothur.org/wiki/Lookup_files
     "lookup_titanium": {
-        "GS FLX Titanium": ["http://www.mothur.org/w/images/9/96/LookUp_Titanium.zip", ]
+        "GS FLX Titanium": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lookup_titanium.zip", ]
     },
     "lookup_gsflx": {
-        "GSFLX": ["http://www.mothur.org/w/images/8/84/LookUp_GSFLX.zip", ]
+        "GSFLX": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lookup_gsflx.zip", ]
     },
     "lookup_gs20": {
-        "GS20": ["http://www.mothur.org/w/images/7/7b/LookUp_GS20.zip", ]
+        "GS20": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lookup_gs20.zip", ]
     },
     # RDP reference files
     # http://www.mothur.org/wiki/RDP_reference_files
     "RDP_v16": {
         "16S rRNA RDP training set 16":
-        ["https://mothur.org/w/images/d/dc/Trainset16_022016.rdp.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.rdp.tgz", ],
         "16S rRNA PDS training set 16":
-        ["https://mothur.org/w/images/c/c3/Trainset16_022016.pds.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.pds.tgz", ],
     },
     "RDP_v14": {
         "16S rRNA RDP training set 14":
-        ["https://mothur.org/w/images/6/6c/Trainset14_032015.rdp.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset14_032015.rdp.tgz", ],
         "16S rRNA PDS training set 14":
-        ["https://mothur.org/w/images/8/88/Trainset14_032015.pds.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset14_032015.pds.tgz", ],
     },
     "RDP_v10": {
         "16S rRNA RDP training set 10":
-        ["http://www.mothur.org/w/images/b/b5/Trainset10_082014.rdp.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset10_082014.rdp.tgz", ],
         "16S rRNA PDS training set 10":
-        ["http://www.mothur.org/w/images/2/24/Trainset10_082014.pds.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset10_082014.pds.tgz", ],
     },
     "RDP_v9": {
         "16S rRNA RDP training set 9":
-        ["http://www.mothur.org/w/images/7/72/Trainset9_032012.rdp.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset9_032012.rdp.zip", ],
         "16S rRNA PDS training set 9":
-        ["http://www.mothur.org/w/images/5/59/Trainset9_032012.pds.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset9_032012.pds.zip", ],
     },
     "RDP_v7": {
         "16S rRNA RDP training set 7":
-        ["http://www.mothur.org/w/images/2/29/Trainset7_112011.rdp.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset7_112011.rdp.zip", ],
         "16S rRNA PDS training set 7":
-        ["http://www.mothur.org/w/images/4/4a/Trainset7_112011.pds.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset7_112011.pds.zip", ],
         "8S rRNA Fungi training set 7":
-        ["http://www.mothur.org/w/images/3/36/FungiLSU_train_v7.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/fungilsu_train_v7.zip", ],
     },
     "RDP_v6": {
         "RDP training set 6":
-        ["http://www.mothur.org/w/images/4/49/RDPTrainingSet.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/rdptrainingset.zip", ],
     },
     # Silva reference files
     # http://www.mothur.org/wiki/Silva_reference_files
     "silva_release_128": {
         "SILVA release 128":
-        ["https://mothur.org/w/images/b/b4/Silva.nr_v128.tgz",
-         "https://mothur.org/w/images/a/a4/Silva.seed_v128.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v128.tgz",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v128.tgz", ],
     },
     "silva_release_123": {
         "SILVA release 123":
-        ["https://mothur.org/w/images/b/be/Silva.nr_v123.tgz",
-         "https://mothur.org/w/images/1/15/Silva.seed_v123.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v123.tgz",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v123.tgz", ],
     },
     "silva_release_119": {
         "SILVA release 119":
-        ["http://www.mothur.org/w/images/2/27/Silva.nr_v119.tgz",
-         "http://www.mothur.org/w/images/5/56/Silva.seed_v119.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v119.tgz",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v119.tgz", ],
     },
     "silva_release_102": {
         "SILVA release 102":
-        ["http://www.mothur.org/w/images/9/98/Silva.bacteria.zip",
-         "http://www.mothur.org/w/images/3/3c/Silva.archaea.zip",
-         "http://www.mothur.org/w/images/1/1a/Silva.eukarya.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.bacteria.zip",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.archaea.zip",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.eukarya.zip", ],
     },
     "silva_gold_bacteria": {
         "SILVA gold":
-        ["http://www.mothur.org/w/images/f/f1/Silva.gold.bacteria.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.gold.bacteria.zip", ],
     },
     # Greengenes
     # http://www.mothur.org/wiki/Greengenes-formatted_databases
     "greengenes_August2013": {
         "Greengenes August 2013":
-        ["http://www.mothur.org/w/images/1/19/Gg_13_8_99.refalign.tgz",
-         "http://www.mothur.org/w/images/6/68/Gg_13_8_99.taxonomy.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_13_8_99.refalign.tgz",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_13_8_99.taxonomy.tgz", ],
     },
     "greengenes_May2013": {
         "Greengenes May 2013":
-        ["http://www.mothur.org/w/images/c/cd/Gg_13_5_99.refalign.tgz",
-         "http://www.mothur.org/w/images/9/9d/Gg_13_5_99.taxonomy.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_13_5_99.refalign.tgz",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_13_5_99.taxonomy.tgz", ],
     },
     "greengenes_old": {
         "Greengenes pre-May 2013":
-        ["http://www.mothur.org/w/images/7/72/Greengenes.alignment.zip",
-         "http://www.mothur.org/w/images/1/16/Greengenes.tax.tgz", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/greengenes.alignment.zip",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/greengenes.tax.tgz", ],
     },
     "greengenes_gold_alignment": {
         "Greengenes gold alignment":
-        ["http://www.mothur.org/w/images/2/21/Greengenes.gold.alignment.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/greengenes.gold.alignment.zip", ],
     },
     # Secondary structure maps
     # http://www.mothur.org/wiki/Secondary_structure_map
     "secondary_structure_maps_silva": {
         "SILVA":
-        ["http://www.mothur.org/w/images/6/6d/Silva_ss_map.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva_ss_map.zip", ],
     },
     "secondary_structure_maps_greengenes": {
         "Greengenes":
-        ["http://www.mothur.org/w/images/4/4b/Gg_ss_map.zip", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_ss_map.zip", ],
     },
     # Lane masks: not used here?
     "lane_masks": {
         "Greengenes-compatible":
-        ["http://www.mothur.org/w/images/2/2a/Lane1241.gg.filter",
-         "http://www.mothur.org/w/images/a/a0/Lane1287.gg.filter",
-         "http://www.mothur.org/w/images/3/3d/Lane1349.gg.filter", ],
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/Lane1241.gg.filter",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/lane1287.gg.filter",
+         "https://mothur.s3.us-east-2.amazonaws.com/wiki/lane1349.gg.filter", ],
         "SILVA-compatible":
-        ["http://www.mothur.org/w/images/6/6d/Lane1349.silva.filter", ]
+        ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lane1349.silva.filter", ]
     },
 }