Mercurial > repos > iuc > data_manager_mothur_toolsuite
changeset 2:0e532fc0a0a6 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_mothur_toolsuite/ commit b824ec6d0faa5353c5893b5a2a540b8251cae309"
author | iuc |
---|---|
date | Thu, 17 Sep 2020 09:37:41 +0000 |
parents | aec831b54a5b |
children | 9d09724f2bf1 |
files | data_manager/data_manager_fetch_mothur_reference_data.xml data_manager/fetch_mothur_reference_data.py |
diffstat | 2 files changed, 257 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_fetch_mothur_reference_data.xml Thu Nov 28 15:47:32 2019 -0500 +++ b/data_manager/data_manager_fetch_mothur_reference_data.xml Thu Sep 17 09:37:41 2020 +0000 @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool id="data_manager_fetch_mothur_reference_data" name="Fetch Mothur toolsuite reference data" version="0.1.4" tool_type="manage_data" profile="19.05"> +<tool id="data_manager_fetch_mothur_reference_data" name="Fetch Mothur toolsuite reference data" version="0.1.5" tool_type="manage_data" profile="19.05"> <description>Fetch and install reference data for Mothur</description> <requirements> <requirement type="package" version="2.7">python</requirement> @@ -74,6 +74,223 @@ </assert_contents> </output> </test> + <test> + <param name="data_source|ref_data" value="lookup_gsflx"/> + <output name="out_file"> + <assert_contents> + <has_text text="GSFLX" /> + <has_text text="LookUp_GSFLX.pat" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="lookup_gs20"/> + <output name="out_file"> + <assert_contents> + <has_text text="GS20" /> + <has_text text="LookUp_GS20.pat" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="RDP_v16"/> + <output name="out_file"> + <assert_contents> + <has_text text="16S rRNA RDP training set 16" /> + <has_text text="trainset16_022016.rdp.fasta" /> + <has_text text="trainset16_022016.rdp.tax" /> + <has_text text="trainset16_022016.pds.fasta" /> + <has_text text="trainset16_022016.pds.tax" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="RDP_v14"/> + <output name="out_file"> + <assert_contents> + <has_text text="16S rRNA RDP training set 14" /> + <has_text text="trainset14_032015.rdp.fasta" /> + <has_text text="trainset14_032015.rdp.tax" /> + <has_text text="trainset14_032015.pds.fasta" /> + <has_text text="trainset14_032015.pds.tax" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="RDP_v10"/> + <output name="out_file"> + <assert_contents> + <has_text text="16S rRNA RDP training set 10" /> + <has_text text="trainset10_082014.rdp.fasta" /> + <has_text text="trainset10_082014.rdp.tax" /> + <has_text text="trainset10_082014.pds.fasta" /> + <has_text text="trainset10_082014.pds.tax" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="RDP_v9"/> + <output name="out_file"> + <assert_contents> + <has_text text="16S rRNA PDS training set 9" /> + <has_text text="trainset9_032012.rdp.fasta" /> + <has_text text="trainset9_032012.rdp.tax" /> + <has_text text="trainset9_032012.pds.fasta" /> + <has_text text="trainset9_032012.pds.tax" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="RDP_v7"/> + <output name="out_file"> + <assert_contents> + <has_text text="16S rRNA RDP training set 7" /> + <has_text text="FungiLSU_train_1400bp_8506_mod.fasta" /> + <has_text text="FungiLSU_train_1400bp_8506_mod.tax" /> + <has_text text="trainset7_112011.rdp.fasta" /> + <has_text text="trainset7_112011.rdp.tax" /> + <has_text text="trainset7_112011.pds.fasta" /> + <has_text text="trainset7_112011.pds.tax" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="RDP_v6"/> + <output name="out_file"> + <assert_contents> + <has_text text="RDP training set 6" /> + <has_text text="trainset6_032010.rdp.fasta" /> + <has_text text="trainset6_032010.rdp.tax" /> + </assert_contents> + </output> + </test> + <!-- SILVA data is to large (>1GB each) for CI testing on github actions + so we skip them --> + <!--<test> + <param name="data_source|ref_data" value="silva_release_128"/> + <output name="out_file"> + <assert_contents> + <has_text text="SILVA release 128" /> + <has_text text="silva.nr_v128.tax" /> + <has_text text="silva.seed_v128.tax" /> + <has_text text="silva.nr_v128.align" /> + <has_text text="silva.seed_v128.align" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="silva_release_123"/> + <output name="out_file"> + <assert_contents> + <has_text text="SILVA release 123" /> + <has_text text="silva.nr_v123.align" /> + <has_text text="silva.seed_v123.align" /> + <has_text text="silva.nr_v123.tax" /> + <has_text text="silva.seed_v123.tax" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="silva_release_119"/> + <output name="out_file"> + <assert_contents> + <has_text text="SILVA release 119" /> + <has_text text="silva.nr_v119.align" /> + <has_text text="silva.seed_v119.align" /> + <has_text text="silva.nr_v119.tax" /> + <has_text text="silva.seed_v119.tax" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="silva_release_102"/> + <output name="out_file"> + <assert_contents> + <has_text text="SILVA release 102" /> + <has_text text="silva.bacteria.fasta" /> + <has_text text="silva.gold.ng.fasta" /> + <has_text text="nogap.archaea.fasta" /> + <has_text text="silva.archaea.fasta" /> + <has_text text="nogap.eukarya.fasta" /> + <has_text text="silva.eukarya.fasta" /> + <has_text text="silva.bacteria.gg.tax" /> + <has_text text="silva.bacteria.ncbi.tax" /> + <has_text text="silva.bacteria.rdp.tax" /> + <has_text text="silva.bacteria.rdp6.tax" /> + <has_text text="silva.bacteria.silva.tax" /> + <has_text text="silva.archaea.gg.tax" /> + <has_text text="silva.archaea.ncbi.tax" /> + <has_text text="silva.archaea.rdp.tax" /> + <has_text text="silva.archaea.silva.tax" /> + <has_text text="silva.eukarya.ncbi.tax" /> + <has_text text="silva.eukarya.silva.tax" /> + </assert_contents> + </output> + </test>--> + + <!-- also greengenes is large (400MB-1.5GB) so only tests for older + (smaller) releases are executed --> + <!--<test> + <param name="data_source|ref_data" value="greengenes_August2013"/> + <output name="out_file"> + <assert_contents> + <has_text text="Greengenes August 2013" /> + <has_text text="gg_13_8_99.gg.tax" /> + <has_text text="gg_13_8_99.fasta" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="greengenes_May2013"/> + <output name="out_file"> + <assert_contents> + <has_text text="Greengenes May 2013" /> + <has_text text="gg_13_5_99.pds.tax" /> + <has_text text="gg_13_5_99.gg.tax" /> + <has_text text="gg_13_5_99.align" /> + <has_text text="gg_13_5_99.fasta" /> + </assert_contents> + </output> + </test>--> + <test> + <param name="data_source|ref_data" value="greengenes_old"/> + <output name="out_file"> + <assert_contents> + <has_text text="Greengenes pre-May 2013" /> + <has_text text="gg_99.pds.tax" /> + <has_text text="core_set_aligned.imputed.fasta" /> + <has_text text="gg_99.pds.ng.fasta" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="greengenes_gold_alignment"/> + <output name="out_file"> + <assert_contents> + <has_text text="Greengenes gold alignment" /> + <has_text text="rRNA16S.gold.NAST_ALIGNED.fasta" /> + </assert_contents> + </output> + </test> + + <test> + <param name="data_source|ref_data" value="secondary_structure_maps_silva"/> + <output name="out_file"> + <assert_contents> + <has_text text="SILVA" /> + <has_text text="silva.ss.map" /> + </assert_contents> + </output> + </test> + <test> + <param name="data_source|ref_data" value="secondary_structure_maps_greengenes"/> + <output name="out_file"> + <assert_contents> + <has_text text="Greengenes" /> + <has_text text="gg.ss.map" /> + </assert_contents> + </output> + </test> </tests> <help> .. class:: infomark
--- a/data_manager/fetch_mothur_reference_data.py Thu Nov 28 15:47:32 2019 -0500 +++ b/data_manager/fetch_mothur_reference_data.py Thu Sep 17 09:37:41 2020 +0000 @@ -10,6 +10,7 @@ import tempfile import urllib2 import zipfile +from functools import reduce # When extracting files from archives, skip names that # start with the following strings @@ -27,118 +28,118 @@ # Look up data # http://www.mothur.org/wiki/Lookup_files "lookup_titanium": { - "GS FLX Titanium": ["http://www.mothur.org/w/images/9/96/LookUp_Titanium.zip", ] + "GS FLX Titanium": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lookup_titanium.zip", ] }, "lookup_gsflx": { - "GSFLX": ["http://www.mothur.org/w/images/8/84/LookUp_GSFLX.zip", ] + "GSFLX": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lookup_gsflx.zip", ] }, "lookup_gs20": { - "GS20": ["http://www.mothur.org/w/images/7/7b/LookUp_GS20.zip", ] + "GS20": ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lookup_gs20.zip", ] }, # RDP reference files # http://www.mothur.org/wiki/RDP_reference_files "RDP_v16": { "16S rRNA RDP training set 16": - ["https://mothur.org/w/images/d/dc/Trainset16_022016.rdp.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.rdp.tgz", ], "16S rRNA PDS training set 16": - ["https://mothur.org/w/images/c/c3/Trainset16_022016.pds.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset16_022016.pds.tgz", ], }, "RDP_v14": { "16S rRNA RDP training set 14": - ["https://mothur.org/w/images/6/6c/Trainset14_032015.rdp.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset14_032015.rdp.tgz", ], "16S rRNA PDS training set 14": - ["https://mothur.org/w/images/8/88/Trainset14_032015.pds.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset14_032015.pds.tgz", ], }, "RDP_v10": { "16S rRNA RDP training set 10": - ["http://www.mothur.org/w/images/b/b5/Trainset10_082014.rdp.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset10_082014.rdp.tgz", ], "16S rRNA PDS training set 10": - ["http://www.mothur.org/w/images/2/24/Trainset10_082014.pds.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset10_082014.pds.tgz", ], }, "RDP_v9": { "16S rRNA RDP training set 9": - ["http://www.mothur.org/w/images/7/72/Trainset9_032012.rdp.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset9_032012.rdp.zip", ], "16S rRNA PDS training set 9": - ["http://www.mothur.org/w/images/5/59/Trainset9_032012.pds.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset9_032012.pds.zip", ], }, "RDP_v7": { "16S rRNA RDP training set 7": - ["http://www.mothur.org/w/images/2/29/Trainset7_112011.rdp.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset7_112011.rdp.zip", ], "16S rRNA PDS training set 7": - ["http://www.mothur.org/w/images/4/4a/Trainset7_112011.pds.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/trainset7_112011.pds.zip", ], "8S rRNA Fungi training set 7": - ["http://www.mothur.org/w/images/3/36/FungiLSU_train_v7.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/fungilsu_train_v7.zip", ], }, "RDP_v6": { "RDP training set 6": - ["http://www.mothur.org/w/images/4/49/RDPTrainingSet.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/rdptrainingset.zip", ], }, # Silva reference files # http://www.mothur.org/wiki/Silva_reference_files "silva_release_128": { "SILVA release 128": - ["https://mothur.org/w/images/b/b4/Silva.nr_v128.tgz", - "https://mothur.org/w/images/a/a4/Silva.seed_v128.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v128.tgz", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v128.tgz", ], }, "silva_release_123": { "SILVA release 123": - ["https://mothur.org/w/images/b/be/Silva.nr_v123.tgz", - "https://mothur.org/w/images/1/15/Silva.seed_v123.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v123.tgz", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v123.tgz", ], }, "silva_release_119": { "SILVA release 119": - ["http://www.mothur.org/w/images/2/27/Silva.nr_v119.tgz", - "http://www.mothur.org/w/images/5/56/Silva.seed_v119.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.nr_v119.tgz", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.seed_v119.tgz", ], }, "silva_release_102": { "SILVA release 102": - ["http://www.mothur.org/w/images/9/98/Silva.bacteria.zip", - "http://www.mothur.org/w/images/3/3c/Silva.archaea.zip", - "http://www.mothur.org/w/images/1/1a/Silva.eukarya.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.bacteria.zip", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.archaea.zip", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.eukarya.zip", ], }, "silva_gold_bacteria": { "SILVA gold": - ["http://www.mothur.org/w/images/f/f1/Silva.gold.bacteria.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva.gold.bacteria.zip", ], }, # Greengenes # http://www.mothur.org/wiki/Greengenes-formatted_databases "greengenes_August2013": { "Greengenes August 2013": - ["http://www.mothur.org/w/images/1/19/Gg_13_8_99.refalign.tgz", - "http://www.mothur.org/w/images/6/68/Gg_13_8_99.taxonomy.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_13_8_99.refalign.tgz", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_13_8_99.taxonomy.tgz", ], }, "greengenes_May2013": { "Greengenes May 2013": - ["http://www.mothur.org/w/images/c/cd/Gg_13_5_99.refalign.tgz", - "http://www.mothur.org/w/images/9/9d/Gg_13_5_99.taxonomy.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_13_5_99.refalign.tgz", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_13_5_99.taxonomy.tgz", ], }, "greengenes_old": { "Greengenes pre-May 2013": - ["http://www.mothur.org/w/images/7/72/Greengenes.alignment.zip", - "http://www.mothur.org/w/images/1/16/Greengenes.tax.tgz", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/greengenes.alignment.zip", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/greengenes.tax.tgz", ], }, "greengenes_gold_alignment": { "Greengenes gold alignment": - ["http://www.mothur.org/w/images/2/21/Greengenes.gold.alignment.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/greengenes.gold.alignment.zip", ], }, # Secondary structure maps # http://www.mothur.org/wiki/Secondary_structure_map "secondary_structure_maps_silva": { "SILVA": - ["http://www.mothur.org/w/images/6/6d/Silva_ss_map.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/silva_ss_map.zip", ], }, "secondary_structure_maps_greengenes": { "Greengenes": - ["http://www.mothur.org/w/images/4/4b/Gg_ss_map.zip", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/gg_ss_map.zip", ], }, # Lane masks: not used here? "lane_masks": { "Greengenes-compatible": - ["http://www.mothur.org/w/images/2/2a/Lane1241.gg.filter", - "http://www.mothur.org/w/images/a/a0/Lane1287.gg.filter", - "http://www.mothur.org/w/images/3/3d/Lane1349.gg.filter", ], + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/Lane1241.gg.filter", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/lane1287.gg.filter", + "https://mothur.s3.us-east-2.amazonaws.com/wiki/lane1349.gg.filter", ], "SILVA-compatible": - ["http://www.mothur.org/w/images/6/6d/Lane1349.silva.filter", ] + ["https://mothur.s3.us-east-2.amazonaws.com/wiki/lane1349.silva.filter", ] }, }