changeset 6:9002633b4737 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit 5d74f13e59c4a2862c108ac1a08c067b0cfb2d97
author iuc
date Fri, 24 Jun 2022 12:44:33 +0000
parents 2f27f3b86827
children ed1518ce2237
files data_manager/kraken2_build_database.py data_manager/kraken2_build_database.xml
diffstat 2 files changed, 220 insertions(+), 80 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/kraken2_build_database.py	Mon Nov 08 15:40:34 2021 +0000
+++ b/data_manager/kraken2_build_database.py	Fri Jun 24 12:44:33 2022 +0000
@@ -54,9 +54,17 @@
 
 
 class StandardPrebuiltSizes(Enum):
-    full = 'full'
-    gb_16 = '16'
-    gb_8 = '8'
+    viral = "viral"
+    minusb = "minusb"
+    standard = "standard"
+    standard_08gb = "standard_08gb"
+    standard_16gb = "standard_16gb"
+    pluspf = "pluspf"
+    pluspf_08gb = "pluspf_08gb"
+    pluspf_16gb = "pluspf_16gb"
+    pluspfp = "pluspfp"
+    pluspfp_08gb = "pluspfp_08gb"
+    pluspfp_16gb = "pluspfp_16gb"
 
     def __str__(self):
         return self.value
@@ -122,36 +130,47 @@
     return data_table_entry
 
 
-def kraken2_build_standard_prebuilt(standard_prebuilt_size, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME):
+def kraken2_build_standard_prebuilt(prebuilt_db, prebuilt_date, target_directory, data_table_name=DATA_TABLE_NAME):
 
     now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
 
+    prebuild_name = {
+        'viral': "Viral",
+        'minusb': "MinusB (archaea, viral, plasmid, human, UniVec_Core)",
+        'standard': "Standard-Full (archaea, bacteria, viral, plasmid, human,UniVec_Core)",
+        'standard_08gb': "Standard-8 (Standard with DB capped at 8 GB)",
+        'standard_16gb': "Standard-16 (Standard with DB capped at 16 GB)",
+        'pluspf': "PlusPF (Standard plus protozoa and fungi)",
+        'pluspf_08gb': "PlusPF-8 (PlusPF with DB capped at 8 GB)",
+        'pluspf_16gb': "PlusPF-16 (PlusPF with DB capped at 16 GB)",
+        'pluspfp': "PlusPFP (Standard plus protozoa, fungi and plant)",
+        'pluspfp_08gb': "PlusPFP-8 (PlusPFP with DB capped at 8 GB)",
+        'pluspfp_16gb': "PlusPFP-16 (PlusPFP with DB capped at 16 GB)"
+    }
+
     database_value = "_".join([
         now,
         "standard_prebuilt",
-        standard_prebuilt_size
+        prebuilt_db,
+        prebuilt_date
     ])
 
     database_name = " ".join([
-        "Standard (Prebuilt)",
-        standard_prebuilt_size,
-        "(Downloaded:",
+        "Prebuilt Refseq indexes: ",
+        prebuild_name[prebuilt_db],
+        "(Version: ",
+        prebuilt_date,
+        "- Downloaded:",
         now + ")"
     ])
 
     database_path = database_value
 
-    size_to_url_str = {
-        'full': '',
-        '16': '_16gb',
-        '8': '_8gb',
-    }
     # we may need to let the user choose the date when new DBs are posted.
     date_url_str = prebuilt_date.replace('-', '')
-    standard_prebuilt_size_url = size_to_url_str[standard_prebuilt_size]
     # download the pre-built database
     try:
-        download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard%s_%s.tar.gz' % (standard_prebuilt_size_url, date_url_str)
+        download_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_%s_%s.tar.gz' % (prebuilt_db, date_url_str)
         src = urlopen(download_url)
     except URLError as e:
         print('url: ' + download_url, file=sys.stderr)
@@ -369,7 +388,7 @@
     parser.add_argument('--threads', dest='threads', default=1, help='threads')
     parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build')
     parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)')
-    parser.add_argument('--standard-prebuilt-size', dest='standard_prebuilt_size', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Size of standard prebuilt database to download (only applies to --database-type standard_prebuilt. Options are: "8", "16", "full".)')
+    parser.add_argument('--prebuilt-db', dest='prebuilt_db', type=StandardPrebuiltSizes, choices=list(StandardPrebuiltSizes), help='Prebuilt database to download. Only applies to --database-type standard_prebuilt.')
     parser.add_argument('--prebuilt-date', dest='prebuilt_date', help='Database build date (YYYY-MM-DD). Only applies to --database-type standard_prebuilt.')
     parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)')
     parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)')
@@ -408,7 +427,7 @@
         )
     elif str(args.database_type) == 'standard_prebuilt':
         data_manager_output = kraken2_build_standard_prebuilt(
-            str(args.standard_prebuilt_size),
+            str(args.prebuilt_db),
             str(args.prebuilt_date),
             target_directory
         )
--- a/data_manager/kraken2_build_database.xml	Mon Nov 08 15:40:34 2021 +0000
+++ b/data_manager/kraken2_build_database.xml	Fri Jun 24 12:44:33 2022 +0000
@@ -1,7 +1,9 @@
 <?xml version="1.0"?>
-<tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@" profile="18.09">
+<tool id="kraken2_build_database" name="Kraken2" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <macros>
-        <token name="@TOOL_VERSION@">2.1.1</token>
+        <token name="@TOOL_VERSION@">2.1.2</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">21.01</token>
         <xml name="common_params">
             <param name="kmer_len" type="integer" value="35" label="K-mer length in BP" />
             <param name="minimizer_len" type="integer" value="31" label="Minimizer length" />
@@ -9,6 +11,39 @@
             <param name="load_factor" type="float" value="0.7" min="0" max="1" label="Load factor" help="Proportion of the hash table to be populated" />
             <param name="clean" type="boolean" truevalue="--clean" falsevalue="" checked="true" label="Clean up extra files" />
         </xml>
+        <xml name="viral">
+            <option value="viral">Viral (viral; ~50 GB)</option>
+        </xml>
+        <xml name="minusb">
+            <option value="minusb">MinusB (archaea, viral, plasmid, human, UniVec_Core; ~8 GB)</option>
+        </xml>
+        <xml name="standard">
+            <option value="standard">Standard-Full (archaea, bacteria, viral, plasmid, human, UniVec_Core; ~50 GB)</option>
+        </xml>
+        <xml name="standard_08gb">
+            <option value="standard_08gb">Standard-8 (Standard with DB capped at 8 GB)</option>
+        </xml>
+        <xml name="standard_16gb">
+            <option value="standard_16gb">Standard-16 (Standard with DB capped at 16 GB)</option>
+        </xml>
+        <xml name="pluspf">
+            <option value="pluspf">PlusPF (Standard plus protozoa and fungi; ~50 GB)</option>
+        </xml>
+        <xml name="pluspf_08gb">
+            <option value="pluspf_08gb">PlusPF-8 (PlusPF with DB capped at 8 GB; ~7.5 GB)</option>
+        </xml>
+        <xml name="pluspf_16gb">
+            <option value="pluspf_16gb">PlusPF-16 (PlusPF with DB capped at 16 GB; ~15 GB)</option>
+        </xml>
+        <xml name="pluspfp">
+            <option value="pluspfp">PlusPFP (Standard plus protozoa, fungi and plant; ~129 GB)</option>
+        </xml>
+        <xml name="pluspfp_08gb">
+            <option value="pluspfp_08gb">PlusPFP-8 (PlusPFP with DB capped at 8 GB; ~7.5 GB)</option>
+        </xml>
+        <xml name="pluspfp_16gb">
+            <option value="pluspfp_16gb">PlusPFP-16 (PlusPFP with DB capped at 16 GB; ~15 GB)</option>
+        </xml>
     </macros>
     <description>database builder</description>
     <requirements>
@@ -16,53 +51,48 @@
         <requirement type="package" version="3.7">python</requirement>
     </requirements>
     <version_command>kraken2 -version | head -n 1 | awk '{print $NF}'</version_command>
-    <command detect_errors="exit_code">
-    <![CDATA[
-        python '$__tool_directory__/kraken2_build_database.py'
-          '${out_file}'
-          --database-type ${database_type.database_type}
-          #if $database_type.database_type == "minikraken"
-            --minikraken2-version ${database_type.minikraken2_version}
-          #else if $database_type.database_type == "standard_prebuilt"
-            --standard-prebuilt-size ${database_type.standard_prebuilt_size}
-            #if $database_type.prebuilt_date_custom != ""
-              --prebuilt-date ${database_type.prebuilt_date_custom}
-            #else
-              --prebuilt-date ${database_type.prebuilt_date}
-            #end if
-          #else if $database_type.database_type == "standard_local_build"
-            --threads \${GALAXY_SLOTS:-1}
-            --kmer-len ${database_type.kmer_len}
-            --minimizer-len ${database_type.minimizer_len}
-            --minimizer-spaces ${database_type.minimizer_spaces}
-            --load-factor ${database_type.load_factor}
-            ${database_type.clean}
-          #else if $database_type.database_type == "special"
-            --threads \${GALAXY_SLOTS:-1}
-            --special-database-type ${database_type.special_database_type}
-            --kmer-len ${database_type.kmer_len}
-            --minimizer-len ${database_type.minimizer_len}
-            --minimizer-spaces ${database_type.minimizer_spaces}
-            --load-factor ${database_type.load_factor}
-            ${database_type.clean}
-          #else if $database_type.database_type == "custom"
-            --threads \${GALAXY_SLOTS:-1}
-            ${database_type.skip_maps}
-            --custom-fasta ${database_type.custom_fasta}
-            --custom-database-name ${database_type.custom_database_name}
-            --kmer-len ${database_type.kmer_len}
-            --minimizer-len ${database_type.minimizer_len}
-            --minimizer-spaces ${database_type.minimizer_spaces}
-            --load-factor ${database_type.load_factor}
-            ${database_type.clean}
-          #end if
-    ]]>
+    <command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/kraken2_build_database.py'
+    '$out_file'
+    --database-type '$database_type.database_type'
+#if $database_type.database_type == "standard_local_build"
+    --threads \${GALAXY_SLOTS:-1}
+    --kmer-len $database_type.kmer_len
+    --minimizer-len $database_type.minimizer_len
+    --minimizer-spaces $database_type.minimizer_spaces
+    --load-factor $database_type.load_factor
+    $database_type.clean
+#else if $database_type.database_type == "standard_prebuilt"
+    --prebuilt-db '$database_type.prebuild.prebuilt_db'
+    --prebuilt-date '$database_type.prebuild.prebuilt_date'
+#else if $database_type.database_type == "minikraken"
+    --minikraken2-version '$database_type.minikraken2_version'
+#else if $database_type.database_type == "special"
+    --threads \${GALAXY_SLOTS:-1}
+    --special-database-type '$database_type.special_database_type'
+    --kmer-len $database_type.kmer_len
+    --minimizer-len $database_type.minimizer_len
+    --minimizer-spaces $database_type.minimizer_spaces
+    --load-factor $database_type.load_factor
+    $database_type.clean
+#else if $database_type.database_type == "custom"
+    --threads \${GALAXY_SLOTS:-1}
+    --custom-fasta '$database_type.custom_fasta'
+    --custom-database-name '$database_type.custom_database_name'
+    $database_type.skip_maps
+    --kmer-len $database_type.kmer_len
+    --minimizer-len $database_type.minimizer_len
+    --minimizer-spaces $database_type.minimizer_spaces
+    --load-factor $database_type.load_factor
+    $database_type.clean
+#end if
+]]>
     </command>
     <inputs>
         <conditional name="database_type">
             <param name="database_type" type="select" multiple="false" label="Database Type">
                 <option value="standard_local_build">Standard, Local Build</option>
-                <option value="standard_prebuilt">Standard, Pre-Built</option>      
+                <option value="standard_prebuilt">Pre-Built Refseq indexes</option>
                 <option value="minikraken">MiniKraken</option>
                 <option value="special">Special</option>
                 <option value="custom">Custom</option>
@@ -71,16 +101,75 @@
                 <expand macro="common_params" />
             </when>
             <when value="standard_prebuilt">
-                <param name="standard_prebuilt_size" type="select" multiple="false" label="Select size of prebuilt database to download">
-                    <option value="full">Standard-Full (~50 GB)</option>
-                    <option value="16">Standard-16 (~16 GB)</option>
-                    <option value="8">Standard-8 (~8 GB)</option>
-                </param>
-                <param name="prebuilt_date" type="select" multiple="false" optional="true" label="Select database build date">
-                    <option value="2021-05-17">May 17, 2021</option>
-                    <option value="2020-12-02">December 2, 2020</option>
-                </param>
-                <param name="prebuilt_date_custom" type="text" label="Custom date (YYYY-MM-DD)" help="Any text here will overwrite the selected date above." />
+                <conditional name="prebuild">
+                    <param name="prebuilt_date" type="select" label="Select index build date">
+                        <option value="2022-06-07">June 7, 2022</option>
+                        <option value="2021-05-17">May 17, 2021</option>
+                        <option value="2021-01-27">January 27, 2021</option>
+                        <option value="2020-12-02">December 2, 2020</option>
+                        <option value="2020-09-19">September 19, 2020</option>
+                    </param>
+                    <when value="2022-06-07">
+                        <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download">
+                            <expand macro="viral"/>
+                            <expand macro="minusb"/>
+                            <expand macro="standard"/>
+                            <expand macro="standard_08gb"/>
+                            <expand macro="standard_16gb"/>
+                            <expand macro="pluspf"/>
+                            <expand macro="pluspf_08gb"/>
+                            <expand macro="pluspf_16gb"/>
+                            <expand macro="pluspfp"/>
+                            <expand macro="pluspfp_08gb"/>
+                            <expand macro="pluspfp_16gb"/>
+                        </param>
+                    </when>
+                    <when value="2021-05-17">
+                        <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download">
+                            <expand macro="viral"/>
+                            <expand macro="minusb"/>
+                            <expand macro="standard"/>
+                            <expand macro="standard_08gb"/>
+                            <expand macro="standard_16gb"/>
+                            <expand macro="pluspf"/>
+                            <expand macro="pluspf_08gb"/>
+                            <expand macro="pluspf_16gb"/>
+                            <expand macro="pluspfp_08gb"/>
+                            <expand macro="pluspfp_16gb"/>
+                        </param>
+                    </when>
+                    <when value="2021-01-27">
+                        <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download">
+                            <expand macro="pluspf"/>
+                            <expand macro="pluspf_08gb"/>
+                            <expand macro="pluspf_16gb"/>
+                            <expand macro="pluspfp_08gb"/>
+                            <expand macro="pluspfp_16gb"/>
+                        </param>
+                    </when>
+                    <when value="2020-12-02">
+                        <param name="prebuilt_db" type="select" label="Select a prebuilt Refseq index to download">
+                            <expand macro="viral"/>
+                            <expand macro="minusb"/>
+                            <expand macro="standard"/>
+                            <expand macro="standard_08gb"/>
+                            <expand macro="standard_16gb"/>
+                        </param>
+                    </when>
+                    <when value="2020-09-19">
+                        <param name="prebuilt_db" type="select" multiple="false" label="Select a prebuilt Refseq indexes to download">
+                            <expand macro="minusb"/>
+                            <expand macro="standard"/>
+                            <expand macro="standard_08gb"/>
+                            <expand macro="standard_16gb"/>
+                            <expand macro="pluspf"/>
+                            <expand macro="pluspf_08gb"/>
+                            <expand macro="pluspf_16gb"/>
+                            <expand macro="pluspfp_08gb"/>
+                            <expand macro="pluspfp_16gb"/>
+                        </param>
+                    </when>
+                </conditional>
             </when>
             <when value="minikraken">
                 <param name="minikraken2_version" type="select" multiple="false" label="Select MiniKraken2 database version to download">
@@ -97,9 +186,9 @@
                 <expand macro="common_params" />
             </when>
             <when value="custom">
-                <param name="custom_fasta" type="data" format="fasta" multiple="False" optional="true" label="Select history item" />
+                <param name="custom_fasta" type="data" format="fasta" multiple="False" label="Select history item" />
                 <param name="custom_database_name" type="text" label="Name for this database" />
-                <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." /> 
+                <param name="skip_maps" type="boolean" truevalue="--skip-maps" falsevalue="" label="Skip downloading accession number to taxid maps during taxonomy download." />
                 <expand macro="common_params" />
             </when>
         </conditional>
@@ -108,16 +197,48 @@
         <data name="out_file" format="data_manager_json" />
     </outputs>
     <tests>
+        <test expect_num_outputs="1">
+            <conditional name="database_type">
+                <param name="database_type" value="custom" />
+                <param name="custom_fasta" value="adapter.fa" />
+                <param name="custom_database_name" value="custom_database" />
+                <param name="skip_maps" value="true" />
+                <param name="kmer_len" value="35" />
+                <param name="minimizer_spaces" value="6"/>
+                <param name="load_factor" value="0.7" />
+                <param name="clean" truevalue="--clean"/>
+            </conditional>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kraken2_databases"/>
+                    <has_text text="path"/>
+                    <has_text text="custom_database"/>
+                </assert_contents>
+            </output>
+        </test>
         <test>
-            <param name="database_type" value="custom" />
-            <param name="custom_fasta" value="adapter.fa" />
-            <param name="custom_database_name" value="database" />
-            <param name="skip_maps" value="true" />
-            <output name="out_file" value="kraken2_custom_data_manager.json" />
+            <conditional name="database_type">
+                <param name="database_type" value="standard_prebuilt" />
+                <conditional name="prebuild">
+                    <param name="prebuilt_date" value="2022-06-07"/>
+                    <param name="prebuilt_db" value="standard_08gb"/>
+                </conditional>
+            </conditional>
+            <output name="out_file">
+                <assert_contents>
+                    <has_text text="kraken2_databases"/>
+                    <has_text text="path"/>
+                    <has_text text="Standard-8"/>
+                    <has_text text="_standard_prebuilt_standard_08gb_2022-06-07"/>
+                    <has_text text="Version"/>
+                    <has_text text="Downloaded"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
-    <help>
-    </help>
+    <help><![CDATA[
+Build Kraken2 databases or download `prebuilt Kraken2 RefSeq indexes <https://benlangmead.github.io/aws-indexes/k2>`__
+    ]]></help>
     <citations>
         <citation type="doi">10.1186/gb-2014-15-3-r46</citation>
     </citations>