Mercurial > repos > iuc > drep_dereplicate

diff macros.xml @ 1:ef7cd2e7bc05 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/drep commit 5e6e589002d554be180e575080e9ad66cc78ed74"
author: iuc
date: Sat, 12 Feb 2022 17:40:42 +0000
parents: 8dfcdbeaeed8
children: 368cb4bef9d8
--- a/macros.xml	Tue May 05 06:12:47 2020 -0400
+++ b/macros.xml	Sat Feb 12 17:40:42 2022 +0000
@@ -1,8 +1,16 @@
+<?xml version="1.0"?>
 <macros>
-    <token name="@VERSION@">2.5.4</token>
+    <token name="@TOOL_VERSION@">3.2.2</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">20.01</token>
+    <xml name="biotools">
+        <xrefs>
+            <xref type="bio.tools">drep</xref>
+        </xrefs>
+    </xml>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="@VERSION@">drep</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">drep</requirement>
             <yield/>
         </requirements>
     </xml>
@@ -13,250 +21,295 @@
         </citations>
     </xml>
 
+    <xml name="genomes">
+        <param argument="--genomes" type="data" format="fasta" multiple="true" label="Genomes to filer"/>
+    </xml>
 
-    <xml name="genomes">
-        <param argument="--genomes" type="data" format="fasta" label="genomes fasta files" multiple="true"/>
-    </xml>
+<!-- Addition of ".fasta" after names to avoid string to be read as integer
+Bug in dRep: probably fixed in next version -->
     <token name="@PREPARE_GENOMES@"><![CDATA[
-    #import re 
-    #set $genomefiles = [] 
-    #for $genome in $genomes
-        #set $input_name = $re.sub('[^\w\-_.]', '_',str($genome.element_identifier.split('/')[-1]))
-        ln -s '${genome}' '${input_name}' &&
-        $genomefiles.append($input_name)
-    #end for
-]]></token>
+#import re
+#set $genomefiles = []
+#for $genome in $genomes
+    #set $input_name = $re.sub('[^\w\-_.]', '_',str($genome.element_identifier.split('/')[-1]))
+ln -s '${genome}' '${input_name}.fasta' &&
+$genomefiles.append($input_name)
+#end for
+    ]]></token>
     <token name="@GENOMES@"><![CDATA[
-    -g 
-    #for $genomefile in $genomefiles
-    '${genomefile}' 
-    #end for
-]]></token>
-
-
-    <xml name="checkm_method">
-        <param argument="--checkM_method" type="select" label="checkm method" optional="true">
-           <option value="taxonomy_wf">taxonomy_wf (faster)</option>
-           <option value="lineage_wf">lineage_wf (more accurate)</option>
-        </param>
-    </xml>
-    <token name="@CHECKM_METHOD@"><![CDATA[
-    #if $checkM_method:
-    --checkM_method $checkM_method 
-    #end if
-]]></token>
+    -g
+#for $genomefile in $genomefiles
+    '${genomefile}.fasta'
+#end for
+    ]]></token>
 
     <xml name="filtering_options">
-        <conditional name="filter">
-            <param name="set_options" type="select" label="set filtering options">
-                <option value="yes">Yes</option>
-                <option value="no" selected="true">No (use --checkM_method taxonomy_wf)</option>
-            </param>
-            <when value="yes">
-                <param argument="--length" type="integer" value="50000" label="Minimum genome length"/>
-                <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/>
-                <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/>
-                 
-                <conditional name="quality">
-                    <param argument="source" type="select" label="genome quality">
-                        <help>
-                            --ignoreGenomeQuality is useful with
-                            bacteriophages or eukaryotes or things where checkM
-                            scoring does not work. Will only choose genomes based
-                            on length and N50. 
-                        </help>
-                        <option value="checkm" selected="true">Run checkM</option>
-                        <option value="genomeInfo">User supplied genomeInfo csv file</option>
-                        <option value="ignoreGenomeQuality">--ignoreGenomeQuality (NOT RECOMMENDED!)</option>
-                    </param>
-                    <when value="checkm">
-                        <param argument="--checkM_method" type="select" label="checkm method" optional="true">
-                            <help>
-                                Using the checkm method of lineage_wf can require more than 40Gb of RAM.
-                            </help>
-                            <option value="taxonomy_wf">taxonomy_wf (faster)</option>
-                            <option value="lineage_wf">lineage_wf (more accurate)</option>
-                        </param>
-                    </when>
-                    <when value="genomeInfo">
-                        <param argument="--genomeInfo" type="data" format="csv" label="genomes fasta files">
-                            <help><![CDATA[
-                            A CSV dataset that must contain: [
-                            "genome"(history dataset name of .fasta dataset of that genome), 
-                            "completeness"(0-100 value for completeness of the genome), 
-                            "contamination"(0-100 value of the contamination of the genome)] 
-                            ]]></help>
-                        </param>
-                    </when>
-                    <when value="ignoreGenomeQuality"/>
-                </conditional>
-            </when>
-            <when value="no"/>
-        </conditional>
+        <section name="filter" title="Genome filtering" expanded="true">
+            <param argument="--length" type="integer" value="50000" label="Minimum genome length"/>
+            <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/>
+            <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/>
+        </section>
+    </xml>
+    <xml name="test_default_filtering_options">
+        <section name="filter">
+            <param name="length" value="50000"/>
+            <param name="completeness" value="75"/>
+            <param name="contamination" value="100"/>
+        </section>
     </xml>
     <token name="@FILTER_OPTIONS@"><![CDATA[
-        #if $filter.set_options == 'yes':
-            --length $filter.length
-            --completeness $filter.completeness
-            --contamination $filter.contamination
-            #if $filter.quality.source == 'checkm'
-                --checkM_method $filter.quality.checkM_method
-            #elif $filter.quality.source == 'genomeInfo'
-                --genomeInfo $filter.quality.genomeInfo 
-            #elif $filter.quality.source == 'ignoreGenomeQuality'
-                --ignoreGenomeQuality
-            #end if
-        #else
-            --checkM_method taxonomy_wf
-        #end if
+    --length $filter.length
+    --completeness $filter.completeness
+    --contamination $filter.contamination
 ]]></token>
 
-    <xml name="genome_comparison_options">
-        <conditional name="genome_comparison">
-            <param name="set_options" type="select" label="set genome comparison options">
-                <option value="yes">Yes</option>
-                <option value="no" selected="true">No</option>
+    <xml name="quality_assessment_options">
+        <conditional name="quality">
+            <param name="source" type="select" label="Genome quality filtering" help="No checkM or quality filtering is not recommened but with bacteriophages or eukaryotes or things where checkM scoring does not work. Will only choose genomes based on length and N50.">
+                <option value="checkm" selected="true">Run checkM</option>
+                <option value="genomeInfo">Provide quality information on the genome (CSV file)</option>
+                <option value="ignoreGenomeQuality">Don't run checkM or do any quality filtering (--ignoreGenomeQuality) - NOT RECOMMENDED!</option>
             </param>
-            <when value="yes">
-                <param argument="--MASH_sketch" type="integer" value="1000" label="MASH sketch size"/>
-                <param argument="--S_algorithm" type="select" label="Algorithm for secondary clustering comaprisons">
-                    <option value="ANImf" selected="true">ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions</option>
-                    <option value="ANIn">ANIn  = Align whole genomes with nucmer; compare aligned regions</option>
-                    <option value="gANI">gANI  = Identify and align ORFs; compare aligned ORFS</option>
+            <when value="checkm">
+                <param argument="--checkM_method" type="select" label="CheckM method">
+                    <option value="lineage_wf" selected="true">lineage_wf: Lineage-specific Workflow - quality estimates with lineage-specific markers (more accurate)</option>
+                    <option value="taxonomy_wf">taxonomy_wf: Taxonomic-specific Workflow - quality estimates with taxonomic-specific markers (faster)</option>
                 </param>
-                <param argument="-n_PRESET" type="select" label="Presets to pass to nucmer">
-                    <option value="normal" selected="true">normal  = default ANIn parameters (default: normal)</option>
-                    <option value="tight">tight   = only align highly conserved regions</option>
+                <param argument="--set_recursion" type="integer" optional="true" label="Increases the python recursion limit" help="NOT RECOMMENDED unless checkM is crashing due to recursion issues. Recommended to set to 2000 if needed, but setting this could crash Python"/>
+                <param argument="--checkm_group_size" type="integer" value="2000" min="1" label="Number of genomes passed to checkM at a time" help="Increasing this increases RAM but makes checkM faster"/>
+            </when>
+            <when value="genomeInfo">
+                <param argument="--genomeInfo" type="data" format="csv" label="Quality information on the genomes">
+                    <help><![CDATA[
+                    A CSV dataset that must contain: [
+                    "genome"(history dataset name of .fasta dataset of that genome),
+                    "completeness"(0-100 value for completeness of the genome),
+                    "contamination"(0-100 value of the contamination of the genome)]
+                    ]]></help>
                 </param>
             </when>
-            <when value="no"/>
+            <when value="ignoreGenomeQuality"/>
+        </conditional>
+    </xml>
+    <xml name="test_default_quality_assessment_options">
+        <conditional name="quality">
+            <param name="source" value="checkm"/>
+            <param name="checkM_method" value="taxonomy_wf"/>
+            <param name="checkm_group_size" value="2000"/>
         </conditional>
     </xml>
-    <token name="@GENOME_COMPARISON_OPTIONS@"><![CDATA[
-        #if $genome_comparison.set_options == 'yes':
-            --MASH_sketch $genome_comparison.MASH_sketch
-            --S_algorithm $genome_comparison.S_algorithm
-            -n_PRESET $genome_comparison.n_PRESET
-        #end if
+    <token name="@QUALITY_ASSESSMENT_OPTIONS@"><![CDATA[
+#if $quality.source == 'checkm'
+    --checkM_method '$quality.checkM_method'
+    #if str($quality.set_recursion) != ''
+    --set_recurison $filter.set_recursion
+    #end if
+    --checkm_group_size $quality.checkm_group_size
+#else if $quality.source == 'genomeInfo'
+    --genomeInfo '$quality.genomeInfo'
+#else if $quality.source == 'ignoreGenomeQuality'
+    --ignoreGenomeQuality
+#end if
+]]></token>
+
+    <xml name="mash">
+        <param argument="--MASH_sketch" type="integer" value="1000" min="0" label="MASH sketch size"/>
+        <param argument="--P_ani" type="float" value="0.9" min="0." max="1." label="ANI threshold to form primary clusters"/>
+        <param argument="--multiround_primary_clustering" type='boolean' checked="false" truevalue='--multiround_primary_clustering' falsevalue='' label="Cluster each primary clunk separately and merge at the end with single linkage?" help="Decreases RAM usage and        increases speed, and the cost of a minor loss in precision and the inability to plot primary_clustering_dendrograms. Especially helpful when clustering 5000+ genomes. Will be done with single linkage clustering"/>
+        <param argument="--primary_chunksize" type="integer" value="5000" min="1" label="Impacts multiround_primary_clusterings" help=" If you have more than this many genomes, process them in chunks of this size"/>
+    </xml>
+    <xml name="test_default_mash">
+        <param name="MASH_sketch" value="1000"/>
+        <param name="P_ani" value="0.9"/>
+        <param name="multiround_primary_clustering" value=''/>
+        <param name="primary_chunksize" value="5000"/>
+    </xml>
+    <token name="@MASH@"><![CDATA[
+    --MASH_sketch '$comp_clust.steps.MASH_sketch'
+    --P_ani $comp_clust.steps.P_ani
+    $comp_clust.steps.multiround_primary_clustering
+    --primary_chunksize $comp_clust.steps.primary_chunksize
+]]></token>
+
+    <xml name="nucmer">
+        <param argument="--n_PRESET" type="select" label="Presets to pass to nucmer">
+            <option value="normal" selected="true">normal: default ANIn parameters</option>
+            <option value="tight">tight: only align highly conserved regions</option>
+        </param>
+    </xml>
+    <xml name="test_default_nucmer">
+        <param name="n_PRESET" value="normal"/>
+    </xml>
+    <token name="@NUCMER@"><![CDATA[
+    --n_PRESET '$comp_clust.steps.clustering.n_PRESET'
+]]></token>
+
+    <xml name="coverage_method">
+        <param argument="--coverage_method" type="select" label="Method to calculate coverage of an alignment">
+            <option value="larger" selected="true">Larger = max((aligned length / genome 1), (aligned_length / genome2))</option>
+            <option value="total">Total = 2*(aligned length) / (sum of total genome lengths)</option>
+        </param>
+    </xml>
+    <xml name="test_default_coverage_method">
+        <param name="coverage_method" value="larger"/>
+    </xml>
+    <token name="@COVERAGE_METHOD@"><![CDATA[
+    --coverage_method '$comp_clust.steps.clustering.coverage_method'
 ]]></token>
 
-    <xml name="clustering_options">
+    <xml name="secondary_clustering">
         <conditional name="clustering">
-            <param name="set_options" type="select" label="set clustering options">
-                <option value="yes">Yes</option>
-                <option value="no" selected="true">No</option>
+            <param argument="--S_algorithm" type="select" label="Algorithm for secondary clustering comparisons">
+                <option value="fastANI">fastANI: Kmer-based approach - very fast</option>
+                <option value="ANImf" selected="true">ANImf: Align whole genomes with nucmer; filter alignment; compare aligned regions - RECOMMENDED</option>
+                <option value="ANIn">ANIn: Align whole genomes with nucmer; compare aligned regions</option>
+                <option value="gANI">gANI: Identify and align ORFs; compare aligned ORFS</option>
+                <option value="goANI">Open source version of gANI; requires nsmimscan</option>
             </param>
-            <when value="yes">
-                <param argument="--P_ani" type="float" value="0.9" min="0." max="1." label="ANI threshold to form primary (MASH) clusters"/>
-                <param argument="--S_ani" type="float" value="0.99" min="0." max="1." label="ANI threshold to form secondary clusters"/>
+            <when value="fastANI">
+                <param argument="--greedy_secondary_clustering" type='boolean' checked="false" truevalue='--greedy_secondary_clustering' falsevalue='' label="Use a heuristic to avoid pair-wise comparisons when doing secondary clustering?" help="Will be done with single linkage clustering"/>
+            </when>
+            <when value="ANImf">
+                <expand macro="nucmer"/>
+                <expand macro="coverage_method"/>
+            </when>
+            <when value="ANIn">
+                <expand macro="nucmer"/>
+                <expand macro="coverage_method"/>
+            </when>
+            <when value="gANI"/>
+            <when value="goANI"/>
+        </conditional>
+        <param argument="--S_ani" type="float" value="0.99" min="0." max="1." label="ANI threshold to form secondary clusters"/>
+        <param argument="--cov_thresh" type="float" value="0.1" min="0." max="1." label="Minmum level of overlap between genomes when doing secondary comparisons"/>
+    </xml>
+    <xml name="test_default_secondary_clustering">
+        <conditional name="clustering">
+            <param name="S_algorithm" value="ANImf"/>
+            <expand macro="test_default_nucmer"/>
+            <expand macro="test_default_coverage_method"/>
+        </conditional>
+        <param name="S_ani" value="0.99"/>
+        <param name="cov_thresh" value="0.1"/>
+    </xml>
+    <token name="@SECONDARY_CLUSTERING@"><![CDATA[
+    --S_algorithm '$comp_clust.steps.clustering.S_algorithm'
+    #if $comp_clust.steps.clustering.S_algorithm == 'fastANI'
+    $comp_clust.steps.clustering.greedy_secondary_clustering
+    #else if $comp_clust.steps.clustering.S_algorithm == 'ANImf'
+    @NUCMER@
+    @COVERAGE_METHOD@
+    #else if $comp_clust.steps.clustering.S_algorithm == 'ANIn'
+    @NUCMER@
+    @COVERAGE_METHOD@
+    #end if
+    --S_ani $comp_clust.steps.S_ani
+    --cov_thresh $comp_clust.steps.cov_thresh
+]]></token>
 
-                <param argument="--SkipMash" type="boolean" truevalue="--SkipMash" falsevalue="" checked="false" label="Skip MASH clustering, just do secondary clustering on all genomes"/>
-                <param argument="--SkipSecondary" type="boolean" truevalue="--SkipSecondary" falsevalue="" checked="false" label="Skip secondary clustering, just perform MASH clustering"/>
-                <param argument="--cov_thresh" type="float" value="0.1" min="0." max="1." label="Minmum level of overlap between genomes when doing secondary comparisons"/>
-                <param argument="--coverage_method" type="select" label="Method to calculate coverage of an alignment">
-                    <help>(for ANIn/ANImf only; gANI can only do larger method)</help>
-                    <option value="larger" selected="true">arger  = max((aligned length / genome 1), (aligned_length / genome2))</option>
-                    <option value="total">total   = 2*(aligned length) / (sum of total genome lengths)</option>
+    <xml name="comparison_clustering_options">
+        <section name="comp_clust" title="Genome comparison and clustering" expanded="false">
+            <conditional name="steps">
+                <param name="select" type="select" label="Steps in genome comparison">
+                    <option value="default" selected="true">Default: Run MASH clustering and a secondary clustering</option>
+                    <option value="SkipMash">Skip MASH clustering, just do secondary clustering on all genomes</option>
+                    <option value="SkipSecondary">Skip secondary clustering, just perform MASH clustering</option>
                 </param>
-                <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes">
-                    <help>(passed to  scipy.cluster.hierarchy.linkage)</help>
-                    <option value="average" selected="true">average</option>
-                </param>
-            </when>
-            <when value="no"/>
-        </conditional>
+                <when value="default">
+                    <expand macro="mash"/>
+                    <expand macro="secondary_clustering"/>
+                </when>
+                <when value="SkipMash">
+                    <expand macro="secondary_clustering"/>
+                </when>
+                <when value="SkipSecondary">
+                    <expand macro="mash"/>
+                </when>
+            </conditional>
+            <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes" help="Passed to scipy.cluster.hierarchy.linkage">
+                <option value="average" selected="true">average</option>
+                <option value="ward">ward</option>
+                <option value="single">single</option>
+                <option value="median">median</option>
+                <option value="centroid">centroid</option>
+                <option value="weighted">weighted</option>
+            </param>
+            <param argument="--run_tertiary_clustering" type='boolean' checked="false" truevalue='--run_tertiary_clustering' falsevalue='' label="Run an additional round of clustering on the final genome set?" help="This is especially useful when greedy clustering is performed and/or to handle cases where similar genomes end up in different primary clusters."/>
+        </section>
     </xml>
-    <token name="@CLUSTERING_OPTIONS@"><![CDATA[
-        #if $clustering.set_options == 'yes':
-            --P_ani $clustering.P_ani
-            --S_ani $clustering.S_ani
-            $clustering.SkipMash
-            $clustering.SkipSecondary
-            --cov_thresh $clustering.cov_thresh
-            --coverage_method $clustering.coverage_method
-            --clusterAlg $clustering.clusterAlg
-        #end if
+    <xml name="test_default_comparison_clustering_options">
+        <section name="comp_clust">
+            <conditional name="steps">
+                <param name="select" value="default" />
+                <expand macro="test_default_mash"/>
+                <expand macro="test_default_secondary_clustering"/>
+            </conditional>
+            <param name="clusterAlg" value="average"/>
+            <param name="run_tertiary_clustering" value=''/>
+        </section>
+    </xml>
+    <token name="@COMPARISON_CLUSTERING_OPTIONS@"><![CDATA[
+#if $comp_clust.steps.select == 'default'
+    @MASH@
+    @SECONDARY_CLUSTERING@
+#else if $comp_clust.steps.select == 'SkipMash'
+    --SkipMash
+    @SECONDARY_CLUSTERING@
+#else
+    @MASH@
+    --SkipSecondary
+#end if
+    --clusterAlg '$comp_clust.clusterAlg'
+    $comp_clust.run_tertiary_clustering
 ]]></token>
 
     <xml name="scoring_options">
-        <conditional name="scoring">
-            <param name="set_options" type="select" label="set scoring options">
-                <option value="yes">Yes</option>
-                <option value="no" selected="true">No</option>
-            </param>
-            <when value="yes">
-                <param argument="--completeness_weight" type="float" value="1" label="completeness weight">
-                    <help>
-Based off of the formula:
-A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size)
-A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight;
-                    </help>
-                </param>
-                <param argument="--contamination_weight" type="float" value="5" label="contamination weight"/>
-                <param argument="--strain_heterogeneity_weight" type="float" value="1" min="0." max="1." label="strain heterogeneity weight"/>
-                <param argument="--N50_weight" type="float" value=".5" label="weight of log(genome N50)"/>
-                <param argument="--size_weight" type="float" value="0" label="weight of log(genome size)"/>
-            </when>
-            <when value="no"/>
-        </conditional>
+        <section name="scoring" title="Scoring criteria" expanded="false" help="Based off of the formula: A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) + F*(centrality - S_ani). With A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight; F = cent_weight">
+            <param argument="--completeness_weight" type="float" value="1" label="Completeness weight"/>
+            <param argument="--contamination_weight" type="float" value="5" label="Contamination weight"/>
+            <param argument="--strain_heterogeneity_weight" type="float" value="1" min="0." max="1." label="Strain heterogeneity weight"/>
+            <param argument="--N50_weight" type="float" value=".5" label="Weight of log(genome N50)"/>
+            <param argument="--size_weight" type="float" value="0" label="Weight of log(genome size)"/>
+            <param argument="--centrality_weight" type="float" value="1" label="Weight of (centrality - S_ani)"/>
+        </section>
+    </xml>
+    <xml name="test_default_scoring_options">
+        <section name="scoring">
+            <param name="completeness_weight" value="1"/>
+            <param name="contamination_weight" value="5"/>
+            <param name="strain_heterogeneity_weight" value="1"/>
+            <param name="N50_weight" value=".5" />
+            <param name="size_weight" value="0"/>
+            <param name="centrality_weight" value="1"/>
+        </section>
     </xml>
     <token name="@SCORING_OPTIONS@"><![CDATA[
-        #if $scoring.set_options == 'yes':
-            --completeness_weight $scoring.completeness_weight
-            --contamination_weight $scoring.contamination_weight
-            --strain_heterogeneity_weight $scoring.strain_heterogeneity_weight
-            --N50_weight $scoring.N50_weight
-            --size_weight $scoring.size_weight
-        #end if
+    --completeness_weight $scoring.completeness_weight
+    --contamination_weight $scoring.contamination_weight
+    --strain_heterogeneity_weight $scoring.strain_heterogeneity_weight
+    --N50_weight $scoring.N50_weight
+    --size_weight $scoring.size_weight
+    --centrality_weight $scoring.centrality_weight
 ]]></token>
 
-    <xml name="taxonomy_options">
-        <conditional name="taxonomy">
-            <param name="set_options" type="select" label="generate taxonomy information">
-                <option value="yes">Yes</option>
-                <option value="no" selected="true">No</option>
-            </param>
-            <when value="yes">
-                <param argument="--tax_method" type="select" label="Method of determining taxonomy">
-                    <help>(for ANIn/ANImf only; gANI can only do larger method)</help>
-                    <option value="percent" selected="true">percent = The most descriptive taxonimic level with at least (per) hits</option>
-                    <option value="max">max = The centrifuge taxonomic level with the most overall hits</option>
-                </param>
-                <param argument="--percent" type="float" value="50" min="0" max="100" label="minimum percent for percent method"/>
-                <param argument="--cent_index" type="data" format="" label="centrifuge index"/>
-            </when>
-            <when value="no"/>
-        </conditional>
+    <xml name="warning_options">
+        <section name="warning" title="Warnings" expanded="false">
+            <param argument="--warn_dist" type="float" value="0.25" min="0" max="1" label="How far from the threshold to throw cluster warnings"/>
+            <param argument="--warn_sim" type="float" value="0.98" min="0" max="1" label="Similarity threshold for warnings between dereplicated genomes"/>
+            <param argument="--warn_aln" type="float" value="0.25" min="0" max="1" label="Minimum aligned fraction for warnings between dereplicated genomes (ANIn)"/>
+        </section>
     </xml>
-    <token name="@TAXONOMY_OPTIONS@"><![CDATA[
-        #if $taxonomy.set_options == 'yes':
-            --run_tax
-            --tax_method $taxonomy.tax_method
-            --percent $taxonomy.percent
-            --cent_index $taxonomy.cent_index
-        #end if
-]]></token>
-
-   <xml name="warning_options">
-        <conditional name="warning">
-            <param name="set_options" type="select" label="set warning options">
-                <option value="yes">Yes</option>
-                <option value="no" selected="true">No</option>
-            </param>
-            <when value="yes">
-                <param argument="--warn_dist" type="float" value="0.25" min="0" max="1" label="How far from the threshold to throw cluster warnings"/>
-                <param argument="--warn_sim" type="float" value="0.98" min="0" max="1" label="Similarity threshold for warnings between dereplicated genomes"/>
-                <param argument="--warn_aln" type="float" value="0.25" min="0" max="1" label="Minimum aligned fraction for warnings between dereplicated genomes (ANIn)"/>
-            </when>
-            <when value="no"/>
-        </conditional>
+    <xml name="test_default_warning_options">
+        <section name="warning">
+            <param name="warn_dist" value="0.25"/>
+            <param name="warn_sim" value="0.98"/>
+            <param name="warn_aln" value="0.25"/>
+        </section>
     </xml>
     <token name="@WARNING_OPTIONS@"><![CDATA[
-        #if $warning.set_options == 'yes':
-            --warn_dist $warning.warn_dist
-            --warn_sim $warning.warn_sim
-            --warn_aln $warning.warn_aln
-        #end if
+    --warn_dist $warning.warn_dist
+    --warn_sim $warning.warn_sim
+    --warn_aln $warning.warn_aln
 ]]></token>
 
     <xml name="select_outputs">
@@ -278,8 +331,14 @@
             <option value="Chdb">Chdb.tsv</option>
         </expand>
     </xml>
+    <xml name="test_default_select_drep_outputs">
+        <param name="select_outputs" value="log,warnings,Primary_clustering_dendrogram,Clustering_scatterplots,Cluster_scoring,Winning_genomes,Widb" />
+    </xml>
+    <xml name="test_default_select_outputs">
+        <param name="select_outputs" value="log,warnings,Primary_clustering_dendrogram,Clustering_scatterplots" />
+    </xml>
 
-   <xml name="common_outputs">
+    <xml name="common_outputs">
         <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outdir/log/logger.log">
             <filter>'log' in select_outputs or not select_outputs</filter>
         </data>
@@ -299,8 +358,6 @@
             <filter>'Clustering_scatterplots' in select_outputs</filter>
         </data>
     </xml>
-
-
     <xml name="drep_outputs">
         <expand macro="common_outputs"/>
         <data name="Cluster_scoring" format="pdf" label="${tool.name} on ${on_string}: Cluster_scoring.pdf" from_work_dir="outdir/figures/Cluster_scoring.pdf">
@@ -316,19 +373,19 @@
             <filter>'Chdb' in select_outputs</filter>
         </data>
     </xml>
-
-    
-    <xml name="test_defaults_log">
-        <test>
-            <param name="genomes" ftype="fasta" value="Enterococcus_casseliflavus_EC20.fasta,Enterococcus_faecalis_T2.fna,Enterococcus_faecalis_TX0104.fa"/>
-            <output name="log">
-                <assert_contents>
-                    <yield/>
-                </assert_contents>
-            </output>
-        </test>
+    <xml name="test_string_inputs">
+        <param name="genomes" ftype="fasta" value="Enterococcus_casseliflavus_EC20.fasta,Enterococcus_faecalis_T2.fna,Enterococcus_faecalis_TX0104.fa"/>
+    </xml>
+    <xml name="test_integer_inputs">
+        <param name="genomes" ftype="fasta" value="001,002,003"/>
     </xml>
-
+    <xml name="test_log_output">
+        <output name="log">
+            <assert_contents>
+                <yield/>
+            </assert_contents>
+        </output>
+    </xml>
     <token name="@GENOMES_HELP@"><![CDATA[
 I/O PARAMETERS:
   -g [GENOMES [GENOMES ...]], --genomes [GENOMES [GENOMES ...]]
@@ -337,7 +394,6 @@
 
 
 ]]></token>
-
     <token name="@FILTERING_HELP@"><![CDATA[
 FILTERING OPTIONS:
   -l LENGTH, --length LENGTH
@@ -364,7 +420,6 @@
 
 
 ]]></token>
-
     <token name="@GENOME_COMPARISON_HELP@"><![CDATA[
 GENOME COMPARISON PARAMETERS:
   -ms MASH_SKETCH, --MASH_sketch MASH_SKETCH
@@ -383,7 +438,6 @@
                         normal  = default ANIn parameters (default: normal)
 
 ]]></token>
-
     <token name="@CLUSTERING_HELP@"><![CDATA[
 CLUSTERING PARAMETERS:
   -pa P_ANI, --P_ani P_ANI
@@ -413,10 +467,9 @@
                         scipy.cluster.hierarchy.linkage (default: average)
 
 ]]></token>
-
     <token name="@SCORING_HELP@"><![CDATA[
 SCORING CRITERIA
-Based off of the formula: 
+Based off of the formula:
 A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size)
 
 A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight:
@@ -433,7 +486,6 @@
 
 
 ]]></token>
-
     <token name="@TAXONOMY_HELP@"><![CDATA[
 TAXONOMY:
   --run_tax             generate taxonomy information (Tdb)
@@ -457,7 +509,6 @@
                         (default: None)
 
 ]]></token>
-
     <token name="@WARNINGS_HELP@"><![CDATA[
 WARNINGS:
   --warn_dist WARN_DIST
@@ -469,6 +520,4 @@
                         dereplicated genomes (ANIn) (default: 0.25)
 
 ]]></token>
-
-
 </macros>
author	iuc
date	Sat, 12 Feb 2022 17:40:42 +0000
parents	8dfcdbeaeed8
children	368cb4bef9d8