Mercurial > repos > iuc > drep_dereplicate
diff macros.xml @ 1:ef7cd2e7bc05 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/drep commit 5e6e589002d554be180e575080e9ad66cc78ed74"
author | iuc |
---|---|
date | Sat, 12 Feb 2022 17:40:42 +0000 |
parents | 8dfcdbeaeed8 |
children | 368cb4bef9d8 |
line wrap: on
line diff
--- a/macros.xml Tue May 05 06:12:47 2020 -0400 +++ b/macros.xml Sat Feb 12 17:40:42 2022 +0000 @@ -1,8 +1,16 @@ +<?xml version="1.0"?> <macros> - <token name="@VERSION@">2.5.4</token> + <token name="@TOOL_VERSION@">3.2.2</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">20.01</token> + <xml name="biotools"> + <xrefs> + <xref type="bio.tools">drep</xref> + </xrefs> + </xml> <xml name="requirements"> <requirements> - <requirement type="package" version="@VERSION@">drep</requirement> + <requirement type="package" version="@TOOL_VERSION@">drep</requirement> <yield/> </requirements> </xml> @@ -13,250 +21,295 @@ </citations> </xml> + <xml name="genomes"> + <param argument="--genomes" type="data" format="fasta" multiple="true" label="Genomes to filer"/> + </xml> - <xml name="genomes"> - <param argument="--genomes" type="data" format="fasta" label="genomes fasta files" multiple="true"/> - </xml> +<!-- Addition of ".fasta" after names to avoid string to be read as integer +Bug in dRep: probably fixed in next version --> <token name="@PREPARE_GENOMES@"><![CDATA[ - #import re - #set $genomefiles = [] - #for $genome in $genomes - #set $input_name = $re.sub('[^\w\-_.]', '_',str($genome.element_identifier.split('/')[-1])) - ln -s '${genome}' '${input_name}' && - $genomefiles.append($input_name) - #end for -]]></token> +#import re +#set $genomefiles = [] +#for $genome in $genomes + #set $input_name = $re.sub('[^\w\-_.]', '_',str($genome.element_identifier.split('/')[-1])) +ln -s '${genome}' '${input_name}.fasta' && +$genomefiles.append($input_name) +#end for + ]]></token> <token name="@GENOMES@"><![CDATA[ - -g - #for $genomefile in $genomefiles - '${genomefile}' - #end for -]]></token> - - - <xml name="checkm_method"> - <param argument="--checkM_method" type="select" label="checkm method" optional="true"> - <option value="taxonomy_wf">taxonomy_wf (faster)</option> - <option value="lineage_wf">lineage_wf (more accurate)</option> - </param> - </xml> - <token name="@CHECKM_METHOD@"><![CDATA[ - #if $checkM_method: - --checkM_method $checkM_method - #end if -]]></token> + -g +#for $genomefile in $genomefiles + '${genomefile}.fasta' +#end for + ]]></token> <xml name="filtering_options"> - <conditional name="filter"> - <param name="set_options" type="select" label="set filtering options"> - <option value="yes">Yes</option> - <option value="no" selected="true">No (use --checkM_method taxonomy_wf)</option> - </param> - <when value="yes"> - <param argument="--length" type="integer" value="50000" label="Minimum genome length"/> - <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/> - <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/> - - <conditional name="quality"> - <param argument="source" type="select" label="genome quality"> - <help> - --ignoreGenomeQuality is useful with - bacteriophages or eukaryotes or things where checkM - scoring does not work. Will only choose genomes based - on length and N50. - </help> - <option value="checkm" selected="true">Run checkM</option> - <option value="genomeInfo">User supplied genomeInfo csv file</option> - <option value="ignoreGenomeQuality">--ignoreGenomeQuality (NOT RECOMMENDED!)</option> - </param> - <when value="checkm"> - <param argument="--checkM_method" type="select" label="checkm method" optional="true"> - <help> - Using the checkm method of lineage_wf can require more than 40Gb of RAM. - </help> - <option value="taxonomy_wf">taxonomy_wf (faster)</option> - <option value="lineage_wf">lineage_wf (more accurate)</option> - </param> - </when> - <when value="genomeInfo"> - <param argument="--genomeInfo" type="data" format="csv" label="genomes fasta files"> - <help><![CDATA[ - A CSV dataset that must contain: [ - "genome"(history dataset name of .fasta dataset of that genome), - "completeness"(0-100 value for completeness of the genome), - "contamination"(0-100 value of the contamination of the genome)] - ]]></help> - </param> - </when> - <when value="ignoreGenomeQuality"/> - </conditional> - </when> - <when value="no"/> - </conditional> + <section name="filter" title="Genome filtering" expanded="true"> + <param argument="--length" type="integer" value="50000" label="Minimum genome length"/> + <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/> + <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/> + </section> + </xml> + <xml name="test_default_filtering_options"> + <section name="filter"> + <param name="length" value="50000"/> + <param name="completeness" value="75"/> + <param name="contamination" value="100"/> + </section> </xml> <token name="@FILTER_OPTIONS@"><![CDATA[ - #if $filter.set_options == 'yes': - --length $filter.length - --completeness $filter.completeness - --contamination $filter.contamination - #if $filter.quality.source == 'checkm' - --checkM_method $filter.quality.checkM_method - #elif $filter.quality.source == 'genomeInfo' - --genomeInfo $filter.quality.genomeInfo - #elif $filter.quality.source == 'ignoreGenomeQuality' - --ignoreGenomeQuality - #end if - #else - --checkM_method taxonomy_wf - #end if + --length $filter.length + --completeness $filter.completeness + --contamination $filter.contamination ]]></token> - <xml name="genome_comparison_options"> - <conditional name="genome_comparison"> - <param name="set_options" type="select" label="set genome comparison options"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> + <xml name="quality_assessment_options"> + <conditional name="quality"> + <param name="source" type="select" label="Genome quality filtering" help="No checkM or quality filtering is not recommened but with bacteriophages or eukaryotes or things where checkM scoring does not work. Will only choose genomes based on length and N50."> + <option value="checkm" selected="true">Run checkM</option> + <option value="genomeInfo">Provide quality information on the genome (CSV file)</option> + <option value="ignoreGenomeQuality">Don't run checkM or do any quality filtering (--ignoreGenomeQuality) - NOT RECOMMENDED!</option> </param> - <when value="yes"> - <param argument="--MASH_sketch" type="integer" value="1000" label="MASH sketch size"/> - <param argument="--S_algorithm" type="select" label="Algorithm for secondary clustering comaprisons"> - <option value="ANImf" selected="true">ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions</option> - <option value="ANIn">ANIn = Align whole genomes with nucmer; compare aligned regions</option> - <option value="gANI">gANI = Identify and align ORFs; compare aligned ORFS</option> + <when value="checkm"> + <param argument="--checkM_method" type="select" label="CheckM method"> + <option value="lineage_wf" selected="true">lineage_wf: Lineage-specific Workflow - quality estimates with lineage-specific markers (more accurate)</option> + <option value="taxonomy_wf">taxonomy_wf: Taxonomic-specific Workflow - quality estimates with taxonomic-specific markers (faster)</option> </param> - <param argument="-n_PRESET" type="select" label="Presets to pass to nucmer"> - <option value="normal" selected="true">normal = default ANIn parameters (default: normal)</option> - <option value="tight">tight = only align highly conserved regions</option> + <param argument="--set_recursion" type="integer" optional="true" label="Increases the python recursion limit" help="NOT RECOMMENDED unless checkM is crashing due to recursion issues. Recommended to set to 2000 if needed, but setting this could crash Python"/> + <param argument="--checkm_group_size" type="integer" value="2000" min="1" label="Number of genomes passed to checkM at a time" help="Increasing this increases RAM but makes checkM faster"/> + </when> + <when value="genomeInfo"> + <param argument="--genomeInfo" type="data" format="csv" label="Quality information on the genomes"> + <help><![CDATA[ + A CSV dataset that must contain: [ + "genome"(history dataset name of .fasta dataset of that genome), + "completeness"(0-100 value for completeness of the genome), + "contamination"(0-100 value of the contamination of the genome)] + ]]></help> </param> </when> - <when value="no"/> + <when value="ignoreGenomeQuality"/> + </conditional> + </xml> + <xml name="test_default_quality_assessment_options"> + <conditional name="quality"> + <param name="source" value="checkm"/> + <param name="checkM_method" value="taxonomy_wf"/> + <param name="checkm_group_size" value="2000"/> </conditional> </xml> - <token name="@GENOME_COMPARISON_OPTIONS@"><![CDATA[ - #if $genome_comparison.set_options == 'yes': - --MASH_sketch $genome_comparison.MASH_sketch - --S_algorithm $genome_comparison.S_algorithm - -n_PRESET $genome_comparison.n_PRESET - #end if + <token name="@QUALITY_ASSESSMENT_OPTIONS@"><![CDATA[ +#if $quality.source == 'checkm' + --checkM_method '$quality.checkM_method' + #if str($quality.set_recursion) != '' + --set_recurison $filter.set_recursion + #end if + --checkm_group_size $quality.checkm_group_size +#else if $quality.source == 'genomeInfo' + --genomeInfo '$quality.genomeInfo' +#else if $quality.source == 'ignoreGenomeQuality' + --ignoreGenomeQuality +#end if +]]></token> + + <xml name="mash"> + <param argument="--MASH_sketch" type="integer" value="1000" min="0" label="MASH sketch size"/> + <param argument="--P_ani" type="float" value="0.9" min="0." max="1." label="ANI threshold to form primary clusters"/> + <param argument="--multiround_primary_clustering" type='boolean' checked="false" truevalue='--multiround_primary_clustering' falsevalue='' label="Cluster each primary clunk separately and merge at the end with single linkage?" help="Decreases RAM usage and increases speed, and the cost of a minor loss in precision and the inability to plot primary_clustering_dendrograms. Especially helpful when clustering 5000+ genomes. Will be done with single linkage clustering"/> + <param argument="--primary_chunksize" type="integer" value="5000" min="1" label="Impacts multiround_primary_clusterings" help=" If you have more than this many genomes, process them in chunks of this size"/> + </xml> + <xml name="test_default_mash"> + <param name="MASH_sketch" value="1000"/> + <param name="P_ani" value="0.9"/> + <param name="multiround_primary_clustering" value=''/> + <param name="primary_chunksize" value="5000"/> + </xml> + <token name="@MASH@"><![CDATA[ + --MASH_sketch '$comp_clust.steps.MASH_sketch' + --P_ani $comp_clust.steps.P_ani + $comp_clust.steps.multiround_primary_clustering + --primary_chunksize $comp_clust.steps.primary_chunksize +]]></token> + + <xml name="nucmer"> + <param argument="--n_PRESET" type="select" label="Presets to pass to nucmer"> + <option value="normal" selected="true">normal: default ANIn parameters</option> + <option value="tight">tight: only align highly conserved regions</option> + </param> + </xml> + <xml name="test_default_nucmer"> + <param name="n_PRESET" value="normal"/> + </xml> + <token name="@NUCMER@"><![CDATA[ + --n_PRESET '$comp_clust.steps.clustering.n_PRESET' +]]></token> + + <xml name="coverage_method"> + <param argument="--coverage_method" type="select" label="Method to calculate coverage of an alignment"> + <option value="larger" selected="true">Larger = max((aligned length / genome 1), (aligned_length / genome2))</option> + <option value="total">Total = 2*(aligned length) / (sum of total genome lengths)</option> + </param> + </xml> + <xml name="test_default_coverage_method"> + <param name="coverage_method" value="larger"/> + </xml> + <token name="@COVERAGE_METHOD@"><![CDATA[ + --coverage_method '$comp_clust.steps.clustering.coverage_method' ]]></token> - <xml name="clustering_options"> + <xml name="secondary_clustering"> <conditional name="clustering"> - <param name="set_options" type="select" label="set clustering options"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> + <param argument="--S_algorithm" type="select" label="Algorithm for secondary clustering comparisons"> + <option value="fastANI">fastANI: Kmer-based approach - very fast</option> + <option value="ANImf" selected="true">ANImf: Align whole genomes with nucmer; filter alignment; compare aligned regions - RECOMMENDED</option> + <option value="ANIn">ANIn: Align whole genomes with nucmer; compare aligned regions</option> + <option value="gANI">gANI: Identify and align ORFs; compare aligned ORFS</option> + <option value="goANI">Open source version of gANI; requires nsmimscan</option> </param> - <when value="yes"> - <param argument="--P_ani" type="float" value="0.9" min="0." max="1." label="ANI threshold to form primary (MASH) clusters"/> - <param argument="--S_ani" type="float" value="0.99" min="0." max="1." label="ANI threshold to form secondary clusters"/> + <when value="fastANI"> + <param argument="--greedy_secondary_clustering" type='boolean' checked="false" truevalue='--greedy_secondary_clustering' falsevalue='' label="Use a heuristic to avoid pair-wise comparisons when doing secondary clustering?" help="Will be done with single linkage clustering"/> + </when> + <when value="ANImf"> + <expand macro="nucmer"/> + <expand macro="coverage_method"/> + </when> + <when value="ANIn"> + <expand macro="nucmer"/> + <expand macro="coverage_method"/> + </when> + <when value="gANI"/> + <when value="goANI"/> + </conditional> + <param argument="--S_ani" type="float" value="0.99" min="0." max="1." label="ANI threshold to form secondary clusters"/> + <param argument="--cov_thresh" type="float" value="0.1" min="0." max="1." label="Minmum level of overlap between genomes when doing secondary comparisons"/> + </xml> + <xml name="test_default_secondary_clustering"> + <conditional name="clustering"> + <param name="S_algorithm" value="ANImf"/> + <expand macro="test_default_nucmer"/> + <expand macro="test_default_coverage_method"/> + </conditional> + <param name="S_ani" value="0.99"/> + <param name="cov_thresh" value="0.1"/> + </xml> + <token name="@SECONDARY_CLUSTERING@"><![CDATA[ + --S_algorithm '$comp_clust.steps.clustering.S_algorithm' + #if $comp_clust.steps.clustering.S_algorithm == 'fastANI' + $comp_clust.steps.clustering.greedy_secondary_clustering + #else if $comp_clust.steps.clustering.S_algorithm == 'ANImf' + @NUCMER@ + @COVERAGE_METHOD@ + #else if $comp_clust.steps.clustering.S_algorithm == 'ANIn' + @NUCMER@ + @COVERAGE_METHOD@ + #end if + --S_ani $comp_clust.steps.S_ani + --cov_thresh $comp_clust.steps.cov_thresh +]]></token> - <param argument="--SkipMash" type="boolean" truevalue="--SkipMash" falsevalue="" checked="false" label="Skip MASH clustering, just do secondary clustering on all genomes"/> - <param argument="--SkipSecondary" type="boolean" truevalue="--SkipSecondary" falsevalue="" checked="false" label="Skip secondary clustering, just perform MASH clustering"/> - <param argument="--cov_thresh" type="float" value="0.1" min="0." max="1." label="Minmum level of overlap between genomes when doing secondary comparisons"/> - <param argument="--coverage_method" type="select" label="Method to calculate coverage of an alignment"> - <help>(for ANIn/ANImf only; gANI can only do larger method)</help> - <option value="larger" selected="true">arger = max((aligned length / genome 1), (aligned_length / genome2))</option> - <option value="total">total = 2*(aligned length) / (sum of total genome lengths)</option> + <xml name="comparison_clustering_options"> + <section name="comp_clust" title="Genome comparison and clustering" expanded="false"> + <conditional name="steps"> + <param name="select" type="select" label="Steps in genome comparison"> + <option value="default" selected="true">Default: Run MASH clustering and a secondary clustering</option> + <option value="SkipMash">Skip MASH clustering, just do secondary clustering on all genomes</option> + <option value="SkipSecondary">Skip secondary clustering, just perform MASH clustering</option> </param> - <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes"> - <help>(passed to scipy.cluster.hierarchy.linkage)</help> - <option value="average" selected="true">average</option> - </param> - </when> - <when value="no"/> - </conditional> + <when value="default"> + <expand macro="mash"/> + <expand macro="secondary_clustering"/> + </when> + <when value="SkipMash"> + <expand macro="secondary_clustering"/> + </when> + <when value="SkipSecondary"> + <expand macro="mash"/> + </when> + </conditional> + <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes" help="Passed to scipy.cluster.hierarchy.linkage"> + <option value="average" selected="true">average</option> + <option value="ward">ward</option> + <option value="single">single</option> + <option value="median">median</option> + <option value="centroid">centroid</option> + <option value="weighted">weighted</option> + </param> + <param argument="--run_tertiary_clustering" type='boolean' checked="false" truevalue='--run_tertiary_clustering' falsevalue='' label="Run an additional round of clustering on the final genome set?" help="This is especially useful when greedy clustering is performed and/or to handle cases where similar genomes end up in different primary clusters."/> + </section> </xml> - <token name="@CLUSTERING_OPTIONS@"><![CDATA[ - #if $clustering.set_options == 'yes': - --P_ani $clustering.P_ani - --S_ani $clustering.S_ani - $clustering.SkipMash - $clustering.SkipSecondary - --cov_thresh $clustering.cov_thresh - --coverage_method $clustering.coverage_method - --clusterAlg $clustering.clusterAlg - #end if + <xml name="test_default_comparison_clustering_options"> + <section name="comp_clust"> + <conditional name="steps"> + <param name="select" value="default" /> + <expand macro="test_default_mash"/> + <expand macro="test_default_secondary_clustering"/> + </conditional> + <param name="clusterAlg" value="average"/> + <param name="run_tertiary_clustering" value=''/> + </section> + </xml> + <token name="@COMPARISON_CLUSTERING_OPTIONS@"><![CDATA[ +#if $comp_clust.steps.select == 'default' + @MASH@ + @SECONDARY_CLUSTERING@ +#else if $comp_clust.steps.select == 'SkipMash' + --SkipMash + @SECONDARY_CLUSTERING@ +#else + @MASH@ + --SkipSecondary +#end if + --clusterAlg '$comp_clust.clusterAlg' + $comp_clust.run_tertiary_clustering ]]></token> <xml name="scoring_options"> - <conditional name="scoring"> - <param name="set_options" type="select" label="set scoring options"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> - </param> - <when value="yes"> - <param argument="--completeness_weight" type="float" value="1" label="completeness weight"> - <help> -Based off of the formula: -A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) -A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight; - </help> - </param> - <param argument="--contamination_weight" type="float" value="5" label="contamination weight"/> - <param argument="--strain_heterogeneity_weight" type="float" value="1" min="0." max="1." label="strain heterogeneity weight"/> - <param argument="--N50_weight" type="float" value=".5" label="weight of log(genome N50)"/> - <param argument="--size_weight" type="float" value="0" label="weight of log(genome size)"/> - </when> - <when value="no"/> - </conditional> + <section name="scoring" title="Scoring criteria" expanded="false" help="Based off of the formula: A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) + F*(centrality - S_ani). With A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight; F = cent_weight"> + <param argument="--completeness_weight" type="float" value="1" label="Completeness weight"/> + <param argument="--contamination_weight" type="float" value="5" label="Contamination weight"/> + <param argument="--strain_heterogeneity_weight" type="float" value="1" min="0." max="1." label="Strain heterogeneity weight"/> + <param argument="--N50_weight" type="float" value=".5" label="Weight of log(genome N50)"/> + <param argument="--size_weight" type="float" value="0" label="Weight of log(genome size)"/> + <param argument="--centrality_weight" type="float" value="1" label="Weight of (centrality - S_ani)"/> + </section> + </xml> + <xml name="test_default_scoring_options"> + <section name="scoring"> + <param name="completeness_weight" value="1"/> + <param name="contamination_weight" value="5"/> + <param name="strain_heterogeneity_weight" value="1"/> + <param name="N50_weight" value=".5" /> + <param name="size_weight" value="0"/> + <param name="centrality_weight" value="1"/> + </section> </xml> <token name="@SCORING_OPTIONS@"><![CDATA[ - #if $scoring.set_options == 'yes': - --completeness_weight $scoring.completeness_weight - --contamination_weight $scoring.contamination_weight - --strain_heterogeneity_weight $scoring.strain_heterogeneity_weight - --N50_weight $scoring.N50_weight - --size_weight $scoring.size_weight - #end if + --completeness_weight $scoring.completeness_weight + --contamination_weight $scoring.contamination_weight + --strain_heterogeneity_weight $scoring.strain_heterogeneity_weight + --N50_weight $scoring.N50_weight + --size_weight $scoring.size_weight + --centrality_weight $scoring.centrality_weight ]]></token> - <xml name="taxonomy_options"> - <conditional name="taxonomy"> - <param name="set_options" type="select" label="generate taxonomy information"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> - </param> - <when value="yes"> - <param argument="--tax_method" type="select" label="Method of determining taxonomy"> - <help>(for ANIn/ANImf only; gANI can only do larger method)</help> - <option value="percent" selected="true">percent = The most descriptive taxonimic level with at least (per) hits</option> - <option value="max">max = The centrifuge taxonomic level with the most overall hits</option> - </param> - <param argument="--percent" type="float" value="50" min="0" max="100" label="minimum percent for percent method"/> - <param argument="--cent_index" type="data" format="" label="centrifuge index"/> - </when> - <when value="no"/> - </conditional> + <xml name="warning_options"> + <section name="warning" title="Warnings" expanded="false"> + <param argument="--warn_dist" type="float" value="0.25" min="0" max="1" label="How far from the threshold to throw cluster warnings"/> + <param argument="--warn_sim" type="float" value="0.98" min="0" max="1" label="Similarity threshold for warnings between dereplicated genomes"/> + <param argument="--warn_aln" type="float" value="0.25" min="0" max="1" label="Minimum aligned fraction for warnings between dereplicated genomes (ANIn)"/> + </section> </xml> - <token name="@TAXONOMY_OPTIONS@"><![CDATA[ - #if $taxonomy.set_options == 'yes': - --run_tax - --tax_method $taxonomy.tax_method - --percent $taxonomy.percent - --cent_index $taxonomy.cent_index - #end if -]]></token> - - <xml name="warning_options"> - <conditional name="warning"> - <param name="set_options" type="select" label="set warning options"> - <option value="yes">Yes</option> - <option value="no" selected="true">No</option> - </param> - <when value="yes"> - <param argument="--warn_dist" type="float" value="0.25" min="0" max="1" label="How far from the threshold to throw cluster warnings"/> - <param argument="--warn_sim" type="float" value="0.98" min="0" max="1" label="Similarity threshold for warnings between dereplicated genomes"/> - <param argument="--warn_aln" type="float" value="0.25" min="0" max="1" label="Minimum aligned fraction for warnings between dereplicated genomes (ANIn)"/> - </when> - <when value="no"/> - </conditional> + <xml name="test_default_warning_options"> + <section name="warning"> + <param name="warn_dist" value="0.25"/> + <param name="warn_sim" value="0.98"/> + <param name="warn_aln" value="0.25"/> + </section> </xml> <token name="@WARNING_OPTIONS@"><![CDATA[ - #if $warning.set_options == 'yes': - --warn_dist $warning.warn_dist - --warn_sim $warning.warn_sim - --warn_aln $warning.warn_aln - #end if + --warn_dist $warning.warn_dist + --warn_sim $warning.warn_sim + --warn_aln $warning.warn_aln ]]></token> <xml name="select_outputs"> @@ -278,8 +331,14 @@ <option value="Chdb">Chdb.tsv</option> </expand> </xml> + <xml name="test_default_select_drep_outputs"> + <param name="select_outputs" value="log,warnings,Primary_clustering_dendrogram,Clustering_scatterplots,Cluster_scoring,Winning_genomes,Widb" /> + </xml> + <xml name="test_default_select_outputs"> + <param name="select_outputs" value="log,warnings,Primary_clustering_dendrogram,Clustering_scatterplots" /> + </xml> - <xml name="common_outputs"> + <xml name="common_outputs"> <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outdir/log/logger.log"> <filter>'log' in select_outputs or not select_outputs</filter> </data> @@ -299,8 +358,6 @@ <filter>'Clustering_scatterplots' in select_outputs</filter> </data> </xml> - - <xml name="drep_outputs"> <expand macro="common_outputs"/> <data name="Cluster_scoring" format="pdf" label="${tool.name} on ${on_string}: Cluster_scoring.pdf" from_work_dir="outdir/figures/Cluster_scoring.pdf"> @@ -316,19 +373,19 @@ <filter>'Chdb' in select_outputs</filter> </data> </xml> - - - <xml name="test_defaults_log"> - <test> - <param name="genomes" ftype="fasta" value="Enterococcus_casseliflavus_EC20.fasta,Enterococcus_faecalis_T2.fna,Enterococcus_faecalis_TX0104.fa"/> - <output name="log"> - <assert_contents> - <yield/> - </assert_contents> - </output> - </test> + <xml name="test_string_inputs"> + <param name="genomes" ftype="fasta" value="Enterococcus_casseliflavus_EC20.fasta,Enterococcus_faecalis_T2.fna,Enterococcus_faecalis_TX0104.fa"/> + </xml> + <xml name="test_integer_inputs"> + <param name="genomes" ftype="fasta" value="001,002,003"/> </xml> - + <xml name="test_log_output"> + <output name="log"> + <assert_contents> + <yield/> + </assert_contents> + </output> + </xml> <token name="@GENOMES_HELP@"><![CDATA[ I/O PARAMETERS: -g [GENOMES [GENOMES ...]], --genomes [GENOMES [GENOMES ...]] @@ -337,7 +394,6 @@ ]]></token> - <token name="@FILTERING_HELP@"><![CDATA[ FILTERING OPTIONS: -l LENGTH, --length LENGTH @@ -364,7 +420,6 @@ ]]></token> - <token name="@GENOME_COMPARISON_HELP@"><![CDATA[ GENOME COMPARISON PARAMETERS: -ms MASH_SKETCH, --MASH_sketch MASH_SKETCH @@ -383,7 +438,6 @@ normal = default ANIn parameters (default: normal) ]]></token> - <token name="@CLUSTERING_HELP@"><![CDATA[ CLUSTERING PARAMETERS: -pa P_ANI, --P_ani P_ANI @@ -413,10 +467,9 @@ scipy.cluster.hierarchy.linkage (default: average) ]]></token> - <token name="@SCORING_HELP@"><![CDATA[ SCORING CRITERIA -Based off of the formula: +Based off of the formula: A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight: @@ -433,7 +486,6 @@ ]]></token> - <token name="@TAXONOMY_HELP@"><![CDATA[ TAXONOMY: --run_tax generate taxonomy information (Tdb) @@ -457,7 +509,6 @@ (default: None) ]]></token> - <token name="@WARNINGS_HELP@"><![CDATA[ WARNINGS: --warn_dist WARN_DIST @@ -469,6 +520,4 @@ dereplicated genomes (ANIn) (default: 0.25) ]]></token> - - </macros>