Mercurial > repos > iuc > checkm_plot
view plot.xml @ 2:9916308301da draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 3ae2bd72f789518e95ef65b97e0e5ac90165e113
author | iuc |
---|---|
date | Mon, 02 Sep 2024 13:51:37 +0000 |
parents | 356839cd89d2 |
children |
line wrap: on
line source
<tool id="checkm_plot" name="CheckM plot" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description> for assessing the quality of genome bins </description> <macros> <import>macros.xml</import> <xml name="gff_inputs"> <param name="gff" type="data_collection" collection_type="list" format="gff" label="Gene feature files for each bin"/> </xml> <token name="@PLOT_GFF_INPUTS@"><![CDATA[ #for $i in $plot.gff #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($i.element_identifier)) mkdir -p 'inputs/bins/${identifier}' && ln -s '$i' 'inputs/bins/${identifier}/genes.gff' && #end for ]]></token> <xml name="tetra_profile"> <param name="tetra_profile" type="data" format="tabular" multiple="true" label="Tetranucleotide profiles for each bin" help="This can be generated using the tetra tool"/> </xml> <xml name="dist_value"> <param argument="--dist_value" type="integer" min="0" max="100" value="" label="Reference distribution(s) to plot" /> </xml> <xml name="gc_params"> <param argument="--gc_window_size" type="integer" min="0" value="5000" label="Window size used to calculate GC histogram" /> <param argument="--gc_bin_width" type="float" min="0" value="0.01" label="Width of GC bars in histogram" /> </xml> <xml name="cd_params"> <param argument="--cd_window_size" type="integer" min="0" value="10000" label="Window size used to calculate CD histogram" /> <param argument="--cd_bin_width" type="float" min="0" value="0.01" label="Width of CD bars in histogram" /> </xml> <xml name="td_params"> <param argument="--td_window_size" type="integer" min="0" value="5000" label="Window size used to calculate TD histogram" /> <param argument="--td_bin_width" type="float" min="0" value="0.01" label="Width of TD bars in histogram" /> </xml> <xml name="fig_padding"> <param argument="--fig_padding" type="float" min="0" value="0.2" label="White space to place around figure" help="In inches"/> </xml> <xml name="gc_bias_plot"> <when value="gc_bias_plot"> <param name="bam_file" type="data" format="bam" label="BAM file to interrogate for coverage information" help="The file should be sorted"/> <param argument="--window_size" type="integer" min="0" value="5000" label="Window size used to calculate plot statistics" /> <param argument="--all_reads" type="boolean" truevalue="--all_reads" falsevalue="" checked="false" label="Use all reads to estimate coverage instead of just those in proper pairs?" /> <param argument="--min_align" type="float" min="0" max="1" value="0.98" label="Minimum alignment length as percentage of read length"/> <param argument="--max_edit_dist" type="float" min="0" max="1" value="0.02" label="Maximum edit distance as percentage of read length"/> </when> </xml> </macros> <expand macro="biotools"/> <expand macro="requirements"> <requirement type="package" version="1.20">samtools</requirement> </expand> <expand macro="version"/> <command detect_errors="exit_code"><![CDATA[ @BIN_INPUTS@ #if $plot.command == 'gc_plot' checkm gc_plot 'bins' 'output' $plot.dist_value --extension 'fasta' --image_type '$image_type' --dpi $dpi --font_size $font_size --width $width --height $height #else if $plot.command == 'coding_plot' @PLOT_GFF_INPUTS@ checkm coding_plot 'inputs' 'bins' 'output' $plot.dist_value --extension 'fasta' --image_type '$image_type' --dpi $dpi --font_size $font_size --width $width --height $height --cd_window_size $plot.cd_window_size --cd_bin_width $plot.cd_bin_width #else if $plot.command == 'tetra_plot' @PLOT_GFF_INPUTS@ checkm tetra_plot 'inputs' 'bins' 'output' '$tetra_profile' $plot.dist_value --extension 'fasta' --image_type '$image_type' --dpi $dpi --font_size $font_size --width $width --height $height --td_window_size $plot.td_window_size --td_bin_width $plot.td_bin_width #else if $plot.command == 'dist_plot' @PLOT_GFF_INPUTS@ checkm dist_plot 'inputs' 'bins' 'output' '$tetra_profile' $plot.dist_value --extension 'fasta' --image_type '$image_type' --dpi $dpi --font_size $font_size --width $width --height $height --gc_window_size $plot.gc_window_size --gc_bin_width $plot.gc_bin_width --cd_window_size $plot.cd_window_size --cd_bin_width $plot.cd_bin_width --td_window_size $plot.td_window_size --td_bin_width $plot.td_bin_width #else if $plot.command == 'nx_plot' checkm nx_plot 'bins' 'output' --extension 'fasta' --image_type '$image_type' --dpi $dpi --font_size $font_size --width $width --height $height --step_size $plot.step_size #else if $plot.command == 'len_hist' checkm len_hist 'bins' 'output' --extension 'fasta' --image_type '$image_type' --dpi $dpi --font_size $font_size --width $width --height $height #else if $plot.command == 'marker_plot' mkdir -p 'inputs/storage/' && cp '$marker_gene_stats' 'inputs/storage/marker_gene_stats.tsv' && cp '$bin_stats_ext' 'inputs/storage/bin_stats_ext.tsv' && #for $b in $plot.genes_fna #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($b.element_identifier)) mkdir -p 'inputs/bins/${identifier}' && cp '$b.file_name' 'inputs/bins/${identifier}/genes.faa' && #end for checkm marker_plot 'inputs' 'bins' 'output' --extension 'fasta' --image_type '$image_type' --dpi $dpi --font_size $font_size --width $width --height $height --fig_padding $plot.fig_padding #else if $plot.command == 'gc_bias_plot' mkdir 'mapping' && ln -s '$bam_file' 'mapping.bam' && samtools index 'mapping.bam' 'mapping.bam.bai' && checkm gc_bias_plot 'bins' 'output' 'mapping.bam' --extension 'fasta' --image_type '$image_type' --dpi $dpi --font_size $font_size --width $width --height $height --window_size $plot.window_size $plot.all_reads --min_align $plot.min_align --max_edit_dist $plot.max_edit_dist --threads \${GALAXY_SLOTS:-1} #end if ]]></command> <inputs> <expand macro="bin_inputs"/> <conditional name="plot"> <param name="command" type="select" label="Plot to generate"> <option value="gc_plot">gc_plot: Create GC histogram and delta-GC plot</option> <option value="coding_plot">Create coding density (CD) histogram and delta-CD plot</option> <option value="tetra_plot">Create tetranucleotide distance (TD) histogram and delta-TD plot</option> <option value="dist_plot">Create image with GC, coding density (CD), and tetranucleotide distance (TD) distribution plots together</option> <option value="nx_plot">Create Nx-plots</option> <option value="len_hist">Sequence length histogram</option> <option value="marker_plot">Plot position of marker genes on sequences</option> <!--<option value="gc_bias_plot">Plot bin coverage as a function of GC</option>--> </param> <when value="gc_plot"> <expand macro="dist_value"/> <expand macro="gc_params"/> </when> <when value="coding_plot"> <expand macro="gff_inputs"/> <expand macro="dist_value"/> <expand macro="cd_params"/> </when> <when value="tetra_plot"> <expand macro="gff_inputs"/> <expand macro="tetra_profile"/> <expand macro="dist_value"/> <expand macro="td_params"/> </when> <when value="dist_plot"> <expand macro="gff_inputs"/> <expand macro="tetra_profile"/> <expand macro="dist_value"/> <expand macro="gc_params"/> <expand macro="cd_params"/> <expand macro="td_params"/> </when> <when value="nx_plot"> <param argument="--step_size" type="float" min="0" value="0.05" label="x step size for calculating Nx" /> </when> <when value="len_hist"> <expand macro="fig_padding" /> </when> <when value="marker_plot"> <param name="genes_fna" type="data_collection" collection_type="list" format="fasta" label="Nucleotide gene sequences for each bin" help="Optional output of the CheckM tree or lineage_wf tools"/> <param name="marker_gene_stats" type="data" format="tabular" label="Marker gene stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/> <param name="bin_stats_ext" type="data" format="tabular" label="Marker gene bin extensive stats" help="Output of the CheckM qa tool or optional output of the lineage_wf or taxonomy_wf tools"/> <expand macro="fig_padding" /> </when> </conditional> <param argument="--image_type" type="select" label="Image type"> <option value="eps">EPS</option> <option value="pdf">PDF</option> <option value="png" selected="true">PNG</option> <option value="ps">PS</option> <option value="svg">SVG</option> </param> <param argument="--dpi" type="integer" min="0" value="600" label="DPI of output image" /> <param argument="--font_size" type="integer" min="0" value="8" label="Font size" /> <param argument="--width" type="float" min="0" value="6.5" label="Width of output image" /> <param argument="--height" type="float" min="0" value="3.5" label="Height of output image" /> </inputs> <outputs> <collection name="gc_plot" type="list" label="${tool.name} on ${on_string}: GC distribution plot"> <filter>plot['command'] == 'gc_plot'</filter> <discover_datasets pattern="(?P<designation>.*)\.gc_plots\.(?P<ext>.+)" directory="output/"/> </collection> <collection name="coding_plot" type="list" label="${tool.name} on ${on_string}: Coding density (CD) distribution plot"> <filter>plot['command'] == 'coding_plot'</filter> <discover_datasets pattern="(?P<designation>.*)\.coding_density_plots\.(?P<ext>.+)" directory="output/"/> </collection> <collection name="tetra_plot" type="list" label="${tool.name} on ${on_string}: Tetranucleotide distance (TD) distribution plot"> <filter>plot['command'] == 'tetra_plot'</filter> <discover_datasets pattern="(?P<designation>.*)\.tetra_dist_plots\.(?P<ext>.+)" directory="output/"/> </collection> <collection name="dist_plot" type="list" label="${tool.name} on ${on_string}: GC, Coding density (CD) and Tetranucleotide distance (TD) distribution plot"> <filter>plot['command'] == 'dist_plot'</filter> <discover_datasets pattern="(?P<designation>.*)\.ref_dist_plots\.(?P<ext>.+)" directory="output/"/> </collection> <collection name="nx_plot" type="list" label="${tool.name} on ${on_string}: Nx-plot"> <filter>plot['command'] == 'nx_plot'</filter> <discover_datasets pattern="(?P<designation>.*)\.nx_plot\.(?P<ext>.+)" directory="output/"/> </collection> <collection name="len_hist" type="list" label="${tool.name} on ${on_string}: Sequence length histogram"> <filter>plot['command'] == 'len_hist'</filter> <discover_datasets pattern="(?P<designation>.*)\.len_hist\.(?P<ext>.+)" directory="output/"/> </collection> <collection name="marker_plot" type="list" label="${tool.name} on ${on_string}: Marker gene position plot"> <filter>plot['command'] == 'marker_plot'</filter> <discover_datasets pattern="(?P<designation>.*)\.marker_pos_plot\.(?P<ext>.+)" directory="output/"/> </collection> <collection name="gc_bias_plot" type="list" label="${tool.name} on ${on_string}: Bin coverage as a function of GC"> <filter>plot['command'] == 'gc_bias_plot'</filter> <discover_datasets pattern="(?P<designation>.*)\.marker_pos_plot\.(?P<ext>.+)" directory="output/"/> </collection> </outputs> <tests> <test expect_num_outputs="1"> <conditional name="bins"> <param name="select" value="collection"/> <param name="bins_coll"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.fna"/> </collection> </param> </conditional> <conditional name="plot"> <param name="command" value="gc_plot"/> <param name="dist_value" value="100" /> <param name="gc_window_size" value="5000"/> <param name="gc_bin_width" value="0.01"/> </conditional> <param name="image_type" value="eps"/> <param name="dpi" value="600" /> <param name="font_size" value="8"/> <param name="width" value="6.5"/> <param name="height" value="3.5"/> <output_collection name="gc_plot" count="1"> <element name="637000110" ftype="eps"> <assert_contents> <has_size value="46343" delta="100"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="1"> <conditional name="bins"> <param name="select" value="collection"/> <param name="bins_coll"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.fna"/> </collection> </param> </conditional> <conditional name="plot"> <param name="command" value="coding_plot"/> <param name="gff"> <collection type="list"> <element name="637000110" ftype="gff" value="637000110.gff"/> </collection> </param> <param name="dist_value" value="100" /> <param name="cd_window_size" value="10000"/> <param name="cd_bin_width" value="0.01"/> </conditional> <param name="image_type" value="png"/> <param name="dpi" value="600" /> <param name="font_size" value="8"/> <param name="width" value="6.5"/> <param name="height" value="3.5"/> <output_collection name="coding_plot" count="1"> <element name="637000110" ftype="png"> <assert_contents> <has_size value="224229" delta="100"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="1"> <conditional name="bins"> <param name="select" value="collection"/> <param name="bins_coll"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.fna"/> </collection> </param> </conditional> <conditional name="plot"> <param name="command" value="tetra_plot"/> <param name="gff"> <collection type="list"> <element name="637000110" ftype="gff" value="637000110.gff"/> </collection> </param> <param name="tetra_profile" ftype="tabular" value="tetra"/> <param name="dist_value" value="100" /> <param name="td_window_size" value="5000"/> <param name="td_bin_width" value="0.01"/> </conditional> <param name="image_type" value="pdf"/> <param name="dpi" value="600" /> <param name="font_size" value="8"/> <param name="width" value="6.5"/> <param name="height" value="3.5"/> <output_collection name="tetra_plot" count="1"> <element name="637000110" ftype="pdf"> <assert_contents> <has_size value="17443" delta="10"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="1"> <conditional name="bins"> <param name="select" value="collection"/> <param name="bins_coll"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.fna"/> </collection> </param> </conditional> <conditional name="plot"> <param name="command" value="dist_plot"/> <param name="gff"> <collection type="list"> <element name="637000110" ftype="gff" value="637000110.gff"/> </collection> </param> <param name="tetra_profile" ftype="tabular" value="tetra"/> <param name="dist_value" value="100" /> <param name="gc_window_size" value="5000"/> <param name="gc_bin_width" value="0.01"/> <param name="cd_window_size" value="10000"/> <param name="cd_bin_width" value="0.01"/> <param name="td_window_size" value="5000"/> <param name="td_bin_width" value="0.01"/> </conditional> <param name="image_type" value="png"/> <param name="dpi" value="600" /> <param name="font_size" value="8"/> <param name="width" value="6.5"/> <param name="height" value="3.5"/> <output_collection name="dist_plot" count="1"> <element name="637000110" ftype="png"> <assert_contents> <has_size value="375137" delta="100"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="1"> <conditional name="bins"> <param name="select" value="collection"/> <param name="bins_coll"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.fna"/> </collection> </param> </conditional> <conditional name="plot"> <param name="command" value="nx_plot"/> <param name="step_size" value="0.05"/> </conditional> <param name="image_type" value="ps"/> <param name="dpi" value="600" /> <param name="font_size" value="8"/> <param name="width" value="6.5"/> <param name="height" value="3.5"/> <output_collection name="nx_plot" count="1"> <element name="637000110" ftype="ps"> <assert_contents> <has_size value="18700" delta="100"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="1"> <conditional name="bins"> <param name="select" value="collection"/> <param name="bins_coll"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.fna"/> </collection> </param> </conditional> <conditional name="plot"> <param name="command" value="len_hist"/> <param name="fig_padding" value="0.2"/> </conditional> <param name="image_type" value="svg"/> <param name="dpi" value="600" /> <param name="font_size" value="8"/> <param name="width" value="6.5"/> <param name="height" value="3.5"/> <output_collection name="len_hist" count="1"> <element name="637000110" ftype="svg"> <assert_contents> <has_size value="11147" delta="100"/> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="1"> <conditional name="bins"> <param name="select" value="collection"/> <param name="bins_coll"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.fna"/> </collection> </param> </conditional> <conditional name="plot"> <param name="command" value="marker_plot"/> <param name="genes_fna"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.faa"/> </collection> </param> <param name="marker_gene_stats" ftype="tabular" value="marker_gene_stats.tsv"/> <param name="bin_stats_ext" ftype="tabular" value="bin_stats_ext.tsv"/> <param name="fig_padding" value="0.2"/> </conditional> <param name="image_type" value="png"/> <param name="dpi" value="600" /> <param name="font_size" value="8"/> <param name="width" value="6.5"/> <param name="height" value="3.5"/> <output_collection name="marker_plot" count="1"> <element name="637000110" ftype="png"> <assert_contents> <has_size value="137320" delta="100"/> </assert_contents> </element> </output_collection> </test> <!--<test expect_num_outputs="1"> <conditional name="bins"> <param name="select" value="collection"/> <param name="bins_coll"> <collection type="list"> <element name="637000110" ftype="fasta" value="637000110.fna"/> </collection> </param> </conditional> <conditional name="plot"> <param name="command" value="gc_bias_plot"/> <param name="bam_file" ftype="bam" value="637000110.bam"/> <param name="window_size" value="5000"/> <param name="all_reads" value="false" /> <param name="min_align" value="0.98"/> <param name="max_edit_dist" value="0.02"/> </conditional> <param name="image_type" value="png"/> <param name="dpi" value="600" /> <param name="font_size" value="8"/> <param name="width" value="6.5"/> <param name="height" value="3.5"/> <output_collection name="gc_bias_plot" count="1"> <element name="637000110" ftype="png"> <assert_contents> <has_size value="10000" delta="100"/> </assert_contents> </element> </output_collection> </test>--> </tests> <help><![CDATA[ @HELP_HEADER@ This command produces a number of plots for assessing the quality of genome bins. Here we describe each of these plots and provide an example. - gc_plot: Provides a 3 pane plot suitable for assessing the GC distribution of sequences within a genome bin. The first pane is a histogram of the number of non-overlapping 5 kbp windows with a give percent GC. A typical genome will produce a unimodal distribution. The second pane plots each sequence in the genome bin as a function of its deviation from the average GC of the entire genome (x-axis) and sequence length (y-axis). The dashed red lines indicate the expected deviation from the mean GC as a function of length. This expected deviation is pre-calculated from a set of trusted reference genomes and the percentile plotted is provided as an argument to this command. A good default value to use for this distribution parameter is 95. - coding_plot: Provides a plot analogous to the gc_plot suitable for assessing the coding density of sequences within a genome bin. - tetra_plot: Provides a plot analogous to the gc_plot suitable for assessing the tetranucleotide signatures of sequences within a genome bin. The Manhattan distance is used for determine the different between each sequence's tetranucleotide signature and the tetranucleotide signature of the entire genome bin. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command. - dist_plot: Produces a single figure combining the plots produced by gc_plot, coding_plot, and tetra_plot. This plot requires a file indicating the tetranucleotide signature of all sequences within the genome bins. This file can be creates with the tetra command. - nx_plot: Produces a plot indicating the Nx value of a genome bin for all values of x. This provides a more comprehensive view of the quality of an assembly than simply considering N50. - len_hist: Produce a histogram of the number of sequences within a genome bin at different sequence length intervals. This provides additional information regarding the quality of an assembled genome. - marker_plot: Plots the position of marker genes on sequences within a genome bin. This provides information regarding the extent to which marker genes are collocated. The number of marker genes within a fixed size window (2.8 kbps in this example) is indicated by with different colours. Sequences without any marker genes are not shown. - gc_bias_plot: ]]></help> <expand macro="citations"/> </tool>