Mercurial > repos > iuc > metabat2_jgi_summarize_bam_contig_depths

diff jgi_summarize_bam_contig_depths.xml @ 0:d27a4ed006ee draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metabat2/ commit 4baddc43c4fdbe9fa7fe056bc3f9213de01516dd"
author: iuc
date: Fri, 28 Jan 2022 12:21:33 +0000
children: 1592150e38d2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jgi_summarize_bam_contig_depths.xml	Fri Jan 28 12:21:33 2022 +0000
@@ -0,0 +1,178 @@
+<tool id="metabat2_jgi_summarize_bam_contig_depths" name="Calculate contig depths" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>for MetaBAT2</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+jgi_summarize_bam_contig_depths 
+--outputDepth '$outputDepth'
+--percentIdentity $advanced.percentIdentity
+#if str($advanced.output_paired_contigs) == 'yes':
+    --pairedContigs '$outputPairedContigs'
+#end if
+$advanced.noIntraDepthVariance
+$advanced.showDepth
+--minMapQual $advanced.minMapQual
+--weightMapQual $advanced.weightMapQual
+$advanced.includeEdgeBases
+--maxEdgeBases $advanced.maxEdgeBases
+#if str($advanced.use_reference_cond.use_reference) == 'yes':
+    #if str($advanced.use_reference_cond.reference_cond.reference_source) == 'cached'
+        --referenceFasta '$advanced.use_reference_cond.reference_cond.referenceFasta.fields.path'
+    #else:
+        --referenceFasta '$advanced.use_reference_cond.reference_cond.referenceFasta'
+    #end if
+    --outputGC '$outputGC'
+    --gcWindow $advanced.use_reference_cond.gcWindow
+    --outputReadStats '$outputReadStats'
+    --outputKmers '$outputKmers'
+#end if
+--shredLength $shredding.shredLength
+--shredDepth $shredding.shredDepth
+--minContigLength $shredding.minContigLength
+--minContigDepth $shredding.minContigDepth
+#for bam_input in $bam_inputs:
+    '$bam_input'
+#end for
+    ]]></command>
+    <inputs>
+        <param name="bam_inputs" type="data" format="bam" multiple="true" label="Sorted bam files"/>
+        <section name="advanced" title="Advanced options">
+            <param argument="--percentIdentity" type="integer" value="97" label="Minimum end-to-end percent identity of qualifying reads"/>
+            <param name="output_paired_contigs" type="select" display="radio" label="Output the sparse matrix of contigs which paired reads span?">
+                <option value="no" selected="true">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <param argument="--noIntraDepthVariance" type="boolean" truevalue="--noIntraDepthVariance" falsevalue="" checked="false" label="Remove variance from mean depth along the contig?"/>
+            <param argument="--showDepth" type="boolean" truevalue="--showDepth" falsevalue="" checked="false" label="Output a depth file per bam for each contig base?"/>
+            <param argument="--minMapQual" type="integer" value="0" label="Minimum mapping quality necessary to count a read as mapped"/>
+            <param argument="--weightMapQual" type="float" value="0.0" label="Weight per-base depth based on the MQ of the read" help="Zero value disables"/>
+            <param argument="--includeEdgeBases" type="boolean" truevalue="--includeEdgeBases" falsevalue="" checked="false" label="Include 1-readlength edges when calculating depth and variance?"/>
+            <param argument="--maxEdgeBases" type="integer" value="75" label="Maximum length when calculating depth and variance" help="Ignored when including 1-readlength edges when calculating depth and variance"/>
+            <conditional name="use_reference_cond">
+                <param name="use_reference" type="select" label="Select a reference genome?">
+                    <option value="no" selected="true">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+                <when value="no"/>
+                <when value="yes">
+                    <conditional name="reference_cond">
+                        <param name="reference_source" type="select" label="Choose the source for the reference genome">
+                            <option value="cached" selected="true">locally cached</option>
+                            <option value="history">from history</option>
+                        </param>
+                        <when value="cached">
+                            <param argument="--referenceFasta" type="select" label="Using reference genome">
+                                <options from_data_table="fasta_indexes">
+                                    <filter type="data_meta" column="1" key="dbkey" ref="bam_inputs"/>
+                                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file(s)"/>
+                                </options>
+                            </param>
+                        </when>
+                        <when value="history">
+                            <param argument="--referenceFasta" type="data" format="fasta,fasta.gz" label="Using reference genome" help="Must be the reference used to map the input bam files"/>
+                        </when>
+                    </conditional>
+                    <param argument="--gcWindow" type="integer" value ="100" label="Sliding window size for GC calculations"/>
+               </when>
+            </conditional>
+        </section>
+        <section name="shredding" title="Options to control shredding contigs that are under-represented by the reads">
+            <param argument="--shredLength" type="integer" value="16000" label="Maximum length of the shreds"/>
+            <param argument="--shredDepth" type="integer" value="5" label="Depth to generate overlapping shreds"/>
+            <param argument="--minContigLength" type="integer" value="1" label="Mimimum length of contig to include for mapping and shredding"/>
+            <param argument="--minContigDepth" type="float" value="0.0" label="Minimum depth along the contig at which to break the contig"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="outputDepth" format="tabular" label="${tool.name} on ${on_string} (depth matrix)"/>
+        <data name="outputPairedContigs" format="fasta" label="${tool.name} on ${on_string} (paired contigs)">
+            <filter>advanced["output_paired_contigs"] == "yes"</filter>
+        </data>
+        <data name="outputGC" format="tabular" label="${tool.name} on ${on_string} (gc coverage histogram)">
+            <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter>
+        </data>
+        <data name="outputReadStats" format="tabular" label="${tool.name} on ${on_string} (read statistics)">
+            <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter>
+        </data>
+        <data name="outputKmers" format="tabular" label="${tool.name} on ${on_string} (perfect kmer counts)">
+            <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Single input, default settings -->
+        <test expect_num_outputs="1">
+            <param name="bam_inputs" value="input1.bam" ftype="bam"/>
+            <output name="outputDepth" file="jgi_output1.tabular" ftype="tabular" compare="contains"/>
+        </test>
+        <!-- Multiple inputs, default settings -->
+        <test expect_num_outputs="1">
+            <param name="bam_inputs" value="input1.bam,input1.bam" ftype="bam"/>
+            <output name="outputDepth" file="jgi_output2.tabular" ftype="tabular" compare="contains"/>
+        </test>
+        <!-- Single input, output paired contigs, reference from history -->
+        <test expect_num_outputs="5">
+            <param name="bam_inputs" value="input2.bam" ftype="bam" dbkey="89"/>
+            <param name="output_paired_contigs" value="yes"/>
+            <param name="use_reference" value="yes"/>
+            <param name="reference_source" value="history"/>
+            <param name="referenceFasta" value="NC_002945v4.fasta" ftype="fasta"/>
+            <output name="outputDepth" file="jgi_output_depth1.tabular" ftype="tabular" compare="contains"/>
+            <output name="outputPairedContigs" file="jgi_output_paired_contigs1.fasta" ftype="fasta"/>
+            <output name="outputGC" file="jgi_output_gc1.tabular" ftype="tabular"/>
+            <output name="outputReadStats" file="jgi_output_read_stats1.tabular" ftype="tabular"/>
+            <output name="outputKmers" file="jgi_output_kmers1.tabular" ftype="tabular"/>
+        </test>
+        <!-- Single input, output paired contigs, cached reference -->
+        <test expect_num_outputs="5">
+            <param name="bam_inputs" value="input2.bam" ftype="bam" dbkey="89"/>
+            <param name="output_paired_contigs" value="yes"/>
+            <param name="use_reference" value="yes"/>
+            <param name="reference_source" value="cached"/>
+            <output name="outputDepth" file="jgi_output_depth1.tabular" ftype="tabular" compare="contains"/>
+            <output name="outputPairedContigs" file="jgi_output_paired_contigs1.fasta" ftype="fasta"/>
+            <output name="outputGC" file="jgi_output_gc1.tabular" ftype="tabular"/>
+            <output name="outputReadStats" file="jgi_output_read_stats1.tabular" ftype="tabular"/>
+            <output name="outputKmers" file="jgi_output_kmers1.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Calculates coverage depth for each sequence in one or more selected BAM files, producing a tabular file (for each input)
+having mean and variance of base coverage depth that can be used as one of the inputs for the MetaBAT2 metagenome binning
+tool.
+
+The algorithm used for calculating the coverage depth is adjusted by a few factors to improve the fidelity of the metrics
+when correlating abundance coverage in the binning stage.  By default the following adjustments are applied.
+
+**Edge bases are ignored**
+
+Edge bases are not counted as coverage, by the lesser of 1 AverageReadLength or (--maxEdgeBases=75).  This is because most
+mappers can not reliably place a read that would extend off the edge of a sequence, and coverage depth tends to drop towards
+0 at the edge of a contig or scaffold.  Use --includeEdgeBases to include the coverage in this region.
+
+**Reads with high mapping errors are skipped**
+
+Reads that map imperfectly are excluded when the %ID of the mapping drops below a threshold (--percentIdentity=97).
+MetaBAT2 is designed to resolve strain variation and mapping reads with low %ID indicate that the read actually came from
+a different strain/species.
+
+%ID is calculated from the CIGAR string and/or NM/MD fields and == 100 * MatchedBases / (MatchedBases + Substituions +
+Insertions + Deletions).  This ensures that clips, insertions, deletions and mismatches are excluded from the coverage count.
+Only the read bases that exactly match the reference are counted as coverage. This generally has a small effect, except in
+the case of long reads from PacBio and Nanopore.
+
+**More information**
+
+https://bitbucket.org/berkeleylab/metabat/src/master/
+
+**Options**
+
+ * **Select a reference genome** - optionally select the reference genome that was used to map the input bam file(s) and 3 additional outputs will be produced; gc coverage histogram, read statistics and perfect kmer counts.
+
+    </help>
+    <expand macro="citations"/>
+</tool>
+
author	iuc
date	Fri, 28 Jan 2022 12:21:33 +0000
parents
children	1592150e38d2