comparison jgi_summarize_bam_contig_depths.xml @ 0:d27a4ed006ee draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metabat2/ commit 4baddc43c4fdbe9fa7fe056bc3f9213de01516dd"
author iuc
date Fri, 28 Jan 2022 12:21:33 +0000
parents
children 1592150e38d2
comparison
equal deleted inserted replaced
-1:000000000000 0:d27a4ed006ee
1 <tool id="metabat2_jgi_summarize_bam_contig_depths" name="Calculate contig depths" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>for MetaBAT2</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[
8 jgi_summarize_bam_contig_depths
9 --outputDepth '$outputDepth'
10 --percentIdentity $advanced.percentIdentity
11 #if str($advanced.output_paired_contigs) == 'yes':
12 --pairedContigs '$outputPairedContigs'
13 #end if
14 $advanced.noIntraDepthVariance
15 $advanced.showDepth
16 --minMapQual $advanced.minMapQual
17 --weightMapQual $advanced.weightMapQual
18 $advanced.includeEdgeBases
19 --maxEdgeBases $advanced.maxEdgeBases
20 #if str($advanced.use_reference_cond.use_reference) == 'yes':
21 #if str($advanced.use_reference_cond.reference_cond.reference_source) == 'cached'
22 --referenceFasta '$advanced.use_reference_cond.reference_cond.referenceFasta.fields.path'
23 #else:
24 --referenceFasta '$advanced.use_reference_cond.reference_cond.referenceFasta'
25 #end if
26 --outputGC '$outputGC'
27 --gcWindow $advanced.use_reference_cond.gcWindow
28 --outputReadStats '$outputReadStats'
29 --outputKmers '$outputKmers'
30 #end if
31 --shredLength $shredding.shredLength
32 --shredDepth $shredding.shredDepth
33 --minContigLength $shredding.minContigLength
34 --minContigDepth $shredding.minContigDepth
35 #for bam_input in $bam_inputs:
36 '$bam_input'
37 #end for
38 ]]></command>
39 <inputs>
40 <param name="bam_inputs" type="data" format="bam" multiple="true" label="Sorted bam files"/>
41 <section name="advanced" title="Advanced options">
42 <param argument="--percentIdentity" type="integer" value="97" label="Minimum end-to-end percent identity of qualifying reads"/>
43 <param name="output_paired_contigs" type="select" display="radio" label="Output the sparse matrix of contigs which paired reads span?">
44 <option value="no" selected="true">No</option>
45 <option value="yes">Yes</option>
46 </param>
47 <param argument="--noIntraDepthVariance" type="boolean" truevalue="--noIntraDepthVariance" falsevalue="" checked="false" label="Remove variance from mean depth along the contig?"/>
48 <param argument="--showDepth" type="boolean" truevalue="--showDepth" falsevalue="" checked="false" label="Output a depth file per bam for each contig base?"/>
49 <param argument="--minMapQual" type="integer" value="0" label="Minimum mapping quality necessary to count a read as mapped"/>
50 <param argument="--weightMapQual" type="float" value="0.0" label="Weight per-base depth based on the MQ of the read" help="Zero value disables"/>
51 <param argument="--includeEdgeBases" type="boolean" truevalue="--includeEdgeBases" falsevalue="" checked="false" label="Include 1-readlength edges when calculating depth and variance?"/>
52 <param argument="--maxEdgeBases" type="integer" value="75" label="Maximum length when calculating depth and variance" help="Ignored when including 1-readlength edges when calculating depth and variance"/>
53 <conditional name="use_reference_cond">
54 <param name="use_reference" type="select" label="Select a reference genome?">
55 <option value="no" selected="true">No</option>
56 <option value="yes">Yes</option>
57 </param>
58 <when value="no"/>
59 <when value="yes">
60 <conditional name="reference_cond">
61 <param name="reference_source" type="select" label="Choose the source for the reference genome">
62 <option value="cached" selected="true">locally cached</option>
63 <option value="history">from history</option>
64 </param>
65 <when value="cached">
66 <param argument="--referenceFasta" type="select" label="Using reference genome">
67 <options from_data_table="fasta_indexes">
68 <filter type="data_meta" column="1" key="dbkey" ref="bam_inputs"/>
69 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file(s)"/>
70 </options>
71 </param>
72 </when>
73 <when value="history">
74 <param argument="--referenceFasta" type="data" format="fasta,fasta.gz" label="Using reference genome" help="Must be the reference used to map the input bam files"/>
75 </when>
76 </conditional>
77 <param argument="--gcWindow" type="integer" value ="100" label="Sliding window size for GC calculations"/>
78 </when>
79 </conditional>
80 </section>
81 <section name="shredding" title="Options to control shredding contigs that are under-represented by the reads">
82 <param argument="--shredLength" type="integer" value="16000" label="Maximum length of the shreds"/>
83 <param argument="--shredDepth" type="integer" value="5" label="Depth to generate overlapping shreds"/>
84 <param argument="--minContigLength" type="integer" value="1" label="Mimimum length of contig to include for mapping and shredding"/>
85 <param argument="--minContigDepth" type="float" value="0.0" label="Minimum depth along the contig at which to break the contig"/>
86 </section>
87 </inputs>
88 <outputs>
89 <data name="outputDepth" format="tabular" label="${tool.name} on ${on_string} (depth matrix)"/>
90 <data name="outputPairedContigs" format="fasta" label="${tool.name} on ${on_string} (paired contigs)">
91 <filter>advanced["output_paired_contigs"] == "yes"</filter>
92 </data>
93 <data name="outputGC" format="tabular" label="${tool.name} on ${on_string} (gc coverage histogram)">
94 <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter>
95 </data>
96 <data name="outputReadStats" format="tabular" label="${tool.name} on ${on_string} (read statistics)">
97 <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter>
98 </data>
99 <data name="outputKmers" format="tabular" label="${tool.name} on ${on_string} (perfect kmer counts)">
100 <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter>
101 </data>
102 </outputs>
103 <tests>
104 <!-- Single input, default settings -->
105 <test expect_num_outputs="1">
106 <param name="bam_inputs" value="input1.bam" ftype="bam"/>
107 <output name="outputDepth" file="jgi_output1.tabular" ftype="tabular" compare="contains"/>
108 </test>
109 <!-- Multiple inputs, default settings -->
110 <test expect_num_outputs="1">
111 <param name="bam_inputs" value="input1.bam,input1.bam" ftype="bam"/>
112 <output name="outputDepth" file="jgi_output2.tabular" ftype="tabular" compare="contains"/>
113 </test>
114 <!-- Single input, output paired contigs, reference from history -->
115 <test expect_num_outputs="5">
116 <param name="bam_inputs" value="input2.bam" ftype="bam" dbkey="89"/>
117 <param name="output_paired_contigs" value="yes"/>
118 <param name="use_reference" value="yes"/>
119 <param name="reference_source" value="history"/>
120 <param name="referenceFasta" value="NC_002945v4.fasta" ftype="fasta"/>
121 <output name="outputDepth" file="jgi_output_depth1.tabular" ftype="tabular" compare="contains"/>
122 <output name="outputPairedContigs" file="jgi_output_paired_contigs1.fasta" ftype="fasta"/>
123 <output name="outputGC" file="jgi_output_gc1.tabular" ftype="tabular"/>
124 <output name="outputReadStats" file="jgi_output_read_stats1.tabular" ftype="tabular"/>
125 <output name="outputKmers" file="jgi_output_kmers1.tabular" ftype="tabular"/>
126 </test>
127 <!-- Single input, output paired contigs, cached reference -->
128 <test expect_num_outputs="5">
129 <param name="bam_inputs" value="input2.bam" ftype="bam" dbkey="89"/>
130 <param name="output_paired_contigs" value="yes"/>
131 <param name="use_reference" value="yes"/>
132 <param name="reference_source" value="cached"/>
133 <output name="outputDepth" file="jgi_output_depth1.tabular" ftype="tabular" compare="contains"/>
134 <output name="outputPairedContigs" file="jgi_output_paired_contigs1.fasta" ftype="fasta"/>
135 <output name="outputGC" file="jgi_output_gc1.tabular" ftype="tabular"/>
136 <output name="outputReadStats" file="jgi_output_read_stats1.tabular" ftype="tabular"/>
137 <output name="outputKmers" file="jgi_output_kmers1.tabular" ftype="tabular"/>
138 </test>
139 </tests>
140 <help>
141 **What it does**
142
143 Calculates coverage depth for each sequence in one or more selected BAM files, producing a tabular file (for each input)
144 having mean and variance of base coverage depth that can be used as one of the inputs for the MetaBAT2 metagenome binning
145 tool.
146
147 The algorithm used for calculating the coverage depth is adjusted by a few factors to improve the fidelity of the metrics
148 when correlating abundance coverage in the binning stage. By default the following adjustments are applied.
149
150 **Edge bases are ignored**
151
152 Edge bases are not counted as coverage, by the lesser of 1 AverageReadLength or (--maxEdgeBases=75). This is because most
153 mappers can not reliably place a read that would extend off the edge of a sequence, and coverage depth tends to drop towards
154 0 at the edge of a contig or scaffold. Use --includeEdgeBases to include the coverage in this region.
155
156 **Reads with high mapping errors are skipped**
157
158 Reads that map imperfectly are excluded when the %ID of the mapping drops below a threshold (--percentIdentity=97).
159 MetaBAT2 is designed to resolve strain variation and mapping reads with low %ID indicate that the read actually came from
160 a different strain/species.
161
162 %ID is calculated from the CIGAR string and/or NM/MD fields and == 100 * MatchedBases / (MatchedBases + Substituions +
163 Insertions + Deletions). This ensures that clips, insertions, deletions and mismatches are excluded from the coverage count.
164 Only the read bases that exactly match the reference are counted as coverage. This generally has a small effect, except in
165 the case of long reads from PacBio and Nanopore.
166
167 **More information**
168
169 https://bitbucket.org/berkeleylab/metabat/src/master/
170
171 **Options**
172
173 * **Select a reference genome** - optionally select the reference genome that was used to map the input bam file(s) and 3 additional outputs will be produced; gc coverage histogram, read statistics and perfect kmer counts.
174
175 </help>
176 <expand macro="citations"/>
177 </tool>
178