Mercurial > repos > iuc > metabat2_jgi_summarize_bam_contig_depths
comparison jgi_summarize_bam_contig_depths.xml @ 0:d27a4ed006ee draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metabat2/ commit 4baddc43c4fdbe9fa7fe056bc3f9213de01516dd"
author | iuc |
---|---|
date | Fri, 28 Jan 2022 12:21:33 +0000 |
parents | |
children | 1592150e38d2 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d27a4ed006ee |
---|---|
1 <tool id="metabat2_jgi_summarize_bam_contig_depths" name="Calculate contig depths" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>for MetaBAT2</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 jgi_summarize_bam_contig_depths | |
9 --outputDepth '$outputDepth' | |
10 --percentIdentity $advanced.percentIdentity | |
11 #if str($advanced.output_paired_contigs) == 'yes': | |
12 --pairedContigs '$outputPairedContigs' | |
13 #end if | |
14 $advanced.noIntraDepthVariance | |
15 $advanced.showDepth | |
16 --minMapQual $advanced.minMapQual | |
17 --weightMapQual $advanced.weightMapQual | |
18 $advanced.includeEdgeBases | |
19 --maxEdgeBases $advanced.maxEdgeBases | |
20 #if str($advanced.use_reference_cond.use_reference) == 'yes': | |
21 #if str($advanced.use_reference_cond.reference_cond.reference_source) == 'cached' | |
22 --referenceFasta '$advanced.use_reference_cond.reference_cond.referenceFasta.fields.path' | |
23 #else: | |
24 --referenceFasta '$advanced.use_reference_cond.reference_cond.referenceFasta' | |
25 #end if | |
26 --outputGC '$outputGC' | |
27 --gcWindow $advanced.use_reference_cond.gcWindow | |
28 --outputReadStats '$outputReadStats' | |
29 --outputKmers '$outputKmers' | |
30 #end if | |
31 --shredLength $shredding.shredLength | |
32 --shredDepth $shredding.shredDepth | |
33 --minContigLength $shredding.minContigLength | |
34 --minContigDepth $shredding.minContigDepth | |
35 #for bam_input in $bam_inputs: | |
36 '$bam_input' | |
37 #end for | |
38 ]]></command> | |
39 <inputs> | |
40 <param name="bam_inputs" type="data" format="bam" multiple="true" label="Sorted bam files"/> | |
41 <section name="advanced" title="Advanced options"> | |
42 <param argument="--percentIdentity" type="integer" value="97" label="Minimum end-to-end percent identity of qualifying reads"/> | |
43 <param name="output_paired_contigs" type="select" display="radio" label="Output the sparse matrix of contigs which paired reads span?"> | |
44 <option value="no" selected="true">No</option> | |
45 <option value="yes">Yes</option> | |
46 </param> | |
47 <param argument="--noIntraDepthVariance" type="boolean" truevalue="--noIntraDepthVariance" falsevalue="" checked="false" label="Remove variance from mean depth along the contig?"/> | |
48 <param argument="--showDepth" type="boolean" truevalue="--showDepth" falsevalue="" checked="false" label="Output a depth file per bam for each contig base?"/> | |
49 <param argument="--minMapQual" type="integer" value="0" label="Minimum mapping quality necessary to count a read as mapped"/> | |
50 <param argument="--weightMapQual" type="float" value="0.0" label="Weight per-base depth based on the MQ of the read" help="Zero value disables"/> | |
51 <param argument="--includeEdgeBases" type="boolean" truevalue="--includeEdgeBases" falsevalue="" checked="false" label="Include 1-readlength edges when calculating depth and variance?"/> | |
52 <param argument="--maxEdgeBases" type="integer" value="75" label="Maximum length when calculating depth and variance" help="Ignored when including 1-readlength edges when calculating depth and variance"/> | |
53 <conditional name="use_reference_cond"> | |
54 <param name="use_reference" type="select" label="Select a reference genome?"> | |
55 <option value="no" selected="true">No</option> | |
56 <option value="yes">Yes</option> | |
57 </param> | |
58 <when value="no"/> | |
59 <when value="yes"> | |
60 <conditional name="reference_cond"> | |
61 <param name="reference_source" type="select" label="Choose the source for the reference genome"> | |
62 <option value="cached" selected="true">locally cached</option> | |
63 <option value="history">from history</option> | |
64 </param> | |
65 <when value="cached"> | |
66 <param argument="--referenceFasta" type="select" label="Using reference genome"> | |
67 <options from_data_table="fasta_indexes"> | |
68 <filter type="data_meta" column="1" key="dbkey" ref="bam_inputs"/> | |
69 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file(s)"/> | |
70 </options> | |
71 </param> | |
72 </when> | |
73 <when value="history"> | |
74 <param argument="--referenceFasta" type="data" format="fasta,fasta.gz" label="Using reference genome" help="Must be the reference used to map the input bam files"/> | |
75 </when> | |
76 </conditional> | |
77 <param argument="--gcWindow" type="integer" value ="100" label="Sliding window size for GC calculations"/> | |
78 </when> | |
79 </conditional> | |
80 </section> | |
81 <section name="shredding" title="Options to control shredding contigs that are under-represented by the reads"> | |
82 <param argument="--shredLength" type="integer" value="16000" label="Maximum length of the shreds"/> | |
83 <param argument="--shredDepth" type="integer" value="5" label="Depth to generate overlapping shreds"/> | |
84 <param argument="--minContigLength" type="integer" value="1" label="Mimimum length of contig to include for mapping and shredding"/> | |
85 <param argument="--minContigDepth" type="float" value="0.0" label="Minimum depth along the contig at which to break the contig"/> | |
86 </section> | |
87 </inputs> | |
88 <outputs> | |
89 <data name="outputDepth" format="tabular" label="${tool.name} on ${on_string} (depth matrix)"/> | |
90 <data name="outputPairedContigs" format="fasta" label="${tool.name} on ${on_string} (paired contigs)"> | |
91 <filter>advanced["output_paired_contigs"] == "yes"</filter> | |
92 </data> | |
93 <data name="outputGC" format="tabular" label="${tool.name} on ${on_string} (gc coverage histogram)"> | |
94 <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter> | |
95 </data> | |
96 <data name="outputReadStats" format="tabular" label="${tool.name} on ${on_string} (read statistics)"> | |
97 <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter> | |
98 </data> | |
99 <data name="outputKmers" format="tabular" label="${tool.name} on ${on_string} (perfect kmer counts)"> | |
100 <filter>advanced["use_reference_cond"]["use_reference"] == "yes"</filter> | |
101 </data> | |
102 </outputs> | |
103 <tests> | |
104 <!-- Single input, default settings --> | |
105 <test expect_num_outputs="1"> | |
106 <param name="bam_inputs" value="input1.bam" ftype="bam"/> | |
107 <output name="outputDepth" file="jgi_output1.tabular" ftype="tabular" compare="contains"/> | |
108 </test> | |
109 <!-- Multiple inputs, default settings --> | |
110 <test expect_num_outputs="1"> | |
111 <param name="bam_inputs" value="input1.bam,input1.bam" ftype="bam"/> | |
112 <output name="outputDepth" file="jgi_output2.tabular" ftype="tabular" compare="contains"/> | |
113 </test> | |
114 <!-- Single input, output paired contigs, reference from history --> | |
115 <test expect_num_outputs="5"> | |
116 <param name="bam_inputs" value="input2.bam" ftype="bam" dbkey="89"/> | |
117 <param name="output_paired_contigs" value="yes"/> | |
118 <param name="use_reference" value="yes"/> | |
119 <param name="reference_source" value="history"/> | |
120 <param name="referenceFasta" value="NC_002945v4.fasta" ftype="fasta"/> | |
121 <output name="outputDepth" file="jgi_output_depth1.tabular" ftype="tabular" compare="contains"/> | |
122 <output name="outputPairedContigs" file="jgi_output_paired_contigs1.fasta" ftype="fasta"/> | |
123 <output name="outputGC" file="jgi_output_gc1.tabular" ftype="tabular"/> | |
124 <output name="outputReadStats" file="jgi_output_read_stats1.tabular" ftype="tabular"/> | |
125 <output name="outputKmers" file="jgi_output_kmers1.tabular" ftype="tabular"/> | |
126 </test> | |
127 <!-- Single input, output paired contigs, cached reference --> | |
128 <test expect_num_outputs="5"> | |
129 <param name="bam_inputs" value="input2.bam" ftype="bam" dbkey="89"/> | |
130 <param name="output_paired_contigs" value="yes"/> | |
131 <param name="use_reference" value="yes"/> | |
132 <param name="reference_source" value="cached"/> | |
133 <output name="outputDepth" file="jgi_output_depth1.tabular" ftype="tabular" compare="contains"/> | |
134 <output name="outputPairedContigs" file="jgi_output_paired_contigs1.fasta" ftype="fasta"/> | |
135 <output name="outputGC" file="jgi_output_gc1.tabular" ftype="tabular"/> | |
136 <output name="outputReadStats" file="jgi_output_read_stats1.tabular" ftype="tabular"/> | |
137 <output name="outputKmers" file="jgi_output_kmers1.tabular" ftype="tabular"/> | |
138 </test> | |
139 </tests> | |
140 <help> | |
141 **What it does** | |
142 | |
143 Calculates coverage depth for each sequence in one or more selected BAM files, producing a tabular file (for each input) | |
144 having mean and variance of base coverage depth that can be used as one of the inputs for the MetaBAT2 metagenome binning | |
145 tool. | |
146 | |
147 The algorithm used for calculating the coverage depth is adjusted by a few factors to improve the fidelity of the metrics | |
148 when correlating abundance coverage in the binning stage. By default the following adjustments are applied. | |
149 | |
150 **Edge bases are ignored** | |
151 | |
152 Edge bases are not counted as coverage, by the lesser of 1 AverageReadLength or (--maxEdgeBases=75). This is because most | |
153 mappers can not reliably place a read that would extend off the edge of a sequence, and coverage depth tends to drop towards | |
154 0 at the edge of a contig or scaffold. Use --includeEdgeBases to include the coverage in this region. | |
155 | |
156 **Reads with high mapping errors are skipped** | |
157 | |
158 Reads that map imperfectly are excluded when the %ID of the mapping drops below a threshold (--percentIdentity=97). | |
159 MetaBAT2 is designed to resolve strain variation and mapping reads with low %ID indicate that the read actually came from | |
160 a different strain/species. | |
161 | |
162 %ID is calculated from the CIGAR string and/or NM/MD fields and == 100 * MatchedBases / (MatchedBases + Substituions + | |
163 Insertions + Deletions). This ensures that clips, insertions, deletions and mismatches are excluded from the coverage count. | |
164 Only the read bases that exactly match the reference are counted as coverage. This generally has a small effect, except in | |
165 the case of long reads from PacBio and Nanopore. | |
166 | |
167 **More information** | |
168 | |
169 https://bitbucket.org/berkeleylab/metabat/src/master/ | |
170 | |
171 **Options** | |
172 | |
173 * **Select a reference genome** - optionally select the reference genome that was used to map the input bam file(s) and 3 additional outputs will be produced; gc coverage histogram, read statistics and perfect kmer counts. | |
174 | |
175 </help> | |
176 <expand macro="citations"/> | |
177 </tool> | |
178 |