comparison maxbin2.xml @ 0:35aa0df55a62 draft

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tree/master/tools/maxbin2 commit 8e118a4d24047e2c62912b962e854f789d6ff559-dirty
author mbernt
date Thu, 28 Jun 2018 08:49:29 -0400
parents
children 864279a0d64b
comparison
equal deleted inserted replaced
-1:000000000000 0:35aa0df55a62
1 <tool id="maxbin2" name="MaxBin2" version="2.2.4">
2 <requirements>
3 <requirement type="package" version="2.2.4">maxbin2</requirement>
4 </requirements>
5 <version_command><![CDATA[run_MaxBin.pl -version | head -n 1]]></version_command>
6 <command detect_errors="exit_code"><![CDATA[
7 ## generate read or abundance files
8 #if $intype_cond.intype_select == 'rds':
9 #for $r in $intype_cond.reads
10 #if $r
11 echo '$r' >> reads_list &&
12 #end if
13 #end for
14 #else if $intype_cond.intype_select == 'abdc':
15 #for $a in $intype_cond.abund
16 #if $a
17 echo '$a' >> abund_list &&
18 #end if
19 #end for
20 #end if
21
22 ## in case of reassembly the IBDA out and err is appended
23 ## to differentiate this a header is added also befor the
24 ## MaxBin2 outputs
25 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != ""
26 echo "==== MaxBin2 stdout ====" &&
27 echo "==== MaxBin2 stderr ====" 1>&2 &&
28 #end if
29
30 run_MaxBin.pl
31 -contig '$contig'
32 -out out
33 #if $intype_cond.intype_select == 'rds':
34 -reads_list reads_list
35 $intype_cond.reassembly
36 #else if $intype_cond.intype_select == 'abdc':
37 -abund_list abund_list
38 #end if
39 #if $adv_cond.adv_select == 'yes':
40 -min_contig_length $adv_cond.min_contig_length
41 -max_iteration $adv_cond.max_iteration
42 -prob_threshold $adv_cond.prob_threshold
43 $adv_cond.plotmarker
44 -markerset $adv_cond.markerset
45 #end if
46 -thread \${GALAXY_SLOTS:-1}
47
48 && tar -xf out.marker_of_each_bin.tar.gz
49
50 ## redirect the idba out and err file content to stdout and err
51 #if $intype_cond.intype_select == 'rds' and $intype_cond.reassembly != ""
52 && echo "==== IDBA stdout ===="
53 && cat out.idba.out
54 && echo "==== IDBA stderr ====" 1>&2
55 && cat out.idba.err 1>&2
56 #end if
57 ]]></command>
58 <inputs>
59 <param argument="-contig" type="data" format="fasta,fasta.gz" label="Contig file"/>
60 <conditional name="intype_cond">
61 <param name="intype_select" type="select" label="Input type">
62 <option value="rds" selected="true">Sequencing Reads</option>
63 <option value="abdc">Abundances</option>
64 </param>
65 <when value="rds">
66 <param name="reads" argument="-read/-read2/..." type="data" format="fasta,fastq" multiple="true" label="Reads file(s)"/>
67 <param argument="--reassembly" type="boolean" truevalue="-reassembly" falsevalue="" checked="false" label="" help="Reassembly option is still highly experimental. To use this function, you need to feed MaxBin interleaved paired-end fastq or fasta file if you were to use this option." />
68 </when>
69 <when value="abdc">
70 <param name="abund" argument="-abund/-abund2/..." type="data" format="tabular" multiple="true" label="Abundance file(s)"/>
71 </when>
72 </conditional>
73 <conditional name="adv_cond">
74 <param name="adv_select" type="select" label="Advanced options">
75 <option value="yes">Yes</option>
76 <option value="no" selected="true">No</option>
77 </param>
78 <when value="no"/>
79 <when value="yes">
80 <param argument="-min_contig_length" type="integer" min="0" value="1000" label="minimum contig length" />
81 <param argument="-max_iteration" type="integer" min="0" value="50" label="Maximum Expectation-Maximization algorithm iteration number" />
82 <param argument="-prob_threshold" type="float" min="0" max="1.0" value="0.9" label="Probability threshold for EM final classification" />
83 <param name="plotmarker" type="boolean" truevalue="-plotmarker" falsevalue="" checked="false" label="Generate visualization of the marker gene presence numbers" />
84 <param argument="-markerset" type="select" label="Marker gene set">
85 <option value="107" selected="true">107 marker genes present in >95% of bacteria</option>
86 <option value="40">40 marker gene sets that are universal among bacteria and archaea</option>
87 </param>
88 </when>
89 </conditional>
90 </inputs>
91 <outputs>
92 <collection name="bins" type="list" label="${tool.name} on ${on_string} (bins)">
93 <discover_datasets pattern="out.(?P&lt;designation&gt;[0-9]+).fasta" format="fasta" visible="false" />
94 </collection>
95 <data name="summary" format="tabular" label="${tool.name} on ${on_string} (summary)" from_work_dir="out.summary"/>
96 <data name="log" format="txt" label="${tool.name} on ${on_string} (log)" from_work_dir="out.log"/>
97 <data name="abundout" format="tabular" label="${tool.name} on ${on_string} (abundances)" from_work_dir="out.abund1">
98 <filter>intype_cond['intype_select']=='rds'</filter>
99 </data>
100 <data name="marker" format="tabular" label="${tool.name} on ${on_string} (marker gene presence)" from_work_dir="out.marker"/>
101 <data name="plot" format="pdf" label="${tool.name} on ${on_string} (marker gene presence plot)" from_work_dir="out.marker.pdf">
102 <filter>adv_cond['adv_select']=='yes' and adv_cond['plotmarker']</filter>
103 </data>
104 <data name="noclass" format="fasta" label="${tool.name} on ${on_string} (unclassified sequences)" from_work_dir="out.noclass"/>
105 <data name="toshort" format="fasta" label="${tool.name} on ${on_string} (to short sequences)" from_work_dir="out.tooshort"/>
106 <collection name="markers" type="list" label="${tool.name} on ${on_string} (markers prediced for bins)">
107 <discover_datasets pattern="out.(?P&lt;designation&gt;[0-9]+).marker.fasta" format="fasta" visible="false" />
108 </collection>
109 <!-- additional output in case of reassembly -->
110 <collection name="reassembly_bins" type="list" label="${tool.name} on ${on_string} (reassembly bins)">
111 <discover_datasets directory="out.reassem" pattern="out.(?P&lt;designation&gt;[0-9]+).fasta" format="fasta" visible="false" />
112 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']!=""</filter>
113 </collection>
114 <collection name="reassembly_reads" type="list" label="${tool.name} on ${on_string} (reassembly reads)">
115 <discover_datasets directory="out.reassem" pattern="out.reads.(?P&lt;designation&gt;[0-9]+)" format="fasta" visible="false" />
116 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']!=""</filter>
117 </collection>
118 <data name="reassembly_noclass" format="fasta" label="${tool.name} on ${on_string} (reassembly unclassified sequences)" from_work_dir="out.reassem/out.reads.noclass">
119 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']!=""</filter>
120 </data>
121 <data name="reassembly_n50" format="text" label="${tool.name} on ${on_string} (reassembly N50)" from_work_dir="out.reassem/N50.txt">
122 <filter>intype_cond['intype_select']=='rds' and intype_cond['reassembly']!=""</filter>
123 </data>
124 </outputs>
125 <tests>
126 <test><!-- test w contigs and reads as input -->
127 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
128 <conditional name="intype_cond">
129 <param name="intype_select" value="rds"/>
130 <param name="reads" value="interleavedPE_unmapped_Sample3_total.fasta" ftype="fasta"/>
131 </conditional>
132 <conditional name="adv_cond">
133 <param name="adv_select" value="no"/>
134 </conditional>
135 <output_collection name="bins" type="list" count="2">
136 <element name="001" file="1/out.001.fasta" ftype="fasta"/>
137 <element name="002" file="1/out.002.fasta" ftype="fasta"/>
138 </output_collection>
139 <output name="summary" file="1/out.summary" ftype="tabular" />
140 <output name="log" file="1/out.log" ftype="txt" compare="diff" lines_diff="17" />
141 <output name="abundout" file="1/out.abund1" ftype="tabular" />
142 <output name="marker" file="1/out.marker" ftype="tabular" />
143 <output name="noclass" file="1/out.noclass" ftype="fasta" />
144 <output name="toshort" file="1/out.tooshort" ftype="fasta" />
145 <output_collection name="markers" type="list" count="1">
146 <element name="001" file="1/out.001.marker.fasta" ftype="fasta"/>
147 </output_collection>
148 </test>
149 <test><!--test w contigs and abundances as input + advanced options -->
150 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
151 <conditional name="intype_cond">
152 <param name="intype_select" value="abdc"/>
153 <param name="abund" value="abundances.tsv" ftype="tabular"/>
154 </conditional>
155 <conditional name="adv_cond">
156 <param name="adv_select" value="yes"/>
157 <param name="min_contig_length" value="500"/>
158 <param name="max_iteration" value="10"/>
159 <param name="prob_threshold" value="0.95"/>
160 <param name="plotmarker" value="-plotmarker"/>
161 <param name="markerset" value="107"/>
162 </conditional>
163 <output_collection name="bins" type="list" count="2">
164 <element name="001" file="2/out.001.fasta" ftype="fasta"/>
165 <element name="002" file="2/out.002.fasta" ftype="fasta"/>
166 </output_collection>
167 <output name="summary" file="2/out.summary" ftype="tabular" />
168 <output name="log" file="2/out.log" ftype="txt" compare="diff" lines_diff="17" />
169 <output name="marker" file="2/out.marker" ftype="tabular" />
170 <output name="plot" file="2/out.marker.pdf" ftype="pdf" compare="sim_size" />
171 <output name="noclass" file="2/out.noclass" ftype="fasta" />
172 <output name="toshort" file="2/out.tooshort" ftype="fasta" />
173 <output_collection name="markers" type="list" count="1">
174 <element name="001" file="2/out.001.marker.fasta" ftype="fasta"/>
175 </output_collection>
176 </test>
177 <test><!-- test w contigs and reads as input + reassembly-->
178 <param name="contig" value="Bin_Sample3_contigs.fasta" ftype="fasta" />
179 <conditional name="intype_cond">
180 <param name="intype_select" value="rds"/>
181 <param name="reads" value="interleavedPE_unmapped_Sample3_total.fasta" ftype="fasta"/>
182 <param name="reassembly" value="-reassembly"/>
183 </conditional>
184 <conditional name="adv_cond">
185 <param name="adv_select" value="no"/>
186 </conditional>
187 <output_collection name="bins" type="list" count="2">
188 <element name="001" file="3/out.001.fasta" ftype="fasta"/>
189 <element name="002" file="3/out.002.fasta" ftype="fasta"/>
190 </output_collection>
191 <output name="summary" file="3/out.summary" ftype="tabular" />
192 <output name="log" file="3/out.log" ftype="txt" compare="diff" lines_diff="17" />
193 <output name="abundout" file="3/out.abund1" ftype="tabular" />
194 <output name="marker" file="3/out.marker" ftype="tabular" />
195 <output name="noclass" file="3/out.noclass" ftype="fasta" />
196 <output name="toshort" file="3/out.tooshort" ftype="fasta" />
197 <output_collection name="markers" type="list" count="1">
198 <element name="001" file="3/out.001.marker.fasta" ftype="fasta"/>
199 </output_collection>
200 <output_collection name="reassembly_bins" type="list" count="2">
201 <element name="001" file="3/out.reassem/out.001.fasta" ftype="fasta"/>
202 <element name="002" file="3/out.reassem/out.002.fasta" ftype="fasta"/>
203 </output_collection>
204 <output_collection name="reassembly_reads" type="list" count="2">
205 <element name="001" file="3/out.reassem/out.reads.001" ftype="fasta"/>
206 <element name="002" file="3/out.reassem/out.reads.002" ftype="fasta"/>
207 </output_collection>
208 <output name="reassembly_noclass" file="3/out.reassem/out.reads.noclass" ftype="fasta" />
209 <output name="reassembly_n50" file="3/out.reassem/N50.txt" ftype="text" />
210 </test>
211 </tests>
212 <help><![CDATA[
213 MaxBin is a software that clusters metagenomic contigs into different bins,
214 each consists (hopefully) of contigs from one species. MaxBin uses the
215 nucleotide composition information and contig abundance information to do
216 achieve binning through an Expectation-Maximization algorithm.
217
218
219 **Input**:
220
221 MaxBin need the contigs and contig abundance information. The contig abundance
222 information can be provided in two ways: the user can choose to provide
223
224 - the abundance file or
225 - the sequencing reads in fasta format (and MaxBin will use Bowtie2 to map the
226 sequencing reads against the contigs and generate the abundance information)
227
228 The abundance information can be provided as tabular file:
229
230 For example, assume I have three contigs named A0001, A0002, and A0003, then my abundance file will look like
231
232 A0001 30.89
233 A0002 20.02
234 A0003 78.93
235
236 Reads/Abundundance files can be given in multiple files.
237
238 By default MaxBin will look for 107 marker genes present in >95% of bacteria.
239 Alternatively you can also choose 40 marker gene sets that are universal among
240 bacteria and archaea (Wu et al., PLoS ONE 2013). This option may be better
241 suited for environment dominated by archaea; however it tend to split genomes
242 into more bins. You can choose between different marker gene sets and see which
243 one works better.
244
245 **Outputs**
246
247 - bins: binned sequences
248 - summary: a summary file describing which contigs are being classified into which bin.
249 - log: a log file recording the core steps of MaxBin algorithm
250 - abundances (only if reads are used as input): a summary file describing which contigs are being classified into which bin
251 - marker: marker gene presence numbers for each bin. This table is ready to be plotted by R or other 3rd-party software.
252 - marker plot (anly present if selected in the advanced options): visualization of the marker gene presence numbers using R. Will only appear if -plotmarker is specified.
253 - unclassified sequences: this file stores all sequences that pass the minimum length threshold but are not classified successfully.
254 - to short sequences: this file stores all sequences that do not meet the minimum length threshold.
255 - markers prediced for bins: these data sets store all markers predicted from the individual bins.
256
257 **Reassembly**
258
259 This is an experimental feature of MaxBin. It calls for each read bin IDBA_UD with the pre_correction parameter. Of course this IDBA_UD call can be done also with the corresponding Galaxy tool
260
261 ]]></help>
262 <citations>
263 <citation type="doi">10.1093/bioinformatics/btv638</citation>
264 <citation type="bibtex">
265 @misc{renameTODO,
266 author = {LastTODO, FirstTODO},
267 year = {TODO},
268 title = {TODO},
269 url = {https://downloads.jbei.org/data/microbial_communities/MaxBin/MaxBin.html},
270 }</citation>
271 </citations>
272 </tool>