comparison generate_sequence_features.xml @ 0:07bf5268724f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author iuc
date Fri, 14 Oct 2022 21:45:54 +0000
parents
children 0ae1a2636de5
comparison
equal deleted inserted replaced
-1:000000000000 0:07bf5268724f
1 <tool id="semibin_generate_sequence_features" name="SemiBin: Generate sequence features" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>
3 (kmer and abundance) as training data for semi-supervised deep learning model training
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="biotools"/>
9 <expand macro="requirements"/>
10 <expand macro="version"/>
11 <command detect_errors="exit_code"><![CDATA[
12 #import re
13 @BAM_FILES@
14 @FASTA_FILES@
15
16 SemiBin
17 #if $mode.select == 'single' or $mode.select == 'co'
18 generate_sequence_features_single
19 #else
20 generate_sequence_features_multi
21 --separator '$separator'
22 #end if
23 --input-fasta 'contigs.fasta'
24 --input-bam *.bam
25 --output 'output'
26 --threads \${GALAXY_SLOTS:-1}
27 @MIN_LEN@
28 #if str($ml_threshold) != ''
29 --ml-threshold $ml_threshold
30 #end if
31 ]]></command>
32 <inputs>
33 <expand macro="mode_fasta_bam"/>
34 <expand macro="min_len"/>
35 <expand macro="ml-threshold"/>
36 <param name="extra_output" type="select" multiple="true" label="Extra outputs" help="In addition to the training data">
37 <option value="coverage">Coverage files</option>
38 <option value="contigs">Contigs (if multiple sample)</option>
39 </param>
40 </inputs>
41 <outputs>
42 <expand macro="data_output_single"/>
43 <expand macro="data_output_multi"/>
44 <expand macro="generate_sequence_features_extra_outputs"/>
45 </outputs>
46 <tests>
47 <test expect_num_outputs="4">
48 <conditional name="mode">
49 <param name="select" value="single"/>
50 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
51 <param name="input_bam" ftype="bam" value="input_single.bam"/>
52 </conditional>
53 <conditional name="min_len">
54 <param name="method" value="automatic"/>
55 </conditional>
56 <param name="ml_threshold" value="4000"/>
57 <param name="extra_output" value="coverage"/>
58 <output name="single_data" ftype="csv">
59 <assert_contents>
60 <has_n_lines n="41"/>
61 <has_text text="g1k_0"/>
62 <has_text text="g4k_9"/>
63 </assert_contents>
64 </output>
65 <output name="single_data_split" ftype="csv">
66 <assert_contents>
67 <has_n_lines n="81"/>
68 <has_text text="g1k_0_1"/>
69 <has_text text="g3k_2_2"/>
70 <has_text text="g4k_7_2"/>
71 </assert_contents>
72 </output>
73 <output name="single_cov" ftype="csv">
74 <assert_contents>
75 <has_n_lines n="41"/>
76 <has_text text="g1k_0"/>
77 </assert_contents>
78 </output>
79 <output name="single_split_cov" ftype="csv">
80 <assert_contents>
81 <has_n_lines n="1" delta="1"/>
82 </assert_contents>
83 </output>
84 </test>
85 <test expect_num_outputs="4">
86 <conditional name="mode">
87 <param name="select" value="co"/>
88 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/>
89 <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/>
90 </conditional>
91 <conditional name="min_len">
92 <param name="method" value="automatic"/>
93 </conditional>
94 <param name="ml_threshold" value="4000"/>
95 <param name="extra_output" value="coverage"/>
96 <output name="single_data" ftype="csv">
97 <assert_contents>
98 <has_n_lines n="41"/>
99 <has_text text="g1k_0"/>
100 <has_text text="g4k_9"/>
101 </assert_contents>
102 </output>
103 <output name="single_data_split" ftype="csv">
104 <assert_contents>
105 <has_n_lines n="81"/>
106 <has_text text="g1k_0_1"/>
107 <has_text text="g3k_2_2"/>
108 <has_text text="g4k_7_2"/>
109 </assert_contents>
110 </output>
111 <output_collection name="co_cov" count="5">
112 <element name="0" ftype="csv">
113 <assert_contents>
114 <has_n_lines n="41"/>
115 <has_text text="g1k_0"/>
116 </assert_contents>
117 </element>
118 <element name="4" ftype="csv">
119 <assert_contents>
120 <has_n_lines n="41"/>
121 <has_text text="g1k_0"/>
122 </assert_contents>
123 </element>
124 </output_collection>
125 <output_collection name="co_split_cov" count="5">
126 <element name="0" ftype="csv">
127 <assert_contents>
128 <has_n_lines n="81"/>
129 <has_text text="g1k_0_1"/>
130 </assert_contents>
131 </element>
132 <element name="4" ftype="csv">
133 <assert_contents>
134 <has_n_lines n="81"/>
135 <has_text text="g1k_0_1"/>
136 </assert_contents>
137 </element>
138 </output_collection>
139 </test>
140 <test expect_num_outputs="7">
141 <conditional name="mode">
142 <param name="select" value="multi"/>
143 <conditional name="multi_fasta">
144 <param name="select" value="concatenated"/>
145 <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/>
146 </conditional>
147 <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/>
148 </conditional>
149 <conditional name="min_len">
150 <param name="method" value="automatic"/>
151 </conditional>
152 <param name="ml_threshold" value="4000"/>
153 <param name="extra_output" value="coverage,contigs"/>
154 <output_collection name="multi_data" count="10">
155 <element name="S1" ftype="csv">
156 <assert_contents>
157 <has_n_lines n="21"/>
158 <has_text text="g1k_0"/>
159 </assert_contents>
160 </element>
161 </output_collection>
162 <output_collection name="multi_data_split" count="10">
163 <element name="S1" ftype="csv">
164 <assert_contents>
165 <has_n_lines n="41"/>
166 <has_text text="g1k_0_1"/>
167 </assert_contents>
168 </element>
169 </output_collection>
170 <output_collection name="multi_cov" count="10">
171 <element name="0" ftype="csv">
172 <assert_contents>
173 <has_n_lines n="201"/>
174 <has_text text="S1:g1k_5"/>
175 </assert_contents>
176 </element>
177 <element name="9" ftype="csv">
178 <assert_contents>
179 <has_n_lines n="201"/>
180 <has_text text="S1:g1k_5"/>
181 </assert_contents>
182 </element>
183 </output_collection>
184 <output_collection name="multi_cov_sample" count="10">
185 <element name="S1" ftype="csv">
186 <assert_contents>
187 <has_n_lines n="21"/>
188 <has_text text="g1k_0"/>
189 </assert_contents>
190 </element>
191 </output_collection>
192 <output_collection name="multi_split_cov" count="10">
193 <element name="1" ftype="csv">
194 <assert_contents>
195 <has_n_lines n="401"/>
196 <has_text text="S1:g1k_5_1"/>
197 </assert_contents>
198 </element>
199 <element name="9" ftype="csv">
200 <assert_contents>
201 <has_n_lines n="401"/>
202 <has_text text="S1:g1k_5_1"/>
203 </assert_contents>
204 </element>
205 </output_collection>
206 <output_collection name="multi_split_cov_sample" count="10">
207 <element name="S1" ftype="csv">
208 <assert_contents>
209 <has_n_lines n="41"/>
210 <has_text text="g1k_5_1"/>
211 </assert_contents>
212 </element>
213 </output_collection>
214 <output_collection name="multi_contigs" count="10">
215 <element name="S1" ftype="fasta">
216 <assert_contents>
217 <has_text text=">g1k_0"/>
218 </assert_contents>
219 </element>
220 <element name="S9" ftype="fasta">
221 <assert_contents>
222 <has_text text=">g1k_0"/>
223 </assert_contents>
224 </element>
225 </output_collection>
226 </test>
227 <test expect_num_outputs="2">
228 <conditional name="mode">
229 <param name="select" value="multi"/>
230 <conditional name="multi_fasta">
231 <param name="select" value="multi"/>
232 <param name="input_fasta" ftype="fasta" value="S1.fasta,S2.fasta,S3.fasta,S4.fasta,S5.fasta,S6.fasta,S7.fasta,S8.fasta,S9.fasta,S10.fasta"/>
233 </conditional>
234 <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/>
235 </conditional>
236 <conditional name="min_len">
237 <param name="method" value="automatic"/>
238 </conditional>
239 <param name="ml_threshold" value="4000"/>
240 <output_collection name="multi_data" count="10">
241 <element name="S1" ftype="csv">
242 <assert_contents>
243 <has_n_lines n="21"/>
244 <has_text text="g1k_0"/>
245 </assert_contents>
246 </element>
247 </output_collection>
248 <output_collection name="multi_data_split" count="10">
249 <element name="S1" ftype="csv">
250 <assert_contents>
251 <has_n_lines n="41"/>
252 <has_text text="g1k_0_1"/>
253 </assert_contents>
254 </element>
255 </output_collection>
256 </test>
257 </tests>
258 <help><![CDATA[
259 @HELP_HEADER@
260
261 This tool generates sequence features (kmer and abundance) as training data for semi-supervised deep learning model training.
262
263 Inputs
264 ======
265
266 @HELP_INPUT_FASTA@
267
268 Outputs
269 =======
270
271 @HELP_DATA@
272
273 ]]></help>
274 <expand macro="citations"/>
275 </tool>