comparison gfastats.xml @ 0:5799092ffdff draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gfastats commit 115f451c7c9e7e30fd1b8df26bfc5362832a6eb7"
author bgruening
date Wed, 09 Mar 2022 10:29:20 +0000
parents
children 2b8b4cacb83d
comparison
equal deleted inserted replaced
-1:000000000000 0:5799092ffdff
1 <tool id="gfastats" name="gfastats" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="20.01">
2 <description>the swiss army knife for genome assembly</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="biotools"/>
8 <version_command>gfastats --version</version_command>
9 <command detect_errors="exit_code"><![CDATA[
10 gfastats
11 '$input_file'
12 #if $mode_condition.selector == 'statistics'
13 #if $mode_condition.statistics_condition.selector == 'assembly'
14 $mode_condition.statistics_condition.expected_genomesize
15 #end if
16 #end if
17 #if $target_condition.target_option == 'true'
18 $target_condition.target_sequence
19 #if $target_condition.include_bed
20 --include-bed $target_condition.include_bed
21 #end if
22 #if $target_condition.exclude_bed
23 --exclude-bed $target_condition.exclude_bed
24 #end if
25 #end if
26 #if $mode_condition.selector == 'manipulation'
27 #if $mode_condition.swiss_army_knife
28 -k $mode_condition.swiss_army_knife
29 #end if
30 #if $mode_condition.sort
31 --sort $mode_condition.sort
32 #end if
33 $mode_condition.homopolymer_compress
34 -o dataset.$mode_condition.output_condition.out_format
35 #if $mode_condition.output_condition.out_format == 'fasta'
36 #if $mode_condition.output_condition.line_length
37 --line-length $mode_condition.output_condition.line_length
38 #end if
39 #else if $mode_condition.output_condition.out_format == 'fasta.gz'
40 #if $mode_condition.output_condition.line_length
41 --line-length $mode_condition.output_condition.line_length
42 #end if
43 #end if
44 #else
45 #if $mode_condition.statistics_condition.selector == 'size'
46 --out-size $mode_condition.statistics_condition.out_size
47 #else if $mode_condition.statistics_condition.selector == 'coordinates'
48 --out-coord $mode_condition.statistics_condition.out_coord
49 #else if $mode_condition.statistics_condition.selector == 'assembly'
50 --nstar-report
51 #else
52 --seq-report
53 $mode_condition.statistics_condition.out_sequence
54 #end if
55 $mode_condition.tabular > '$stats'
56 #end if
57 #if $mode_condition.selector == 'manipulation'
58 && mv dataset* output_dataset
59 #end if
60 ]]></command>
61 <inputs>
62 <param name="input_file" argument="--fasta" type="data"
63 format="fasta,fastq,fastqsanger,gfa1,fasta.gz,fastq.gz,fastqsanger.gz,gfa1.gz"
64 label="Input file"/>
65 <conditional name="target_condition">
66 <param name="target_option" type="select" label="Specify target sequences">
67 <option value="false">Disabled</option>
68 <option value="true">Enabled</option>
69 </param>
70 <when value="false"/>
71 <when value="true">
72 <param name="target_sequence" type="text" value="" label="Target sequence" help="Target specific sequence by header, optionally with coordinates: header[:start-end]">
73 <sanitizer invalid_char="">
74 <valid initial="string.digits,string.letters">
75 <add value=":"/>
76 <add value="-"/>
77 <add value="_"/>
78 <add value="|"/>
79 <add value=" "/>
80 </valid>
81 </sanitizer>
82 <validator type="regex">[0-9A-Za-z:-_| ]+</validator>
83 </param>
84 <param argument="--include-bed" type="data" optional="true"
85 format="bed" label="Include specific intervals"
86 help="Generates output on a subset list of headers or coordinates
87 in 0-based bed format. It can be combined with --exclude-bed. Optional"/>
88 <param argument="--exclude-bed" type="data" format="bed" optional="true"
89 label="Exclude specific intervals"
90 help="Exclude a subset of headers or coordinates in 0-base bed format. It can be conmbined with --include-bed Optional"/>
91 </when>
92 <when value="false"/>
93 </conditional>
94 <conditional name="mode_condition">
95 <param name="selector" type="select" label="Tool mode">
96 <option value="statistics">Summary statistics generation</option>
97 <option value="manipulation">Genome assembly manipulation</option>
98 </param>
99 <when value="manipulation">
100 <param argument="--swiss-army-knife" type="data"
101 format="text" label="SAK input file" optional="true"
102 help="Set of instructions provided as an ordered list"/>
103 <conditional name="output_condition">
104 <param argument="--out-format" type="select"
105 label="Output format" help="Outputs selected sequences.">
106 <option value="fasta">FASTA</option>
107 <option value="fasta.gz">FASTA.gz</option>
108 <option value="fastq">FASTQ</option>
109 <option value="fastq.gz" selected="true">FASTQ.gz</option>
110 <option value="gfa">GFA</option>
111 <option value="gfa.gz">GFA.gz</option>
112 </param>
113 <when value="fasta">
114 <expand macro="length_macro"/>
115 </when>
116 <when value="fasta.gz">
117 <expand macro="length_macro"/>
118 </when>
119 <when value="fastq"/>
120 <when value="fastq.gz"/>
121 <when value="gfa"/>
122 <when value="gfa.gz"/>
123 </conditional>
124 <param argument="--sort" type="select" label="Sort sequences" help="Specify how to sort the sequences. Ascending/descending used the sequence/path header.">
125 <option value="" selected="true">Disabled</option>
126 <option value="ascending">Ascending</option>
127 <option value="descending">Descending</option>
128 <option value="largest">Largest</option>
129 <option value="smallest">Smallest</option>
130 </param>
131 <param argument="--homopolymer-compress" type="boolean" truevalue="--homopolymer-compress" falsevalue="" checked="false"
132 label="Homopolymer compression" help="Compress all the homopolymers in the input"/>
133 </when>
134 <when value="statistics">
135 <conditional name="statistics_condition">
136 <param name="selector" type="select" label="Report mode">
137 <option value="assembly" selected="true">Genome assembly statistics (--nstar-report)</option>
138 <option value="size">Scaffold, contig or gap sizes (--out-size)</option>
139 <option value="coordinates">AGP, contig or gap coordinates (--out-coord)</option>
140 <option value="sequence">Sequence statistics (--seq-report)</option>
141 </param>
142 <when value="size">
143 <param argument="--out-size" type="select" label="Feature for reporting sizes"
144 help="Generate a tabular file with the sequence sizes">
145 <option value="s">Scaffolds</option>
146 <option value="c">Contigs</option>
147 <option value="g">Gaps</option>
148 </param>
149 </when>
150 <when value="coordinates">
151 <param argument="--out-coord" type="select" label="BED coordinares feature"
152 help="Generates bed coordinates of given feature. Default: agp">
153 <option value="a">AGP</option>
154 <option value="c">Contigs</option>
155 <option value="g">Gaps</option>
156 </param>
157 </when>
158 <when value="assembly">
159 <param name="expected_genomesize" type="integer" min="0" optional="true"
160 label="Expected genome size" help="Estimated genome size. This parameter is optional, but required for NG* statistics."/>
161 </when>
162 <when value="sequence">
163 <param argument="--out-sequence" type="boolean" truevalue="--out-sequence" falsevalue="" checked="false"
164 label="Report actual sequence" help="It reports also the actual sequence"/>
165 </when>
166 </conditional>
167 <param argument="--tabular" type="boolean" truevalue="--tabular" falsevalue="" checked="true"
168 label="Tabular-format output" help="Generate output in tabular format"/>
169 </when>
170 </conditional>
171 </inputs>
172 <outputs>
173 <data name="stats" format="tabular" label="${tool.name} on ${on_string}: stats">
174 <filter>mode_condition['selector'] == 'statistics'</filter>
175 <change_format>
176 <when input="tabular" value="false" format="text"/>
177 </change_format>
178 </data>
179 <data name="output" format="fastq" from_work_dir="output_dataset" label="${tool.name} on ${on_string}: edited sequences">
180 <filter>mode_condition['selector'] == 'manipulation'</filter>
181 <change_format>
182 <when input="mode_condition.output_condition.out_format" value="fasta" format="fasta"/>
183 <when input="mode_condition.output_condition.out_format" value="fasta.gz" format="fasta.gz"/>
184 <when input="mode_condition.output_condition.out_format" value="fastq" format="fastq"/>
185 <when input="mode_condition.output_condition.out_format" value="fastq.gz" format="fastq.gz"/>
186 <when input="mode_condition.output_condition.out_format" value="gfa" format="gfa1"/>
187 <when input="mode_condition.output_condition.out_format" value="gfa.gz" format="gfa1.gz"/>
188 </change_format>
189 </data>
190 </outputs>
191 <tests>
192 <!--Test 01 -->
193 <test expect_num_outputs="1">
194 <param name="input_file" value="dataset_01.fastq.gz"/>
195 <conditional name="target_condition">
196 <param name="target_condition" value="true"/>
197 <param name="target_sequence" value="S1_1"/>
198 </conditional>
199 <conditional name="mode_condition">
200 <param name="selector" value="manipulation"/>
201 <param name="swiss_army_knife" value="swiss_army.sak"/>
202 <conditional name="output_condition">
203 <param name="out_format" value="fasta.gz"/>
204 </conditional>
205 </conditional>
206 <output name="output" value="test_01.fasta.gz" ftype="fasta.gz"/>
207 </test>
208 <!--Test 02 -->
209 <test expect_num_outputs="1">
210 <param name="input_file" value="dataset_01.fastq.gz"/>
211 <conditional name="target_condition">
212 <param name="target_condition" value="true"/>
213 <param name="target_sequence" value="S1_1"/>
214 </conditional>
215 <conditional name="mode_condition">
216 <param name="selector" value="statistics"/>
217 <conditional name="statistics_condition">
218 <param name="selector" value="size"/>
219 <param name="out_size" value="c"/>
220 </conditional>
221 </conditional>
222 <output name="stats" value="test_02_stats.tabular" ftype="tabular"/>
223 </test>
224 <!--Test 03 -->
225 <test expect_num_outputs="1">
226 <param name="input_file" value="dataset_02.fasta.gz"/>
227 <conditional name="mode_condition">
228 <param name="selector" value="statistics"/>
229 <conditional name="statistics_condition">
230 <param name="selector" value="sequence"/>
231 </conditional>
232 </conditional>
233 <output name="stats" value="test_03_stats.tabular" ftype="tabular"/>
234 </test>
235 <!--Test 04 -->
236 <test expect_num_outputs="1">
237 <param name="input_file" value="dataset_03.fasta"/>
238 <conditional name="mode_condition">
239 <param name="selector" value="statistics"/>
240 <conditional name="statistics_condition">
241 <param name="selector" value="assembly"/>
242 <param name="expected_genomesize" value="600000"/>
243 </conditional>
244 </conditional>
245 <output name="stats" value="test_04_stats.tabular" ftype="tabular"/>
246 </test>
247 <!--Test 05 -->
248 <test expect_num_outputs="1">
249 <param name="input_file" value="dataset_04.gfa"/>
250 <conditional name="mode_condition">
251 <param name="selector" value="statistics"/>
252 <conditional name="statistics_condition">
253 <param name="selector" value="coordinates"/>
254 <param name="out_coord" value="a"/>
255 </conditional>
256 </conditional>
257 <output name="stats" value="test_05_stats.tabular" ftype="tabular"/>
258 </test>
259 <!--Test 06 -->
260 <test expect_num_outputs="1">
261 <param name="input_file" value="dataset_04.gfa"/>
262 <conditional name="mode_condition">
263 <param name="selector" value="manipulation"/>
264 <conditional name="output_condition">
265 <param name="out_format" value="fasta.gz"/>
266 </conditional>
267 </conditional>
268 <output name="output" value="test_06.fasta.gz" ftype="fasta.gz"/>
269 </test>
270 <!--Test 07 -->
271 <test expect_num_outputs="1">
272 <param name="input_file" value="dataset_03.fasta"/>
273 <conditional name="mode_condition">
274 <param name="selector" value="statistics"/>
275 <conditional name="statistics_condition">
276 <param name="selector" value="assembly"/>
277 </conditional>
278 <param name="tabular" value="false"/>
279 </conditional>
280 <output name="stats" value="test_07_stats.tabular" ftype="tabular"/>
281 </test>
282 <!--Test 08 -->
283 <test expect_num_outputs="1">
284 <param name="input_file" value="dataset_01.fastq.gz"/>
285 <conditional name="mode_condition">
286 <param name="selector" value="manipulation"/>
287 <conditional name="output_condition">
288 <param name="out_format" value="fasta.gz"/>
289 </conditional>
290 <param name="sort" value="ascending"/>
291 <param name="homopolymer_compress" value="true"/>
292 </conditional>
293 <output name="output" value="test_08.fasta.gz" ftype="fasta.gz"/>
294 </test>
295 </tests>
296 <help><![CDATA[
297
298 .. class:: infomark
299
300 **Purpose**
301
302 gfastats is a single fast and exhaustive tool for summary statistics and simultaneous genome assembly file manipulation. gfastats also allows seamless format conversion.
303
304
305 .. class:: infomark
306
307 **Metrics details**
308
309 Typical fast* metrics include:
310
311 - Scaffold, contig and gap size
312 - Number of scaffolds, contigs and gaps
313 - Total length of scaffolds, contigs and gaps
314 - Scaffold, contig, gap N50 and statistics (full N*/NG* statistics with the --nstar-report flag)
315 - Area under the curve (AuN/AuNG) values for scaffolds, contigs and gaps
316 - Average scaffold, contig, gap size
317 - Largest scaffold, contig and gap
318 - Base composition and GC content
319 - Soft-masked base counts (lower case bases)
320
321
322 Typical gfa metrics include:
323
324 - Number of nodes and edges
325 - Average degree
326 - Number of connected components, and length of the largets connected component
327 - Number of dead ends
328 - Number of disconnected components, and their total length
329
330
331 .. class:: infomark
332
333 **Assembly manipulation**
334
335 gfastats allows extensive assembly manipulation at the sequence level. Manipulation is achieved using a set of instructions provided as an ordered list in a file to the option **swiss army knife**. See the `instruction wiki <https://github.com/vgl-hub/gfastats/tree/main/instructions>`_ for a full list of instructions.
336
337 ]]></help>
338 <expand macro="citations" />
339 </tool>