Mercurial > repos > nml > csvtk_summary
comparison summary.xml @ 0:ceb70f0dd898 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author | nml |
---|---|
date | Tue, 19 May 2020 17:23:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ceb70f0dd898 |
---|---|
1 <tool id="csvtk_summary" name="csvtk-summary" version="@VERSION@+@GALAXY_VERSION@"> | |
2 <description> statistics of selected fields</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <expand macro="version_cmd" /> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 | |
10 ## Set Up Input ## | |
11 ################## | |
12 | |
13 #set input_list = list() | |
14 #for $repeat in $field | |
15 #silent $input_list.append(str($repeat.column_text.in_text) + ":" + str($repeat.analysis_type)) | |
16 #end for | |
17 | |
18 #set input_total = ",".join($input_list) | |
19 | |
20 ################### | |
21 ## Start Command ## | |
22 ################### | |
23 | |
24 csvtk summary --num-cpus "\${GALAXY_SLOTS:-1}" | |
25 | |
26 ## Add additional flags as specified ## | |
27 ####################################### | |
28 $global_param.illegal_rows | |
29 $global_param.empty_rows | |
30 $global_param.header | |
31 $global_param.lazy_quotes | |
32 | |
33 ## Set Tabular input/output flag if input is tabular ## | |
34 ####################################################### | |
35 #if $in_1.is_of_type("tabular"): | |
36 -t -T | |
37 #end if | |
38 | |
39 ## Set Input ## | |
40 ############### | |
41 '$in_1' | |
42 | |
43 ## Specify fields ## | |
44 #################### | |
45 -f '$input_total' | |
46 | |
47 ## other ## | |
48 ########### | |
49 | |
50 #if $group_field.select_group != "none" | |
51 -g '$group_field.in_text' | |
52 #end if | |
53 | |
54 -s '$extra.separator' | |
55 -S '$extra.rand_int' | |
56 -n '$decimal_width' | |
57 $ignore_non_digits | |
58 | |
59 | |
60 ## To output ## | |
61 ############### | |
62 > summary | |
63 | |
64 ]]></command> | |
65 <inputs> | |
66 <expand macro="singular_input" /> | |
67 <repeat name="field" title="Select Column and Operator" min="1"> | |
68 <expand macro="singular_fields_input" /> | |
69 <param name="analysis_type" type="select" label="Analysis Type" help="Select analysis type to do on the chosen field"> | |
70 <option value="collapse">Collapse</option> | |
71 <option value="count">Count</option> | |
72 <option value="countn">Count of Digits (countn)</option> | |
73 <option value="countunique">Count Unique</option> | |
74 <option value="first">First Value</option> | |
75 <option value="last">Last Value</option> | |
76 <option value="max">Maximum</option> | |
77 <option value="mean">Mean</option> | |
78 <option value="median">Median</option> | |
79 <option value="min">Minimum</option> | |
80 <option value="prod">Product of the Elements</option> | |
81 <option value="q1">q1</option> | |
82 <option value="q2">q2</option> | |
83 <option value="q3">q3</option> | |
84 <option value="rand">Random Value</option> | |
85 <option value="entropy">Shannon Entropy</option> | |
86 <option value="stdev">Standard Deviation</option> | |
87 <option value="sum">Sum</option> | |
88 <option value="uniq">Unique</option> | |
89 <option value="variance">Variance</option> | |
90 </param> | |
91 </repeat> | |
92 <expand macro="groups_input" /> | |
93 <param name="decimal_width" type="integer" value="2" argument="-n" | |
94 label="Number of Decimals" | |
95 help="Limit float to N decimal places" | |
96 /> | |
97 <param name="ignore_non_digits" type="boolean" checked="false" argument="-i" | |
98 truevalue="-i" | |
99 falsevalue="" | |
100 label="Ignore non-digits" | |
101 help="Ignore non-digit values in columns. Ex. NA or N/A" | |
102 /> | |
103 <section name="extra" title="Specific Optional Analysis Modifiers" expanded="false"> | |
104 <param name="separator" type="text" value="; " argument="-s" | |
105 label="Collapse Separator String" | |
106 help="Input string of characters that will separate collapsed columns. The ' character is not allowed"> | |
107 <expand macro="text_sanitizer" /> | |
108 </param> | |
109 <param name="rand_int" type="integer" value="11" argument="-S" | |
110 label="Random Value Seed" | |
111 help="specify an integer" | |
112 /> | |
113 </section> | |
114 <expand macro="global_parameters" /> | |
115 </inputs> | |
116 <outputs> | |
117 <data format_source="in_1" name="summary" from_work_dir="summary" label="${in_1.name} summary of analyses" /> | |
118 </outputs> | |
119 <tests> | |
120 <test> | |
121 <param name="in_1" value="plot.csv" /> | |
122 <repeat name="field"> | |
123 <conditional name="column_text"> | |
124 <param name="select" value="string" /> | |
125 <param name="in_text" value="2" /> | |
126 </conditional> | |
127 <param name="analysis_type" value="collapse" /> | |
128 </repeat> | |
129 <repeat name="field"> | |
130 <conditional name="column_text"> | |
131 <param name="select" value="string" /> | |
132 <param name="in_text" value="3" /> | |
133 </conditional> | |
134 <param name="analysis_type" value="count" /> | |
135 </repeat> | |
136 <repeat name="field"> | |
137 <conditional name="column_text"> | |
138 <param name="select" value="string" /> | |
139 <param name="in_text" value="2" /> | |
140 </conditional> | |
141 <param name="analysis_type" value="countn" /> | |
142 </repeat> | |
143 <repeat name="field"> | |
144 <conditional name="column_text"> | |
145 <param name="select" value="string" /> | |
146 <param name="in_text" value="3" /> | |
147 </conditional> | |
148 <param name="analysis_type" value="countunique" /> | |
149 </repeat> | |
150 <repeat name="field"> | |
151 <conditional name="column_text"> | |
152 <param name="select" value="string" /> | |
153 <param name="in_text" value="2" /> | |
154 </conditional> | |
155 <param name="analysis_type" value="entropy" /> | |
156 </repeat> | |
157 <repeat name="field"> | |
158 <conditional name="column_text"> | |
159 <param name="select" value="string" /> | |
160 <param name="in_text" value="3" /> | |
161 </conditional> | |
162 <param name="analysis_type" value="first" /> | |
163 </repeat> | |
164 <repeat name="field"> | |
165 <conditional name="column_text"> | |
166 <param name="select" value="string" /> | |
167 <param name="in_text" value="2" /> | |
168 </conditional> | |
169 <param name="analysis_type" value="last" /> | |
170 </repeat> | |
171 <repeat name="field"> | |
172 <conditional name="column_text"> | |
173 <param name="select" value="string" /> | |
174 <param name="in_text" value="3" /> | |
175 </conditional> | |
176 <param name="analysis_type" value="max" /> | |
177 </repeat> | |
178 <repeat name="field"> | |
179 <conditional name="column_text"> | |
180 <param name="select" value="string" /> | |
181 <param name="in_text" value="2" /> | |
182 </conditional> | |
183 <param name="analysis_type" value="mean" /> | |
184 </repeat> | |
185 <repeat name="field"> | |
186 <conditional name="column_text"> | |
187 <param name="select" value="string" /> | |
188 <param name="in_text" value="3" /> | |
189 </conditional> | |
190 <param name="analysis_type" value="median" /> | |
191 </repeat> | |
192 <repeat name="field"> | |
193 <conditional name="column_text"> | |
194 <param name="select" value="string" /> | |
195 <param name="in_text" value="2" /> | |
196 </conditional> | |
197 <param name="analysis_type" value="min" /> | |
198 </repeat> | |
199 <repeat name="field"> | |
200 <conditional name="column_text"> | |
201 <param name="select" value="string" /> | |
202 <param name="in_text" value="3" /> | |
203 </conditional> | |
204 <param name="analysis_type" value="prod" /> | |
205 </repeat> | |
206 <repeat name="field"> | |
207 <conditional name="column_text"> | |
208 <param name="select" value="string" /> | |
209 <param name="in_text" value="2" /> | |
210 </conditional> | |
211 <param name="analysis_type" value="q1" /> | |
212 </repeat> | |
213 <repeat name="field"> | |
214 <conditional name="column_text"> | |
215 <param name="select" value="string" /> | |
216 <param name="in_text" value="3" /> | |
217 </conditional> | |
218 <param name="analysis_type" value="q2" /> | |
219 </repeat> | |
220 <repeat name="field"> | |
221 <conditional name="column_text"> | |
222 <param name="select" value="string" /> | |
223 <param name="in_text" value="2" /> | |
224 </conditional> | |
225 <param name="analysis_type" value="q3" /> | |
226 </repeat> | |
227 <repeat name="field"> | |
228 <conditional name="column_text"> | |
229 <param name="select" value="string" /> | |
230 <param name="in_text" value="3" /> | |
231 </conditional> | |
232 <param name="analysis_type" value="rand" /> | |
233 </repeat> | |
234 <repeat name="field"> | |
235 <conditional name="column_text"> | |
236 <param name="select" value="string" /> | |
237 <param name="in_text" value="2" /> | |
238 </conditional> | |
239 <param name="analysis_type" value="stdev" /> | |
240 </repeat> | |
241 <repeat name="field"> | |
242 <conditional name="column_text"> | |
243 <param name="select" value="string" /> | |
244 <param name="in_text" value="3" /> | |
245 </conditional> | |
246 <param name="analysis_type" value="sum" /> | |
247 </repeat> | |
248 <repeat name="field"> | |
249 <conditional name="column_text"> | |
250 <param name="select" value="string" /> | |
251 <param name="in_text" value="2" /> | |
252 </conditional> | |
253 <param name="analysis_type" value="uniq" /> | |
254 </repeat> | |
255 <repeat name="field"> | |
256 <conditional name="column_text"> | |
257 <param name="select" value="string" /> | |
258 <param name="in_text" value="3" /> | |
259 </conditional> | |
260 <param name="analysis_type" value="variance" /> | |
261 </repeat> | |
262 <conditional name="group_field"> | |
263 <param name="select_group" value="string" /> | |
264 <param name="in_text" value="1" /> | |
265 </conditional> | |
266 <output name="summary" > | |
267 <assert_contents> | |
268 <has_text text="collapse" /> | |
269 <has_text text="count" /> | |
270 <has_text text="countn" /> | |
271 <has_text text="countunique" /> | |
272 <has_text text="entropy" /> | |
273 <has_text text="first" /> | |
274 <has_text text="last" /> | |
275 <has_text text="max" /> | |
276 <has_text text="mean" /> | |
277 <has_text text="median" /> | |
278 <has_text text="min" /> | |
279 <has_text text="prod" /> | |
280 <has_text text="q1" /> | |
281 <has_text text="q2" /> | |
282 <has_text text="q3" /> | |
283 <has_text text="rand" /> | |
284 <has_text text="stdev" /> | |
285 <has_text text="sum" /> | |
286 <has_text text="uniq" /> | |
287 <has_text text="variance" /> | |
288 </assert_contents> | |
289 </output> | |
290 </test> | |
291 </tests> | |
292 <help><![CDATA[ | |
293 | |
294 Csvtk - Summary Help | |
295 -------------------- | |
296 | |
297 Info | |
298 #### | |
299 | |
300 Csvtk Summary works to allow the use of a variety of analysis tools on the selected columns(s) and display one output at the end | |
301 | |
302 .. class:: warningmark | |
303 | |
304 Single quotes are not allowed in text inputs! | |
305 | |
306 @HELP_INPUT_DATA@ | |
307 | |
308 | |
309 Usage | |
310 ##### | |
311 | |
312 To run csvtk-summary, all you need is a valid (as defined above) CSV or TSV file with any column(s) that you want to | |
313 run one of the analyses on. | |
314 | |
315 Analyses include: | |
316 | |
317 - Collapse | |
318 | |
319 - Count | |
320 | |
321 - Count Numbers (countn) | |
322 | |
323 - Count Unique | |
324 | |
325 - First Value Selection | |
326 | |
327 - Last Value Selection | |
328 | |
329 - Maximum | |
330 | |
331 - Mean | |
332 | |
333 - Median | |
334 | |
335 - Minimum | |
336 | |
337 - q1 | |
338 | |
339 - q2 | |
340 | |
341 - q3 | |
342 | |
343 - Random Value Selection | |
344 | |
345 - Shannon Entropy | |
346 | |
347 - Sum | |
348 | |
349 - Unique Values | |
350 | |
351 - Variance | |
352 | |
353 More information on these can be found on the `csvtk website. <https://bioinf.shenwei.me/csvtk/usage/#summary>`_ | |
354 | |
355 **Example Summary Input** | |
356 | |
357 Input table: | |
358 | |
359 +-------+--------+ | |
360 | Group | Length | | |
361 +=======+========+ | |
362 | A | 1500 | | |
363 +-------+--------+ | |
364 | B | 1000 | | |
365 +-------+--------+ | |
366 | B | 1500 | | |
367 +-------+--------+ | |
368 | B | 2000 | | |
369 +-------+--------+ | |
370 | |
371 Suppose you wanted to group the values based on column 1 of the input table and then find out the mean lenght and maximum length for each group. | |
372 You would input this into csvtk-summary by creating 2 input repeats where the first one selects "column 2" and an analysis of "mean" and the | |
373 second one selects "column 2" with an analysis of "maximum". | |
374 | |
375 Running this would generate the following output: | |
376 | |
377 +-------+-------------+------------+ | |
378 | Group | Length:mean | Length:max | | |
379 +=======+=============+============+ | |
380 | A | 1500 | 1500 | | |
381 +-------+-------------+------------+ | |
382 | B | 1500 | 2000 | | |
383 +-------+-------------+------------+ | |
384 | |
385 -------- | |
386 | |
387 | |
388 @HELP_COLUMNS@ | |
389 | |
390 | |
391 @HELP_END_STATEMENT@ | |
392 | |
393 | |
394 ]]></help> | |
395 <expand macro="citations" /> | |
396 </tool> |