Mercurial > repos > brinkmanlab > awkscript
comparison awkscript.xml @ 0:7222917a3948 draft
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/awkscript commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
author | brinkmanlab |
---|---|
date | Fri, 24 Jan 2020 17:30:15 -0500 |
parents | |
children | 87c5033654a6 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7222917a3948 |
---|---|
1 <tool id="awkscript" name="AWK Script" version="1.0"> | |
2 <description>Transform, modify, or generate data</description> | |
3 <edam_topics> | |
4 <edam_topic>topic_0769</edam_topic> | |
5 </edam_topics> | |
6 <edam_operations> | |
7 <edam_operation>operation_3429</edam_operation> | |
8 <edam_operation>operation_3778</edam_operation> | |
9 <edam_operation>operation_3434</edam_operation> | |
10 <edam_operation>operation_2409</edam_operation> | |
11 </edam_operations> | |
12 <requirements> | |
13 <requirement type="package" version="5.0.1">gawk</requirement> | |
14 </requirements> | |
15 <version_command>awk --version | head -n 1</version_command> | |
16 <command><![CDATA[ | |
17 #set $tool_input = 0 | |
18 env -i | |
19 #for env in $envs | |
20 '$env.name'='$env.val' | |
21 #end for | |
22 awk | |
23 --sandbox | |
24 -v FS=' ' | |
25 -v OFS=' ' | |
26 -f '$awk_script' | |
27 #for group in $infiles | |
28 tool_input=$tool_input | |
29 #set $tool_input+=1 | |
30 #if $group.input_type_selection.multiple | |
31 #set $inputs = $group.input_type_selection.infile | |
32 ## Allow capturing the collection id in the program | |
33 ## tool_input_id='$inputs.element_identifier' FIXME collections dont expose their label to tools. | |
34 #else | |
35 #set $inputs = [$group.input_type_selection.infile] | |
36 #end if | |
37 #for file in $inputs | |
38 tool_input_id='$file.element_identifier' | |
39 '$file' | |
40 #end for | |
41 #end for | |
42 > '$outfile' | |
43 ]]></command> | |
44 <configfiles> | |
45 <configfile name="awk_script">$code</configfile> | |
46 </configfiles> | |
47 <inputs> | |
48 <repeat name="infiles" title="Inputs"> | |
49 <conditional name="input_type_selection"> | |
50 <param name="multiple" type="boolean" checked="false" label="Accept multiple" /> | |
51 <when value="false"> | |
52 <param name="infile" format="txt" type="data" label="Single file to process" help="A separate execution will occur for each provided file"/> | |
53 </when> | |
54 <when value="true"> | |
55 <param name="infile" format="txt" type="data" multiple="true" label="File or collection to process" /> | |
56 </when> | |
57 </conditional> | |
58 </repeat> | |
59 <param name="code" type="text" area="true" size="5x35" label="AWK Program" help=""> | |
60 <sanitizer sanitize="false" /> | |
61 </param> | |
62 <repeat name="envs" title="Environment Variables"> | |
63 <param name="name" type="text" label="Name"> | |
64 <sanitizer> | |
65 <valid initial="string.printable"> | |
66 <remove value="'" /> | |
67 </valid> | |
68 </sanitizer> | |
69 </param> | |
70 <param name="val" type="text" label="Value"> | |
71 <sanitizer> | |
72 <valid initial="string.printable"> | |
73 <remove value="'" /> | |
74 </valid> | |
75 </sanitizer> | |
76 </param> | |
77 </repeat> | |
78 </inputs> | |
79 <outputs> | |
80 <data name="outfile" format="txt" /> | |
81 </outputs> | |
82 <tests> | |
83 <test> | |
84 <repeat name="envs"> | |
85 <param name="name" value="message" /> | |
86 <param name="val" value="Success!" /> | |
87 </repeat> | |
88 <param name="code" value="BEGIN { print ENVIRON["message"]; }" /> | |
89 <output name="outfile"> | |
90 <assert_contents> | |
91 <has_text text="Success!" /> | |
92 </assert_contents> | |
93 </output> | |
94 </test> | |
95 </tests> | |
96 <help><![CDATA[ | |
97 GNU AWK Script | |
98 | |
99 Several kinds of tasks occur repeatedly when working with text files. | |
100 You might want to extract certain lines and discard the rest. Or you may need to make changes wherever | |
101 certain patterns appear, but leave the rest of the file alone. Such jobs are often easy with awk. | |
102 The awk utility interprets a special-purpose programming language that makes it easy to handle simple data-reformatting jobs. | |
103 | |
104 Each input can be set to single, causing any input collections to map over the tool running, a new instance for each. | |
105 Setting the input to multiple will pass the entire collection to a single instance of the tool, allowing aggregation of the input data. | |
106 A mixture of single and multiple inputs is allowed, the single inputs being mapped over and the multiple inputs passed to each instance. | |
107 This tool is also able to operate with no inputs, generating data for whatever purpose. | |
108 | |
109 See [GNU AWK Users Guide](https://www.gnu.org/software/gawk/manual/gawk.html) for more information. | |
110 | |
111 A variable 'tool_input' will be set to the index of the inputs, in order. | |
112 You can combine this with ARGIND to determine which file you are currently operating on and its position in any possible input collection. | |
113 A variable 'tool_input_id' is also set specifying the current inputs dataset name or collection id. | |
114 Beware that ARGIND will increment 3 between inputs as one is consumed setting tool_input and another setting tool_input_id. | |
115 | |
116 The environment inputs allow you to generalise your scripts, specifying constants with the tool invocation, or allow attaching simple workflow inputs. | |
117 Environment variables are accessible via [ENVIRON](https://www.gnu.org/software/gawk/manual/gawk.html#index-environment-variables_002c-in-ENVIRON-array). | |
118 | |
119 Due to a limitation in Galaxy, all output is assigned the 'txt' type. This can be changed in the tool settings in a workflow or modifying the dataset after invocation. | |
120 | |
121 gawk is run with the [sandbox](https://www.gnu.org/software/gawk/manual/gawk.html#index-sandbox-mode) argument, disabling some functionality. | |
122 ]]></help> | |
123 <citations> | |
124 <citation type="doi">10.5281/zenodo.3364789</citation> | |
125 </citations> | |
126 </tool> |