Mercurial > repos > brinkmanlab > awkscript
changeset 0:7222917a3948 draft
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/awkscript commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
author | brinkmanlab |
---|---|
date | Fri, 24 Jan 2020 17:30:15 -0500 |
parents | |
children | 87c5033654a6 |
files | awkscript.xml |
diffstat | 1 files changed, 126 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/awkscript.xml Fri Jan 24 17:30:15 2020 -0500 @@ -0,0 +1,126 @@ +<tool id="awkscript" name="AWK Script" version="1.0"> + <description>Transform, modify, or generate data</description> + <edam_topics> + <edam_topic>topic_0769</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_3429</edam_operation> + <edam_operation>operation_3778</edam_operation> + <edam_operation>operation_3434</edam_operation> + <edam_operation>operation_2409</edam_operation> + </edam_operations> + <requirements> + <requirement type="package" version="5.0.1">gawk</requirement> + </requirements> + <version_command>awk --version | head -n 1</version_command> + <command><![CDATA[ + #set $tool_input = 0 + env -i + #for env in $envs + '$env.name'='$env.val' + #end for + awk + --sandbox + -v FS=' ' + -v OFS=' ' + -f '$awk_script' + #for group in $infiles + tool_input=$tool_input + #set $tool_input+=1 + #if $group.input_type_selection.multiple + #set $inputs = $group.input_type_selection.infile + ## Allow capturing the collection id in the program + ## tool_input_id='$inputs.element_identifier' FIXME collections dont expose their label to tools. + #else + #set $inputs = [$group.input_type_selection.infile] + #end if + #for file in $inputs + tool_input_id='$file.element_identifier' + '$file' + #end for + #end for + > '$outfile' + ]]></command> + <configfiles> + <configfile name="awk_script">$code</configfile> + </configfiles> + <inputs> + <repeat name="infiles" title="Inputs"> + <conditional name="input_type_selection"> + <param name="multiple" type="boolean" checked="false" label="Accept multiple" /> + <when value="false"> + <param name="infile" format="txt" type="data" label="Single file to process" help="A separate execution will occur for each provided file"/> + </when> + <when value="true"> + <param name="infile" format="txt" type="data" multiple="true" label="File or collection to process" /> + </when> + </conditional> + </repeat> + <param name="code" type="text" area="true" size="5x35" label="AWK Program" help=""> + <sanitizer sanitize="false" /> + </param> + <repeat name="envs" title="Environment Variables"> + <param name="name" type="text" label="Name"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'" /> + </valid> + </sanitizer> + </param> + <param name="val" type="text" label="Value"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'" /> + </valid> + </sanitizer> + </param> + </repeat> + </inputs> + <outputs> + <data name="outfile" format="txt" /> + </outputs> + <tests> + <test> + <repeat name="envs"> + <param name="name" value="message" /> + <param name="val" value="Success!" /> + </repeat> + <param name="code" value="BEGIN { print ENVIRON["message"]; }" /> + <output name="outfile"> + <assert_contents> + <has_text text="Success!" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + GNU AWK Script + + Several kinds of tasks occur repeatedly when working with text files. + You might want to extract certain lines and discard the rest. Or you may need to make changes wherever + certain patterns appear, but leave the rest of the file alone. Such jobs are often easy with awk. + The awk utility interprets a special-purpose programming language that makes it easy to handle simple data-reformatting jobs. + + Each input can be set to single, causing any input collections to map over the tool running, a new instance for each. + Setting the input to multiple will pass the entire collection to a single instance of the tool, allowing aggregation of the input data. + A mixture of single and multiple inputs is allowed, the single inputs being mapped over and the multiple inputs passed to each instance. + This tool is also able to operate with no inputs, generating data for whatever purpose. + + See [GNU AWK Users Guide](https://www.gnu.org/software/gawk/manual/gawk.html) for more information. + + A variable 'tool_input' will be set to the index of the inputs, in order. + You can combine this with ARGIND to determine which file you are currently operating on and its position in any possible input collection. + A variable 'tool_input_id' is also set specifying the current inputs dataset name or collection id. + Beware that ARGIND will increment 3 between inputs as one is consumed setting tool_input and another setting tool_input_id. + + The environment inputs allow you to generalise your scripts, specifying constants with the tool invocation, or allow attaching simple workflow inputs. + Environment variables are accessible via [ENVIRON](https://www.gnu.org/software/gawk/manual/gawk.html#index-environment-variables_002c-in-ENVIRON-array). + + Due to a limitation in Galaxy, all output is assigned the 'txt' type. This can be changed in the tool settings in a workflow or modifying the dataset after invocation. + + gawk is run with the [sandbox](https://www.gnu.org/software/gawk/manual/gawk.html#index-sandbox-mode) argument, disabling some functionality. + ]]></help> + <citations> + <citation type="doi">10.5281/zenodo.3364789</citation> + </citations> +</tool>