Mercurial > repos > brinkmanlab > awkscript
view awkscript.xml @ 3:ceac6ffb3865 draft
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/awkscript commit 00868930fc05f48a702fe8357b58f004cf899238"
author | brinkmanlab |
---|---|
date | Wed, 08 Jul 2020 18:03:50 -0400 |
parents | c6c81e6e1ca1 |
children | 7966a43dbc9e |
line wrap: on
line source
<tool id="awkscript" name="AWK Script" version="1.0"> <description>Transform, modify, or generate data</description> <edam_topics> <edam_topic>topic_0769</edam_topic> </edam_topics> <edam_operations> <edam_operation>operation_3429</edam_operation> <edam_operation>operation_3778</edam_operation> <edam_operation>operation_3434</edam_operation> <edam_operation>operation_2409</edam_operation> </edam_operations> <requirements> <requirement type="package" version="5.0.1">gawk</requirement> </requirements> <version_command>awk --version | head -n 1</version_command> <command><![CDATA[ #set $tool_input = 0 env -i #for env in $envs '$env.name'='$env.val' #end for \$(which awk) --sandbox -v FS=' ' -v OFS=' ' -f '$awk_script' #for group in $infiles tool_input=$tool_input #set $tool_input+=1 #if $group.input_type_selection.multiple #set $inputs = $group.input_type_selection.infile ## Allow capturing the collection id in the program ## tool_input_id='$inputs.element_identifier' FIXME collections dont expose their label to tools. #else #set $inputs = [$group.input_type_selection.infile] #end if #for file in $inputs tool_input_id='$file.element_identifier' '$file' #end for #end for > '$outfile' ]]></command> <configfiles> <configfile name="awk_script">$code</configfile> </configfiles> <inputs> <repeat name="infiles" title="Inputs"> <conditional name="input_type_selection"> <param name="multiple" type="boolean" checked="false" label="Accept multiple" /> <when value="false"> <param name="infile" format="txt" type="data" label="Single file to process" help="A separate execution will occur for each provided file"/> </when> <when value="true"> <param name="infile" format="txt" type="data" multiple="true" label="File or collection to process" /> </when> </conditional> </repeat> <param name="code" type="text" area="true" size="5x35" label="AWK Program" help=""> <sanitizer sanitize="false" /> </param> <repeat name="envs" title="Environment Variables"> <param name="name" type="text" label="Name"> <sanitizer> <valid initial="string.printable"> <remove value="'" /> </valid> </sanitizer> </param> <param name="val" type="text" label="Value"> <sanitizer> <valid initial="string.printable"> <remove value="'" /> </valid> </sanitizer> </param> </repeat> </inputs> <outputs> <data name="outfile" format="txt" /> </outputs> <tests> <test> <repeat name="envs"> <param name="name" value="message" /> <param name="val" value="Success!" /> </repeat> <param name="code" value="BEGIN { print ENVIRON["message"]; }" /> <output name="outfile"> <assert_contents> <has_text text="Success!" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ GNU AWK Script Several kinds of tasks occur repeatedly when working with text files. You might want to extract certain lines and discard the rest. Or you may need to make changes wherever certain patterns appear, but leave the rest of the file alone. Such jobs are often easy with awk. The awk utility interprets a special-purpose programming language that makes it easy to handle simple data-reformatting jobs. Each input can be set to single, causing any input collections to map over the tool running, a new instance for each. Setting the input to multiple will pass the entire collection to a single instance of the tool, allowing aggregation of the input data. A mixture of single and multiple inputs is allowed, the single inputs being mapped over and the multiple inputs passed to each instance. This tool is also able to operate with no inputs, generating data for whatever purpose. See [GNU AWK Users Guide](https://www.gnu.org/software/gawk/manual/gawk.html) for more information. A variable 'tool_input' will be set to the index of the inputs, in order. You can combine this with ARGIND to determine which file you are currently operating on and its position in any possible input collection. A variable 'tool_input_id' is also set specifying the current inputs dataset name or collection id. Beware that ARGIND will increment 3 between inputs as one is consumed setting tool_input and another setting tool_input_id. The environment inputs allow you to generalise your scripts, specifying constants with the tool invocation, or allow attaching simple workflow inputs. Environment variables are accessible via [ENVIRON](https://www.gnu.org/software/gawk/manual/gawk.html#index-environment-variables_002c-in-ENVIRON-array). Due to a limitation in Galaxy, all output is assigned the 'txt' type. This can be changed in the tool settings in a workflow or modifying the dataset after invocation. gawk is run with the [sandbox](https://www.gnu.org/software/gawk/manual/gawk.html#index-sandbox-mode) argument, disabling some functionality. ]]></help> <citations> <citation type="doi">10.5281/zenodo.3364789</citation> </citations> </tool>