changeset 0:7222917a3948 draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/awkscript commit 33b02e08cbc8f76fb4b8537f8c968393f85a1b5e"
author brinkmanlab
date Fri, 24 Jan 2020 17:30:15 -0500
parents
children 87c5033654a6
files awkscript.xml
diffstat 1 files changed, 126 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/awkscript.xml	Fri Jan 24 17:30:15 2020 -0500
@@ -0,0 +1,126 @@
+<tool id="awkscript" name="AWK Script" version="1.0">
+    <description>Transform, modify, or generate data</description>
+    <edam_topics>
+        <edam_topic>topic_0769</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_3429</edam_operation>
+        <edam_operation>operation_3778</edam_operation>
+        <edam_operation>operation_3434</edam_operation>
+        <edam_operation>operation_2409</edam_operation>
+    </edam_operations>
+    <requirements>
+        <requirement type="package" version="5.0.1">gawk</requirement>
+    </requirements>
+    <version_command>awk --version | head -n 1</version_command>
+    <command><![CDATA[
+        #set $tool_input = 0
+        env -i
+        #for env in $envs
+            '$env.name'='$env.val'
+        #end for
+        awk
+            --sandbox
+            -v FS='	'
+            -v OFS='	'
+            -f '$awk_script'
+            #for group in $infiles
+                tool_input=$tool_input
+                #set $tool_input+=1
+                #if $group.input_type_selection.multiple
+                    #set $inputs = $group.input_type_selection.infile
+                    ## Allow capturing the collection id in the program
+                    ## tool_input_id='$inputs.element_identifier'  FIXME collections dont expose their label to tools.
+                #else
+                    #set $inputs = [$group.input_type_selection.infile]
+                #end if
+                #for file in $inputs
+                    tool_input_id='$file.element_identifier'
+                    '$file'
+                #end for
+            #end for
+        > '$outfile'
+    ]]></command>
+    <configfiles>
+        <configfile name="awk_script">$code</configfile>
+    </configfiles>
+    <inputs>
+        <repeat name="infiles" title="Inputs">
+            <conditional name="input_type_selection">
+                <param name="multiple" type="boolean" checked="false" label="Accept multiple" />
+                <when value="false">
+                    <param name="infile" format="txt" type="data" label="Single file to process" help="A separate execution will occur for each provided file"/>
+                </when>
+                <when value="true">
+                    <param name="infile" format="txt" type="data" multiple="true" label="File or collection to process" />
+                </when>
+            </conditional>
+        </repeat>
+        <param name="code" type="text" area="true" size="5x35" label="AWK Program" help="">
+            <sanitizer sanitize="false" />
+        </param>
+        <repeat name="envs" title="Environment Variables">
+            <param name="name" type="text" label="Name">
+                <sanitizer>
+                    <valid initial="string.printable">
+                        <remove value="&apos;" />
+                    </valid>
+                </sanitizer>
+            </param>
+            <param name="val" type="text" label="Value">
+                <sanitizer>
+                    <valid initial="string.printable">
+                        <remove value="&apos;" />
+                    </valid>
+                </sanitizer>
+            </param>
+        </repeat>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="txt" />
+    </outputs>
+    <tests>
+        <test>
+            <repeat name="envs">
+                <param name="name" value="message" />
+                <param name="val" value="Success!" />
+            </repeat>
+            <param name="code" value="BEGIN { print ENVIRON[&quot;message&quot;]; }" />
+	        <output name="outfile">
+	            <assert_contents>
+	                <has_text text="Success!" />
+	            </assert_contents>
+	        </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+    GNU AWK Script
+
+    Several kinds of tasks occur repeatedly when working with text files. 
+    You might want to extract certain lines and discard the rest. Or you may need to make changes wherever 
+    certain patterns appear, but leave the rest of the file alone. Such jobs are often easy with awk. 
+    The awk utility interprets a special-purpose programming language that makes it easy to handle simple data-reformatting jobs. 
+    
+    Each input can be set to single, causing any input collections to map over the tool running, a new instance for each.
+    Setting the input to multiple will pass the entire collection to a single instance of the tool, allowing aggregation of the input data.
+    A mixture of single and multiple inputs is allowed, the single inputs being mapped over and the multiple inputs passed to each instance.
+    This tool is also able to operate with no inputs, generating data for whatever purpose.
+
+    See [GNU AWK Users Guide](https://www.gnu.org/software/gawk/manual/gawk.html) for more information.
+
+    A variable 'tool_input' will be set to the index of the inputs, in order.
+    You can combine this with ARGIND to determine which file you are currently operating on and its position in any possible input collection.
+    A variable 'tool_input_id' is also set specifying the current inputs dataset name or collection id.
+    Beware that ARGIND will increment 3 between inputs as one is consumed setting tool_input and another setting tool_input_id.
+
+    The environment inputs allow you to generalise your scripts, specifying constants with the tool invocation, or allow attaching simple workflow inputs.
+    Environment variables are accessible via [ENVIRON](https://www.gnu.org/software/gawk/manual/gawk.html#index-environment-variables_002c-in-ENVIRON-array).
+
+    Due to a limitation in Galaxy, all output is assigned the 'txt' type. This can be changed in the tool settings in a workflow or modifying the dataset after invocation.
+
+    gawk is run with the [sandbox](https://www.gnu.org/software/gawk/manual/gawk.html#index-sandbox-mode) argument, disabling some functionality.
+    ]]></help>
+    <citations>
+        <citation type="doi">10.5281/zenodo.3364789</citation>
+    </citations>
+</tool>