Mercurial > repos > nml > csvtk_mutate

diff mutate.xml @ 0:452fd1614f09 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author: nml
date: Tue, 19 May 2020 17:11:54 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutate.xml	Tue May 19 17:11:54 2020 -0400
@@ -0,0 +1,255 @@
+<tool id="csvtk_mutate" name="csvtk-mutate" version="@VERSION@+@GALAXY_VERSION@">
+    <description>new column by regular expression</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_cmd" />
+    <command detect_errors="exit_code"><![CDATA[
+
+###################
+## Start Command ##
+###################
+csvtk mutate --num-cpus "\${GALAXY_SLOTS:-1}"
+
+    ## Add additional flags as specified ##
+    #######################################
+    $ignore_case
+    $global_param.illegal_rows
+    $global_param.empty_rows
+    $global_param.header
+    $global_param.lazy_quotes
+
+    ## Set Tabular input/output flag if first input is tabular ##
+    #############################################################
+    #if $in_1.is_of_type("tabular"):
+        -t -T
+    #end if
+
+    ## Set input files ##
+    #####################
+    '$in_1'
+
+    ## Specify fields to use ##
+    ###########################
+    -f '${column_text.in_text}'
+    
+    ## Column Name and pattern ##
+    ############################
+    -n '$column_name_input'
+    -p '($pattern_input)'
+    $remove
+    $fill_na
+    
+    ## To output ##
+    ###############
+    > mutated
+
+    ]]></command>
+    <inputs>
+        <expand macro="singular_input"/>
+        <conditional name="column_text" >
+            <param type="select" name="select" label="Select column based on" argument="-f">
+                <option value="string">Column Name</option>
+                <option value="column">Column Number</option>
+            </param>
+            <when value="column">
+                <param type="data_column" name="in_text"
+                        data_ref="in_1"
+                        multiple="False" force_select="True"
+                        label="Analyze column number"
+                        help="Select column to mutate data from"
+                        />
+            </when>
+            <when value="string">
+                <param type="text" name="in_text"
+                    optional="False"
+                    label="Analyze column name"
+                    help="Specify column name to pull data out of with Regex"
+                    />
+            </when>
+        </conditional>
+        <param type="text" name="pattern_input"
+            value=".+"
+            argument="-p"
+            label="Set regex search pattern"
+            optional="false"
+            help="Use regex to match input column information. Example: ^(.+)$ will match all characters.
+            Regex help can be found below. The ' character is invalid"
+            >
+            <expand macro="text_sanitizer" />
+        </param>
+        <param type="text" name="column_name_input"
+            value="new_column"
+            argument="-n"
+            label="Set new column name"
+            optional="false"
+            help="Specify output column name for the matched data">
+            <expand macro="text_sanitizer" />
+        </param>
+        <param type="boolean" name="fill_na"
+            falsevalue="--na" truevalue=""
+            checked="true"
+            label="Fill Non-matches"
+            help="If NO, mutate will create a blank cell where no regex matches (--na). If YES the full cell value will be copied of non-matched columns"
+        />
+        <param name="remove" type="boolean" checked="false"
+            falsevalue=""
+            truevalue="-R"
+            argument="-R"
+            label="Remove input column"
+        />
+        <expand macro="ignore_case" />
+        <expand macro="global_parameters" />
+    </inputs>
+    <outputs>
+        <data format_source="in_1" name="mutated" from_work_dir="mutated" 
+        label='${in_1.name} mutated by column ${column_text.in_text} with pattern ${pattern_input}' />
+    </outputs>
+    <tests>
+        <test>
+            <param name="in_1" value="blood_type.tsv" />
+            <conditional name="column_text">
+                <param name="select" value="string" />
+                <param name="in_text" value="1" />
+            </conditional>
+            <param name="column_name_input" value="new_column" />
+            <param name="pattern_input" value=".*" />
+            <output name="mutated" file="mutated.tsv" ftype="tabular" />
+        </test>
+        <test>
+            <param name="in_1" value="blood_type.tsv" />
+            <conditional name="column_text">
+                <param name="select" value="string" />
+                <param name="in_text" value="1" />
+            </conditional>
+            <param name="column_name_input" value="new_column" />
+            <param name="pattern_input" value="Darian" />
+            <param name="remove" value="true" />
+            <param name="fill_na" value="false" />
+            <output name="mutated" file="mutate_removed.tsv" ftype="tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+    
+Csvtk - Mutate Help
+-------------------
+
+Info
+####
+Csvtk-mutate is a tool that uses Regular Expressions (Regex) to match data in the specified column. Using this matched data, a
+new column is created using that matched data.
+
+The regex input for this tool is structured such that your regular expression **does not** need to start with with quotes or brackets. You can
+start your expression with a `^` or just go straight into it
+
+For example:
+
+::
+
+    Using `.+` as an input would be used in the code as '(.+)'
+
+    Using ^(.+)$ as an input would yield an input in the code as '(^(.+)$)'
+
+.. class:: warningmark
+
+    Single quotes are not allowed in text inputs!
+
+-----
+
+
+@HELP_INPUT_DATA@
+
+
+Usage
+#####
+You can use csvtk to mutate a new column with data matched through regular expressions(regex).
+
+A good Regular expressions cheat sheet that you can use to help yourself build regular expressions can be found at:
+https://regexr.com/
+
+**Mutate Examples**
+
+::
+
+    Mutate with Filling empty columns when no regex match:
+
+    Suppose we have the following table and we want to pull out all of the exponent in the column "Colonies"
+    without pulling out any of the other characters to make a new column called "Exponent":
+    
+    +-------------+----------+-----------+
+    | Colonies    | Catalase | Coagulase |
+    +=============+==========+===========+
+    | 1x10^15 cfu | Yes      | No        |
+    +-------------+----------+-----------+
+    | 1x10^14 cfu | No       | No        |
+    +-------------+----------+-----------+
+    | 1x10^18 cfu | Yes      | No        |
+    +-------------+----------+-----------+
+    | 100 cfu     | No       | Yes       |
+    +-------------+----------+-----------+ 
+
+    We would use a Regex expression similar to (\^)(\d+) to get the "^" and the exponent into a new
+    column giving the following result:
+
+    +-------------+----------+-----------+----------+
+    | Colonies    | Catalase | Coagulase | Exponent |
+    +=============+==========+===========+==========+
+    | 1x10^15 cfu | Yes      | No        | ^15      |
+    +-------------+----------+-----------+----------+
+    | 1x10^14 cfu | No       | No        | ^14      |
+    +-------------+----------+-----------+----------+
+    | 1x10^18 cfu | Yes      | No        | ^18      |
+    +-------------+----------+-----------+----------+
+    | 100 cfu     | No       | Yes       | 100 cfu  | 
+    +-------------+----------+-----------+----------+
+
+    As you can see we still have "100 cfu" at the bottom even though it doesn't contain a "^" as we
+    did not specify that we wanted to make non-matches blank, we copied over the 100 cfu.
+
+    ----------------------------------------------------------------------------------------------------------------
+
+    Mutate leaving columns blank with no regex match:
+
+    Suppose we had the same chart as above:
+
+    +-------------+----------+-----------+
+    | Colonies    | Catalase | Coagulase |
+    +=============+==========+===========+
+    | 1x10^15 cfu | Yes      | No        |
+    +-------------+----------+-----------+
+    | 1x10^14 cfu | No       | No        |
+    +-------------+----------+-----------+
+    | 1x10^18 cfu | Yes      | No        |
+    +-------------+----------+-----------+
+    | 100 cfu     | No       | Yes       |
+    +-------------+----------+-----------+ 
+
+    Now, if we were to set "Fill Non-matches" to "No", then we would get the following table using the same inputs
+    other than the change to fill non-matches:
+
+    +-------------+----------+-----------+----------+
+    | Colonies    | Catalase | Coagulase | Exponent |
+    +=============+==========+===========+==========+
+    | 1x10^15 cfu | Yes      | No        | ^15      |
+    +-------------+----------+-----------+----------+
+    | 1x10^14 cfu | No       | No        | ^14      |
+    +-------------+----------+-----------+----------+
+    | 1x10^18 cfu | Yes      | No        | ^18      |
+    +-------------+----------+-----------+----------+
+    | 100 cfu     | No       | Yes       |          | 
+    +-------------+----------+-----------+----------+
+
+----
+
+If your having trouble with the regular expressions, please play around with a builder, there are many others online 
+and they are great resources to improve your regex statements or test them before use!
+
+----
+
+@HELP_END_STATEMENT@
+
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
\ No newline at end of file