Mercurial > repos > nml > csvtk_mutate
diff mutate.xml @ 0:452fd1614f09 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author | nml |
---|---|
date | Tue, 19 May 2020 17:11:54 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mutate.xml Tue May 19 17:11:54 2020 -0400 @@ -0,0 +1,255 @@ +<tool id="csvtk_mutate" name="csvtk-mutate" version="@VERSION@+@GALAXY_VERSION@"> + <description>new column by regular expression</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_cmd" /> + <command detect_errors="exit_code"><![CDATA[ + +################### +## Start Command ## +################### +csvtk mutate --num-cpus "\${GALAXY_SLOTS:-1}" + + ## Add additional flags as specified ## + ####################################### + $ignore_case + $global_param.illegal_rows + $global_param.empty_rows + $global_param.header + $global_param.lazy_quotes + + ## Set Tabular input/output flag if first input is tabular ## + ############################################################# + #if $in_1.is_of_type("tabular"): + -t -T + #end if + + ## Set input files ## + ##################### + '$in_1' + + ## Specify fields to use ## + ########################### + -f '${column_text.in_text}' + + ## Column Name and pattern ## + ############################ + -n '$column_name_input' + -p '($pattern_input)' + $remove + $fill_na + + ## To output ## + ############### + > mutated + + ]]></command> + <inputs> + <expand macro="singular_input"/> + <conditional name="column_text" > + <param type="select" name="select" label="Select column based on" argument="-f"> + <option value="string">Column Name</option> + <option value="column">Column Number</option> + </param> + <when value="column"> + <param type="data_column" name="in_text" + data_ref="in_1" + multiple="False" force_select="True" + label="Analyze column number" + help="Select column to mutate data from" + /> + </when> + <when value="string"> + <param type="text" name="in_text" + optional="False" + label="Analyze column name" + help="Specify column name to pull data out of with Regex" + /> + </when> + </conditional> + <param type="text" name="pattern_input" + value=".+" + argument="-p" + label="Set regex search pattern" + optional="false" + help="Use regex to match input column information. Example: ^(.+)$ will match all characters. + Regex help can be found below. The ' character is invalid" + > + <expand macro="text_sanitizer" /> + </param> + <param type="text" name="column_name_input" + value="new_column" + argument="-n" + label="Set new column name" + optional="false" + help="Specify output column name for the matched data"> + <expand macro="text_sanitizer" /> + </param> + <param type="boolean" name="fill_na" + falsevalue="--na" truevalue="" + checked="true" + label="Fill Non-matches" + help="If NO, mutate will create a blank cell where no regex matches (--na). If YES the full cell value will be copied of non-matched columns" + /> + <param name="remove" type="boolean" checked="false" + falsevalue="" + truevalue="-R" + argument="-R" + label="Remove input column" + /> + <expand macro="ignore_case" /> + <expand macro="global_parameters" /> + </inputs> + <outputs> + <data format_source="in_1" name="mutated" from_work_dir="mutated" + label='${in_1.name} mutated by column ${column_text.in_text} with pattern ${pattern_input}' /> + </outputs> + <tests> + <test> + <param name="in_1" value="blood_type.tsv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="1" /> + </conditional> + <param name="column_name_input" value="new_column" /> + <param name="pattern_input" value=".*" /> + <output name="mutated" file="mutated.tsv" ftype="tabular" /> + </test> + <test> + <param name="in_1" value="blood_type.tsv" /> + <conditional name="column_text"> + <param name="select" value="string" /> + <param name="in_text" value="1" /> + </conditional> + <param name="column_name_input" value="new_column" /> + <param name="pattern_input" value="Darian" /> + <param name="remove" value="true" /> + <param name="fill_na" value="false" /> + <output name="mutated" file="mutate_removed.tsv" ftype="tabular" /> + </test> + </tests> + <help><![CDATA[ + +Csvtk - Mutate Help +------------------- + +Info +#### +Csvtk-mutate is a tool that uses Regular Expressions (Regex) to match data in the specified column. Using this matched data, a +new column is created using that matched data. + +The regex input for this tool is structured such that your regular expression **does not** need to start with with quotes or brackets. You can +start your expression with a `^` or just go straight into it + +For example: + +:: + + Using `.+` as an input would be used in the code as '(.+)' + + Using ^(.+)$ as an input would yield an input in the code as '(^(.+)$)' + +.. class:: warningmark + + Single quotes are not allowed in text inputs! + +----- + + +@HELP_INPUT_DATA@ + + +Usage +##### +You can use csvtk to mutate a new column with data matched through regular expressions(regex). + +A good Regular expressions cheat sheet that you can use to help yourself build regular expressions can be found at: +https://regexr.com/ + +**Mutate Examples** + +:: + + Mutate with Filling empty columns when no regex match: + + Suppose we have the following table and we want to pull out all of the exponent in the column "Colonies" + without pulling out any of the other characters to make a new column called "Exponent": + + +-------------+----------+-----------+ + | Colonies | Catalase | Coagulase | + +=============+==========+===========+ + | 1x10^15 cfu | Yes | No | + +-------------+----------+-----------+ + | 1x10^14 cfu | No | No | + +-------------+----------+-----------+ + | 1x10^18 cfu | Yes | No | + +-------------+----------+-----------+ + | 100 cfu | No | Yes | + +-------------+----------+-----------+ + + We would use a Regex expression similar to (\^)(\d+) to get the "^" and the exponent into a new + column giving the following result: + + +-------------+----------+-----------+----------+ + | Colonies | Catalase | Coagulase | Exponent | + +=============+==========+===========+==========+ + | 1x10^15 cfu | Yes | No | ^15 | + +-------------+----------+-----------+----------+ + | 1x10^14 cfu | No | No | ^14 | + +-------------+----------+-----------+----------+ + | 1x10^18 cfu | Yes | No | ^18 | + +-------------+----------+-----------+----------+ + | 100 cfu | No | Yes | 100 cfu | + +-------------+----------+-----------+----------+ + + As you can see we still have "100 cfu" at the bottom even though it doesn't contain a "^" as we + did not specify that we wanted to make non-matches blank, we copied over the 100 cfu. + + ---------------------------------------------------------------------------------------------------------------- + + Mutate leaving columns blank with no regex match: + + Suppose we had the same chart as above: + + +-------------+----------+-----------+ + | Colonies | Catalase | Coagulase | + +=============+==========+===========+ + | 1x10^15 cfu | Yes | No | + +-------------+----------+-----------+ + | 1x10^14 cfu | No | No | + +-------------+----------+-----------+ + | 1x10^18 cfu | Yes | No | + +-------------+----------+-----------+ + | 100 cfu | No | Yes | + +-------------+----------+-----------+ + + Now, if we were to set "Fill Non-matches" to "No", then we would get the following table using the same inputs + other than the change to fill non-matches: + + +-------------+----------+-----------+----------+ + | Colonies | Catalase | Coagulase | Exponent | + +=============+==========+===========+==========+ + | 1x10^15 cfu | Yes | No | ^15 | + +-------------+----------+-----------+----------+ + | 1x10^14 cfu | No | No | ^14 | + +-------------+----------+-----------+----------+ + | 1x10^18 cfu | Yes | No | ^18 | + +-------------+----------+-----------+----------+ + | 100 cfu | No | Yes | | + +-------------+----------+-----------+----------+ + +---- + +If your having trouble with the regular expressions, please play around with a builder, there are many others online +and they are great resources to improve your regex statements or test them before use! + +---- + +@HELP_END_STATEMENT@ + + + ]]></help> + <expand macro="citations" /> +</tool> \ No newline at end of file