comparison mutate.xml @ 0:452fd1614f09 draft default tip

"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author nml
date Tue, 19 May 2020 17:11:54 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:452fd1614f09
1 <tool id="csvtk_mutate" name="csvtk-mutate" version="@VERSION@+@GALAXY_VERSION@">
2 <description>new column by regular expression</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_cmd" />
8 <command detect_errors="exit_code"><![CDATA[
9
10 ###################
11 ## Start Command ##
12 ###################
13 csvtk mutate --num-cpus "\${GALAXY_SLOTS:-1}"
14
15 ## Add additional flags as specified ##
16 #######################################
17 $ignore_case
18 $global_param.illegal_rows
19 $global_param.empty_rows
20 $global_param.header
21 $global_param.lazy_quotes
22
23 ## Set Tabular input/output flag if first input is tabular ##
24 #############################################################
25 #if $in_1.is_of_type("tabular"):
26 -t -T
27 #end if
28
29 ## Set input files ##
30 #####################
31 '$in_1'
32
33 ## Specify fields to use ##
34 ###########################
35 -f '${column_text.in_text}'
36
37 ## Column Name and pattern ##
38 ############################
39 -n '$column_name_input'
40 -p '($pattern_input)'
41 $remove
42 $fill_na
43
44 ## To output ##
45 ###############
46 > mutated
47
48 ]]></command>
49 <inputs>
50 <expand macro="singular_input"/>
51 <conditional name="column_text" >
52 <param type="select" name="select" label="Select column based on" argument="-f">
53 <option value="string">Column Name</option>
54 <option value="column">Column Number</option>
55 </param>
56 <when value="column">
57 <param type="data_column" name="in_text"
58 data_ref="in_1"
59 multiple="False" force_select="True"
60 label="Analyze column number"
61 help="Select column to mutate data from"
62 />
63 </when>
64 <when value="string">
65 <param type="text" name="in_text"
66 optional="False"
67 label="Analyze column name"
68 help="Specify column name to pull data out of with Regex"
69 />
70 </when>
71 </conditional>
72 <param type="text" name="pattern_input"
73 value=".+"
74 argument="-p"
75 label="Set regex search pattern"
76 optional="false"
77 help="Use regex to match input column information. Example: ^(.+)$ will match all characters.
78 Regex help can be found below. The ' character is invalid"
79 >
80 <expand macro="text_sanitizer" />
81 </param>
82 <param type="text" name="column_name_input"
83 value="new_column"
84 argument="-n"
85 label="Set new column name"
86 optional="false"
87 help="Specify output column name for the matched data">
88 <expand macro="text_sanitizer" />
89 </param>
90 <param type="boolean" name="fill_na"
91 falsevalue="--na" truevalue=""
92 checked="true"
93 label="Fill Non-matches"
94 help="If NO, mutate will create a blank cell where no regex matches (--na). If YES the full cell value will be copied of non-matched columns"
95 />
96 <param name="remove" type="boolean" checked="false"
97 falsevalue=""
98 truevalue="-R"
99 argument="-R"
100 label="Remove input column"
101 />
102 <expand macro="ignore_case" />
103 <expand macro="global_parameters" />
104 </inputs>
105 <outputs>
106 <data format_source="in_1" name="mutated" from_work_dir="mutated"
107 label='${in_1.name} mutated by column ${column_text.in_text} with pattern ${pattern_input}' />
108 </outputs>
109 <tests>
110 <test>
111 <param name="in_1" value="blood_type.tsv" />
112 <conditional name="column_text">
113 <param name="select" value="string" />
114 <param name="in_text" value="1" />
115 </conditional>
116 <param name="column_name_input" value="new_column" />
117 <param name="pattern_input" value=".*" />
118 <output name="mutated" file="mutated.tsv" ftype="tabular" />
119 </test>
120 <test>
121 <param name="in_1" value="blood_type.tsv" />
122 <conditional name="column_text">
123 <param name="select" value="string" />
124 <param name="in_text" value="1" />
125 </conditional>
126 <param name="column_name_input" value="new_column" />
127 <param name="pattern_input" value="Darian" />
128 <param name="remove" value="true" />
129 <param name="fill_na" value="false" />
130 <output name="mutated" file="mutate_removed.tsv" ftype="tabular" />
131 </test>
132 </tests>
133 <help><![CDATA[
134
135 Csvtk - Mutate Help
136 -------------------
137
138 Info
139 ####
140 Csvtk-mutate is a tool that uses Regular Expressions (Regex) to match data in the specified column. Using this matched data, a
141 new column is created using that matched data.
142
143 The regex input for this tool is structured such that your regular expression **does not** need to start with with quotes or brackets. You can
144 start your expression with a `^` or just go straight into it
145
146 For example:
147
148 ::
149
150 Using `.+` as an input would be used in the code as '(.+)'
151
152 Using ^(.+)$ as an input would yield an input in the code as '(^(.+)$)'
153
154 .. class:: warningmark
155
156 Single quotes are not allowed in text inputs!
157
158 -----
159
160
161 @HELP_INPUT_DATA@
162
163
164 Usage
165 #####
166 You can use csvtk to mutate a new column with data matched through regular expressions(regex).
167
168 A good Regular expressions cheat sheet that you can use to help yourself build regular expressions can be found at:
169 https://regexr.com/
170
171 **Mutate Examples**
172
173 ::
174
175 Mutate with Filling empty columns when no regex match:
176
177 Suppose we have the following table and we want to pull out all of the exponent in the column "Colonies"
178 without pulling out any of the other characters to make a new column called "Exponent":
179
180 +-------------+----------+-----------+
181 | Colonies | Catalase | Coagulase |
182 +=============+==========+===========+
183 | 1x10^15 cfu | Yes | No |
184 +-------------+----------+-----------+
185 | 1x10^14 cfu | No | No |
186 +-------------+----------+-----------+
187 | 1x10^18 cfu | Yes | No |
188 +-------------+----------+-----------+
189 | 100 cfu | No | Yes |
190 +-------------+----------+-----------+
191
192 We would use a Regex expression similar to (\^)(\d+) to get the "^" and the exponent into a new
193 column giving the following result:
194
195 +-------------+----------+-----------+----------+
196 | Colonies | Catalase | Coagulase | Exponent |
197 +=============+==========+===========+==========+
198 | 1x10^15 cfu | Yes | No | ^15 |
199 +-------------+----------+-----------+----------+
200 | 1x10^14 cfu | No | No | ^14 |
201 +-------------+----------+-----------+----------+
202 | 1x10^18 cfu | Yes | No | ^18 |
203 +-------------+----------+-----------+----------+
204 | 100 cfu | No | Yes | 100 cfu |
205 +-------------+----------+-----------+----------+
206
207 As you can see we still have "100 cfu" at the bottom even though it doesn't contain a "^" as we
208 did not specify that we wanted to make non-matches blank, we copied over the 100 cfu.
209
210 ----------------------------------------------------------------------------------------------------------------
211
212 Mutate leaving columns blank with no regex match:
213
214 Suppose we had the same chart as above:
215
216 +-------------+----------+-----------+
217 | Colonies | Catalase | Coagulase |
218 +=============+==========+===========+
219 | 1x10^15 cfu | Yes | No |
220 +-------------+----------+-----------+
221 | 1x10^14 cfu | No | No |
222 +-------------+----------+-----------+
223 | 1x10^18 cfu | Yes | No |
224 +-------------+----------+-----------+
225 | 100 cfu | No | Yes |
226 +-------------+----------+-----------+
227
228 Now, if we were to set "Fill Non-matches" to "No", then we would get the following table using the same inputs
229 other than the change to fill non-matches:
230
231 +-------------+----------+-----------+----------+
232 | Colonies | Catalase | Coagulase | Exponent |
233 +=============+==========+===========+==========+
234 | 1x10^15 cfu | Yes | No | ^15 |
235 +-------------+----------+-----------+----------+
236 | 1x10^14 cfu | No | No | ^14 |
237 +-------------+----------+-----------+----------+
238 | 1x10^18 cfu | Yes | No | ^18 |
239 +-------------+----------+-----------+----------+
240 | 100 cfu | No | Yes | |
241 +-------------+----------+-----------+----------+
242
243 ----
244
245 If your having trouble with the regular expressions, please play around with a builder, there are many others online
246 and they are great resources to improve your regex statements or test them before use!
247
248 ----
249
250 @HELP_END_STATEMENT@
251
252
253 ]]></help>
254 <expand macro="citations" />
255 </tool>