Mercurial > repos > nml > csvtk_mutate
comparison mutate.xml @ 0:452fd1614f09 draft default tip
"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author | nml |
---|---|
date | Tue, 19 May 2020 17:11:54 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:452fd1614f09 |
---|---|
1 <tool id="csvtk_mutate" name="csvtk-mutate" version="@VERSION@+@GALAXY_VERSION@"> | |
2 <description>new column by regular expression</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements" /> | |
7 <expand macro="version_cmd" /> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 | |
10 ################### | |
11 ## Start Command ## | |
12 ################### | |
13 csvtk mutate --num-cpus "\${GALAXY_SLOTS:-1}" | |
14 | |
15 ## Add additional flags as specified ## | |
16 ####################################### | |
17 $ignore_case | |
18 $global_param.illegal_rows | |
19 $global_param.empty_rows | |
20 $global_param.header | |
21 $global_param.lazy_quotes | |
22 | |
23 ## Set Tabular input/output flag if first input is tabular ## | |
24 ############################################################# | |
25 #if $in_1.is_of_type("tabular"): | |
26 -t -T | |
27 #end if | |
28 | |
29 ## Set input files ## | |
30 ##################### | |
31 '$in_1' | |
32 | |
33 ## Specify fields to use ## | |
34 ########################### | |
35 -f '${column_text.in_text}' | |
36 | |
37 ## Column Name and pattern ## | |
38 ############################ | |
39 -n '$column_name_input' | |
40 -p '($pattern_input)' | |
41 $remove | |
42 $fill_na | |
43 | |
44 ## To output ## | |
45 ############### | |
46 > mutated | |
47 | |
48 ]]></command> | |
49 <inputs> | |
50 <expand macro="singular_input"/> | |
51 <conditional name="column_text" > | |
52 <param type="select" name="select" label="Select column based on" argument="-f"> | |
53 <option value="string">Column Name</option> | |
54 <option value="column">Column Number</option> | |
55 </param> | |
56 <when value="column"> | |
57 <param type="data_column" name="in_text" | |
58 data_ref="in_1" | |
59 multiple="False" force_select="True" | |
60 label="Analyze column number" | |
61 help="Select column to mutate data from" | |
62 /> | |
63 </when> | |
64 <when value="string"> | |
65 <param type="text" name="in_text" | |
66 optional="False" | |
67 label="Analyze column name" | |
68 help="Specify column name to pull data out of with Regex" | |
69 /> | |
70 </when> | |
71 </conditional> | |
72 <param type="text" name="pattern_input" | |
73 value=".+" | |
74 argument="-p" | |
75 label="Set regex search pattern" | |
76 optional="false" | |
77 help="Use regex to match input column information. Example: ^(.+)$ will match all characters. | |
78 Regex help can be found below. The ' character is invalid" | |
79 > | |
80 <expand macro="text_sanitizer" /> | |
81 </param> | |
82 <param type="text" name="column_name_input" | |
83 value="new_column" | |
84 argument="-n" | |
85 label="Set new column name" | |
86 optional="false" | |
87 help="Specify output column name for the matched data"> | |
88 <expand macro="text_sanitizer" /> | |
89 </param> | |
90 <param type="boolean" name="fill_na" | |
91 falsevalue="--na" truevalue="" | |
92 checked="true" | |
93 label="Fill Non-matches" | |
94 help="If NO, mutate will create a blank cell where no regex matches (--na). If YES the full cell value will be copied of non-matched columns" | |
95 /> | |
96 <param name="remove" type="boolean" checked="false" | |
97 falsevalue="" | |
98 truevalue="-R" | |
99 argument="-R" | |
100 label="Remove input column" | |
101 /> | |
102 <expand macro="ignore_case" /> | |
103 <expand macro="global_parameters" /> | |
104 </inputs> | |
105 <outputs> | |
106 <data format_source="in_1" name="mutated" from_work_dir="mutated" | |
107 label='${in_1.name} mutated by column ${column_text.in_text} with pattern ${pattern_input}' /> | |
108 </outputs> | |
109 <tests> | |
110 <test> | |
111 <param name="in_1" value="blood_type.tsv" /> | |
112 <conditional name="column_text"> | |
113 <param name="select" value="string" /> | |
114 <param name="in_text" value="1" /> | |
115 </conditional> | |
116 <param name="column_name_input" value="new_column" /> | |
117 <param name="pattern_input" value=".*" /> | |
118 <output name="mutated" file="mutated.tsv" ftype="tabular" /> | |
119 </test> | |
120 <test> | |
121 <param name="in_1" value="blood_type.tsv" /> | |
122 <conditional name="column_text"> | |
123 <param name="select" value="string" /> | |
124 <param name="in_text" value="1" /> | |
125 </conditional> | |
126 <param name="column_name_input" value="new_column" /> | |
127 <param name="pattern_input" value="Darian" /> | |
128 <param name="remove" value="true" /> | |
129 <param name="fill_na" value="false" /> | |
130 <output name="mutated" file="mutate_removed.tsv" ftype="tabular" /> | |
131 </test> | |
132 </tests> | |
133 <help><![CDATA[ | |
134 | |
135 Csvtk - Mutate Help | |
136 ------------------- | |
137 | |
138 Info | |
139 #### | |
140 Csvtk-mutate is a tool that uses Regular Expressions (Regex) to match data in the specified column. Using this matched data, a | |
141 new column is created using that matched data. | |
142 | |
143 The regex input for this tool is structured such that your regular expression **does not** need to start with with quotes or brackets. You can | |
144 start your expression with a `^` or just go straight into it | |
145 | |
146 For example: | |
147 | |
148 :: | |
149 | |
150 Using `.+` as an input would be used in the code as '(.+)' | |
151 | |
152 Using ^(.+)$ as an input would yield an input in the code as '(^(.+)$)' | |
153 | |
154 .. class:: warningmark | |
155 | |
156 Single quotes are not allowed in text inputs! | |
157 | |
158 ----- | |
159 | |
160 | |
161 @HELP_INPUT_DATA@ | |
162 | |
163 | |
164 Usage | |
165 ##### | |
166 You can use csvtk to mutate a new column with data matched through regular expressions(regex). | |
167 | |
168 A good Regular expressions cheat sheet that you can use to help yourself build regular expressions can be found at: | |
169 https://regexr.com/ | |
170 | |
171 **Mutate Examples** | |
172 | |
173 :: | |
174 | |
175 Mutate with Filling empty columns when no regex match: | |
176 | |
177 Suppose we have the following table and we want to pull out all of the exponent in the column "Colonies" | |
178 without pulling out any of the other characters to make a new column called "Exponent": | |
179 | |
180 +-------------+----------+-----------+ | |
181 | Colonies | Catalase | Coagulase | | |
182 +=============+==========+===========+ | |
183 | 1x10^15 cfu | Yes | No | | |
184 +-------------+----------+-----------+ | |
185 | 1x10^14 cfu | No | No | | |
186 +-------------+----------+-----------+ | |
187 | 1x10^18 cfu | Yes | No | | |
188 +-------------+----------+-----------+ | |
189 | 100 cfu | No | Yes | | |
190 +-------------+----------+-----------+ | |
191 | |
192 We would use a Regex expression similar to (\^)(\d+) to get the "^" and the exponent into a new | |
193 column giving the following result: | |
194 | |
195 +-------------+----------+-----------+----------+ | |
196 | Colonies | Catalase | Coagulase | Exponent | | |
197 +=============+==========+===========+==========+ | |
198 | 1x10^15 cfu | Yes | No | ^15 | | |
199 +-------------+----------+-----------+----------+ | |
200 | 1x10^14 cfu | No | No | ^14 | | |
201 +-------------+----------+-----------+----------+ | |
202 | 1x10^18 cfu | Yes | No | ^18 | | |
203 +-------------+----------+-----------+----------+ | |
204 | 100 cfu | No | Yes | 100 cfu | | |
205 +-------------+----------+-----------+----------+ | |
206 | |
207 As you can see we still have "100 cfu" at the bottom even though it doesn't contain a "^" as we | |
208 did not specify that we wanted to make non-matches blank, we copied over the 100 cfu. | |
209 | |
210 ---------------------------------------------------------------------------------------------------------------- | |
211 | |
212 Mutate leaving columns blank with no regex match: | |
213 | |
214 Suppose we had the same chart as above: | |
215 | |
216 +-------------+----------+-----------+ | |
217 | Colonies | Catalase | Coagulase | | |
218 +=============+==========+===========+ | |
219 | 1x10^15 cfu | Yes | No | | |
220 +-------------+----------+-----------+ | |
221 | 1x10^14 cfu | No | No | | |
222 +-------------+----------+-----------+ | |
223 | 1x10^18 cfu | Yes | No | | |
224 +-------------+----------+-----------+ | |
225 | 100 cfu | No | Yes | | |
226 +-------------+----------+-----------+ | |
227 | |
228 Now, if we were to set "Fill Non-matches" to "No", then we would get the following table using the same inputs | |
229 other than the change to fill non-matches: | |
230 | |
231 +-------------+----------+-----------+----------+ | |
232 | Colonies | Catalase | Coagulase | Exponent | | |
233 +=============+==========+===========+==========+ | |
234 | 1x10^15 cfu | Yes | No | ^15 | | |
235 +-------------+----------+-----------+----------+ | |
236 | 1x10^14 cfu | No | No | ^14 | | |
237 +-------------+----------+-----------+----------+ | |
238 | 1x10^18 cfu | Yes | No | ^18 | | |
239 +-------------+----------+-----------+----------+ | |
240 | 100 cfu | No | Yes | | | |
241 +-------------+----------+-----------+----------+ | |
242 | |
243 ---- | |
244 | |
245 If your having trouble with the regular expressions, please play around with a builder, there are many others online | |
246 and they are great resources to improve your regex statements or test them before use! | |
247 | |
248 ---- | |
249 | |
250 @HELP_END_STATEMENT@ | |
251 | |
252 | |
253 ]]></help> | |
254 <expand macro="citations" /> | |
255 </tool> |