Mercurial > repos > proteore > proteore_filter_keywords_values
comparison filter_kw_val.xml @ 5:33ca9ba2495a draft
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
author | proteore |
---|---|
date | Tue, 05 Mar 2019 07:37:10 -0500 |
parents | 2080e2a4f209 |
children | b4641c0f8a82 |
comparison
equal
deleted
inserted
replaced
4:2080e2a4f209 | 5:33ca9ba2495a |
---|---|
1 <tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.02.05"> | 1 <tool id="MQoutputfilter" name="Filter by keywords and/or numerical value" version="2019.03.05"> |
2 <description></description> | 2 <description></description> |
3 <requirements> | 3 <requirements> |
4 </requirements> | 4 </requirements> |
5 <stdio> | 5 <stdio> |
6 <exit_code range="1:" /> | 6 <exit_code range="1:" /> |
7 </stdio> | 7 </stdio> |
8 <command><![CDATA[ | 8 <command><![CDATA[ |
9 python $__tool_directory__/filter_kw_val.py | 9 python $__tool_directory__/filter_kw_val.py |
10 -i "$input1,$header" | 10 -i "$input1,$header" |
11 -o "$output1" | 11 -o "$kept_lines" |
12 --filtered_file "$filtered_file" | 12 --discarded_lines "$discarded_lines" |
13 --operation "$operation" | |
13 --operator "$operator" | 14 --operator "$operator" |
14 | 15 |
15 ## Keywords | 16 ## Keywords |
16 #for $key in $keyword | 17 #for $key in $keyword |
17 #if $key.k.kw != "None" | 18 #if $key.k.kw != "None" |
23 #end if | 24 #end if |
24 #end for | 25 #end for |
25 | 26 |
26 ## value to filter | 27 ## value to filter |
27 #for $val in $value | 28 #for $val in $value |
28 #if $val.v.val != "None" | 29 #if $val.value != "None" |
29 --value | 30 --value $val.value $val.ncol $val.operator |
30 #if $val.v.val == "Equal" | |
31 $val.v.equal "$val.ncol" "=" | |
32 #else if $val.v.val == "Higher" | |
33 $val.v.higher "$val.ncol" ">" | |
34 #else if $val.v.val == "Equal or higher" | |
35 $val.v.equal_higher "$val.ncol" ">=" | |
36 #else if $val.v.val == "Lower" | |
37 $val.v.lower "$val.ncol" "<" | |
38 #else if $val.v.val == "Equal or lower" | |
39 $val.v.equal_lower "$val.ncol" "<=" | |
40 #else | |
41 $val.v.different "$val.ncol" "!=" | |
42 #end if | |
43 #end if | 31 #end if |
44 #end for | 32 #end for |
45 | 33 |
46 ##range of values to keep | 34 ##range of values to keep |
47 #for $vr in $values_range | 35 #for $vr in $values_range |
48 #if vr | 36 #if vr |
49 --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive | 37 --values_range $vr.bottom_value $vr.top_value $vr.ncol $vr.inclusive |
50 #end if | 38 #end if |
51 #end for | 39 #end for |
52 | 40 |
53 #if $sort_column != "" | 41 #if $sort.sort_bool == "true" |
54 --sort_col "$sort_column,$reversed_sort" | 42 --sort_col "$sort.sort_column,$sort.reversed_sort" |
55 #end if | 43 #end if |
56 | 44 |
57 ]]></command> | 45 ]]></command> |
58 <inputs> | 46 <inputs> |
59 <param type="data" name="input1" format="txt,tabular" label="Input file" /> | 47 <param type="data" name="input1" format="txt,tabular" label="Input file" /> |
60 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> | 48 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> |
49 <param name="operation" type="select" label="Operation" help="keep or discard word(s) or value(s) that match filters ?"> | |
50 <option value="keep">Keep</option> | |
51 <option value="discard">Discard</option> | |
52 </param> | |
61 <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" > | 53 <param name="operator" type="select" label="Select an operator to combine your filters (if more than one)" help="OR : only one filter must be satisfied to filter a row, AND : all your filters must be satisfied to filter a row" > |
62 <option value="OR" selected="True">OR</option> | 54 <option value="OR" selected="True">OR</option> |
63 <option value="AND">AND</option> | 55 <option value="AND">AND</option> |
64 </param> | 56 </param> |
65 | 57 |
66 <repeat name="keyword" title="Filter by keywords" > | 58 <repeat name="keyword" title="Filter by keywords" > |
67 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> | 59 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek keywords in the first column (and keep or discard them)'> |
60 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> | |
61 </param> | |
68 <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' /> | 62 <param type="boolean" name="match" truevalue="True" label="Search for exact match?" help='Choosing "Yes" will only filter out exact match (e.g. case sensitive), see help section' /> |
69 <conditional name="k" > | 63 <conditional name="k" > |
70 <param name="kw" type="select" label="Enter keywords" > | 64 <param name="kw" type="select" label="Enter keywords" > |
71 <option value="text" selected="true">copy/paste</option> | 65 <option value="text" selected="true">copy/paste</option> |
72 <option value="file">File containing keywords</option> | 66 <option value="file">File containing keywords</option> |
73 </param> | 67 </param> |
74 <when value="text" > | 68 <when value="text" > |
75 <param name="txt" type="text" label="Copy/paste keywords to be filtered out" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' > | 69 <param name="txt" type="text" label="Copy/paste keywords to find (keep or discard)" help='Keywords must be separated by tab, space or carriage return into the form field, for example: A8K2U0 Q5TA79 O43175' > |
76 <sanitizer> | 70 <sanitizer> |
77 <valid initial="string.printable"> | 71 <valid initial="string.printable"> |
78 <remove value="'"/> | 72 <remove value="'"/> |
79 </valid> | 73 </valid> |
80 <mapping initial="none"> | 74 <mapping initial="none"> |
89 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> | 83 <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> |
90 </when> | 84 </when> |
91 </conditional> | 85 </conditional> |
92 </repeat> | 86 </repeat> |
93 <repeat name="value" title="Filter by numerical value" > | 87 <repeat name="value" title="Filter by numerical value" > |
94 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> | 88 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'> |
95 <conditional name="v" > | 89 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> |
96 <param name="val" type="select" label="Select operator" > | 90 </param> |
97 <option value="Equal">=</option> | 91 <param name="operator" type="select" label="Select operator" > |
98 <option value="Higher">></option> | 92 <option value="Equal">=</option> |
99 <option value="Equal or higher">>=</option> | 93 <option value="Higher">></option> |
100 <option value="Lower"><</option> | 94 <option value="Equal-or-higher">>=</option> |
101 <option value="Equal or lower"><=</option> | 95 <option value="Lower"><</option> |
102 <option value="Different">!=</option> | 96 <option value="Equal-or-lower"><=</option> |
103 </param> | 97 <option value="Different">!=</option> |
104 <when value="Equal" > | 98 </param> |
105 <param name="equal" type="float" value="" label="Value" /> | 99 <param name="value" type="float" value="" label="Value"></param> |
106 </when> | |
107 <when value="Higher" > | |
108 <param type="float" name="higher" value="" label="Value" /> | |
109 </when> | |
110 <when value="Equal or higher" > | |
111 <param type="float" name="equal_higher" value="" label="Value" /> | |
112 </when> | |
113 <when value="Lower" > | |
114 <param type="float" name="lower" value="" label="Value" /> | |
115 </when> | |
116 <when value="Equal or lower" > | |
117 <param type="float" name="equal_lower" value="" label="Value" /> | |
118 </when> | |
119 <when value="Different"> | |
120 <param type="float" name="different" value="" label="Value"/> | |
121 </when> | |
122 </conditional> | |
123 </repeat> | 100 </repeat> |
124 <repeat name="values_range" title="Filter by range of numerical values"> | 101 <repeat name="values_range" title="Filter by range of numerical values"> |
125 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if the keywords you want to filter out are listed in the first column' /> | 102 <param name="ncol" type="text" value="c1" label="Column number on which to apply the filter" help='For example, fill in "c1" if you want to seek values in the first column (and keep or discard them)'> |
103 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator> | |
104 </param> | |
126 <param name="bottom_value" type="float" value="" label="Enter the bottom value" /> | 105 <param name="bottom_value" type="float" value="" label="Enter the bottom value" /> |
127 <param name="top_value" type="float" value="" label="Enter the top value" /> | 106 <param name="top_value" type="float" value="" label="Enter the top value" /> |
128 <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" /> | 107 <param name="inclusive" type="boolean" label="inclusive range ?" checked="false" truevalue="true" falsevalue="false" /> |
129 </repeat> | 108 </repeat> |
130 <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values" /> | 109 <conditional name="sort"> |
131 <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/> | 110 <param name="sort_bool" type="boolean" label="Sort by column ?" checked="false" truevalue="true" falsevalue="false" /> |
111 <when value="true"> | |
112 <param name="sort_column" type="text" value="" label="Sort result files by:" help="Fill in 'c1' if you want to sort your result file by the column 1 values"> | |
113 <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]</validator> | |
114 </param> | |
115 <param name="reversed_sort" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Sort in descending order ?"/> | |
116 </when> | |
117 <when value="false"/> | |
118 </conditional> | |
132 </inputs> | 119 </inputs> |
133 <outputs> | 120 <outputs> |
134 <data name="output1" format="tsv" label="${tool.name} on ${input1.name}" /> | 121 <data name="kept_lines" format="tsv" label="Filtered_${input1.name}" /> |
135 <data name="filtered_file" format="tsv" label="${tool.name} on ${input1.name} - Filtered lines" /> | 122 <data name="discarded_lines" format="tsv" label="Filtered_${input1.name} - discarded_lines" /> |
136 </outputs> | 123 </outputs> |
137 <tests> | 124 <tests> |
138 <test> | 125 <test> |
139 <param name="input1" value="Lacombe_et_al_2017_OK.txt" /> | 126 <param name="input1" value="Lacombe_et_al_2017_OK.txt" /> |
140 <param name="header" value="true" /> | 127 <param name="header" value="true" /> |
141 <param name="operator" value="OR"/> | 128 <param name="operator" value="OR"/> |
129 <param name="operation" value="discard"/> | |
142 <param name="sort_column" value="c1"/> | 130 <param name="sort_column" value="c1"/> |
143 <param name="reversed_sort" value="false"/> | 131 <conditional name="sort"> |
132 <param name="sort_bool" value="false"/> | |
133 <param name="reversed_sort" value="false"/> | |
134 </conditional> | |
144 <repeat name="keyword"> | 135 <repeat name="keyword"> |
145 <param name="ncol" value="c1" /> | 136 <param name="ncol" value="c1" /> |
146 <param name="match" value="True" /> | 137 <param name="match" value="True" /> |
147 <conditional name="k"> | 138 <conditional name="k"> |
148 <param name="kw" value="text" /> | 139 <param name="kw" value="text" /> |
149 <param name="txt" value="P04264 P35908 P13645 Q5D862 Q5T749 Q8IW75 P81605 P22531 P59666 P78386" /> | 140 <param name="txt" value="P04264 P35908 P13645 Q5D862 Q5T749 Q8IW75 P81605 P22531 P59666 P78386" /> |
150 </conditional> | 141 </conditional> |
151 </repeat> | 142 </repeat> |
152 <repeat name="value"> | 143 <repeat name="value"> |
153 <param name="ncol" value="c3"/> | 144 <param name="ncol" value="c3"/> |
154 <conditional name="v"> | 145 <param name="operator" value="Higher"/> |
155 <param name="val" value="Higher"/> | 146 <param name="value" value="20" /> |
156 <param name="higher" value="20" /> | |
157 </conditional> | |
158 </repeat> | 147 </repeat> |
159 <output name="output1" file="output.csv" /> | 148 <output name="kept_lines" file="output.tsv" /> |
160 <output name="filtered_file" file="filtered_output.csv" /> | 149 <output name="discarded_lines" file="discarded_lines.tsv" /> |
161 </test> | 150 </test> |
162 </tests> | 151 </tests> |
163 <help><![CDATA[ | 152 <help><![CDATA[ |
164 **Description** | 153 **Description** |
165 | 154 |
166 This tool allows to filter out data according to different criteria such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold). | 155 This tool allows to keep/discard rows from your dataset according to different filter such as keywords (e.g. a list of contaminants) or numerical values (e.g. intensity measurements below a given threshold). |
167 A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful. | 156 A boolean operator "OR/AND" allows to combine different type of filters making this tool very powerful. |
168 | 157 |
169 ----- | 158 ----- |
170 | 159 |
171 **Input** | 160 **Input** |
174 | 163 |
175 ----- | 164 ----- |
176 | 165 |
177 **Parameters** | 166 **Parameters** |
178 | 167 |
179 **AND/OR operator** | 168 **Operation** |
180 | 169 |
181 As many filters as needed can be combined, you can choose how filters apply on your data by using the following boolean operators: | 170 - **Keep**: only keep lines with keyword(s) and/or value(s) concerned by defined filter(s) |
182 | 171 - **Discard**: only keep lines with keyword(s) and/or value(s) NOT concerned by defined filter(s) |
183 - OR: only one filter must be satisfied to remove one row | 172 |
184 - AND: all filters must be satisfied to remove one row | 173 .. class:: infomark |
174 | |
175 Two output files are created, one with kept lines and the other one with discarded lines. | |
176 | |
177 **Select an operator to combine your filters (if more than one)** | |
178 | |
179 Many filters (criteria) can be combined in a single execution making this tool quite powerful; this can be achieved using the following boolean operators: | |
180 | |
181 - **OR**: only one of filters must be satisfied to keep/discard one row | |
182 - **AND**: all filters must be satisfied to keep/discard one row | |
185 | 183 |
186 ----- | 184 ----- |
187 | 185 |
188 **Filter by keyword(s)** | 186 **Filter by keyword(s)** |
189 | 187 |
203 | 201 |
204 Lines that contains these keywords will be removed from input file. | 202 Lines that contains these keywords will be removed from input file. |
205 | 203 |
206 "Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option: | 204 "Search for exact match?": Keywords search can be applied by performing either exact match or partial one by using the following option: |
207 | 205 |
208 - If you choose **Yes**, only the fields that contains exactly the same content will be removed. | 206 - If you choose **Yes**, only the fields that contains exactly the same content will be removed (i.e. using the "discard" mode). |
209 | 207 |
210 - If you choose **No**, all the fields containing the keyword will be removed. | 208 - If you choose **No**, all the fields containing the keyword will be removed. |
211 | 209 |
212 Example: | 210 Example: |
213 | 211 |
229 - <= (lower than or equal to) | 227 - <= (lower than or equal to) |
230 - > (greater than) | 228 - > (greater than) |
231 - >= (greater than or equal to) | 229 - >= (greater than or equal to) |
232 | 230 |
233 Then enter the numerical threshold to apply by filling the "Value" box. | 231 Then enter the numerical threshold to apply by filling the "Value" box. |
234 If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be filtered out. | 232 If you choose > 10, each row containing a numerical value (in the chosen column of your input file) that correspond to your settings will be kept or discarded (based on operation parameter). |
235 | 233 |
236 ----- | 234 ----- |
237 | 235 |
238 **Filter by a range of values**: You can also set a range of values to filter your file. | 236 **Filter by a range of values**: You can also set a range of values to filter your file. |
239 Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be filtered out. | 237 Conversely to the numeric filter, rows with numerical values within the defined range will be kept while rows with values out of this range will be discarded (or the other way around based on operation parameter). |
240 | 238 |
241 ----- | 239 ----- |
242 | 240 |
243 **Sort results files** | 241 **Sort by column ?** |
244 | 242 click on the "Yes" button allows to "Sort result files by:" a column number. this can be done in ascending (default value) or descending order by entering the column number on which to sort the data. |
245 You can sort your results by column in ascending (default value) or descending by entering the column number on which to sort the data. | |
246 | 243 |
247 ----- | 244 ----- |
248 | 245 |
249 **Output** | 246 **Output** |
250 | 247 |
251 The tool returns two output files. | 248 The tool returns two output files. |
252 | 249 |
253 * A text file containing the results that pass your filters | 250 * A text file containing the results that satisfy your filters (i.e. "keep" mode). |
254 | 251 |
255 * A text file containing the rows removed from the input file (i.e. containing data taht do not pass your filter(s). | 252 * A text file containing the rows removed from the input file (i.e. "discard" mode). |
256 | 253 |
257 ----- | 254 ----- |
258 | 255 |
259 .. class:: infomark | 256 .. class:: infomark |
260 | 257 |
261 **Authors** | 258 **Authors** |
262 | 259 |
263 T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR | 260 David Christiany, T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR |
264 | 261 |
265 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR | 262 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR |
266 | 263 |
267 This work has been partially funded through the French National Agency for Research (ANR) IFB project. | 264 This work has been partially funded through the French National Agency for Research (ANR) IFB project. |
268 | 265 |