comparison flagRemove.xml @ 0:5d0461edc7fd draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc-dirty"
author computational-metabolomics
date Wed, 27 Nov 2019 12:40:15 -0500
parents
children d2373ced0ded
comparison
equal deleted inserted replaced
-1:000000000000 0:5d0461edc7fd
1 <tool id="mspurity_flagremove" name="msPurity.flagRemove" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
2 <description>Tool to flag and remove XCMS grouped peaks from the xcmsSet object based on various thresholds
3 (e.g. RSD of intensity and retention time).
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="requirements" />
9 <command detect_errors="exit_code"><![CDATA[
10 Rscript '$__tool_directory__/flagRemove.R'
11 --xset_path='$xset_path'
12 --out_dir=.
13
14 #if $sample_flag.sample_flag=='update'
15 --rsd_i_sample='$sample_flag.rsd_i_sample'
16 --minfrac_sample='$sample_flag.minfrac_sample'
17 --ithres_sample='$sample_flag.ithres_sample'
18 #end if
19
20 #if $blank_flag.blank_flag=='update'
21 --rsd_i_blank='$blank_flag.rsd_i_blank'
22 --minfrac_blank='$blank_flag.minfrac_blank'
23 --ithres_blank='$blank_flag.ithres_blank'
24 --blank_class='$blank_flag.blank_class'
25 #end if
26
27 #if $peak_removal.peak_removal=='remove'
28 --remove_spectra
29 --minfrac_xcms='$peak_removal.minfrac_xcms'
30 --mzwid='$peak_removal.mzwid'
31 --bw='$peak_removal.bw'
32 #end if
33
34 #if $advanced.advanced=='update'
35 --egauss_thr='$advanced.egauss_thr'
36 --polarity='$advanced.polarity'
37 --grp_rm_ids='$advanced.grp_rm_ids'
38 --xset_name='$advanced.xset_name'
39 '$advanced.temp_save.value'
40
41 #end if
42 #if $choose_samp.choose_samp=='yes'
43 --samplelist='$choose_samp.samplelist'
44 #end if
45 ]]></command>
46 <inputs>
47 <param argument="--xset_path" type="data" format='rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata.xcms.fillpeaks,rdata'
48 help="The path to the xcmsSet object saved as an RData file"/>
49 <conditional name="sample_flag">
50 <param argument="sample_flag" type="select" label="Change biological sample flag parameters?">
51 <option value="update" >Update biological sample flag parameters</option>
52 <option value="" selected="true">Use default biological sample flag parameters</option>
53 </param>
54 <when value="">
55 </when>
56 <when value="update">
57 <param argument="--rsd_i_sample" type="text" label="rsd_i_sample" value="NA"
58 help="Relative Standard Deviation threshold for the sample classes"/>
59 <param argument="--minfrac_sample" type="float" label="minfrac_sample" value="0.5" min="0.0" max="1"
60 help="minimum fraction of files for features needed for the sample classes"/>
61 <param argument="--rsd_rt_sample" type="text" label="rsd_rt_sample" value="NA"
62 help="Relative standard Deviation threshold for the retention time of the sample
63 classes"/>
64 <param argument="--ithres_sample" type="text" label="ithres_sample" value="NA"
65 help="Intensity threshold for the sample"/>
66 </when>
67 </conditional>
68 <conditional name="blank_flag">
69 <param argument="blank_flag" type="select" label="Change blank flag parameters?">
70 <option value="update" >Update blank flag parameters</option>
71 <option value="" selected="true">Use default blank flag parameters</option>
72 </param>
73 <when value="">
74 </when>
75 <when value="update">
76 <param argument="--blank_class" type="text" label="blank_class" value="blank"
77 help="A string representing the class that will be used for the blank"/>
78 <param argument="--rsd_i_blank" type="text" label="rsd_i_blank" value="NA"
79 help="RSD threshold for the blank"/>
80 <param argument="--minfrac_blank" type="float" label="minfrac_blank" value="0.5" min="0.0" max="1"
81 help="minimum fraction of files for features needed for the blank"/>
82 <param argument="--rsd_rt_blank" type="text" label="rsd_rt_blank" value="NA"
83 help="RSD threshold for the retention time of the blank"/>
84 <param argument="--ithres_blank" type="text" label="ithres_blank" value="NA"
85 help="Intensity threshold for the blank"/>
86 <param argument="--s2b" type="float" label="s2b" value="10"
87 help="fold change (sample/blank) needed for sample peak to be allowed. e.g.
88 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
89 1000/10 = 100, so sample has fold change higher than the threshold and the peak
90 is not considered a blank"/>
91 </when>
92 </conditional>
93 <conditional name="peak_removal">
94 <param name="peak_removal" type="select" label="Remove peaks from xcmsSet object?">
95 <option value="remove" >Remove peaks and re-group</option>
96 <option value="" selected="true">Only flag peaks (do not remove and re-group)</option>
97 </param>
98 <when value="">
99 </when>
100 <when value="remove">
101 <param argument="--minfrac_xcms" type="float" label="minfrac_xcms" value="0.7" min="0.0" max="1"
102 help="minfrac for xcms grouping"/>
103 <param argument="--mzwid" type="float" label="mzwid" value="0.001"
104 help="mzwid for xcms grouping"/>
105 <param argument="--bw" type="float" label="bw" value="5"
106 help="bw for xcms grouping"/>
107 </when>
108 </conditional>
109 <conditional name="advanced">
110 <param name="advanced" type="select" label="Advanced parameters">
111 <option value="update" >Update advanced and testing parameters</option>
112 <option value="" selected="true">Use default advanced parameters</option>
113 </param>
114 <when value="">
115 </when>
116 <when value="update">
117 <param argument="--egauss_thr" type="text" label="egauss_thr" value="NA"
118 help="Threshold for filtering out non gaussian shaped peaks. Note this only works
119 if the 'verbose columns' and 'fit gauss' was used with xcms"/>
120 <param argument="--temp_save" type="boolean" label="temp_save" checked="false" truevalue="--temp_save" falsevalue=""
121 help="Assign True if files for each step saved (for testing purposes)"/>
122 <param argument="--polarity" type="select" label="polarity"
123 help="polarity (just used for naming purpose when files are saved)">
124 <option value="positive">Positive</option>
125 <option value="negative" >Negative</option>
126 <option value="NA" selected="true">NA</option>
127 </param>
128 <param argument="--grp_rm_ids" type="text" label="grp_rm_ids" value="NA"
129 help="comma seperated list of grouped_xcms peak ids to remove (corresponds to the row from xcms::group output)
130 e.g '1,20,30,56'"/>
131 <param argument="--xset_name" type="text" label="xset_name" value="xset"
132 help="Name of the xcmsSet object within the RData file"/>
133 </when>
134 </conditional>
135 <conditional name="choose_samp">
136 <param name="choose_samp" type="select" label="Samplelist">
137 <option value="yes" >Use samplelist</option>
138 <option value="" selected="true">Don't use samplelist</option>
139 </param>
140 <when value="">
141 </when>
142 <when value="yes">
143 <param argument="--samplelist" type="data" label="samplelist" format="tsv,tabular"
144 help="A samplelist can be provided to find
145 an appriopiate blank class (requires a column 'blank' where 'yes' indicates the
146 class should be used as the blank) "/>
147
148 </when>
149 </conditional>
150 </inputs>
151 <outputs>
152 <data name="peaklist_filtered" format="tsv" label="${tool.name} on ${on_string}: peaklist_filtered (tsv)"
153 from_work_dir="peaklist_filtered.tsv" />
154 <data name="removed_peaks" format="tsv" label="${tool.name} on ${on_string}: removed_peaks (tsv)"
155 from_work_dir="removed_peaks.tsv" />
156 <data name="xset_filtered" format="rdata" label="xset_filtered"
157 from_work_dir="xset_filtered.RData"/>
158 </outputs>
159 <tests>
160 <test>
161 <param name="blank_flag.blank_flag" value="update" />
162 <param name="xset_path" value="flagRemove_input.RData"/>
163
164 <param name="blank_flag.blank_flag" value="update" />
165 <conditional name="blank_flag">
166 <param name="blank_flag" value="update"/>
167 <param name="blank_class" value="KO" />
168 </conditional>
169 <conditional name="peak_removal">
170 <param name="peak_removal" value="remove"/>
171 </conditional>
172 <output name="peaklist_filtered" file="flagRemove_output.tsv"/>
173 </test>
174 </tests>
175 <help><![CDATA[
176
177 =======================================
178 Flag & remove peaks from xcmsSet object
179 =======================================
180 -----------
181 Description
182 -----------
183
184 Tool to flag XCMS grouped peaks based on various criteria (e.g RSD, intensity). The flagged grouped peaks can then be removed
185 completely from the xcmsSet object (xset). This means removing the individual peaks associated for each file. Located
186 in the **xset@peaks** socket of the xcmsSet object.
187
188 Additionally a list of ids of the xcms grouped peaks can be supplied, all peaks associated with these ids can be
189 be removed.
190
191 **Note**: grouped peak refers to a peak that has been grouped together by xcms::group function
192
193 -----------------
194 Updated peaklist
195 -----------------
196 The calculated columns for the update peaklist dataframe include:
197
198 * RSD of intensity for grouped peaks across each class
199 * RSD of retention time for grouped peaks across each class
200 * Coverage across all classes
201 * mzmin_full & mzmax_full: the full mzrange of each grouped peak
202 * rtmin_full & rtmax_full: the full rtrange of each grouped peak
203 * flag for criteria for the blank class (if 1 it means the blank is valid in at least 1 condition) and this grouped peak will be removed
204 * flag for criteria for the sample classes (if 1 it means that this grouped peak is valid for this class)
205 * all_sample_valid: flag for all samples (if 1 it means that at least 1 sample class is valid, this ignores the blank)
206
207 To filter out blank peaks just filter out all peaks where the blank_valid is equal to 1
208 This is in addition to the standard output from the xcmsSet peaklist
209
210 **flag example**
211
212 Dataset consists of 3 classes. Blank, cond1 and cond2. The classes cond1 and cond2 are biological sample classes.
213
214 ============= ============= ============= ================ ================
215 blank_valid cond1_valid cond2_valid all_sample_valid Keep peak?
216 ============= ============= ============= ================ ================
217 0 0 1 1 Yes
218 ------------- ------------- ------------- ---------------- ----------------
219 0 1 1 1 Yes
220 ------------- ------------- ------------- ---------------- ----------------
221 1 0 1 1 No
222 ------------- ------------- ------------- ---------------- ----------------
223 1 0 0 0 No
224 ------------- ------------- ------------- ---------------- ----------------
225 0 0 0 0 No
226 ============= ============= ============= ================ ================
227
228
229
230 -----------------
231 Filters for flags
232 -----------------
233
234 The following filters can be used to determine if a grouped peak gets flagged to keep. If the column all_sample_valid is 0 then
235 the grouped peak will be removed
236
237
238 * RSD of intensity for each biological sample class
239 * minfrac for each biological sample class
240 * RSD of retention time for each biological sample class
241 * intensity threshold for each biological sample class
242 * Blank subtraction. If a blank peak is found where the intensity of any corresponding biological sample class is not greater that the he s2b threshold (sample/blank). Then this grouped peak will be flagged for removal
243
244 The blank grouped peaks also have there own filters. If the blank peak passes this criteria and s2b threshold detailed above
245 then the peak will be removed.
246
247 * RSD of intensity
248 * minfrac
249 * intensity threshold
250
251 Additionally there is a filter for assessing how well the peaks fit the gaussian shape. Note that this can only be performed
252 when XCMS has fit_gauss option and verbose columns set to TRUE. Also, these peaks are just removed and not flagged.
253
254
255
256 -----------
257 Regrouping
258 -----------
259 The resulting xcmsSet object where all peaks have been removed needs to be regrouped otherwise the individual peaks
260 associated with each file will not be correctly linked to the grouped peaks.
261
262 This tool will re-group the xcmsSet object and check the newly created re-grouped xcmsSet object to see if any peaks are
263 still being flagged. If so the process will be repeated untill the xcmsSet object only contains peaks that match
264 the peak criteria.
265
266 The output file is an xcmsSet.RData file.
267 ]]></help>
268
269 <expand macro="citations" />
270 </tool>