Mercurial > repos > galaxyp > msi_filtering
comparison msi_filtering.xml @ 0:f17d3f1a065f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_filtering commit 3363c40790b0d64a085f980980f4289165eed27f
author | galaxyp |
---|---|
date | Wed, 28 Feb 2018 14:02:21 -0500 |
parents | |
children | 98c101b19f3c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f17d3f1a065f |
---|---|
1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.7.0"> | |
2 <description>tool for filtering mass spectrometry imaging data</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> | |
5 <requirement type="package" version="2.2.1">r-gridextra</requirement> | |
6 </requirements> | |
7 <command detect_errors="exit_code"> | |
8 <![CDATA[ | |
9 | |
10 #if $infile.ext == 'imzml' | |
11 cp '${infile.extra_files_path}/imzml' infile.imzML && | |
12 cp '${infile.extra_files_path}/ibd' infile.ibd && | |
13 #elif $infile.ext == 'analyze75' | |
14 cp '${infile.extra_files_path}/hdr' infile.hdr && | |
15 cp '${infile.extra_files_path}/img' infile.img && | |
16 cp '${infile.extra_files_path}/t2m' infile.t2m && | |
17 #else | |
18 ln -s $infile infile.RData && | |
19 #end if | |
20 cat '${MSI_subsetting}' && | |
21 echo ${MSI_subsetting} && | |
22 Rscript '${MSI_subsetting}' | |
23 | |
24 ]]> | |
25 </command> | |
26 <configfiles> | |
27 <configfile name="MSI_subsetting"><![CDATA[ | |
28 | |
29 | |
30 ################################# load libraries and read file ######################### | |
31 | |
32 | |
33 library(Cardinal) | |
34 library(gridExtra) | |
35 | |
36 ## Read MALDI Imaging dataset | |
37 | |
38 #if $infile.ext == 'imzml' | |
39 msidata = readMSIData('infile.imzML') | |
40 #elif $infile.ext == 'analyze75' | |
41 msidata = readMSIData('infile.hdr') | |
42 #else | |
43 load('infile.RData') | |
44 #end if | |
45 | |
46 ###################################### inputfile properties in numbers ###################### | |
47 | |
48 #if $outputs.outputs_select == "quality_control" | |
49 ## Number of features (mz) | |
50 maxfeatures = length(features(msidata)) | |
51 ## Range mz | |
52 minmz = round(min(mz(msidata)), digits=2) | |
53 maxmz = round(max(mz(msidata)), digits=2) | |
54 ## Number of spectra (pixels) | |
55 pixelcount = length(pixels(msidata)) | |
56 ## Range x coordinates | |
57 minimumx = min(coord(msidata)[,1]) | |
58 maximumx = max(coord(msidata)[,1]) | |
59 ## Range y coordinates | |
60 minimumy = min(coord(msidata)[,2]) | |
61 maximumy = max(coord(msidata)[,2]) | |
62 ## Number of intensities > 0 | |
63 npeaks= sum(spectra(msidata)[]>0) | |
64 ## Spectra multiplied with mz (potential number of peaks) | |
65 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | |
66 ## Percentage of intensities > 0 | |
67 percpeaks = round(npeaks/numpeaks*100, digits=2) | |
68 ## Number of empty TICs | |
69 TICs = colSums(spectra(msidata)[]) | |
70 NumemptyTIC = sum(TICs == 0) | |
71 ## median TIC | |
72 medint = round(median(TICs), digits=2) | |
73 ## Store features for QC plot | |
74 featuresinfile = mz(msidata) | |
75 #end if | |
76 | |
77 | |
78 ###################################### filtering of pixels ###################### | |
79 #if $inputpixels: | |
80 input_list = read.delim("$inputpixels", header = FALSE, | |
81 na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) | |
82 validpixels = input_list[,$pixel_column] %in% names(pixels(msidata)) | |
83 | |
84 if (validpixels != 0) | |
85 { | |
86 pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[validpixels,$pixel_column]] | |
87 msidata = msidata[,pixelsofinterest] | |
88 numberpixels = length(input_list[,$pixel_column]) | |
89 }else { | |
90 numberpixels = 0 | |
91 } | |
92 | |
93 | |
94 #else | |
95 input_list = data.frame(0, 0) | |
96 validpixels=0 | |
97 numberpixels = 0 | |
98 #end if | |
99 | |
100 | |
101 | |
102 ###################################### filtering of features ###################### | |
103 | |
104 #if $inputfeatures: | |
105 input_features = read.delim("$inputfeatures", header = FALSE, | |
106 na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) | |
107 validfeatures = input_features[,$feature_column] %in% names(features(msidata)) | |
108 | |
109 if (validfeatures != 0) | |
110 { | |
111 featuresofinterest = features(msidata)[names(features(msidata)) %in% input_features[validfeatures,$feature_column]] | |
112 msidata = msidata[featuresofinterest,] | |
113 numberfeatures = length(input_features[,$feature_column]) | |
114 } else { | |
115 numberfeatures = 0 | |
116 } | |
117 | |
118 | |
119 #else | |
120 input_features = data.frame(0, 0) | |
121 validfeatures = 0 | |
122 numberfeatures = 0 | |
123 #end if | |
124 | |
125 | |
126 | |
127 | |
128 | |
129 | |
130 # save msidata as Rfile | |
131 save(msidata, file="$msidata_filtered") | |
132 | |
133 ###################################### outputfile properties in numbers ###################### | |
134 | |
135 #if $outputs.outputs_select == "quality_control" | |
136 | |
137 ## Number of features (mz) | |
138 maxfeatures2 = length(features(msidata)) | |
139 ## Range mz | |
140 minmz2 = round(min(mz(msidata)), digits=2) | |
141 maxmz2 = round(max(mz(msidata)), digits=2) | |
142 ## Number of spectra (pixels) | |
143 pixelcount2 = length(pixels(msidata)) | |
144 ## Range x coordinates | |
145 minimumx2 = min(coord(msidata)[,1]) | |
146 maximumx2 = max(coord(msidata)[,1]) | |
147 ## Range y coordinates | |
148 minimumy2 = min(coord(msidata)[,2]) | |
149 maximumy2 = max(coord(msidata)[,2]) | |
150 ## Number of intensities > 0 | |
151 npeaks2= sum(spectra(msidata)[]>0) | |
152 ## Spectra multiplied with mz (potential number of peaks) | |
153 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | |
154 ## Percentage of intensities > 0 | |
155 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) | |
156 ## Number of empty TICs | |
157 TICs2 = colSums(spectra(msidata)[]) | |
158 NumemptyTIC2 = sum(TICs2 == 0) | |
159 ## median TIC | |
160 medint2 = round(median(TICs2), digits=2) | |
161 | |
162 | |
163 properties = c("Number of mz features", | |
164 "Range of mz values [Da]", | |
165 "Number of pixels", | |
166 "Range of x coordinates", | |
167 "Range of y coordinates", | |
168 "Intensities > 0", | |
169 "Median TIC per pixel", | |
170 "Number of zero TICs", | |
171 paste0("# pixels in ", "$inputpixels.display_name"), | |
172 paste0("# mz in ", "$inputfeatures.display_name")) | |
173 | |
174 before = c(paste0(maxfeatures), | |
175 paste0(minmz, " - ", maxmz), | |
176 paste0(pixelcount), | |
177 paste0(minimumx, " - ", maximumx), | |
178 paste0(minimumy, " - ", maximumy), | |
179 paste0(percpeaks, " %"), | |
180 paste0(medint), | |
181 paste0(NumemptyTIC), | |
182 paste0("input pixels: ", numberpixels), | |
183 paste0("input mz: ", numberfeatures)) | |
184 | |
185 filtered = c(paste0(maxfeatures2), | |
186 paste0(minmz2, " - ", maxmz2), | |
187 paste0(pixelcount2), | |
188 paste0(minimumx2, " - ", maximumx2), | |
189 paste0(minimumy2, " - ", maximumy2), | |
190 paste0(percpeaks2, " %"), | |
191 paste0(medint2), | |
192 paste0(NumemptyTIC2), | |
193 paste0("valid pixels: ", sum(validpixels)), | |
194 paste0("valid mz: ", sum(validfeatures))) | |
195 | |
196 | |
197 property_df = data.frame(properties, before, filtered) | |
198 | |
199 | |
200 | |
201 ######################################## PDF QC ############################################# | |
202 | |
203 | |
204 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) | |
205 plot(0,type='n',axes=FALSE,ann=FALSE) | |
206 | |
207 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) | |
208 | |
209 | |
210 grid.table(property_df, rows= NULL) | |
211 | |
212 | |
213 ### heatmap image as visual pixel control | |
214 | |
215 | |
216 image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", | |
217 main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) | |
218 | |
219 ### control features which are left | |
220 | |
221 par(mfrow = c(2,1)) | |
222 plot(featuresinfile, ylab = "m/z in Dalton", xlab = "feature index") | |
223 plot(mz(msidata), ylab = "m/z in Dalton", xlab = "feature index") | |
224 | |
225 | |
226 dev.off() | |
227 | |
228 #end if | |
229 | |
230 ######################################## intensity matrix ################################## | |
231 | |
232 #if $output_matrix: | |
233 | |
234 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0) | |
235 { | |
236 | |
237 spectramatrix = spectra(msidata) | |
238 rownames(spectramatrix) = mz(msidata) | |
239 newmatrix = rbind(pixels(msidata), spectramatrix) | |
240 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | |
241 | |
242 }else{ | |
243 print("file has no features or pixels left") | |
244 } | |
245 | |
246 #end if | |
247 | |
248 | |
249 ]]></configfile> | |
250 </configfiles> | |
251 <inputs> | |
252 <param name="infile" type="data" format="imzml, rdata, analyze75" | |
253 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" | |
254 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> | |
255 <param name="inputpixels" type="data" optional="true" format="tabular" label="pixels for filtering of MSI data" | |
256 help="tabular file with pixels of interest in the form x = 1, y = 1"/> | |
257 <param name="pixel_column" data_ref="inputpixels" optional="true" label="Column with pixels" type="data_column" /> | |
258 <param name="inputfeatures" type="data" optional="true" format="tabular" label="features for filtering of MSI data" | |
259 help="tabular file with masses of interest in the form mz = 800.05"/> | |
260 <param name="feature_column" data_ref="inputfeatures" optional="true" label="Column with features" type="data_column" /> | |
261 | |
262 <conditional name="outputs"> | |
263 <param name="outputs_select" type="select" label="Quality control output"> | |
264 <option value="quality_control" selected="True">yes</option> | |
265 <option value="no_quality_control" >no</option> | |
266 </param> | |
267 <when value="quality_control"> | |
268 <param name="inputmz" type="float" value="1296.7" label="Mass for which a heatmap image will be drawn" help="Use a mass which is still present in all pixels to control if the pixel filtering went well"/> | |
269 <param name="plusminus_dalton" value="0.25" type="float" label="mass range for mz value" help="plusminus mass window in Dalton"/> | |
270 </when> | |
271 </conditional> | |
272 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> | |
273 </inputs> | |
274 <outputs> | |
275 <data format="rdata" name="msidata_filtered" label="${tool.name} on $infile.display_name"/> | |
276 <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "QC ${tool.name} on $infile.display_name"> | |
277 <filter>outputs["outputs_select"] == "quality_control"</filter> | |
278 </data> | |
279 <data format="tabular" name="matrixasoutput" label="matrix ${tool.name} on $infile.display_name"> | |
280 <filter>output_matrix</filter> | |
281 </data> | |
282 </outputs> | |
283 | |
284 <tests> | |
285 <test expect_num_outputs="2"> | |
286 <param name="infile" value="" ftype="imzml"> | |
287 <composite_data value="Example_Continuous.imzML"/> | |
288 <composite_data value="Example_Continuous.ibd"/> | |
289 </param> | |
290 <param name="inputpixels" ftype="tabular" value = "inputpixels.tabular"/> | |
291 <param name="pixel_column" value="1"/> | |
292 <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/> | |
293 <param name="feature_column" value="2"/> | |
294 | |
295 <conditional name="outputs"> | |
296 <param name="outputs_select" value="quality_control"/> | |
297 <param name="inputmz" value="328.9"/> | |
298 <param name="plusminus_dalton" value="0.25"/> | |
299 </conditional> | |
300 <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/> | |
301 <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size" /> | |
302 </test> | |
303 <test expect_num_outputs="3"> | |
304 <param name="infile" value="" ftype="analyze75"> | |
305 <composite_data value="Analyze75.hdr"/> | |
306 <composite_data value="Analyze75.img"/> | |
307 <composite_data value="Analyze75.t2m"/> | |
308 </param> | |
309 <param name="inputpixels" ftype="tabular" value = "inputpixels2.tabular"/> | |
310 <param name="pixel_column" value="1"/> | |
311 <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/> | |
312 <param name="feature_column" value="1"/> | |
313 <conditional name="outputs"> | |
314 <param name="outputs_select" value="quality_control"/> | |
315 <param name="inputmz" value="702"/> | |
316 <param name="plusminus_dalton" value="0.25"/> | |
317 </conditional> | |
318 <param name="output_matrix" value="True"/> | |
319 <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/> | |
320 <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" /> | |
321 <output name="matrixasoutput" file="analyze_matrix.tabular"/> | |
322 </test> | |
323 <test expect_num_outputs="1"> | |
324 <param name="infile" value="" ftype="analyze75"> | |
325 <composite_data value="Analyze75.hdr"/> | |
326 <composite_data value="Analyze75.img"/> | |
327 <composite_data value="Analyze75.t2m"/> | |
328 </param> | |
329 <conditional name="outputs"> | |
330 <param name="outputs_select" value="no_quality_control"/> | |
331 </conditional> | |
332 <output name="msidata_filtered" file="analyze_originaloutput.RData" compare="sim_size" /> | |
333 </test> | |
334 <test expect_num_outputs="2"> | |
335 <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/> | |
336 <conditional name="outputs"> | |
337 <param name="outputs_select" value="no_quality_control"/> | |
338 </conditional> | |
339 <param name="output_matrix" value="True"/> | |
340 <output name="matrixasoutput" file="rdata_matrix.tabular"/> | |
341 </test> | |
342 </tests> | |
343 <help> | |
344 <![CDATA[ | |
345 | |
346 This tool can filter three types of mass-spectrometry imaging files (see below) for pixels and features of interest. This can be used to keep only pixels in a regions of interest. | |
347 For filtering at least one valid pixel/feature is needed otherwise no filtering will be performed. | |
348 | |
349 Input data: 3 types of input data can be used: | |
350 | |
351 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_ | |
352 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) | |
353 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) | |
354 | |
355 The output of this tool is a subsetted Cardinal "MSImageSet" with the variable name "msidata" saved as .RData. | |
356 ]]> | |
357 </help> | |
358 <citations> | |
359 <citation type="doi">10.1093/bioinformatics/btv146</citation> | |
360 </citations> | |
361 </tool> |