comparison maldi_quant_preprocessing.xml @ 0:e2aa05746a69 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6
author galaxyp
date Wed, 22 Aug 2018 11:49:06 -0400
parents
children 0892a051eb17
comparison
equal deleted inserted replaced
-1:000000000000 0:e2aa05746a69
1 <tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="1.18.0.0">
2 <description>
3 Preprocessing of mass-spectrometry imaging data
4 </description>
5 <macros>
6 <import>maldi_macros.xml</import>
7 </macros>
8 <expand macro="requirements"/>
9 <command detect_errors="exit_code">
10 <![CDATA[
11 #if $infile.ext == 'imzml'
12 cp '${infile.extra_files_path}/imzml' infile.imzML &&
13 cp '${infile.extra_files_path}/ibd' infile.ibd &&
14 #elif $infile.ext == 'analyze75'
15 cp '${infile.extra_files_path}/hdr' infile.hdr &&
16 cp '${infile.extra_files_path}/img' infile.img &&
17 cp '${infile.extra_files_path}/t2m' infile.t2m &&
18 du infile.hdr &&
19 du infile.img &&
20 du -s -B1 infile.hdr &&
21 #else
22 ln -s $infile infile.RData &&
23 #end if
24 Rscript "${maldi_quant_preprocessing}" &&
25 mkdir $outfile_imzml.files_path &&
26 mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
27 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
28 echo "imzML file:" > $outfile_imzml &&
29 ls -l "$outfile_imzml.files_path" >> $outfile_imzml
30 ]]>
31 </command>
32 <configfiles>
33 <configfile name="maldi_quant_preprocessing"><![CDATA[
34
35 @R_IMPORTS@
36
37 #if $restriction_conditional.restriction == 'restrict':
38
39 print('Reading mask region')
40 ## Import imzML file
41
42 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE))[,1:2]
43
44 maldi_data = importImzMl('infile.imzML',
45 coordinates = coordinate_matrix)
46 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2])
47
48 #else:
49
50 print('Reading entire file')
51 #if $infile.ext == 'imzml'
52 ## Import imzML file
53 maldi_data = import( 'infile.imzML', type="imzML" )
54 #elif $infile.ext == 'analyze75'
55 ## Import analyze7.5 file
56 maldi_data = import( 'infile.hdr' )
57 #else
58 loadRData <- function(fileName){
59 #loads an RData file, and returns it
60 load(fileName)
61 get(ls()[ls() != "fileName"])
62 }
63 msidata = loadRData('infile.RData')
64
65 ## save coordinates
66 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2])
67 ## save mz values
68 cardinal_mzs = Cardinal::mz(msidata)
69 ## create MALDIquant MassSpectrum object
70 maldi_data = list()
71 for(number_spectra in 1:ncol(msidata)){
72 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])
73 }
74
75 #end if
76
77 #end if
78
79 ## Quality control plots during preprocessing
80
81 pdf("prepro_qc_plot.pdf", fonts = "Times", pointsize = 12)
82 plot(0,type='n',axes=FALSE,ann=FALSE)
83
84 ## if no filename is given, name of file in Galaxy history is used
85 #set $filename = $infile.display_name
86 title(main=paste("$filename"))
87
88 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
89 print("use annotation file")
90 ## read and extract x,y,annotation information
91 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
92 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
93 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"
94
95 ## merge with coordinate information of MSI data
96 coordinates_st = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data)))
97 colnames(coordinates_st)[3] = "pixel_index"
98 merged_annotation = merge(coordinates_st, annotation_input, by=c("x", "y"), all.x=TRUE)
99 merged_annotation[is.na(merged_annotation)] = "NA"
100 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
101 samples = as.factor(merged_annotation\$annotation)
102
103 ## print annotation overview into PDF output
104
105 ## the more annotation groups a file has the smaller will be the legend
106 number_combined = length(levels(as.factor(merged_annotation\$annotation)))
107 if (number_combined<20){
108 legend_size = 10
109 }else if (number_combined>20 && number_combined<40){
110 legend_size = 9
111 }else if (number_combined>40 && number_combined<60){
112 legend_size = 8
113 }else if (number_combined>60 && number_combined<100){
114 legend_size = 7
115 }else{
116 legend_size = 6
117 }
118
119 combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+
120 geom_tile() +
121 coord_fixed()+
122 ggtitle("Spatial orientation of annotated data")+
123 theme_bw()+
124 theme(plot.title = element_text(hjust = 0.5))+
125 theme(text=element_text(family="ArialMT", face="bold", size=12))+
126 theme(legend.position="bottom",legend.direction="vertical")+
127 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
128 guides(fill=guide_legend(ncol=5,byrow=TRUE))
129
130 print(combine_plot)
131
132 #end if
133
134 #################### Preprocessing methods #####################################
135
136 ## QC plot
137 avgSpectra = averageMassSpectra(maldi_data,method="mean")
138 plot(avgSpectra, main="Average spectrum for input file")
139
140 #for $method in $methods:
141
142 #if str( $method.methods_conditional.method ) == 'Transformation':
143
144 print('transforming')
145 ##transformation
146 maldi_data = transformIntensity(maldi_data, method="$method.methods_conditional.transform_method")
147 ## QC plot
148 avgSpectra = averageMassSpectra(maldi_data,method="mean")
149 plot(avgSpectra, main="Average spectrum after transformation")
150
151
152 #elif str( $method.methods_conditional.method ) == 'Smoothing':
153
154 print('smoothing')
155 ##smoothing
156
157 #if str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'SavitzkyGolay':
158 print('SavitzkyGolay')
159
160 maldi_data = smoothIntensity(maldi_data,
161 method="SavitzkyGolay", polynomialOrder=$method.methods_conditional.methods_for_smoothing.polynomial,
162 halfWindowSize=$method.methods_conditional.halfWindowSize)
163
164 #elif str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'MovingAverage':
165 print('MovingAverage')
166
167 maldi_data = smoothIntensity(maldi_data,
168 method="MovingAverage", weighted=$method.methods_conditional.methods_for_smoothing.weighted,
169 halfWindowSize=$method.methods_conditional.halfWindowSize)
170
171 #end if
172
173 ## QC plot
174 avgSpectra = averageMassSpectra(maldi_data,method="mean")
175 plot(avgSpectra, main="Average spectrum after smoothing")
176
177
178 #elif str( $method.methods_conditional.method ) == 'Baseline':
179
180 print('baseline removing')
181 ## Remove baseline
182
183 maldi_data = removeBaseline(maldi_data,
184 method="$method.methods_conditional.baseline_method",
185 iterations=$method.methods_conditional.iterations)
186 ## QC plot
187 avgSpectra = averageMassSpectra(maldi_data,method="mean")
188 plot(avgSpectra, main="Average spectrum after baseline removal")
189
190
191 #elif str( $method.methods_conditional.method ) == 'Calibrate':
192
193 print('calibrate')
194 ##calibrate
195
196 #if $method.methods_conditional.mass_start != 0 and $method.methods_conditional.mass_end != 0:
197 ## calibrate only given m/z range
198 maldi_data = calibrateIntensity(maldi_data,
199 method="$method.methods_conditional.calibrate_method",
200 range=c($method.methods_conditional.mass_start, $method.methods_conditional.mass_end))
201 #else:
202 maldi_data = calibrateIntensity(maldi_data,
203 method="$method.methods_conditional.calibrate_method")
204 #end if
205 ## QC plot
206 avgSpectra = averageMassSpectra(maldi_data,method="mean")
207 plot(avgSpectra, main="Average spectrum after normalization")
208
209
210 #elif str( $method.methods_conditional.method ) == 'Align':
211
212 print('align')
213 ##align spectra
214
215 #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference':
216
217 maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize,
218 SNR=$method.methods_conditional.snr,
219 tolerance=$method.methods_conditional.tolerance,
220 warpingMethod="$method.methods_conditional.warping_method")
221
222 #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference':
223
224 ## create reference mass_vector from tabular file
225 mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = FALSE, stringsAsFactors = FALSE)[,1]
226 int_vector = rep(1,length(mass_vector))
227 mass_list = createMassPeaks(mass_vector, int_vector)
228
229 maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize,
230 SNR=$method.methods_conditional.snr,
231 tolerance=$method.methods_conditional.tolerance,
232 warpingMethod="$method.methods_conditional.warping_method",
233 reference = mass_list, allowNoMatches =$method.methods_conditional.reference_for_alignment.allow_nomatch, emptyNoMatches = $method.methods_conditional.reference_for_alignment.empty_nomatch)
234
235 #if $method.methods_conditional.reference_for_alignment.remove_empty:
236
237 #if $infile.ext == 'rdata'
238 cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),] ## remove coordinates of empty spectra for Cardinal RData input
239 #end if
240 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
241 merged_annotation = merged_annotation[-findEmptyMassObjects(maldi_data),] ## remove coordinate annotations for empty spectra
242 #end if
243 maldi_data = removeEmptyMassObjects(maldi_data)
244 #end if
245 #end if
246
247 ## QC plot
248
249 if (length(maldi_data)>0){
250 avgSpectra = averageMassSpectra(maldi_data,method="mean")
251 plot(avgSpectra, main="Average spectrum after alignment")
252 }else{"All spectra are empty"}
253
254 #end if
255 #end for
256
257 dev.off()
258
259 ## export imzML file
260 if (length(maldi_data)>0){
261 #if $infile.ext == 'rdata'
262 MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates)
263 #else
264 MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed)
265 #end if
266
267 ## export annotation tabular file
268 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
269 write.table(merged_annotation, file="$annotation_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
270 #end if
271 }else{"All spectra are empty, outputfiles will be empty,too."}
272
273 ]]>
274 </configfile>
275 </configfiles>
276 <inputs>
277 <param name="infile" type="data" format="imzml,rdata" label="MS metadata" help="This file is in imzML format or Cardinal MSImageSet saved as RData"/>
278 <conditional name="restriction_conditional">
279 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files">
280 <option value="no_restriction" selected="True">Calculate on entire file</option>
281 <option value="restrict">Restrict to coordinates of interest</option>
282 </param>
283 <when value="restrict">
284 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates which should be read" help="x-values in first column, y-values in second column"/>
285 </when>
286 <when value="no_restriction"/>
287 </conditional>
288 <conditional name="tabular_annotation">
289 <param name="load_annotation" type="select" label="Use pixel annotation from tabular file to have updated annotation information in case empty spectra will be removed">
290 <option value="no_annotation" selected="True">use no annotation</option>
291 <option value="yes_annotation">use pixel annotation from a tabular file</option>
292 </param>
293 <when value="yes_annotation">
294 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
295 help="Tabular file with three columns: x values, y values and pixel annotations"/>
296 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
297 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
298 <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
299 <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
300 </when>
301 <when value="no_annotation"/>
302 </conditional>
303 <repeat name="methods" title="Method" min="1">
304 <conditional name="methods_conditional">
305 <param name="method" type="select" label="Select the method you want to apply">
306 <option value="Transformation" selected="True">Transformation</option>
307 <option value="Smoothing">Smoothing</option>
308 <option value="Baseline">Baseline removal</option>
309 <option value="Calibrate">Calibrate</option>
310 <option value="Align">Align Spectra (warping/phase correction)</option>
311 <validator type="empty_field" />
312 </param>
313 <when value="Transformation">
314 <param name="transform_method" type="select" label="Select your transfprormation method">
315 <option value="sqrt" selected="True">sqrt</option>
316 <option value="log">log</option>
317 <option value="log2">log2</option>
318 <option value="log10">log10</option>
319 <validator type="empty_field" />
320 </param>
321 </when>
322 <when value="Smoothing">
323 <conditional name="methods_for_smoothing">
324 <param name="smooth_method" type="select" label="This method smoothes the intensity values of a MassSpectrum object">
325 <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option>
326 <option value="MovingAverage">MovingAverage</option>
327 </param>
328 <when value="SavitzkyGolay">
329 <param name="polynomial" value="3" type="text" label="PolynomialOrder argument to control the order of the filter"/>
330 </when>
331 <when value="MovingAverage">
332 <param name="weighted" type="boolean" label="Weighted average" help = "indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/>
333 </when>
334 </conditional>
335 <param name="halfWindowSize" type="integer" value="10"
336 label="Half window size"
337 help="The resulting window reaches from
338 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
339 (window size is 2*halfWindowSize+1).
340 The best size differs depending on the selected smoothing method."/>
341 </when>
342 <when value="Baseline">
343 <param name="baseline_method" type="select" label="Baseline removal method">
344 <option value="SNIP" selected="True">SNIP</option>
345 <option value="TopHat">TopHat</option>
346 <option value="ConvexHull">ConvexHull</option>
347 <option value="median">median</option>
348 <validator type="empty_field" />
349 </param>
350 <param name="iterations" type="integer" value="100"
351 label="Number of iterations"
352 help=""/>
353 </when>
354 <when value="Calibrate">
355 <param name="calibrate_method" type="select" label="Calibration method">
356 <option value="TIC" selected="True">TIC</option>
357 <option value="PQN">PQN</option>
358 <option value="median">median</option>
359 <validator type="empty_field" />
360 </param>
361 <param name="mass_start" type="integer" value="0"
362 label="Start of m/z range, has to be inside m/z range"
363 help="Scaling factor is calculated on the mass range and applied to the whole spectrum"/>
364 <param name="mass_end" type="integer" value="0"
365 label="End of m/z range, has to be inside m/z range"
366 help="The Start and End value needs to be different from 0 to be taken into account and."/>
367 </when>
368 <when value="Align">
369 <param name="warping_method" type="select" label="Warping methods">
370 <option value="lowess" selected="True">Lowess</option>
371 <option value="linear">Linear</option>
372 <option value="quadratic">Quadratic</option>
373 <option value="cubic">Cubic</option>
374 </param>
375
376 <param name="tolerance" type="float" value="0.002"
377 label="Tolerance"
378 help="Double, maximal relative deviation of a peak position (m/z) to be considered as identical" />
379
380 <param name="halfWindowSize" type="integer" value="20"
381 label="Half window size"
382 help="The resulting window reaches from
383 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
384 (window size is 2*halfWindowSize+1).
385 The best size differs depending on the selected smoothing method."/>
386
387 <param name="snr" type="integer" value="2"
388 label="Signal-to-noise-ratio"
389 help=""/>
390
391 <conditional name="reference_for_alignment">
392 <param name="align_ref" type="select" label="Reference to which the samples should be aligned" help="Use internal calibrants to perform m/z calibration">
393 <option value="no_reference" selected="True">no reference</option>
394 <option value="yes_reference">reference from tabular file</option>
395 </param>
396 <when value="no_reference"/>
397 <when value="yes_reference">
398 <param name="reference_file" type="data" format="tabular"
399 label="Tabular file with m/z of internal calibrants (MassPeaks) which should be used for spectra alignment"
400 help="calibration of m/z values to internal calibrants, at least 2 m/z per spectrum are needed"/>
401 <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/>
402 <param name="empty_nomatch" type="boolean" label="logical, if TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/>
403 <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE"/>
404 </when>
405 </conditional>
406 </when>
407 </conditional>
408 </repeat>
409 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/>
410 </inputs>
411 <outputs>
412 <data format="imzml" name="outfile_imzml" label="$infile.display_name processed" />
413 <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="$infile.display_name preprocessed QC"/>
414 <data format="tabular" name="annotation_output" label="$infile.display_name annotations">
415 <filter>tabular_annotation["load_annotation"] == 'yes_annotation'</filter>
416 </data>
417 </outputs>
418 <tests>
419 <test>
420 <param name="infile" value="" ftype="imzml">
421 <composite_data value="Example_Continuous.imzML"/>
422 <composite_data value="Example_Continuous.ibd"/>
423 </param>
424 <conditional name="restriction_conditional">
425 <param name="restriction" value="restrict"/>
426 <param name="coordinates_file" value="restricted_pixels.tabular"/>
427 </conditional>
428 <conditional name="methods_conditional">
429 <param name="method" value="Transformation"/>
430 <param name="transform_method" value="log2"/>
431 <param name="method" value="Smoothing"/>
432 <param name="smooth_method" value="SavitzkyGolay"/>
433 <param name="method" value="Basline"/>
434 <param name="baseline_method" value ="TopHat"/>
435 </conditional>
436 <output name="outfile_imzml" file="outfile1.imzML" compare="sim_size"/>
437 <output name="outfile_imzml" file="outfile1.ibd" compare="sim_size"/>
438 <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/>
439 </test>
440 <test>
441 <param name="infile" value="msidata_1.RData" ftype="rdata"/>
442 <conditional name="methods_conditional">
443 <param name="method" value="Calibrate"/>
444 <param name="calibrate_method" value="PQN"/>
445 </conditional>
446 <output name="outfile_imzml" file="outfile2.imzML" compare="sim_size"/>
447 <output name="outfile_imzml" file="outfile2.ibd" compare="sim_size"/>
448 <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/>
449 </test>
450 <test>
451 <param name="infile" value="" ftype="imzml">
452 <composite_data value="Example_Continuous.imzML"/>
453 <composite_data value="Example_Continuous.ibd"/>
454 </param>
455 <conditional name="tabular_annotation">
456 <param name="load_annotation" value="yes_annotation"/>
457 <param name="annotation_file" value="pixel_annotations.tabular"/>
458 <param name="column_x" value="1"/>
459 <param name="column_y" value="2"/>
460 <param name="column_names" value="3"/>
461 <param name="tabular_header" value="TRUE"/>
462 </conditional>
463 <conditional name="methods_conditional">
464 <param name="method" value="Align"/>
465 <param name="warping_method" value="linear"/>
466 <param name="halfWindowSize" value="1"/>
467 <conditional name="reference_for_alignment">
468 <param name="align_ref" value="yes_reference"/>
469 <param name="reference_file" value="align_reference_test2.tabular" ftype="tabular"/>
470 <param name="allow_nomatch" value="TRUE"/>
471 <param name="remove_empty" value="TRUE"/>
472 <param name="empty_nomatch" value="TRUE"/>
473 </conditional>
474 </conditional>
475 <output name="outfile_imzml" file="outfile3.imzML" compare="sim_size"/>
476 <output name="outfile_imzml" file="outfile3.ibd" compare="sim_size"/>
477 <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/>
478 <output name="annotation_output" file="annotations_output3.tabular"/>
479 </test>
480 </tests>
481 <help><![CDATA[
482
483 MALDIquant_ provides a complete analysis pipeline for MALDI-TOF and other mass spectrometry data. So far we have only implemented the functionalities for mass spectrometry imaging data.
484
485 Input data:
486
487 - MSI data as imzML file (upload via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
488 - optinal tabular file with pixel coordinates to restrict reading of imzML file to coordinates of interest
489
490 Options:
491
492 - Transformation: transformation of intensities with log, log2, log10 and squareroot
493 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are SavitzkyGolay and Moving Average
494 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets). Available methods are SNIP, TopHat,ConvexHull and median.
495 - Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN)
496 - Spectra alignment (warping):alignment for (re)calibration of m/z values
497
498
499 Output:
500
501 - imzML file (imzML format can be continuous or processed)
502 - pdf with average mass spectra after each preprocessing step
503
504 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/
505
506 ]]>
507 </help>
508 <expand macro="citation"/>
509 </tool>