Mercurial > repos > galaxyp > maldi_quant_preprocessing
comparison maldi_quant_preprocessing.xml @ 3:71411ac28268 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit f127be2141cf22e269c85282d226eb16fe14a9c1
author | galaxyp |
---|---|
date | Fri, 15 Feb 2019 10:26:00 -0500 |
parents | e754c2b545a9 |
children | 60ee8c592b13 |
comparison
equal
deleted
inserted
replaced
2:e754c2b545a9 | 3:71411ac28268 |
---|---|
1 <tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="@VERSION@.2"> | 1 <tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="@VERSION@.3"> |
2 <description> | 2 <description> |
3 Preprocessing of mass-spectrometry imaging data | 3 Preprocessing of mass-spectrometry imaging data |
4 </description> | 4 </description> |
5 <macros> | 5 <macros> |
6 <import>maldi_macros.xml</import> | 6 <import>maldi_macros.xml</import> |
40 | 40 |
41 print('Reading mask region') | 41 print('Reading mask region') |
42 ## Import imzML file | 42 ## Import imzML file |
43 | 43 |
44 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] | 44 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] |
45 coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)] | |
45 | 46 |
46 maldi_data = importImzMl('infile.imzML', | 47 maldi_data = importImzMl('infile.imzML', |
47 coordinates = coordinate_matrix) | 48 coordinates = coordinate_matrix) |
48 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) | 49 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) |
49 | 50 |
58 ## Import analyze7.5 file | 59 ## Import analyze7.5 file |
59 maldi_data = importAnalyze( 'infile.hdr' ) | 60 maldi_data = importAnalyze( 'infile.hdr' ) |
60 coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) | 61 coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) |
61 #else | 62 #else |
62 loadRData <- function(fileName){ | 63 loadRData <- function(fileName){ |
63 #loads an RData file, and returns it | 64 ##loads an RData file, and returns it |
64 load(fileName) | 65 load(fileName) |
65 get(ls()[ls() != "fileName"]) | 66 get(ls()[ls() != "fileName"]) |
66 } | 67 } |
67 msidata = loadRData('infile.RData') | 68 msidata = loadRData('infile.RData') |
68 ## save coordinates | 69 ## save coordinates |
69 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2]) | 70 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2]) |
70 ## save mz values | 71 ## save mz values |
71 cardinal_mzs = Cardinal::mz(msidata) | 72 cardinal_mzs = Cardinal::mz(msidata) |
72 ## create MALDIquant MassSpectrum object | 73 ## create MALDIquant MassSpectrum object, order of pixels in iData is same as in coord(msidata): |
73 maldi_data = list() | 74 maldi_data = list() |
74 for(number_spectra in 1:ncol(msidata)){ | 75 for(number_spectra in 1:ncol(msidata)){ |
75 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) | 76 maldi_data[[number_spectra]] = MALDIquant::createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) |
76 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data))) | |
77 } | 77 } |
78 | |
79 #end if | 78 #end if |
80 | 79 |
81 #end if | 80 #end if |
82 | 81 |
83 ## Quality control plots during preprocessing | 82 ## Quality control plots during preprocessing |
87 | 86 |
88 ## if no filename is given, name of file in Galaxy history is used | 87 ## if no filename is given, name of file in Galaxy history is used |
89 #set $filename = $infile.display_name | 88 #set $filename = $infile.display_name |
90 title(main=paste("$filename")) | 89 title(main=paste("$filename")) |
91 | 90 |
92 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | |
93 print("use annotation file") | |
94 | |
95 ## read and extract x,y,annotation information | |
96 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) | |
97 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] | |
98 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" | |
99 | |
100 ## merge with coordinate information (from above) of MSI data | |
101 colnames(coordinates_info)[3] = "pixel_index" | |
102 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE) | |
103 merged_annotation[is.na(merged_annotation)] = "NA" | |
104 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] | |
105 samples = as.factor(merged_annotation\$annotation) | |
106 | |
107 ## print annotation overview into PDF output | |
108 | |
109 ## the more annotation groups a file has the smaller will be the legend | |
110 number_combined = length(levels(as.factor(merged_annotation\$annotation))) | |
111 if (number_combined<20){ | |
112 legend_size = 10 | |
113 }else if (number_combined>20 && number_combined<40){ | |
114 legend_size = 9 | |
115 }else if (number_combined>40 && number_combined<60){ | |
116 legend_size = 8 | |
117 }else if (number_combined>60 && number_combined<100){ | |
118 legend_size = 7 | |
119 }else{ | |
120 legend_size = 6 | |
121 } | |
122 | |
123 combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+ | |
124 geom_tile() + | |
125 coord_fixed()+ | |
126 ggtitle("Spatial orientation of annotated data")+ | |
127 theme_bw()+ | |
128 theme(plot.title = element_text(hjust = 0.5))+ | |
129 theme(text=element_text(family="ArialMT", face="bold", size=12))+ | |
130 theme(legend.position="bottom",legend.direction="vertical")+ | |
131 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+ | |
132 guides(fill=guide_legend(ncol=5,byrow=TRUE)) | |
133 | |
134 print(combine_plot) | |
135 | |
136 #end if | |
137 | 91 |
138 #################### Preprocessing methods ##################################### | 92 #################### Preprocessing methods ##################################### |
139 | 93 |
140 ## QC plot on input file | 94 ## QC plot on input file |
141 avgSpectra = averageMassSpectra(maldi_data,method="mean") | 95 avgSpectra = averageMassSpectra(maldi_data,method="mean") |
145 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) | 99 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) |
146 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) | 100 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) |
147 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) | 101 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) |
148 number_features = length(unique(unlist(lapply(maldi_data,mass)))) | 102 number_features = length(unique(unlist(lapply(maldi_data,mass)))) |
149 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) | 103 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) |
150 inputdata = c(minmz, maxmz,number_features,mean_features, medint) | 104 inputdata = c(minmz, maxmz,number_features,mean_features,medint, pixel_number) |
151 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint)) | 105 QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint, pixel_number)) |
152 vectorofactions = "inputdata" | 106 vectorofactions = "inputdata" |
153 | 107 |
154 | 108 |
155 #for $method in $methods: | 109 #for $method in $methods: |
156 | 110 |
166 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) | 120 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) |
167 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) | 121 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) |
168 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) | 122 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) |
169 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) | 123 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) |
170 number_features = length(unique(unlist(lapply(maldi_data,mass)))) | 124 number_features = length(unique(unlist(lapply(maldi_data,mass)))) |
171 transformed = c(minmz, maxmz,number_features,mean_features, medint) | 125 transformed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number) |
172 QC_numbers= cbind(QC_numbers, transformed) | 126 QC_numbers= cbind(QC_numbers, transformed) |
173 vectorofactions = append(vectorofactions, "transformed") | 127 vectorofactions = append(vectorofactions, "transformed") |
174 | 128 |
175 | 129 |
176 #elif str( $method.methods_conditional.method ) == 'Smoothing': | 130 #elif str( $method.methods_conditional.method ) == 'Smoothing': |
201 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) | 155 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) |
202 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) | 156 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) |
203 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) | 157 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) |
204 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) | 158 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) |
205 number_features = length(unique(unlist(lapply(maldi_data,mass)))) | 159 number_features = length(unique(unlist(lapply(maldi_data,mass)))) |
206 smoothed = c(minmz, maxmz,number_features,mean_features, medint) | 160 smoothed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number) |
207 QC_numbers= cbind(QC_numbers, smoothed) | 161 QC_numbers= cbind(QC_numbers, smoothed) |
208 vectorofactions = append(vectorofactions, "smoothed") | 162 vectorofactions = append(vectorofactions, "smoothed") |
209 | 163 |
210 | 164 |
211 #elif str( $method.methods_conditional.method ) == 'Baseline': | 165 #elif str( $method.methods_conditional.method ) == 'Baseline': |
212 | 166 |
213 print('baseline removing') | 167 print('baseline removing') |
214 ## Remove baseline | 168 ## Remove baseline |
215 | 169 |
170 ## Choose random spectra for QC plots | |
171 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE) | |
172 | |
216 #if str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'SNIP': | 173 #if str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'SNIP': |
217 print('SNIP') | 174 print('SNIP') |
218 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE) | 175 |
219 par(mfrow = c(2,2)) | 176 par(mfrow = c(2,2)) |
220 for (random_sample in random_spectra){ | 177 for (random_sample in random_spectra){ |
221 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], | 178 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], |
222 method="SNIP", iterations=$method.methods_conditional.methods_for_baseline.iterations) | 179 method="SNIP", iterations=$method.methods_conditional.methods_for_baseline.iterations) |
223 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) | 180 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) |
225 | 182 |
226 maldi_data = removeBaseline(maldi_data, | 183 maldi_data = removeBaseline(maldi_data, |
227 method="SNIP", | 184 method="SNIP", |
228 iterations=$method.methods_conditional.methods_for_baseline.iterations) | 185 iterations=$method.methods_conditional.methods_for_baseline.iterations) |
229 | 186 |
187 | |
230 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'TopHat': | 188 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'TopHat': |
231 print('TopHat') | 189 print('TopHat') |
190 | |
191 par(mfrow = c(2,2)) | |
192 for (random_sample in random_spectra){ | |
193 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], | |
194 method="TopHat", halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize) | |
195 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) | |
196 lines(maldi_data_baseline, col="blue", lwd=2)} | |
232 | 197 |
233 maldi_data = removeBaseline(maldi_data, | 198 maldi_data = removeBaseline(maldi_data, |
234 method="TopHat", | 199 method="TopHat", |
235 halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize) | 200 halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize) |
236 | 201 |
202 | |
237 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'ConvexHull': | 203 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'ConvexHull': |
238 print('ConvexHull') | 204 print('ConvexHull') |
239 | 205 |
206 par(mfrow = c(2,2)) | |
207 for (random_sample in random_spectra){ | |
208 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], | |
209 method="ConvexHull") | |
210 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) | |
211 lines(maldi_data_baseline, col="blue", lwd=2)} | |
212 | |
240 maldi_data = removeBaseline(maldi_data, | 213 maldi_data = removeBaseline(maldi_data, |
241 method="ConvecHull") | 214 method="ConvexHull") |
215 | |
242 | 216 |
243 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'median': | 217 #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'median': |
244 print('median') | 218 print('median') |
245 | 219 |
220 par(mfrow = c(2,2)) | |
221 for (random_sample in random_spectra){ | |
222 maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], | |
223 method="median", halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize) | |
224 plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) | |
225 lines(maldi_data_baseline, col="blue", lwd=2)} | |
226 | |
246 maldi_data = removeBaseline(maldi_data, | 227 maldi_data = removeBaseline(maldi_data, |
247 method="TopHat", | 228 method="median", |
248 halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize) | 229 halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize) |
249 | 230 |
250 #end if | 231 #end if |
251 | 232 |
252 ## QC plot and numbers | 233 ## QC plot and numbers |
257 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) | 238 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) |
258 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) | 239 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) |
259 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) | 240 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) |
260 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) | 241 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) |
261 number_features = length(unique(unlist(lapply(maldi_data,mass)))) | 242 number_features = length(unique(unlist(lapply(maldi_data,mass)))) |
262 baseline_removed = c(minmz, maxmz,number_features,mean_features, medint) | 243 baseline_removed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number) |
263 QC_numbers= cbind(QC_numbers, baseline_removed) | 244 QC_numbers= cbind(QC_numbers, baseline_removed) |
264 vectorofactions = append(vectorofactions, "baseline_removed") | 245 vectorofactions = append(vectorofactions, "bl_removed") |
265 | 246 |
266 | 247 |
267 #elif str( $method.methods_conditional.method ) == 'Calibrate': | 248 #elif str( $method.methods_conditional.method ) == 'Calibrate': |
268 | 249 |
269 print('calibrate') | 250 print('calibrate') |
285 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) | 266 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) |
286 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) | 267 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) |
287 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) | 268 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) |
288 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) | 269 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) |
289 number_features = length(unique(unlist(lapply(maldi_data,mass)))) | 270 number_features = length(unique(unlist(lapply(maldi_data,mass)))) |
290 intensity_calibrated = c(minmz, maxmz,number_features,mean_features, medint) | 271 intensity_calibrated = c(minmz, maxmz,number_features,mean_features,medint,pixel_number) |
291 QC_numbers= cbind(QC_numbers, intensity_calibrated) | 272 QC_numbers= cbind(QC_numbers, intensity_calibrated) |
292 vectorofactions = append(vectorofactions, "intensity_calibrated ") | 273 vectorofactions = append(vectorofactions, "calibrated") |
293 | 274 |
294 | 275 |
295 #elif str( $method.methods_conditional.method ) == 'Align': | 276 #elif str( $method.methods_conditional.method ) == 'Align': |
296 | 277 |
297 print('align') | 278 print('align') |
298 ##align spectra | 279 ##align spectra with 3 separate functions |
280 | |
281 ## create reference if needed | |
282 | |
283 ## 1) detect peaks: | |
284 peaks <- detectPeaks(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize, | |
285 method="$method.methods_conditional.peak_method", SNR=$method.methods_conditional.snr) | |
286 | |
299 | 287 |
300 #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference': | 288 #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference': |
301 maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize, | 289 |
302 SNR=$method.methods_conditional.snr, tolerance=$method.methods_conditional.tolerance, | 290 ## 2) calculate warping: |
303 allowNoMatches =$method.methods_conditional.allow_nomatch, emptyNoMatches = $method.methods_conditional.empty_nomatch, | 291 warping_function <- determineWarpingFunctions(peaks, |
304 warpingMethod="$method.methods_conditional.warping_method") | 292 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", |
293 allowNoMatches=$method.methods_conditional.allow_nomatch, minFrequency = $method.methods_conditional.reference_for_alignment.min_frequency) | |
294 | |
295 ## 3) warp spectra: | |
296 maldi_data = warpMassSpectra(maldi_data, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) | |
297 | |
305 | 298 |
306 #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference': | 299 #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference': |
300 | |
307 ## create reference mass_vector from tabular file | 301 ## create reference mass_vector from tabular file |
308 mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,1] | 302 mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,$method.methods_conditional.reference_for_alignment.mz_column] |
309 int_vector = rep(1,length(mass_vector)) | 303 int_vector = rep(1,length(mass_vector)) |
310 mass_list = createMassPeaks(mass_vector, int_vector) | 304 mass_list = createMassPeaks(mass_vector, int_vector) |
311 | 305 |
312 maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize, | 306 #if str($method.methods_conditional.reference_for_alignment.separate_alignment) == "FALSE" |
313 SNR=$method.methods_conditional.snr, | 307 print('default alignment') |
314 tolerance=$method.methods_conditional.tolerance, | 308 |
315 warpingMethod="$method.methods_conditional.warping_method", | 309 ## 2) calculate warping: |
316 reference = mass_list, allowNoMatches =$method.methods_conditional.allow_nomatch, emptyNoMatches = $method.methods_conditional.empty_nomatch) | 310 warping_function <- determineWarpingFunctions(peaks, |
317 | 311 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", |
312 allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list) | |
313 | |
314 ## 3) warp spectra: | |
315 maldi_data = warpMassSpectra(maldi_data, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) | |
316 | |
317 #elif str($method.methods_conditional.reference_for_alignment.separate_alignment) == "TRUE" | |
318 print('spectra wise alignment') | |
319 | |
320 maldi_data_new_list =list() | |
321 | |
322 for (pixelnb in 1:length(peaks)) | |
323 { | |
324 ## 2) calculate warping: | |
325 warping_function <- determineWarpingFunctions(peaks[[pixelnb]], | |
326 tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", | |
327 allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list) | |
328 | |
329 ## 3) warp spectra: | |
330 maldi_data_new = warpMassSpectra(list(maldi_data[[pixelnb]]), warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) | |
331 maldi_data_new_list = c(maldi_data_new_list, maldi_data_new) | |
332 | |
333 } | |
334 maldi_data = maldi_data_new_list | |
335 #end if | |
318 #end if | 336 #end if |
319 | 337 |
338 | |
320 #if $method.methods_conditional.remove_empty: | 339 #if $method.methods_conditional.remove_empty: |
321 print("remove empty spectra") | 340 print(paste(length(findEmptyMassObjects(maldi_data)), " empty spectra were removed", sep=" ")) |
322 | 341 |
323 #if $infile.ext == 'rdata' | 342 ## only if there are empty spectra to remove |
324 cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),] ## remove coordinates of empty spectra for Cardinal RData input | 343 |
325 #end if | 344 if (length(findEmptyMassObjects(maldi_data))>0) |
326 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | 345 |
327 merged_annotation = merged_annotation[-findEmptyMassObjects(maldi_data),] ## remove coordinate annotations for empty spectra | 346 { |
328 #end if | 347 #if $infile.ext == 'rdata' |
329 maldi_data = removeEmptyMassObjects(maldi_data) | 348 cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),,drop=FALSE] ## remove coordinates of empty spectra for Cardinal RData input |
349 #end if | |
350 | |
351 maldi_data = removeEmptyMassObjects(maldi_data) | |
352 } | |
330 #end if | 353 #end if |
331 | |
332 | 354 |
333 ## QC plot | 355 ## QC plot |
334 | 356 |
335 if (length(maldi_data)>0){ | 357 if (length(maldi_data)>0){ |
336 avgSpectra = averageMassSpectra(maldi_data,method="mean") | 358 avgSpectra = averageMassSpectra(maldi_data,method="mean") |
337 plot(avgSpectra, main="Average spectrum after alignment") | 359 plot(avgSpectra, main="Average spectrum after alignment") |
338 }else{"All spectra are empty"} | 360 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) |
361 }else{print("All spectra are empty")} | |
339 | 362 |
340 pixel_number = length(maldi_data) | 363 pixel_number = length(maldi_data) |
341 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) | 364 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) |
342 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) | 365 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) |
343 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) | 366 mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) |
344 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) | 367 |
345 number_features = length(unique(unlist(lapply(maldi_data,mass)))) | 368 number_features = length(unique(unlist(lapply(maldi_data,mass)))) |
346 spectra_aligned = c(minmz, maxmz,number_features,mean_features, medint) | 369 spectra_aligned = c(minmz, maxmz,number_features,mean_features, medint,pixel_number) |
347 QC_numbers= cbind(QC_numbers, spectra_aligned) | 370 QC_numbers= cbind(QC_numbers, spectra_aligned) |
348 vectorofactions = append(vectorofactions, "spectra_aligned") | 371 vectorofactions = append(vectorofactions, "aligned") |
372 | |
373 #elif str( $method.methods_conditional.method ) == 'skip_preprocessing': | |
374 ##for now as option to filter large files | |
375 | |
349 #end if | 376 #end if |
350 | 377 |
351 #end for | 378 #end for |
352 | 379 |
353 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity") | 380 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity", "pixel\nnumber") |
381 colnames(QC_numbers) = vectorofactions | |
354 plot(0,type='n',axes=FALSE,ann=FALSE) | 382 plot(0,type='n',axes=FALSE,ann=FALSE) |
355 grid.table(t(QC_numbers)) | 383 grid.table(t(QC_numbers)) |
356 | 384 |
357 dev.off() | 385 dev.off() |
358 | 386 |
368 | 396 |
369 ]]> | 397 ]]> |
370 </configfile> | 398 </configfile> |
371 </configfiles> | 399 </configfiles> |
372 <inputs> | 400 <inputs> |
373 <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML format or Cardinal MSImageSet saved as RData. The file must be in profile mode, not centroided"/> | 401 <param name="infile" type="data" format="imzml,rdata,analyze75" label="MSI data" help="Input file as imzML (composite upload), or Cardinal MSImageSet saved as RData (regular upload). The file must be in profile mode, not centroided."/> |
374 <conditional name="restriction_conditional"> | 402 <conditional name="restriction_conditional"> |
375 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> | 403 <param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files"> |
376 <option value="no_restriction" selected="True">Calculate on entire file</option> | 404 <option value="no_restriction" selected="True">No, calculate on entire file</option> |
377 <option value="restrict">Restrict to coordinates of interest</option> | 405 <option value="restrict">Yes, restrict to spectra of interest</option> |
378 </param> | 406 </param> |
379 <when value="restrict"> | 407 <when value="restrict"> |
380 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> | 408 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/> |
409 <param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/> | |
410 <param name="column_y" data_ref="coordinates_file" label="Column with y values" type="data_column"/> | |
381 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> | 411 <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> |
382 </when> | 412 </when> |
383 <when value="no_restriction"/> | 413 <when value="no_restriction"/> |
384 </conditional> | |
385 <conditional name="tabular_annotation"> | |
386 <param name="load_annotation" type="select" label="For Cardinal RData only: Use pixel annotation from tabular file to have updated annotation information in case empty spectra will be removed"> | |
387 <option value="no_annotation" selected="True">use no annotation</option> | |
388 <option value="yes_annotation">use pixel annotation from a tabular file</option> | |
389 </param> | |
390 <when value="yes_annotation"> | |
391 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" | |
392 help="Tabular file with three columns: x values, y values and pixel annotations"/> | |
393 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> | |
394 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/> | |
395 <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/> | |
396 <param name="tabular_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> | |
397 </when> | |
398 <when value="no_annotation"/> | |
399 </conditional> | 414 </conditional> |
400 <repeat name="methods" title="Method" min="1"> | 415 <repeat name="methods" title="Method" min="1"> |
401 <conditional name="methods_conditional"> | 416 <conditional name="methods_conditional"> |
402 <param name="method" type="select" label="Select a method"> | 417 <param name="method" type="select" label="Select a method"> |
403 <option value="Transformation" selected="True">Transformation</option> | 418 <option value="Transformation" selected="True">Transformation</option> |
404 <option value="Smoothing">Smoothing</option> | 419 <option value="Smoothing">Smoothing</option> |
405 <option value="Baseline">Baseline removal</option> | 420 <option value="Baseline">Baseline removal</option> |
406 <option value="Calibrate">Calibrate</option> | 421 <option value="Calibrate">Intensity calibration (normalization)</option> |
407 <option value="Align">Align Spectra (warping/phase correction)</option> | 422 <option value="Align">Align spectra (warping/phase correction)</option> |
423 <option value="skip_preprocessing">Skip preprocessing</option> | |
408 <validator type="empty_field" /> | 424 <validator type="empty_field" /> |
409 </param> | 425 </param> |
410 <when value="Transformation"> | 426 <when value="Transformation"> |
411 <param name="transform_method" type="select" label="Select a transfprormation method"> | 427 <param name="transform_method" type="select" label="Transformation method"> |
412 <option value="sqrt" selected="True">sqrt</option> | 428 <option value="sqrt" selected="True">sqrt</option> |
413 <option value="log">log</option> | 429 <option value="log">log</option> |
414 <option value="log2">log2</option> | 430 <option value="log2">log2</option> |
415 <option value="log10">log10</option> | 431 <option value="log10">log10</option> |
416 <validator type="empty_field" /> | 432 <validator type="empty_field" /> |
417 </param> | 433 </param> |
418 </when> | 434 </when> |
419 <when value="Smoothing"> | 435 <when value="Smoothing"> |
420 <conditional name="methods_for_smoothing"> | 436 <conditional name="methods_for_smoothing"> |
421 <param name="smooth_method" type="select" label="This method smoothes the intensity values of a MassSpectrum object"> | 437 <param name="smooth_method" type="select" label="Smoothing method" help="This method smoothes the intensity values of a MassSpectrum object."> |
422 <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option> | 438 <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option> |
423 <option value="MovingAverage">MovingAverage</option> | 439 <option value="MovingAverage">MovingAverage</option> |
424 </param> | 440 </param> |
425 <when value="SavitzkyGolay"> | 441 <when value="SavitzkyGolay"> |
426 <param name="polynomial" value="3" type="text" label="PolynomialOrder argument to control the order of the filter" | 442 <param name="polynomial" value="3" type="text" label="Polynomial order" |
427 help="should be smaller than the resulting window"/> | 443 help="Controls the order of the filter, should be smaller than the resulting window."/> |
428 </when> | 444 </when> |
429 <when value="MovingAverage"> | 445 <when value="MovingAverage"> |
430 <param name="weighted" type="boolean" label="Weighted average" help = "Indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/> | 446 <param name="weighted" type="boolean" label="Weighted average" help = "Indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/> |
431 </when> | 447 </when> |
432 </conditional> | 448 </conditional> |
433 <param name="halfWindowSize" type="integer" value="10" | 449 <param name="halfWindowSize" type="integer" value="10" |
434 label="Half window size (number of data points)" | 450 label="Half window size" |
435 help="The resulting window reaches from | 451 help="Number of data points, the resulting window reaches from |
436 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] | 452 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] |
437 (window size is 2*halfWindowSize+1)."/> | 453 (window size is 2*halfWindowSize+1)."/> |
438 </when> | 454 </when> |
439 <when value="Baseline"> | 455 <when value="Baseline"> |
440 <conditional name="methods_for_baseline"> | 456 <conditional name="methods_for_baseline"> |
449 <param name="iterations" type="integer" value="100" | 465 <param name="iterations" type="integer" value="100" |
450 label="Number of iterations" help="Corresponds to half window size: The resulting window reaches from mass[cur_index-iterations] to mass[cur_index+iterations]"/> | 466 label="Number of iterations" help="Corresponds to half window size: The resulting window reaches from mass[cur_index-iterations] to mass[cur_index+iterations]"/> |
451 </when> | 467 </when> |
452 <when value="TopHat"> | 468 <when value="TopHat"> |
453 <param name="tophat_halfWindowSize" type="integer" value="10" | 469 <param name="tophat_halfWindowSize" type="integer" value="10" |
454 label="Half window size (number of data points)" | 470 label="Half window size" |
455 help="The resulting window reaches from | 471 help="Number of data points, the resulting window reaches from |
456 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> | 472 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> |
457 </when> | 473 </when> |
458 <when value="ConvexHull"/> | 474 <when value="ConvexHull"/> |
459 <when value="median"> | 475 <when value="median"> |
460 <param name="median_halfWindowSize" type="integer" value="10" | 476 <param name="median_halfWindowSize" type="integer" value="10" |
461 label="Half window size (number of data points)" | 477 label="Half window size" |
462 help="The resulting window reaches from | 478 help="Number of data points, the resulting window reaches from |
463 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> | 479 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> |
464 </when> | 480 </when> |
465 </conditional> | 481 </conditional> |
466 </when> | 482 </when> |
467 <when value="Calibrate"> | 483 <when value="Calibrate"> |
468 <param name="calibrate_method" type="select" label="Intensity calibration (normalization) method"> | 484 <param name="calibrate_method" type="select" label="Intensity calibration method" help="Intensity normalization"> |
469 <option value="TIC" selected="True">TIC</option> | 485 <option value="TIC" selected="True">TIC</option> |
470 <option value="PQN">PQN</option> | 486 <option value="PQN">PQN</option> |
471 <option value="median">median</option> | 487 <option value="median">median</option> |
472 <validator type="empty_field" /> | 488 <validator type="empty_field" /> |
473 </param> | 489 </param> |
474 <conditional name="cond_calibration_range"> | 490 <conditional name="cond_calibration_range"> |
475 <param name="calibration_range" type="select" label="Instead of the whole m/z range, a specified m/z range can be used to calculate the scaling factor"> | 491 <param name="calibration_range" type="select" label="m/z range" help="Instead of the whole m/z range, a specified m/z range can be used to calculate the scaling factor."> |
476 <option value="no" selected="True">complete m/z range</option> | 492 <option value="no" selected="True">complete m/z range</option> |
477 <option value="yes">specify a m/z range</option> | 493 <option value="yes">specify a m/z range</option> |
478 </param> | 494 </param> |
479 <when value="no"/> | 495 <when value="no"/> |
480 <when value="yes"> | 496 <when value="yes"> |
485 label="End of m/z range, has to be inside m/z range"/> | 501 label="End of m/z range, has to be inside m/z range"/> |
486 </when> | 502 </when> |
487 </conditional> | 503 </conditional> |
488 </when> | 504 </when> |
489 <when value="Align"> | 505 <when value="Align"> |
490 <param name="warping_method" type="select" label="Warping methods"> | 506 <param name="warping_method" type="select" label="Alignment method"> |
491 <option value="lowess" selected="True">Lowess</option> | 507 <option value="lowess" selected="True">Lowess</option> |
492 <option value="linear">Linear</option> | 508 <option value="linear">Linear</option> |
493 <option value="quadratic">Quadratic</option> | 509 <option value="quadratic">Quadratic</option> |
494 <option value="cubic">Cubic</option> | 510 <option value="cubic">Cubic</option> |
495 </param> | 511 </param> |
496 | 512 |
497 <param name="tolerance" type="float" value="0.00005" | 513 <param name="tolerance" type="float" value="0.00005" |
498 label="Tolerance = abs(mz1 - mz2)/mz2" | 514 label="Tolerance" |
499 help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 50e-6" /> | 515 help="abs(mz1 - mz2)/mz2, maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" /> |
500 | 516 |
501 <param name="halfWindowSize" type="integer" value="20" | 517 <param name="halfWindowSize" type="integer" value="20" |
502 label="Half window size (number of data points)" | 518 label="Half window size" |
503 help="The resulting window reaches from | 519 help="Number of data points, the resulting window reaches from |
504 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] | 520 mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] |
505 (window size is 2*halfWindowSize+1). | 521 (window size is 2*halfWindowSize+1). |
506 The best size differs depending on the selected smoothing method."/> | 522 The best size differs depending on the selected smoothing method."/> |
507 | 523 |
524 <param name="peak_method" type="select" label="Noise estimation function"> | |
525 <option value="MAD" selected="True">MAD</option> | |
526 <option value="SuperSmoother">SuperSmoother</option> | |
527 </param> | |
528 | |
508 <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio"/> | 529 <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio"/> |
509 <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/> | 530 <param name="allow_nomatch" type="boolean" label="Allow no matches" help="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/> |
510 <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> | 531 <param name="empty_nomatch" type="boolean" label="Empty no matches" help="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> |
511 <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/> | 532 <param name="remove_empty" type="boolean" label="Remove empty spectra" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/> |
512 | 533 |
513 <conditional name="reference_for_alignment"> | 534 <conditional name="reference_for_alignment"> |
514 <param name="align_ref" type="select" label="Reference to which the samples should be aligned" help="Use internal calibrants to perform m/z calibration"> | 535 <param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration"> |
515 <option value="no_reference" selected="True">no reference</option> | 536 <option value="no_reference" selected="True">no reference</option> |
516 <option value="yes_reference">reference from tabular file</option> | 537 <option value="yes_reference">reference from tabular file</option> |
517 </param> | 538 </param> |
518 <when value="no_reference"/> | 539 <when value="no_reference"> |
540 <param name="min_frequency" type="float" value="0.9" label = "minFrequency" help="Removal of all peaks which occur in less than minFrequency spectra to generate the reference m/z"/> | |
541 </when> | |
519 <when value="yes_reference"> | 542 <when value="yes_reference"> |
520 <param name="reference_file" type="data" format="tabular" | 543 <param name="reference_file" type="data" format="tabular" |
521 label="Tabular file with m/z (MassPeaks) which should be used for spectra alignment" | 544 label="Reference m/z values" |
522 help="At least 2 reference m/z per spectrum are needed"/> | 545 help="Tabular file"/> |
546 <param name="mz_column" data_ref="reference_file" label="Column with m/z values" type="data_column"/> | |
523 <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> | 547 <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> |
548 <param name="separate_alignment" type="boolean" label="Spectrum wise alignment" help="Internal binning is omitted to avoid interaction between spectra" truevalue="TRUE" falsevalue="FALSE"/> | |
524 </when> | 549 </when> |
525 </conditional> | 550 </conditional> |
526 </when> | 551 </when> |
552 <when value="skip_preprocessing"/> | |
527 </conditional> | 553 </conditional> |
528 </repeat> | 554 </repeat> |
529 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/> | 555 <param name="export_processed" type="boolean" label="Export processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/> |
530 </inputs> | 556 </inputs> |
531 <outputs> | 557 <outputs> |
532 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}" /> | 558 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}" /> |
533 <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="${tool.name} on ${on_string}: QC"/> | 559 <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="${tool.name} on ${on_string}: QC"/> |
534 </outputs> | 560 </outputs> |
539 <composite_data value="Example_Continuous.ibd"/> | 565 <composite_data value="Example_Continuous.ibd"/> |
540 </param> | 566 </param> |
541 <conditional name="restriction_conditional"> | 567 <conditional name="restriction_conditional"> |
542 <param name="restriction" value="restrict"/> | 568 <param name="restriction" value="restrict"/> |
543 <param name="coordinates_file" value="restricted_pixels.tabular"/> | 569 <param name="coordinates_file" value="restricted_pixels.tabular"/> |
570 <param name="column_x" value="1"/> | |
571 <param name="column_y" value="2"/> | |
544 </conditional> | 572 </conditional> |
545 <conditional name="methods_conditional"> | 573 <conditional name="methods_conditional"> |
546 <param name="method" value="Transformation"/> | 574 <param name="method" value="Transformation"/> |
547 <param name="transform_method" value="log2"/> | 575 <param name="transform_method" value="log2"/> |
548 <param name="method" value="Smoothing"/> | 576 <param name="method" value="Smoothing"/> |
549 <param name="smooth_method" value="SavitzkyGolay"/> | 577 <param name="smooth_method" value="SavitzkyGolay"/> |
550 <param name="method" value="Basline"/> | 578 <param name="method" value="Basline"/> |
551 <param name="baseline_method" value ="TopHat"/> | 579 <param name="baseline_method" value ="TopHat"/> |
552 </conditional> | 580 </conditional> |
553 <output name="outfile_imzml" file="outfile1.imzML" compare="sim_size"/> | 581 <output name="outfile_imzml" ftype="imzml" file="preprocessing1.imzml.txt" lines_diff="4"> |
554 <output name="outfile_imzml" file="outfile1.ibd" compare="sim_size"/> | 582 <extra_files type="file" file="outfile1.imzml" name="imzml" lines_diff="6"/> |
583 <extra_files type="file" file="outfile1.ibd" name="ibd" compare="sim_size"/> | |
584 </output> | |
555 <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/> | 585 <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/> |
556 </test> | 586 </test> |
557 <test> | 587 <test> |
558 <param name="infile" value="msidata_1.RData" ftype="rdata"/> | 588 <param name="infile" value="msidata_1.RData" ftype="rdata"/> |
559 <conditional name="methods_conditional"> | 589 <conditional name="methods_conditional"> |
560 <param name="method" value="Calibrate"/> | 590 <param name="method" value="Align"/> |
561 <param name="calibrate_method" value="PQN"/> | 591 <param name="warping_method" value="lowess"/> |
592 <param name="halfWindowSize" value="5"/> | |
593 <param name="tolerance" value="0.001"/> | |
594 <param name="allow_nomatch" value="TRUE"/> | |
595 <param name="remove_empty" value="TRUE"/> | |
596 <param name="empty_nomatch" value="TRUE"/> | |
597 <conditional name="reference_for_alignment"> | |
598 <param name="align_ref" value="yes_reference"/> | |
599 <param name="reference_file" value="inputpeptides.tabular" ftype="tabular"/> | |
600 </conditional> | |
562 </conditional> | 601 </conditional> |
563 <output name="outfile_imzml" file="outfile2.imzML" compare="sim_size"/> | 602 <output name="outfile_imzml" ftype="imzml" file="preprocessing2.imzml.txt" lines_diff="4"> |
564 <output name="outfile_imzml" file="outfile2.ibd" compare="sim_size"/> | 603 <extra_files type="file" file="outfile2.imzml" name="imzml" lines_diff="6"/> |
604 <extra_files type="file" file="outfile2.ibd" name="ibd" compare="sim_size"/> | |
605 </output> | |
565 <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/> | 606 <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/> |
566 </test> | 607 </test> |
567 <test> | 608 <test> |
568 <param name="infile" value="" ftype="imzml"> | 609 <param name="infile" value="" ftype="imzml"> |
569 <composite_data value="Example_Continuous.imzML"/> | 610 <composite_data value="Example_Continuous.imzML"/> |
570 <composite_data value="Example_Continuous.ibd"/> | 611 <composite_data value="Example_Continuous.ibd"/> |
571 </param> | 612 </param> |
572 <conditional name="tabular_annotation"> | 613 <conditional name="methods_conditional"> |
573 <param name="load_annotation" value="yes_annotation"/> | 614 <param name="method" value="Calibrate"/> |
574 <param name="annotation_file" value="pixel_annotations.tabular"/> | 615 <param name="calibrate_method" value="median"/> |
575 <param name="column_x" value="1"/> | |
576 <param name="column_y" value="2"/> | |
577 <param name="column_names" value="3"/> | |
578 <param name="tabular_header" value="TRUE"/> | |
579 </conditional> | 616 </conditional> |
580 <conditional name="methods_conditional"> | 617 <output name="outfile_imzml" ftype="imzml" file="preprocessing3.imzml.txt" lines_diff="4"> |
581 <param name="method" value="Align"/> | 618 <extra_files type="file" file="outfile3.imzml" name="imzml" lines_diff="6"/> |
582 <param name="warping_method" value="linear"/> | 619 <extra_files type="file" file="outfile3.ibd" name="ibd" compare="sim_size"/> |
583 <param name="halfWindowSize" value="1"/> | 620 </output> |
584 <param name="tolerance" value="0.002"/> | |
585 <param name="allow_nomatch" value="TRUE"/> | |
586 <param name="remove_empty" value="TRUE"/> | |
587 <param name="empty_nomatch" value="TRUE"/> | |
588 <conditional name="reference_for_alignment"> | |
589 <param name="align_ref" value="yes_reference"/> | |
590 <param name="reference_file" value="align_reference_test2.tabular" ftype="tabular"/> | |
591 </conditional> | |
592 </conditional> | |
593 <output name="outfile_imzml" file="outfile3.imzML" compare="sim_size"/> | |
594 <output name="outfile_imzml" file="outfile3.ibd" compare="sim_size"/> | |
595 <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/> | 621 <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/> |
596 </test> | 622 </test> |
597 </tests> | 623 </tests> |
598 <help><![CDATA[ | 624 <help><![CDATA[ |
599 | 625 |
605 | 631 |
606 - MSI data: 2 types of input data can be used: | 632 - MSI data: 2 types of input data can be used: |
607 | 633 |
608 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ | 634 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ |
609 - Cardinal "MSImageSet" data saved as .RData | 635 - Cardinal "MSImageSet" data saved as .RData |
610 - Only for Cardinal RData files and when remove empty spectra is chosen: Tabular file with coordinates annotations. Separate columns for x and y coordinates and a third column with pixel annotations. Tabular files with any header name or no header at all are supported | 636 |
611 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported. | 637 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. Tabular files with any header name or no header at all are supported. |
612 | 638 |
613 :: | 639 :: |
614 | 640 |
615 x_coord y_coord | 641 x_coord y_coord |
616 1 1 | 642 1 1 |
636 **Options** | 662 **Options** |
637 | 663 |
638 - Transformation: Variance stabilization through intensity transformation:'log', 'log2', 'log10' and 'squareroot' (sqrt) are available | 664 - Transformation: Variance stabilization through intensity transformation:'log', 'log2', 'log10' and 'squareroot' (sqrt) are available |
639 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are 'SavitzkyGolay' and 'Moving Average' | 665 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are 'SavitzkyGolay' and 'Moving Average' |
640 | 666 |
641 - For all smoothing methods: The larger the 'Half window size'f, the stronger the smoothing. The resulting window should be smaller than the FWHM (full width at half maximum) of the typical peaks. Moving average needs smaller window size than SavitzkyGolay. | 667 - For all smoothing methods: The larger the 'Half window size', the stronger the smoothing. The resulting window should be smaller than the FWHM (full width at half maximum) of the typical peaks. Moving average needs smaller window size than SavitzkyGolay. |
642 - Moving average: Recommended for broader peaks/high m/z range spectra. Weighted moving average: Points in the center get larger weight factors than points away from the center. | 668 - Moving average: Recommended for broader peaks/high m/z range spectra. Weighted moving average: Points in the center get larger weight factors than points away from the center. |
643 - SavitzkyGolay: Recommended for sharp peaks/low m/z range, preserves the shape of the local maxima. The PolynomialOrder should be smaller than the resulting window. Negative values will be replaced with 0. | 669 - SavitzkyGolay: Recommended for sharp peaks/low m/z range, preserves the shape of the local maxima. The PolynomialOrder should be smaller than the resulting window. Negative values will be replaced with 0. |
644 | 670 |
645 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets). | 671 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets). |
646 | 672 |
653 - Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN) | 679 - Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN) |
654 | 680 |
655 - TIC and median are local calibration methods: each spectrum is normalized on its own (each peak is divided by the TIC or median of the spectrum) | 681 - TIC and median are local calibration methods: each spectrum is normalized on its own (each peak is divided by the TIC or median of the spectrum) |
656 - PQN is a global calibration method: In PQN all spectra are calibrated using the TIC calibration first. Subsequently, a median reference spectrum is created and the intensities in all spectra are standardized using the reference spectrum and a spectrum-specific median is calculated for each spectrum. Finally, each spectrum is rescaled by the median of the ratios of its intensity values and that of the reference spectrum | 682 - PQN is a global calibration method: In PQN all spectra are calibrated using the TIC calibration first. Subsequently, a median reference spectrum is created and the intensities in all spectra are standardized using the reference spectrum and a spectrum-specific median is calculated for each spectrum. Finally, each spectrum is rescaled by the median of the ratios of its intensity values and that of the reference spectrum |
657 | 683 |
658 - Spectra alignment (warping): alignment for (re)calibration of m/z values, at least two m/z per spectrum are needed for the alignment. This requirement can be skipped by setting "Don't throw an error when less than 2 reference m/z were found in a spectrum" to yes. If the not aligned spectra should be set to zero select yes in "logical, if TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes. | 684 - Spectra alignment (warping): alignment for (re)calibration of m/z values. |
685 | |
686 - peak detection is performed, the reference peaks will be matched to those detected peaks | |
687 - without external reference m/z: internal reference is obtained by filtering and binning the picked peaks to find landmark peaks and their average m/z | |
688 - with external reference m/z: the given m/z are used as a reference, at least 10 reference values are recommended | |
689 - non linear warping (parametric time warping plus binning) to match the reference peaks (internal or external) to the present (picked) peaks with the given tolerance. At least two m/z per spectrum are needed for the alignment. To prevent an error when this criterium is not fullfilled, "Don't throw an error when less than 2 reference m/z were found in a spectrum" should be set to yes. If the not aligned spectra should be set to zero select yes in "If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes. | |
659 | 690 |
660 | 691 |
661 **Output** | 692 **Output** |
662 | 693 |
663 - imzML file (imzML format can be continuous or processed) | 694 - imzML file (imzML format can be continuous or processed) |
667 | 698 |
668 ]]> | 699 ]]> |
669 </help> | 700 </help> |
670 <expand macro="citation"/> | 701 <expand macro="citation"/> |
671 </tool> | 702 </tool> |
703 |