comparison filtering.xml @ 1:aac805a9d2ae draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit d2f311f7fff24e54c565127c40414de708e31b3c
author galaxyp
date Thu, 25 Oct 2018 07:25:13 -0400
parents a2988d8d4b77
children 0c4579390f73
comparison
equal deleted inserted replaced
0:a2988d8d4b77 1:aac805a9d2ae
1 <tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.0"> 1 <tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.1">
2 <description>tool for filtering mass spectrometry imaging data</description> 2 <description>tool for filtering mass spectrometry imaging data</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"> 6 <expand macro="requirements">
7 <requirement type="package" version="2.2.1">r-gridextra</requirement> 7 <requirement type="package" version="2.3">r-gridextra</requirement>
8 <requirement type="package" version="2.2.1">r-ggplot2</requirement> 8 <requirement type="package" version="3.0">r-ggplot2</requirement>
9 </expand> 9 </expand>
10 <expand macro="print_version"/>
10 <command detect_errors="exit_code"> 11 <command detect_errors="exit_code">
11 <![CDATA[ 12 <![CDATA[
12 13
13 @INPUT_LINKING@ 14 @INPUT_LINKING@
14 cat '${MSI_subsetting}' && 15 cat '${MSI_subsetting}' &&
15 Rscript '${MSI_subsetting}' 16 Rscript '${MSI_subsetting}' &&
17
18 #if $imzml_output:
19 mkdir $outfile_imzml.files_path &&
20 ls -l &&
21 mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
22 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
23 #end if
24 echo "imzML file:" > $outfile_imzml &&
25 ls -l "$outfile_imzml.files_path" >> $outfile_imzml
26
16 27
17 ]]> 28 ]]>
18 </command> 29 </command>
30
31
19 <configfiles> 32 <configfiles>
20 <configfile name="MSI_subsetting"><![CDATA[ 33 <configfile name="MSI_subsetting"><![CDATA[
21 34
22 35
23 ################################# load libraries and read file ################# 36 ################################# load libraries and read file #################
110 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){ 123 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){
111 124
112 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] 125 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range]
113 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] 126 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range]
114 }else{ 127 }else{
115 msidata = msidata[,0] 128
116 print("no valid pixel found")} 129 print("no valid pixel found")
130 msidata = msidata[,0]}
117 131
118 ## update position_df for filtered pixels 132 ## update position_df for filtered pixels
119 position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata))) 133 position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata)))
120 colnames(position_df)[3] = "annotation" 134 colnames(position_df)[3] = "annotation"
121 position_df\$annotation = factor(position_df\$annotation) 135 position_df\$annotation = factor(position_df\$annotation)
136 ############################################################################## 150 ##############################################################################
137 151
138 ####################### Keep m/z from tabular file ######################### 152 ####################### Keep m/z from tabular file #########################
139 153
140 ## feature filtering only when pixels/features/intensities are left 154 ## feature filtering only when pixels/features/intensities are left
155
156 if (ncol(msidata) > 0){
141 npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE) 157 npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE)
142 158 if (npeaks_before_filtering > 0)
143 159 {
144 if (npeaks_before_filtering > 0) 160
145 161 #if str($features_cond.features_filtering) == "features_list":
146 { 162 print("feature list")
147 163
148 #if str($features_cond.features_filtering) == "features_list": 164 ## read tabular file, define starting row, extract and count valid features
149 print("feature list") 165 input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE)
150 166 extracted_features = input_features[,$features_cond.feature_column]
151 ## read tabular file, define starting row, extract and count valid features 167 numberfeatures = length(extracted_features)
152 input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE) 168 if (class(extracted_features) == "numeric"){
153 extracted_features = input_features[,$features_cond.feature_column] 169 ### max digits given in the input file will be used to match m/z but the maximum is 4
154 numberfeatures = length(extracted_features) 170 max_digits = max(nchar(sapply(strsplit(as.character(extracted_features), "\\."),`[`,2)), na.rm=TRUE)
155 if (class(extracted_features) == "numeric"){ 171
156 ### max digits given in the input file will be used to match m/z but the maximum is 4 172 if (max_digits >4)
157 max_digits = max(nchar(matrix(unlist(strsplit(as.character(extracted_features), "\\.")), ncol=2, byrow=TRUE)[,2])) 173 {
158 if (max_digits >4) 174 max_digits = 4
159 { 175 }
160 max_digits = 4 176
161 } 177 validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits)
162 178 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)]
163 validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits) 179 validmz = length(unique(featuresofinterest))
164 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)] 180 }else{
165 validmz = length(unique(featuresofinterest)) 181 validmz = 0
166 }else{ 182 featuresofinterest = 0}
167 validmz = 0 183
168 featuresofinterest = 0} 184 ### filter msidata for valid features
169 185 msidata = msidata[featuresofinterest,]
170 ### filter msidata for valid features 186
171 msidata = msidata[featuresofinterest,] 187 ############### features within a given range are kept #####################
172 188
173 ############### features within a given range are kept ##################### 189 #elif str($features_cond.features_filtering) == "features_range":
174 190 print("feature range")
175 #elif str($features_cond.features_filtering) == "features_range": 191
176 print("feature range") 192 numberfeatures = "range"
177 193 validmz = "range"
178 numberfeatures = "range" 194
179 validmz = "range" 195 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){
180 196 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,]
181 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){ 197 }else{
182 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] 198 msidata = msidata[0,]
183 }else{ 199 print("no valid mz range")}
184 msidata = msidata[0,] 200
185 print("no valid mz range")} 201 ############### Remove m/z from tabular file #########################
186 202
187 ############### Remove m/z from tabular file ######################### 203 #elif str($features_cond.features_filtering) == "remove_features":
188 204 print("remove features")
189 #elif str($features_cond.features_filtering) == "remove_features": 205
190 print("remove features") 206 ## read tabular file, define starting row, extract and count valid features
191 207 input_features = read.delim("$mz_tabular", header = $features_cond.removal_header, stringsAsFactors = FALSE)
192 ## read tabular file, define starting row, extract and count valid features 208 extracted_features = input_features[,$features_cond.removal_column]
193 input_features = read.delim("$mz_tabular", header = $features_cond.removal_header, stringsAsFactors = FALSE) 209 numberfeatures = length(extracted_features)
194 extracted_features = input_features[,$features_cond.removal_column] 210 if (class(extracted_features) == "numeric"){
195 numberfeatures = length(extracted_features) 211 print("input is numeric")
196 if (class(extracted_features) == "numeric"){ 212 featuresofinterest = extracted_features
197 print("input is numeric") 213 validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata)))
198 featuresofinterest = extracted_features 214 }else{featuresofinterest = 0
199 validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata))) 215 validmz = 0}
200 }else{featuresofinterest = 0 216
201 validmz = 0} 217 ### Here starts removal of features:
202 218 plusminus = $features_cond.removal_plusminus
203 ### Here starts removal of features: 219
204 plusminus = $features_cond.removal_plusminus 220 mass_to_remove = numeric()
205 221 if (sum(featuresofinterest) > 0){
206 mass_to_remove = numeric() 222 for (masses in featuresofinterest){
207 if (sum(featuresofinterest) > 0){ 223 #if str($features_cond.units_removal) == "ppm":
208 for (masses in featuresofinterest){ 224 plusminus = masses * $features_cond.removal_plusminus/1000000
209 #if str($features_cond.units_removal) == "ppm": 225 #end if
210 plusminus = masses * $features_cond.removal_plusminus/1000000 226 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus))
211 #end if 227 mass_to_remove = append(mass_to_remove, current_mass)}
212 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) 228 msidata= msidata[-mass_to_remove, ]
213 mass_to_remove = append(mass_to_remove, current_mass)} 229 }else{print("No features were removed as they were not fitting to m/z values and/or range")}
214 msidata= msidata[-mass_to_remove, ] 230
215 }else{print("No features were removed as they were not fitting to m/z values and/or range")} 231
216 232 #elif str($features_cond.features_filtering) == "none":
217 233
218 #elif str($features_cond.features_filtering) == "none": 234 print("no feature filtering")
219 235 validmz = 0
220 print("no feature filtering") 236 numberfeatures = 0
221 validmz = 0 237
222 numberfeatures = 0 238 #end if
223 239
224 #end if 240 ## save msidata as Rfile
225 241 save(msidata, file="$msidata_filtered")
226 ## save msidata as Rfile 242
227 save(msidata, file="$msidata_filtered")
228 ## Number of empty TICs 243 ## Number of empty TICs
229 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) 244 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE)
245 ## Number of intensities > 0
246 npeaks2= sum(spectra(msidata)[]>0, na.rm=TRUE)
247 ## Spectra multiplied with m/z (potential number of peaks)
248 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
249
250
251
252 }else{
253 print("Inputfile or file filtered for pixels has no intensities > 0")
254 numberfeatures = NA
255 validmz = NA
256 ## Number of empty TICs
257 TICs2 = 0
258 npeaks2 = 0
259 numpeaks2 = 0
260 }
230 }else{ 261 }else{
231 print("Inputfile or file filtered for pixels has no intensities > 0") 262 print("Inputfile or file filtered for pixels has no pixels left")
232 numberfeatures = NA 263 numberfeatures = NA
233 validmz = NA 264 validmz = NA
234 ## Number of empty TICs 265 ## Number of empty TICs
235 TICs2 = NA 266 TICs2 = 0
267 npeaks2 = 0
268 numpeaks2 = 0
236 } 269 }
237
238 #################### QC numbers ####################### 270 #################### QC numbers #######################
239 271
240 272
241 ## Number of features (m/z) 273 ## Number of features (m/z)
242 maxfeatures2 = length(features(msidata)) 274 maxfeatures2 = length(features(msidata))
249 minimumx2 = min(coord(msidata)[,1]) 281 minimumx2 = min(coord(msidata)[,1])
250 maximumx2 = max(coord(msidata)[,1]) 282 maximumx2 = max(coord(msidata)[,1])
251 ## Range y coordinates 283 ## Range y coordinates
252 minimumy2 = min(coord(msidata)[,2]) 284 minimumy2 = min(coord(msidata)[,2])
253 maximumy2 = max(coord(msidata)[,2]) 285 maximumy2 = max(coord(msidata)[,2])
254 ## Number of intensities > 0 286
255 npeaks2= sum(spectra(msidata)[]>0, na.rm=TRUE)
256 ## Spectra multiplied with m/z (potential number of peaks)
257 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
258 ## Percentage of intensities > 0 287 ## Percentage of intensities > 0
259 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) 288 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2)
260 ## Number of empty TICs 289 ## Number of empty TICs
261 NumemptyTIC2 = sum(TICs2 == 0) 290 NumemptyTIC2 = sum(TICs2 == 0)
262 ## median TIC 291 ## median TIC
294 paste0(NumemptyTIC2), 323 paste0(NumemptyTIC2),
295 paste0("valid pixels: ", validpixels), 324 paste0("valid pixels: ", validpixels),
296 paste0("valid mz: ", validmz)) 325 paste0("valid mz: ", validmz))
297 326
298 property_df = data.frame(properties, before, filtered) 327 property_df = data.frame(properties, before, filtered)
299 328 print(property_df)
300 ############################### PDF QC ################################ 329
301 330 ########################### PDF QC and imzml output ###########################
302 331
303 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) 332 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
304 plot(0,type='n',axes=FALSE,ann=FALSE) 333 plot(0,type='n',axes=FALSE,ann=FALSE)
305 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) 334 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))
306 grid.table(property_df, rows= NULL) 335 grid.table(property_df, rows= NULL)
307 336
308 ## QC report with more than value-table: only when pixels/features/intensities are left 337 ## QC report with more than value-table: only when pixels/features/intensities are left
309 if (npeaks2 > 0) 338 if (npeaks2 > 0)
310 { 339 {
340
341 ## save msidata as imzML file, will only work if there is at least 1 m/z left
342 #if $imzml_output:
343 if (maxfeatures2 > 0){
344 writeImzML(msidata, "out")}
345 #end if
346
347
311 ### visual pixel control 348 ### visual pixel control
312 349
313 levels(position_df\$annotation) = factor(paste(1:length(levels(position_df\$annotation)), levels(position_df\$annotation), sep="_")) 350 levels(position_df\$annotation) = factor(paste(1:length(levels(position_df\$annotation)), levels(position_df\$annotation), sep="_"))
314 351
315 pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation))+ 352 pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation))+
343 }else{ 380 }else{
344 hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")} 381 hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")}
345 #end if 382 #end if
346 383
347 dev.off() 384 dev.off()
385
348 386
349 }else{ 387 }else{
350 print("Inputfile or filtered file has no intensities > 0") 388 print("Inputfile or filtered file has no intensities > 0")
351 dev.off() 389 dev.off()
352 } 390 }
361 <option value="pixel_range">ranges for x and y (manually)</option> 399 <option value="pixel_range">ranges for x and y (manually)</option>
362 </param> 400 </param>
363 <when value="none"/> 401 <when value="none"/>
364 <when value="two_columns"> 402 <when value="two_columns">
365 <expand macro="reading_pixel_annotations"/> 403 <expand macro="reading_pixel_annotations"/>
366
367
368 <param name="two_columns_pixel" type="data" format="tabular" label="Tabular file with pixel coordinates"
369 help="Column with x values, another with y values, another with pixel annotations"/>
370 <param name="pixel_column_x" data_ref="two_columns_pixel" label="Column with x values" type="data_column"/>
371 <param name="pixel_column_y" data_ref="two_columns_pixel" label="Column with y values" type="data_column"/>
372 <param name="annotation_column_xy" data_ref="two_columns_pixel" label="Column with annotations" type="data_column"/>
373 <param name="pixel_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
374
375
376
377 404
378 </when> 405 </when>
379 <when value="pixel_range"> 406 <when value="pixel_range">
380 <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/> 407 <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/>
381 <param name="max_x_range" type="integer" value="100" label="Maximum value for x"/> 408 <param name="max_x_range" type="integer" value="100" label="Maximum value for x"/>
406 <option value="ppm" selected="True">ppm</option> 433 <option value="ppm" selected="True">ppm</option>
407 <option value="Da">Da</option> 434 <option value="Da">Da</option>
408 </param> 435 </param>
409 </when> 436 </when>
410 </conditional> 437 </conditional>
438 <param name="imzml_output" type="boolean" label="Output of imzML file" truevalue="TRUE" falsevalue="FALSE"/>
439
411 </inputs> 440 </inputs>
412 441
413 <outputs> 442 <outputs>
414 <data format="rdata" name="msidata_filtered" label="${tool.name} on ${on_string}"/> 443 <data format="rdata" name="msidata_filtered" label="${tool.name} on ${on_string}"/>
415 <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "${tool.name} on ${on_string}: QC"/> 444 <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "${tool.name} on ${on_string}: QC"/>
445 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML">
446 <filter>imzml_output</filter>
447 </data>
416 </outputs> 448 </outputs>
417 <tests> 449 <tests>
418 <test>
419 <expand macro="infile_imzml"/>
420 <param name="pixel_filtering" value="pixel_range"/>
421 <param name="min_x_range" value="10"/>
422 <param name="max_x_range" value="20"/>
423 <param name="min_y_range" value="2"/>
424 <param name="max_y_range" value="2"/>
425 <output name="QC_overview" file="imzml_filtered2.pdf" compare="sim_size"/>
426 <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size"/>
427 </test>
428 <test> 450 <test>
429 <expand macro="infile_imzml"/> 451 <expand macro="infile_imzml"/>
430 <param name="pixel_filtering" value="pixel_range"/> 452 <param name="pixel_filtering" value="pixel_range"/>
431 <param name="min_x_range" value="1"/> 453 <param name="min_x_range" value="1"/>
432 <param name="max_x_range" value="20"/> 454 <param name="max_x_range" value="20"/>
445 <param name="column_x" value="1"/> 467 <param name="column_x" value="1"/>
446 <param name="column_y" value="3"/> 468 <param name="column_y" value="3"/>
447 <param name="column_names" value="2"/> 469 <param name="column_names" value="2"/>
448 <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size"/> 470 <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size"/>
449 <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/> 471 <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size"/>
472 <!--imzml output test not yet working: output name="outfile_imzml" file="filtering_imzmls/summary" compare="sim_size" delta="10000">
473 <extra_files type="file" name="imzml" value="filtering_imzmls/out4.imzML" compare="sim_size" delta="10000"/>
474 <extra_files type="file" name="ibd" value="filtering_imzmls/out4.ibd" compare="sim_size" delta="10000"/>
475 </output-->
450 </test> 476 </test>
451 <test> 477 <test>
452 <expand macro="infile_imzml"/> 478 <expand macro="infile_imzml"/>
453 <param name="pixel_filtering" value="pixel_range"/> 479 <param name="pixel_filtering" value="pixel_range"/>
454 <param name="min_x_range" value="0"/> 480 <param name="min_x_range" value="0"/>
491 517
492 @MZ_TABULAR_INPUT_DESCRIPTION@ 518 @MZ_TABULAR_INPUT_DESCRIPTION@
493 519
494 **Options** 520 **Options**
495 521
496 - pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible). Pixel that are not present in the dataset are ignored. In case all pixels are not present in the dataset the output file will be empty and no further mz filtering will be performed. 522 - pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible). Pixel that are not present in the dataset are ignored. It is not possible to filter only for pixels that are not present in the dataset.
497 - m/z feature filtering: m/z values for filtering should be either imported as a tabular file containing containing m/z of interest or by defining a range for the m/z values. m/z that are not present in the dataset are ignored. If all given m/z values or the m/z range is outside the dataset, the output file will be empty. 523 - m/z feature filtering: m/z values for filtering should be either imported as a tabular file containing containing m/z of interest or by defining a range for the m/z values. m/z that are not present in the dataset are ignored. It is not possible to filter only for m/z that are not present in the dataset.
498 - m/z feature removing: perturbing m/z features such as matrix contaminants can be removed by specifying their m/z in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed. 524 - m/z feature removing: perturbing m/z features such as matrix contaminants can be removed by specifying their m/z in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed.
499 525
500 526
501 **Tips** 527 **Tips**
502 528
504 - In case tabular file cannot be selected in drop-down menu: Datatype in Galaxy must be tabular otherwise file will not appear in selection window (if Galaxy auto-detection was wrong, datatype can be changed by pressing the pen button (edit attributes)) 530 - In case tabular file cannot be selected in drop-down menu: Datatype in Galaxy must be tabular otherwise file will not appear in selection window (if Galaxy auto-detection was wrong, datatype can be changed by pressing the pen button (edit attributes))
505 531
506 532
507 **Output** 533 **Output**
508 534
509 - imzML file filtered for pixels and/or m/z 535 - MSI data as .RData output (can be read with the Cardinal package in R)
536 - optional: MSI data as imzML file
510 - pdf with heatmap showing the pixels that are left after filtering and histograms of kept and removed m/z 537 - pdf with heatmap showing the pixels that are left after filtering and histograms of kept and removed m/z
511 538
512 539
513 ]]> 540 ]]>
514 </help> 541 </help>