comparison msi_qualitycontrol.xml @ 14:7c7c39b9ec4a draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_qualitycontrol commit 5bceedc3a11c950790692a4c64bbb83d46897bee
author galaxyp
date Tue, 24 Jul 2018 04:53:25 -0400
parents 88e12d270e35
children 2d69460669ae
comparison
equal deleted inserted replaced
13:88e12d270e35 14:7c7c39b9ec4a
1 <tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.10.0.4"> 1 <tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.10.0.5">
2 <description> 2 <description>
3 mass spectrometry imaging QC 3 mass spectrometry imaging QC
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
49 msidata = readAnalyze('infile') 49 msidata = readAnalyze('infile')
50 #else 50 #else
51 load('infile.RData') 51 load('infile.RData')
52 #end if 52 #end if
53 53
54 ## create full matrix to make processed imzML files compatible with segmentation 54 ## create full matrix to make processed imzML files compatible with segmentation and other steps
55 iData(msidata) <- iData(msidata)[] 55 iData(msidata) <- iData(msidata)[]
56 56
57 ## remove duplicated coordinates 57 ## remove duplicated coordinates
58 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) 58 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
59 msidata <- msidata[,!duplicated(coord(msidata))] 59 msidata <- msidata[,!duplicated(coord(msidata))]
60
61 ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample)
62
63 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
64
65 ## read and extract x,y,annotation information
66 input_tabular = read.delim("$tabular_annotation.annotation_file", header = TRUE, stringsAsFactors = FALSE)
67 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
68 annotation_name = colnames(annotation_input)[3] ##extract header for annotations to later export tabular with same name
69 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"
70
71 ## merge with coordinate information of msidata
72 msidata_coordinates = cbind(coord(msidata)[,1:2], c(1:ncol(msidata)))
73 colnames(msidata_coordinates)[3] = "pixel_index"
74 merged_annotation = merge(msidata_coordinates, annotation_input, by=c("x", "y"), all.x=TRUE)
75 merged_annotation[is.na(merged_annotation)] = "NA"
76 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
77 msidata\$annotation = as.factor(merged_annotation[,4])
78
79 #end if
60 80
61 ###################################### file properties in numbers ###################### 81 ###################################### file properties in numbers ######################
62 82
63 ## Number of features (m/z) 83 ## Number of features (m/z)
64 maxfeatures = length(features(msidata)) 84 maxfeatures = length(features(msidata))
228 cols = colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256) 248 cols = colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256)
229 df\$col = cols[df\$dens] 249 df\$col = cols[df\$dens]
230 plot(x2~x1, data=df[order(df\$dens),], ylim=ylim,xlim=xlim,pch=20,col=col, 250 plot(x2~x1, data=df[order(df\$dens),], ylim=ylim,xlim=xlim,pch=20,col=col,
231 cex=1,xlab=xlab,ylab=ylab,las=1, main=main)} 251 cex=1,xlab=xlab,ylab=ylab,las=1, main=main)}
232 252
233 abline_vector= -100000 ## will be filled for samples in case data is combined
234
235 ## start list for optional spectrum values output 253 ## start list for optional spectrum values output
236 spectrum_list = list() 254 spectrum_list = list()
237 list_count = 1 255 list_count = 1
238 256
239 ################### 0) overview for combined data ########################### 257 ################### 0) overview for combined data ###########################
240 258
241 ### only for previously combined data, same plot as in combine QC pdf 259 ### only for previously combined data, same plot as in combine QC pdf
242 260
243 if (!is.null(levels(msidata\$combined_sample))){ 261 if (!is.null(levels(msidata\$annotation))){
244 number_combined = length(levels(msidata\$combined_sample)) 262 number_combined = length(levels(msidata\$annotation))
245 263
246 ## the more combined_samples a file has the smaller will be the legend 264 ## the more annotation groups a file has the smaller will be the legend
247 if (number_combined<20){ 265 if (number_combined<20){
248 legend_size = 10 266 legend_size = 10
249 cex_boxplot = 1 267 cex_boxplot = 1
250 }else if (number_combined>20 && number_combined<40){ 268 }else if (number_combined>20 && number_combined<40){
251 legend_size = 9 269 legend_size = 9
259 }else{ 277 }else{
260 legend_size = 6 278 legend_size = 6
261 cex_boxplot = 0.3 279 cex_boxplot = 0.3
262 } 280 }
263 281
264 position_df = cbind(coord(msidata)[,1:2], msidata\$combined_sample) 282 position_df = cbind(coord(msidata)[,1:2], msidata\$annotation)
265 colnames(position_df)[3] = "sample_name" 283 colnames(position_df)[3] = annotation_name
266 284
267 combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ 285 ## append list for optional tabular output with spectrum values
286 spectrum_list[[list_count]] = position_df
287 list_count = list_count+1
288
289 colnames(position_df)[3] = "Annotation"
290 combine_plot = ggplot(position_df, aes(x=x, y=y, fill=Annotation))+
268 geom_tile() + 291 geom_tile() +
269 coord_fixed()+ 292 coord_fixed()+
270 ggtitle("Spatial orientation of combined data")+ 293 ggtitle("Spatial orientation of combined data")+
271 theme_bw()+ 294 theme_bw()+
272 theme(plot.title = element_text(hjust = 0.5))+ 295 theme(plot.title = element_text(hjust = 0.5))+
273 theme(text=element_text(family="ArialMT", face="bold", size=12))+ 296 theme(text=element_text(family="ArialMT", face="bold", size=12))+
274 theme(legend.position="bottom",legend.direction="vertical")+ 297 theme(legend.position="bottom",legend.direction="vertical")+
275 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+ 298 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
276 guides(fill=guide_legend(ncol=5,byrow=TRUE)) 299 guides(fill=guide_legend(ncol=5,byrow=TRUE))
277 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
278 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
279 for(file_count in 1:nrow(coord_labels))
280 {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],
281 y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}
282 300
283 print(combine_plot) 301 print(combine_plot)
284 302
285 ### find max pixelnumber per subsample to later draw ablines 303 ### order pixels according to annotation - this gives a new pixel/spectra index order according to the annotation groups
286 pixel_name_df = data.frame(pixels(msidata), msidata\$combined_sample) 304 pixel_name_df = data.frame(pixels(msidata), msidata\$annotation)
287 colnames(pixel_name_df) = c("pixel_number", "pixel_name") 305 colnames(pixel_name_df) = c("pixel_number", "pixel_name")
288 last_pixel = aggregate(pixel_number~pixel_name, data = pixel_name_df, max) 306
307 pixel_name_df_ordered = pixel_name_df[order(pixel_name_df\$pixel_name),]
308 pixel_name_df_ordered\$annotated_order = 1:ncol(msidata)
309 last_pixel = aggregate(annotated_order~pixel_name, data = pixel_name_df_ordered, max)
310
289 pixel_vector = last_pixel[,2] 311 pixel_vector = last_pixel[,2]
290 abline_vector = pixel_vector[1:number_combined-1] 312 abline_vector = pixel_vector[1:number_combined-1]
291 print(abline_vector) 313 print(abline_vector)
292 } 314 }
293 315
294 ################### 1) Pixel order image ################################### 316 ################### 1) Pixel order image ###################################
295 317
296 pixelnumber = 1:pixelcount 318 pixelnumber = 1:pixelcount
297 pixelxyarray=cbind(coord(msidata)[,1:2],pixelnumber) 319 pixelxyarray=cbind(coord(msidata)[,1:2],pixelnumber)
320 gg_title = "Pixel order"
321
322 ## order pixels according to annotation groups if annotations are provided
323 if (!is.null(levels(msidata\$annotation))){
324 pixelxyarray = pixelxyarray[match(pixel_name_df_ordered\$pixel_number, pixelxyarray\$pixelnumber),]
325 pixelxyarray\$pixelnumber = 1:ncol(msidata)
326 gg_title = "Pixel ordered for annotation groups"
327 }
298 328
299 print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))+ 329 print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))+
300 geom_tile() + coord_fixed()+ 330 geom_tile() + coord_fixed()+
301 ggtitle("Pixel order") + theme_bw()+ 331 ggtitle(gg_title) + theme_bw()+
302 theme(plot.title = element_text(hjust = 0.5))+ 332 theme(plot.title = element_text(hjust = 0.5))+
303 theme(text=element_text(family="ArialMT", face="bold", size=12))+ 333 theme(text=element_text(family="ArialMT", face="bold", size=12))+
304 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), 334 scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"),
305 space = "Lab", na.value = "black", name = "Pixel\nnumber")) 335 space = "Lab", na.value = "black", name = "Pixel\nnumber"))
306 336
307 ################ 2) Number of calibrants per spectrum ###################### 337 ################ 2) Number of calibrants per spectrum ######################
308 338
309 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not 339 ## matrix with calibrants in columns and in rows if there is peak intensity in range or not
310 pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0) 340 pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0)
311 341
312 ## plot only possible when there is at least one valid calibrant 342 ## plot only possible when there is at least one valid calibrant
313 if (length(inputcalibrantmasses) != 0){ 343 if (length(inputcalibrantmasses) != 0){
314 344
315 ## calculate plusminus values in m/z for each calibrant 345 ## calculate plusminus values in m/z for each calibrant
316 plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses 346 plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses
317 347
318 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0 348 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0
319 for (mass in 1:length(inputcalibrantmasses)){ 349
320 350 for (mass in 1:length(inputcalibrantmasses)){
321 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] 351
322 352 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
323 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ 353
324 354 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){
325 ## intensity of all m/z > 0 355
326 intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0 356 ## intensity of all m/z > 0
327 357 intensity_sum = colSums(spectra(filtered_data)[], na.rm=TRUE) > 0
328 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ 358
329 359 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){
330 ## intensity of only m/z > 0 360
331 intensity_sum = spectra(filtered_data)[] > 0 361 ## intensity of only m/z > 0
332 362 intensity_sum = spectra(filtered_data)[] > 0
333 }else{ 363
334 intensity_sum = rep(FALSE, ncol(filtered_data))} 364 }else{
335 365
336 ## for each pixel add sum of intensity in the given m/z range 366 intensity_sum = rep(FALSE, ncol(filtered_data))}
337 pixelmatrix = rbind(pixelmatrix, intensity_sum) 367
338 } 368 ## for each pixel add sum of intensities > 0 in the given m/z range
369 pixelmatrix = rbind(pixelmatrix, intensity_sum)
370 }
339 371
340 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) 372 ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE)
341 countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE)) 373 countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE))
342 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts 374 countdf= cbind(coord(msidata)[,1:2], countvector) ## add pixel coordinates to counts
343 mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen") 375 mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen")
498 530
499 ## append list for optional tabular output with spectrum values 531 ## append list for optional tabular output with spectrum values
500 colnames(highestmz_matrix)[3] = "Most abundant m/z" 532 colnames(highestmz_matrix)[3] = "Most abundant m/z"
501 spectrum_list[[list_count]] = highestmz_matrix 533 spectrum_list[[list_count]] = highestmz_matrix
502 534
535
503 ## tabular output of spectra values 536 ## tabular output of spectra values
504 537
505 #if $pixel_output: 538 #if $pixel_output:
506 print("pixel list") 539 print("pixel list")
507 pixel_df = Reduce(function(...) merge(..., by=c("x", "y"), all=T), spectrum_list) 540 pixel_df = Reduce(function(...) merge(..., by=c("x", "y"), all=TRUE), spectrum_list)
508 write.table(pixel_df, file="$pixel_tabular_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 541 write.table(pixel_df, file="$pixel_tabular_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
509 #end if 542 #end if
510 543
511 ########################## 8) optional pca image for two components ################# 544 ########################## 8) optional pca image for two components #################
512 545
513 #if $do_pca: 546 #if $do_pca:
514 547
515 pca = PCA(msidata, ncomp=2) 548 pca = PCA(msidata, ncomp=2)
516 par(mfrow = c(2,1)) 549 par(mfrow = c(2,1))
517 plot(pca, col=c("black", "darkgrey"), main="PCA for two components") 550 plot(pca, col=c("black", "darkgrey"), main="PCA for two components")
518 image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy)) 551 image(pca, col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy))
519 552
520 #end if 553 #end if
521 554
522 ################## III) properties over spectra index ########## 555 ################## III) properties over spectra index ######################
523 ############################################################################## 556 ############################################################################
524 print("properties over pixels") 557 print("properties over pixels")
525 par(mfrow = c(2,1), mar=c(5,6,4,2)) 558 par(mfrow = c(2,1), mar=c(5,6,4,2))
526 559
527 ########################## 9) number of peaks per spectrum ################# 560 ########################## 9) number of peaks per spectrum #################
528 ## 9a) scatterplot 561 ## 9a) scatterplot
529 plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum") 562
530 title(xlab="Spectra index", line=3) 563 ## order pixels according to annotation groups if annotations are provided
531 title(ylab="Number of peaks", line=4) 564 if (!is.null(levels(msidata\$annotation))){
532 abline(v=abline_vector, lty = 3) 565
566 pixel_peaks_df = cbind(pixel_name_df, peaksperpixel)
567 pixel_ordered = pixel_peaks_df[order(pixel_peaks_df\$pixel_name),] ## order pixels according to annotation group names
568 pixel_ordered\$annotation_order = 1:ncol(msidata)
569 plot_colorByDensity(pixel_ordered\$annotation_order, pixel_ordered\$peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum")
570 title(xlab="Spectra index ordered for annotation groups", line=3)
571 title(ylab="Number of peaks", line=4)
572 abline(v=abline_vector, lty = 3)
573 }else{
574 plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum")
575 title(xlab="Spectra index", line=3)
576 title(ylab="Number of peaks", line=4)
577 }
533 578
534 ## 9b) histogram 579 ## 9b) histogram
535 580
536 hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") 581 hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="")
537 title(main="Number of peaks per spectrum", line=2) 582 title(main="Number of peaks per spectrum", line=2)
538 title(ylab="Frequency = # spectra", line=4) 583 title(ylab="Frequency = # spectra", line=4)
539 abline(v=median(peaksperpixel), col="blue") 584 abline(v=median(peaksperpixel), col="blue")
540 585
541 ## 9c) additional histogram to show subsample contributions 586 ## 9c) additional histogram to show contribution of annotation groups
542 ## only when samples were combined before (combined_sample) 587 ## only when pixel annotations were loaded
543 if (!is.null(levels(msidata\$combined_sample))){ 588 if (!is.null(levels(msidata\$annotation))){
544 589
545 df_9 = data.frame(peaksperpixel, msidata\$combined_sample) 590 df_9 = data.frame(peaksperpixel, msidata\$annotation)
546 colnames(df_9) = c("Npeaks", "sample_name") 591 colnames(df_9) = c("Npeaks", "annotation")
547 592
548 hist_9 = ggplot(df_9, aes(x=Npeaks, fill=sample_name)) + 593 hist_9 = ggplot(df_9, aes(x=Npeaks, fill=annotation)) +
549 geom_histogram()+ theme_bw()+ 594 geom_histogram()+ theme_bw()+
550 theme(text=element_text(family="ArialMT", face="bold", size=12))+ 595 theme(text=element_text(family="ArialMT", face="bold", size=12))+
551 theme(plot.title = element_text(hjust = 0.5))+ 596 theme(plot.title = element_text(hjust = 0.5))+
552 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+ 597 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
553 theme(legend.position="bottom",legend.direction="vertical")+ 598 theme(legend.position="bottom",legend.direction="vertical")+
554 labs(title="Number of peaks per spectrum and sample", x="Number of peaks per spectrum", y = "Frequency = # spectra") + 599 labs(title="Number of peaks per spectrum and annotation group", x="Number of peaks per spectrum", y = "Frequency = # spectra") +
555 guides(fill=guide_legend(ncol=5,byrow=TRUE))+ 600 guides(fill=guide_legend(ncol=5,byrow=TRUE))+
556 geom_vline(xintercept = median(peaksperpixel), size = 1, colour = "black",linetype = "dashed") 601 geom_vline(xintercept = median(peaksperpixel), size = 1, colour = "black",linetype = "dashed")
557 print(hist_9)} 602 print(hist_9)}
558 603
559 ########################## 10) TIC per spectrum ########################### 604 ########################## 10) TIC per spectrum ###########################
560 605
561 ## 10a)density scatterplot 606 ## 10a)density scatterplot
562 par(mfrow = c(2,1), mar=c(5,6,4,2)) 607 par(mfrow = c(2,1), mar=c(5,6,4,2))
563 plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="TIC per spectrum") 608
564 title(xlab="Spectra index", line=3) 609 ## order pixels according to annotation groups if annotations are provided
565 title(ylab = "Total ion chromatogram intensity", line=4) 610 if (!is.null(levels(msidata\$annotation))){
566 abline(v=abline_vector, lty = 3) 611
612 pixel_TIC_df = cbind(pixel_name_df, TICs)
613 pixel_ordered = pixel_TIC_df[order(pixel_TIC_df\$pixel_name),] ## order pixels according to annotation group names
614 pixel_ordered\$annotation_order = 1:ncol(msidata)
615
616 plot_colorByDensity(pixel_ordered\$annotation_order, pixel_ordered\$TICs, ylab = "", xlab = "", main="TIC per spectrum")
617 title(xlab="Spectra index ordered for annotation groups", line=3)
618 title(ylab="Total ion chromatogram intensity", line=4)
619 abline(v=abline_vector, lty = 3)
620 }else{
621 plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="TIC per spectrum")
622 title(xlab="Spectra index", line=3)
623 title(ylab = "Total ion chromatogram intensity", line=4)
624 }
567 625
568 ## 10b) histogram 626 ## 10b) histogram
569 hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="") 627 hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
570 title(main= "TIC per spectrum", line=2) 628 title(main= "TIC per spectrum", line=2)
571 title(ylab="Frequency = # spectra", line=4) 629 title(ylab="Frequency = # spectra", line=4)
572 abline(v=median(log(TICs[TICs>0])), col="blue") 630 abline(v=median(log(TICs[TICs>0])), col="blue")
573 631
574 ## 10c) additional histogram to show subsample contributions 632 ## 10c) additional histogram to show annotation contributions
575 ## only when samples were combined before (combined_sample) 633 ## only when pixel annotations were loaded
576 if (!is.null(levels(msidata\$combined_sample))){ 634 if (!is.null(levels(msidata\$annotation))){
577 df_10 = data.frame(log(TICs), msidata\$combined_sample) 635 df_10 = data.frame(log(TICs), msidata\$annotation)
578 colnames(df_10) = c("TICs", "sample_name") 636 colnames(df_10) = c("TICs", "annotation")
579 637
580 hist_10 = ggplot(df_10, aes(x=TICs, fill=sample_name)) + 638 hist_10 = ggplot(df_10, aes(x=TICs, fill=annotation)) +
581 geom_histogram()+ theme_bw()+ 639 geom_histogram()+ theme_bw()+
582 theme(text=element_text(family="ArialMT", face="bold", size=12))+ 640 theme(text=element_text(family="ArialMT", face="bold", size=12))+
583 theme(plot.title = element_text(hjust = 0.5))+ 641 theme(plot.title = element_text(hjust = 0.5))+
584 theme(legend.position="bottom",legend.direction="vertical")+ 642 theme(legend.position="bottom",legend.direction="vertical")+
585 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+ 643 theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
586 labs(title="TIC per spectrum and sample", x="log(TIC per spectrum)", y = "Frequency = # spectra") + 644 labs(title="TIC per spectrum and annotation group", x="log(TIC per spectrum)", y = "Frequency = # spectra") +
587 guides(fill=guide_legend(ncol=5,byrow=TRUE))+ 645 guides(fill=guide_legend(ncol=5,byrow=TRUE))+
588 geom_vline(xintercept = median(log(TICs[TICs>0])), size = 1, colour = "black",linetype = "dashed") 646 geom_vline(xintercept = median(log(TICs[TICs>0])), size = 1, colour = "black",linetype = "dashed")
589 print(hist_10)} 647 print(hist_10)}
590 648
591 ################################## IV) properties over m/z #################### 649 ################################## IV) properties over m/z ####################
592 ############################################################################ 650 ############################################################################
593 print("propverties over m/z") 651 print("properties over m/z")
594 652
595 ########################## 11) Histogram on m/z values ##################### 653 ########################## 11) Histogram of m/z values #####################
596 654
597 par(mfrow = c(1, 1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1)) 655 par(mfrow = c(1, 1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
598 hist(mz(msidata), xlab = "m/z", main="Histogram of m/z values") 656 hist(mz(msidata), xlab = "m/z", main="Histogram of m/z values")
599 657
600 ########################## 12) Number of peaks per m/z ##################### 658 ########################## 12) Number of peaks per m/z #####################
640 698
641 par(mfrow = c(2,1), mar=c(5,6,4,2)) 699 par(mfrow = c(2,1), mar=c(5,6,4,2))
642 700
643 ## 14a) Median intensity over spectra 701 ## 14a) Median intensity over spectra
644 medianint_spectra = apply(spectra(msidata), 2, median) 702 medianint_spectra = apply(spectra(msidata), 2, median)
645 plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="") 703
646 title(ylab="Median spectrum intensity", line=4) 704 ## order pixels according to annotation groups if annotations are provided
647 abline(v=abline_vector, lty = 3) 705 if (!is.null(levels(msidata\$annotation))){
706
707 pixel_median_df = cbind(pixel_name_df, medianint_spectra)
708 pixel_ordered = pixel_median_df[order(pixel_median_df\$pixel_name),] ## order pixels according to annotation group names
709 plot(pixel_ordered\$medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index ordered for annotation groups", ylab="")
710 title(ylab="Median spectrum intensity", line=4)
711 abline(v=abline_vector, lty = 3)
712 }else{
713 plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="")
714 title(ylab="Median spectrum intensity", line=4)
715 }
648 716
649 ## 14b) histogram: 717 ## 14b) histogram:
650 hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1) 718 hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
651 title(main="Log2-transformed intensities", line=2) 719 title(main="Log2-transformed intensities", line=2)
652 title(xlab="log2 intensities") 720 title(xlab="log2 intensities")
653 title(ylab="Frequency", line=4) 721 title(ylab="Frequency", line=4)
654 abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue") 722 abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue")
655 723
656 ## 14c) histogram to show subsample contribution 724 ## 14c) histogram to show contribution of annotation groups
657 ## only for previously combined samples 725 ## only when annotation tabular was provided
658 if (!is.null(levels(msidata\$combined_sample))){ 726 if (!is.null(levels(msidata\$annotation))){
659 727
660 df_13 = data.frame(matrix(,ncol=2, nrow=0)) 728 df_13 = data.frame(matrix(,ncol=2, nrow=0))
661 for (subsample in levels(msidata\$combined_sample)){ 729 for (subsample in levels(msidata\$annotation)){
662 log2_int_subsample = log2(spectra(msidata)[,msidata\$combined_sample==subsample]) 730 log2_int_subsample = log2(spectra(msidata)[,msidata\$annotation==subsample])
663 df_subsample = data.frame(as.numeric(log2_int_subsample)) 731 df_subsample = data.frame(as.numeric(log2_int_subsample))
664 df_subsample\$sample_name = subsample 732 df_subsample\$annotation = subsample
665 df_13 = rbind(df_13, df_subsample)} 733 df_13 = rbind(df_13, df_subsample)}
666 df_13\$sample_name = as.factor(df_13\$sample_name) 734 df_13\$annotation = as.factor(df_13\$annotation)
667 colnames(df_13) = c("logint", "sample_name") 735 colnames(df_13) = c("logint", "annotation")
668 736
669 hist_13 = ggplot(df_13, aes(x=logint, fill=sample_name)) + 737 hist_13 = ggplot(df_13, aes(x=logint, fill=annotation)) +
670 geom_histogram()+ theme_bw()+ 738 geom_histogram()+ theme_bw()+
671 theme(text=element_text(family="ArialMT", face="bold", size=12))+ 739 theme(text=element_text(family="ArialMT", face="bold", size=12))+
672 labs(title="Log2-transformed intensities per sample", x="log2 intensities", y = "Frequency") + 740 labs(title="Log2-transformed intensities per sample", x="log2 intensities", y = "Frequency") +
673 theme(plot.title = element_text(hjust = 0.5))+ 741 theme(plot.title = element_text(hjust = 0.5))+
674 theme(legend.position="bottom",legend.direction="vertical")+ 742 theme(legend.position="bottom",legend.direction="vertical")+
679 747
680 ## 14d) boxplots to visualize in a different way the intensity distributions 748 ## 14d) boxplots to visualize in a different way the intensity distributions
681 par(mfrow = c(1,1), cex.axis=1.3, cex.lab=1.3, mar=c(13.1,4.1,5.1,2.1)) 749 par(mfrow = c(1,1), cex.axis=1.3, cex.lab=1.3, mar=c(13.1,4.1,5.1,2.1))
682 750
683 mean_matrix = matrix(,ncol=0, nrow = nrow(msidata)) 751 mean_matrix = matrix(,ncol=0, nrow = nrow(msidata))
684 for (subsample in levels(msidata\$combined_sample)){ 752 for (subsample in levels(msidata\$annotation)){
685 mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$combined_sample==subsample]) 753 mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample])
686 mean_matrix = cbind(mean_matrix, mean_mz_sample)} 754 mean_matrix = cbind(mean_matrix, mean_mz_sample)}
687 755
688 boxplot(log2(mean_matrix), ylab = "log2 mean intensity per m/z", main="Mean intensities per m/z and sample", xaxt = "n") 756 boxplot(log2(mean_matrix), ylab = "log2 mean intensity per m/z", main="Mean intensities per m/z and annotation group", xaxt = "n")
689 (axis(1, at = c(1:number_combined), labels=levels(msidata\$combined_sample), cex.axis=cex_boxplot, las=2)) 757 (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), cex.axis=cex_boxplot, las=2))
690 } 758 }
691 759
692 ################################## VI) Mass spectra and m/z accuracy ######################## 760 ################################## VI) Mass spectra and m/z accuracy ########################
693 ############################################################################ 761 ############################################################################
694 print("Mass spectra and m/z accuracy") 762 print("Mass spectra and m/z accuracy")
758 abline(v=c(mzvalue), col="green2", lty=4) 826 abline(v=c(mzvalue), col="green2", lty=4)
759 title(paste0("theor. m/z: ", inputcalibrants[count,1]), col.main="blue", outer=TRUE, line=0, adj=0.074) 827 title(paste0("theor. m/z: ", inputcalibrants[count,1]), col.main="blue", outer=TRUE, line=0, adj=0.074)
760 title(paste0("most abundant m/z: ", round(maxvalue, digits=4)), col.main="red", outer=TRUE, line=0, adj=0.49) 828 title(paste0("most abundant m/z: ", round(maxvalue, digits=4)), col.main="red", outer=TRUE, line=0, adj=0.49)
761 title(paste0("closest m/z: ", round(mzvalue, digits=4)), col.main="green2", outer=TRUE, line=0, adj=0.93) 829 title(paste0("closest m/z: ", round(mzvalue, digits=4)), col.main="green2", outer=TRUE, line=0, adj=0.93)
762 830
763 ### 16b) one large extra plot with different colours for different samples (for combined_sample only) 831 ### 16b) one large extra plot with different colours for different pixel annotation groups
764 832
765 if (!is.null(levels(msidata\$combined_sample))){ 833 if (!is.null(levels(msidata\$annotation))){
766 if (number_combined < 10){ 834 if (number_combined < 10){
767 key_zoomed = TRUE 835 key_zoomed = TRUE
768 }else{key_zoomed = FALSE} 836 }else{key_zoomed = FALSE}
769 par(mfrow = c(1, 1)) 837 par(mfrow = c(1, 1))
770 plot(msidata[minmasspixel:maxmasspixel,], pixel=1:ncol(msidata),main="Average spectrum per sample", 838 plot(msidata[minmasspixel:maxmasspixel,], pixel=1:ncol(msidata),main="Average spectrum per annotation group",
771 pixel.groups=msidata\$combined_sample, key=key_zoomed, col=hue_pal()(number_combined),superpose=TRUE) 839 pixel.groups=msidata\$annotation, key=key_zoomed, col=hue_pal()(number_combined),superpose=TRUE)
772 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="black", lty=c(3,1,3)) 840 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="black", lty=c(3,1,3))
773 } 841 }
774 count=count+1 842 count=count+1
775 } 843 }
776 844
809 geom_text(aes(label=differencevector2), vjust=-0.3, size=5.5, col="blue")+ 877 geom_text(aes(label=differencevector2), vjust=-0.3, size=5.5, col="blue")+
810 theme(axis.text.x = element_text(angle = 90, hjust = 1, size=16)) 878 theme(axis.text.x = element_text(angle = 90, hjust = 1, size=16))
811 879
812 print(diff_plot2) 880 print(diff_plot2)
813 881
814 882 #################### 19) ppm difference over pixels #####################
815 #################### 19) ppm difference over pixels ##################### 883
816 884 par(mfrow = c(1,1))
817 par(mfrow = c(1,1)) 885 mycolours = c("darkgrey", "darkblue", "blue", "green" , "red", "orange", "yellow", "magenta", "olivedrab1", "lightseagreen")
818 mycolours = c("darkgrey", "darkblue", "blue", "green" , "red", "orange", "yellow", "magenta", "olivedrab1", "lightseagreen") 886 count = 1
819 count = 1 887 ppm_df = as.data.frame(matrix(,ncol=0, nrow = ncol(msidata)))
820 ppm_df = as.data.frame(matrix(,ncol=0, nrow = ncol(msidata))) 888 for (calibrant in inputcalibrantmasses){
821 for (calibrant in inputcalibrantmasses){ 889 ### find m/z with the highest mean intensity in m/z range, if no m/z in the range, ppm differences for this calibrant will be NA
822 ### find m/z with the highest mean intensity in m/z range, if no m/z in the range, all ppm differences will be NA 890 filtered_data = msidata[mz(msidata) >= calibrant-plusminusvalues[count] & mz(msidata) <= calibrant+plusminusvalues[count],]
823 filtered_data = msidata[mz(msidata) >= calibrant-plusminusvalues[count] & mz(msidata) <= calibrant+plusminusvalues[count],] 891
824 892 if (nrow(filtered_data) > 0){
825 if (nrow(filtered_data) > 0){ 893 ### filtered for m/z range, find max peak in each spectrum (pixel)(
826 ### filtered for m/z range, now go through it pixel by pixel to find max peak in each spectrum 894 ppm_vector = numeric()
827 ppm_vector = numeric() 895 for (pixel_count in 1:ncol(filtered_data)){
828 for (pixel_count in 1:ncol(filtered_data)){ 896 mz_max = mz(filtered_data)[which.max(spectra(filtered_data)[,pixel_count])]
829 mz_max = mz(filtered_data)[which.max(spectra(filtered_data)[,pixel_count])] 897
830 898 mzdiff = mz_max - calibrant
831 mzdiff = mz_max - calibrant 899 ppmdiff = mzdiff/calibrant*1000000
832 ppmdiff = mzdiff/calibrant*1000000 900
833 901 ### if maximum intensity in m/z range was 0 set ppm diff to NA (not shown in plot)
834 ### if maximum intensity in m/z range was 0 set ppm diff to NA (not shown in plot) 902 if (max(spectra(filtered_data)[,pixel_count]) == 0){
835 if (max(spectra(filtered_data)[,pixel_count]) == 0){ 903 ppmdiff = NA}
836 ppmdiff = NA} 904 ppm_vector[pixel_count] = ppmdiff}
837 ppm_vector[pixel_count] = ppmdiff} 905
838 }else{ppm_vector = rep(NA, ncol(msidata))} 906 }else{
839 907 ppm_vector = rep(NA, ncol(msidata))
840 ppm_df = cbind(ppm_df, ppm_vector) 908 }
841 count=count+1} 909
910 ppm_df = cbind(ppm_df, ppm_vector)
911 count=count+1
912 }
842 913
843 if (sum(is.na(ppm_df)) == ncol(ppm_df)*nrow(ppm_df)){ 914 if (sum(is.na(ppm_df)) == ncol(ppm_df)*nrow(ppm_df)){
844 plot(0,type='n',axes=FALSE,ann=FALSE) 915 plot(0,type='n',axes=FALSE,ann=FALSE)
845 title(main=paste("plot 19: no peaks in the chosen region, repeat with higher ppm range")) 916 title(main=paste("plot 19: no peaks in the chosen region, repeat with higher ppm range"))
846 }else{ 917 }else{
847 918
848 ### plot ppm differences over pixels (spectra index) 919 ### plot ppm differences over pixels (spectra index)
849
850 par(mar=c(4.1, 4.1, 4.1, 7.5)) 920 par(mar=c(4.1, 4.1, 4.1, 7.5))
851 plot(0,0,type="n", ylim=c(min(ppm_df, na.rm=TRUE),max(ppm_df, na.rm=TRUE)), xlim = c(1,ncol(filtered_data)),xlab = "Spectra index", ylab = "m/z difference in ppm", main="Difference m/z with max. average intensity vs. theor. m/z\n(per spectrum)") 921
852 922 ## if annotations are provided, pixel index is ordered according to annotation groups
853 for (each_cal in 1:ncol(ppm_df)){ 923 if (!is.null(levels(msidata\$annotation))){
854 lines(ppm_df[,each_cal], col=mycolours[each_cal], type="p")} 924 ppm_df_pixels =cbind(pixel_name_df, ppm_df)
855 legend("topright", inset=c(-0.25,0), xpd = TRUE, bty="n", legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1) 925 pixel_ordered = ppm_df_pixels[order(ppm_df_pixels\$pixel_name),] ## order pixels according to annotation group names
856 abline(v=abline_vector, lty = 3)} 926
927 plot(0,0,type="n", ylim=c(min(ppm_df, na.rm=TRUE),max(ppm_df, na.rm=TRUE)), xlim = c(1,ncol(filtered_data)),xlab = "Spectra index ordered for annotation groups", ylab = "m/z difference in ppm", main="Difference m/z with max. average intensity vs. theor. m/z\n(per spectrum)")
928
929 for (each_cal in 1:ncol(ppm_df)){
930 lines(pixel_ordered[,each_cal+2], col=mycolours[each_cal], type="p")}
931 legend("topright", inset=c(-0.25,0), xpd = TRUE, bty="n", legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1)
932 abline(v=abline_vector, lty = 3)
933 }else{
934
935 plot(0,0,type="n", ylim=c(min(ppm_df, na.rm=TRUE),max(ppm_df, na.rm=TRUE)), xlim = c(1,ncol(filtered_data)),xlab = "Spectra index", ylab = "m/z difference in ppm", main="Difference m/z with max. average intensity vs. theor. m/z\n(per spectrum)")
936
937 for (each_cal in 1:ncol(ppm_df)){
938 lines(ppm_df[,each_cal], col=mycolours[each_cal], type="p")}
939 legend("topright", inset=c(-0.25,0), xpd = TRUE, bty="n", legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1)}
940 }
857 941
858 }else{print("plot 16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")} 942 }else{print("plot 16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")}
859 }else{ 943 }else{
860 print("inputfile has no intensities > 0") 944 print("inputfile has no intensities > 0")
861 } 945 }
862 dev.off() 946 dev.off()
863
864 947
865 948
866 ]]></configfile> 949 ]]></configfile>
867 </configfiles> 950 </configfiles>
868 <inputs> 951 <inputs>
879 <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm"> 962 <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm">
880 <option value="mz" >mz</option> 963 <option value="mz" >mz</option>
881 <option value="ppm" selected="True" >ppm</option> 964 <option value="ppm" selected="True" >ppm</option>
882 </param> 965 </param>
883 </when> 966 </when>
967 </conditional>
968 <conditional name="tabular_annotation">
969 <param name="load_annotation" type="select" label="Use pixel annotation from tabular file for QC plots">
970 <option value="no_annotation" selected="True">pixels belong into one group only</option>
971 <option value="yes_annotation">use pixel annotation from a tabular file</option>
972 </param>
973 <when value="yes_annotation">
974 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
975 help="Tabular file with three columns: x values, y values and pixel annotations"/>
976 <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
977 <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
978 <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
979 </when>
980 <when value="no_annotation"/>
884 </conditional> 981 </conditional>
885 <param name="filename" type="text" value="" optional="true" label="Title" help="will appear as header in the quality report, if nothing given input dataset name is used"/> 982 <param name="filename" type="text" value="" optional="true" label="Title" help="will appear as header in the quality report, if nothing given input dataset name is used"/>
886 <param name="calibrant_file" type="data" optional="true" format="tabular" 983 <param name="calibrant_file" type="data" optional="true" format="tabular"
887 label="File with internal calibrants" help="first column: m/z, second column: name (optional), tabular file"/> 984 label="File with internal calibrants" help="first column: m/z, second column: name (optional), tabular file"/>
888 <param name="plusminus_ppm" value="50" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/> 985 <param name="plusminus_ppm" value="50" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/>
910 <conditional name="processed_cond"> 1007 <conditional name="processed_cond">
911 <param name="processed_file" value="processed"/> 1008 <param name="processed_file" value="processed"/>
912 <param name="accuracy" value="200"/> 1009 <param name="accuracy" value="200"/>
913 <param name="units" value="ppm"/> 1010 <param name="units" value="ppm"/>
914 </conditional> 1011 </conditional>
1012 <conditional name="tabular_annotation">
1013 <param name="load_annotation" value="no_annotation"/>
1014 </conditional>
915 <param name="calibrant_file" value="inputcalibrantfile1.txt"/> 1015 <param name="calibrant_file" value="inputcalibrantfile1.txt"/>
916 <param name="plusminus_ppm" value="100"/> 1016 <param name="plusminus_ppm" value="100"/>
917 <param name="filename" value="Testfile_imzml"/> 1017 <param name="filename" value="Testfile_imzml"/>
918 <param name="do_pca" value="True"/> 1018 <param name="do_pca" value="True"/>
919 <repeat name="calibrantratio"> 1019 <repeat name="calibrantratio">
924 </repeat> 1024 </repeat>
925 <param name="pixel_output" value="True"/> 1025 <param name="pixel_output" value="True"/>
926 <output name="pixel_tabular_output" file="spectra_info_imzml.txt"/> 1026 <output name="pixel_tabular_output" file="spectra_info_imzml.txt"/>
927 <output name="plots" file="QC_imzml.pdf" compare="sim_size" delta="20000"/> 1027 <output name="plots" file="QC_imzml.pdf" compare="sim_size" delta="20000"/>
928 </test> 1028 </test>
1029
929 <test expect_num_outputs="1"> 1030 <test expect_num_outputs="1">
930 <param name="infile" value="" ftype="analyze75"> 1031 <param name="infile" value="" ftype="analyze75">
931 <composite_data value="Analyze75.hdr"/> 1032 <composite_data value="Analyze75.hdr"/>
932 <composite_data value="Analyze75.img"/> 1033 <composite_data value="Analyze75.img"/>
933 <composite_data value="Analyze75.t2m"/> 1034 <composite_data value="Analyze75.t2m"/>
934 </param> 1035 </param>
1036 <conditional name="tabular_annotation">
1037 <param name="load_annotation" value="no_annotation"/>
1038 </conditional>
935 <param name="calibrant_file" value="inputcalibrantfile2.txt"/> 1039 <param name="calibrant_file" value="inputcalibrantfile2.txt"/>
936 <param name="filename" value="Testfile_analyze75"/> 1040 <param name="filename" value="Testfile_analyze75"/>
937 <param name="do_pca" value="True"/> 1041 <param name="do_pca" value="True"/>
938 <output name="plots" file="QC_analyze75.pdf" compare="sim_size" delta="20000"/> 1042 <output name="plots" file="QC_analyze75.pdf" compare="sim_size" delta="20000"/>
939 </test> 1043 </test>
1044
940 <test expect_num_outputs="2"> 1045 <test expect_num_outputs="2">
941 <param name="infile" value="123_combined.RData" ftype="rdata"/> 1046 <param name="infile" value="123_combined.RData" ftype="rdata"/>
1047
1048 <conditional name="tabular_annotation">
1049 <param name="load_annotation" value="yes_annotation"/>
1050 <param name="annotation_file" value="annotations_rdata.tabular"/>
1051 <param name="column_x" value="1"/>
1052 <param name="column_y" value="2"/>
1053 <param name="column_names" value="3"/>
1054 </conditional>
1055 <param name="calibrant_file" value="inputcalibrantfile1.txt"/>
1056 <param name="plusminus_ppm" value="100"/>
942 <param name="filename" value="Testfile_rdata"/> 1057 <param name="filename" value="Testfile_rdata"/>
943 <param name="do_pca" value="True"/> 1058 <param name="do_pca" value="True"/>
944 <param name="pixel_output" value="True"/> 1059 <param name="pixel_output" value="True"/>
945 <output name="pixel_tabular_output" file="spectra_info_123_combi.txt"/> 1060 <output name="pixel_tabular_output" file="spectra_info_123_combi.txt"/>
946 <output name="plots" file="QC_rdata.pdf" compare="sim_size" delta="20000"/> 1061 <output name="plots" file="QC_rdata.pdf" compare="sim_size" delta="20000"/>
947 </test> 1062 </test>
948 <test expect_num_outputs="1"> 1063 <test expect_num_outputs="1">
949 <param name="infile" value="empty_spectra.rdata" ftype="rdata"/> 1064 <param name="infile" value="empty_spectra.rdata" ftype="rdata"/>
1065 <conditional name="tabular_annotation">
1066 <param name="load_annotation" value="no_annotation"/>
1067 </conditional>
950 <param name="calibrant_file" value="inputcalibrantfile2.txt"/> 1068 <param name="calibrant_file" value="inputcalibrantfile2.txt"/>
951 <param name="filename" value="Testfile_rdata"/> 1069 <param name="filename" value="Testfile_rdata"/>
952 <param name="do_pca" value="False"/> 1070 <param name="do_pca" value="False"/>
953 <output name="plots" file="QC_empty_spectra.pdf" compare="sim_size" delta="20000"/> 1071 <output name="plots" file="QC_empty_spectra.pdf" compare="sim_size" delta="20000"/>
954 </test> 1072 </test>
962 Input data: 3 types of input data can be used: 1080 Input data: 3 types of input data can be used:
963 1081
964 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ 1082 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
965 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) 1083 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
966 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) 1084 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
1085 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column
967 1086
968 Options: 1087 Options:
969 1088
970 - internal calibrants are used for m/z heatmaps (x-y grid), heatmap of number of calibrants per spectrum (x-y grid), zoomed in mass spectra, m/z accuracy 1089 - internal calibrants are used for m/z heatmaps (x-y grid), heatmap of number of calibrants per spectrum (x-y grid), zoomed in mass spectra, m/z accuracy
971 - optional fold change plot: draws a heatmap (x-y grid) for the fold change of two m/z (log2(intensity ratio)) 1090 - optional fold change plot: draws a heatmap (x-y grid) for the fold change of two m/z (log2(intensity ratio))
972 1091
973 Output: 1092 Output:
974 1093
975 - quality control report as pdf with key numbers and descriptive plots describing the mass spectrometry imaging data 1094 - quality control report as pdf with key numbers and descriptive plots describing the mass spectrometry imaging data
976 - optional spectra information as tabular file with numbers of calibrants (needs valid calibrants), numbers of peaks, TIC and most abundant m/z in each spectrum 1095 - optional spectra information as tabular file with annotation (if provided), numbers of calibrants (needs valid calibrants), numbers of peaks, TIC and most abundant m/z in each spectrum
977 1096
978 Tip: 1097 Tip:
979 1098
980 - For additional m/z heatmaps use the MSI ion images tool and to plot more mass spectra use the MSI mass spectra tool. 1099 - For additional m/z heatmaps use the MSI ion images tool and to plot more mass spectra use the MSI mass spectra tool.
981 1100
982 ---------------------------------------------------------------------------------------------------------------------------------------------------- 1101 ----------------------------------------------------------------------------------------------------------------------------------------------------
983 1102
984 Overview of the QC report plots: 1103 Overview of the QC report plots:
985 1104
986 - (comb): this plots will only be drawn if several files were combined into one file with the msi_combine tool 1105 - (annot): this plots will only be drawn if pixel annotations are loaded via a tabular file
987 - (cal): this plots will only be drawn if a tabular file with at least one valid calibrant m/z is provided 1106 - (cal): this plots will only be drawn if a tabular file with at least one valid calibrant m/z is provided
988 - (FC): this plots will only be drawn if the optional fold change image is selected 1107 - (FC): this plots will only be drawn if the optional fold change image is selected
989 - Vertical lines in histograms represent median values. In density scatter plots the colour changes from blue to green, yellow and red the more points are overlayed. 1108 - Vertical lines in histograms represent median values. In density scatter plots the colour changes from blue to green, yellow and red the more points are overlayed.
990 1109
991 - Overview of file properties: Numbers and ranges for m/z features and pixels are given. Median and range across all intensity values are provided. Intensities > 0 gives the percentage of m/z-pixel pairs with an intensity above zero. The number of empty spectra (TIC = 0), the median number of peaks (intensities > 0) per spectra as well as the median TIC (total ion chromatogram) are given. The processing status of the file is provided as well as the number of valid calibrants from the provided tabular file.> 0 (Intensities > 0). 1110 - Overview of file properties: Numbers and ranges for m/z features and pixels are given. Median and range across all intensity values are provided. Intensities > 0 gives the percentage of m/z-pixel pairs with an intensity above zero. The number of empty spectra (TIC = 0), the median number of peaks (intensities > 0) per spectra as well as the median TIC (total ion chromatogram) are given. The processing status of the file is provided as well as the number of valid calibrants from the provided tabular file.> 0 (Intensities > 0).
992 1111
993 x-y images (pixel/spectra information): 1112 x-y images (pixel/spectra information):
994 1113
995 - (comb) Spatial orientation of combined data: All pixels of a combined file have the same colour. 1114 - (annot) Spatial orientation of annotated pixel: All pixels of one annotation group have the same colour.
996 - Pixel order: Shows the order of the pixels in the provided file. Depending on the instrument this can represent the acquisition order. 1115 - Pixel order: Shows the order of the pixels in the provided file. Depending on the instrument this can represent the acquisition order. If annotation file is provided pixels are ordered according to annotation groups.
997 - (cal) Number of calibrants per pixel: In every spectrum the calibrant m/z window (calibrant m/z plusminus 'ppm range') is searched for peaks (intensity > 0). Calibrants are considered present in a spectrum when they have at least one peak in their m/z window. 1116 - (cal) Number of calibrants per pixel: In every spectrum the calibrant m/z window (calibrant m/z plusminus 'ppm range') is searched for peaks (intensity > 0). Calibrants are considered present in a spectrum when they have at least one peak in their m/z window.
998 - (FC) Control of fold change plot: For both input m/z a zoomed in average spectrum is drawn with the input m/z as blue dashed line, the m/z range as blue dotted lines and the maximum intensity in the m/z window with a red line. 1117 - (FC) Control of fold change plot: For both input m/z a zoomed in average spectrum is drawn with the input m/z as blue dashed line, the m/z range as blue dotted lines and the maximum intensity in the m/z window with a red line.
999 - (FC) Fold change image: For each spectrum the intensities of the two optimal m/z features (red lines in control plots) are divided and log2 transformed to obtain the fold change, which is then plotted as a heatmap. 1118 - (FC) Fold change image: For each spectrum the intensities of the two optimal m/z features (red lines in control plots) are divided and log2 transformed to obtain the fold change, which is then plotted as a heatmap.
1000 - (cal) Intensity heatmaps for the m/z value that is closest to the calibrant m/z. The intensities are averaged within the calibrant m/z window (ppm range). 1119 - (cal) Intensity heatmaps for the m/z value that is closest to the calibrant m/z. The intensities are averaged within the calibrant m/z window (ppm range).
1001 - Number of peaks per spectrum: For each spectrum the number of m/z values with intensity > 0 is calculated and plotted as heatmap. 1120 - Number of peaks per spectrum: For each spectrum the number of m/z values with intensity > 0 is calculated and plotted as heatmap.
1003 - Most abundant m/z in each spectrum: For each spectrum the m/z value with the highest intensity is plotted. 1122 - Most abundant m/z in each spectrum: For each spectrum the m/z value with the highest intensity is plotted.
1004 - PCA for two components: Result of a principal component analysis (PCA) for two components is given. The loading plot depicts the contribution of each m/z value and the x-y image represents the differences between the pixels. 1123 - PCA for two components: Result of a principal component analysis (PCA) for two components is given. The loading plot depicts the contribution of each m/z value and the x-y image represents the differences between the pixels.
1005 1124
1006 Properties over spectra/pixels: 1125 Properties over spectra/pixels:
1007 1126
1008 - Number of peaks per spectrum: Scatter plot and histogram showing the number of intensities > 0 for each spectrum. Dotted lines in the scatter plot separate spectra of combined samples. 1127 - Number of peaks per spectrum: Scatter plot and histogram showing the number of intensities > 0 for each spectrum. If annotation tabular file is provided, the pixels are sorted according to annotation groups and the dotted lines in the scatter plot separate spectra of different annotation groups.
1009 - (comb) Number of peaks per spectrum and sample: Same histogram as in plot before but with colours to show the contribution of each combined sample. 1128 - (annot) Number of peaks per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group.
1010 - TIC per spectrum: Scatter plot and histogram showing the sum of all intensities per spectrum (TIC). Dotted lines in the scatter plot separate spectra of combined samples. 1129 - TIC per spectrum: Scatter plot and histogram showing the sum of all intensities per spectrum (TIC). Dotted lines in the scatter plot separate spectra of different annotation groups.
1011 - (comb) TIC per spectrum and sample: Same histogram as in plot before but with colours to show the contribution of each combined sample. 1130 - (annot) TIC per spectrum and annotation group: Same histogram as in plot before but with colours to show the contribution of each pixel annotation group.
1012 1131
1013 Properties over m/z features: 1132 Properties over m/z features:
1014 1133
1015 - Histogram of m/z values: Histogram of all m/z values (complete m/z axis) 1134 - Histogram of m/z values: Histogram of all m/z values (complete m/z axis)
1016 - Number of peaks per m/z: Scatter plot and histogram giving the number of intensities > 0 for each m/z. 1135 - Number of peaks per m/z: Scatter plot and histogram giving the number of intensities > 0 for each m/z.
1017 - Sum of intensities per m/z: Scatter plot and histogram of the sum of all intensities per m/z. 1136 - Sum of intensities per m/z: Scatter plot and histogram of the sum of all intensities per m/z.
1018 1137
1019 Intensity plots: 1138 Intensity plots:
1020 1139
1021 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of combined samples. 1140 - Median intensity per spectrum: Scatter plot in which each point represents the median intensity for one spectrum. Dotted lines in the scatter plot separate spectra of different annotation groups.
1022 - Log2-transformed intensities: Histogram of log2-transformed intensities. 1141 - Log2-transformed intensities: Histogram of log2-transformed intensities.
1023 - (comb) log2-transformed intensities per sample: Same histogram as before but with colours to show the contribution of each combined sample. 1142 - (annot) log2-transformed intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group.
1024 - (comb) Mean intensities per m/z and sample: For all pixels of a sample the mean intensity for each m/z is calculated and shown as boxplot. 1143 - (annot) Mean intensities per m/z and annotation group: For all pixels of an annotation group the mean intensity for each m/z is calculated and shown as boxplot.
1025 1144
1026 Mass spectra and m/z accuracy: 1145 Mass spectra and m/z accuracy:
1027 1146
1028 - Mass spectra over the full m/z range: First plot shows the average intensities over all spectra. The other three mass spectra are from single pixels (spectra). 1147 - Mass spectra over the full m/z range: First plot shows the average intensities over all spectra. The other three mass spectra are from single pixels (spectra).
1029 - (cal) For each calibrant four zoomed in mass spectrum are drawn: The first shows the average intensities over all spectra and the other three are single mass spectra. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. 1148 - (cal) For each calibrant four zoomed in mass spectrum are drawn: The first shows the average intensities over all spectra and the other three are single mass spectra. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window.
1030 - (comb) Average spectrum per sample: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each combined sample separately. 1149 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately.
1031 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra. 1150 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra.
1032 - (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra. 1151 - (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra.
1033 - (cal) Difference m/z with max. average intensity vs. theor. m/z (per spectrum): For each spectrum the ppm difference between the m/z with the highest average intensity and the theoretical m/z are plotted. The calibrants have different plotting colours. Dashed lines separate spectra of combined samples. 1152 - (cal) Difference m/z with max. average intensity vs. theor. m/z (per spectrum): For each spectrum the ppm difference between the m/z with the highest average intensity and the theoretical m/z are plotted. The calibrants have different plotting colours. Dashed lines separate spectra of different annotation groups.
1034 1153
1035 1154
1036 ]]> 1155 ]]>
1037 </help> 1156 </help>
1038 <citations> 1157 <citations>