comparison preprocessing.xml @ 17:611d80c0e29d draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit eeeb69463a2037a6ee620b9223cb152fcc39f1b0
author galaxyp
date Wed, 19 Apr 2023 22:46:23 +0000
parents accf9fb6ea01
children 83aac7741200
comparison
equal deleted inserted replaced
16:798da6bdff3d 17:611d80c0e29d
1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0"> 1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1">
2 <description> 2 <description>
3 mass spectrometry imaging preprocessing 3 mass spectrometry imaging preprocessing
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>macros.xml</import> 6 <import>macros.xml</import>
7 </macros> 7 </macros>
8 <expand macro="requirements"> 8 <expand macro="requirements">
9 <requirement type="package" version="2.3">r-gridextra</requirement> 9 <requirement type="package" version="2.3">r-gridextra</requirement>
10 <requirement type="package" version="3.3.5">r-ggplot2</requirement> 10 <requirement type="package" version="3.4.0">r-ggplot2</requirement>
11 <requirement type="package" version="3.40.0">bioconductor-sva</requirement>
12 <requirement type="package" version="1.1.0.1">r-randomcolor</requirement>
11 </expand> 13 </expand>
12 <command detect_errors="exit_code"> 14 <command detect_errors="exit_code">
13 <![CDATA[ 15 <![CDATA[
14 16
15 @INPUT_LINKING@ 17 @INPUT_LINKING@
39 } 41 }
40 42
41 library(Cardinal) 43 library(Cardinal)
42 library(gridExtra) 44 library(gridExtra)
43 library(ggplot2) 45 library(ggplot2)
46 library(sva)
47 library(randomcoloR)
44 48
45 49
46 @READING_MSIDATA_FULLY_COMPATIBLE@ 50 @READING_MSIDATA_FULLY_COMPATIBLE@
47 51
48 52
446 QC_numbers= cbind(QC_numbers, transformed) 450 QC_numbers= cbind(QC_numbers, transformed)
447 vectorofactions = append(vectorofactions, "transformed") 451 vectorofactions = append(vectorofactions, "transformed")
448 print(plot(msidata, pixel=random_spectra, col="black")) 452 print(plot(msidata, pixel=random_spectra, col="black"))
449 title("Spectra after transformation", outer=TRUE, line=0) 453 title("Spectra after transformation", outer=TRUE, line=0)
450 454
455
456
457
458 ############################### ComBat batch correction ###########################
459
460 #elif str( $method.methods_conditional.preprocessing_method) == 'ComBat_batch_correction':
461 print('ComBat batch correction of centroided data')
462
463 ## load annotation tabular and define batch and condition column
464 annotation = read.delim("$method.methods_conditional.annotation_file", header=$method.methods_conditional.feature_header, sep="\t")
465 annotation_x = annotation[,$method.methods_conditional.x_column]
466 annotation_y = annotation[,$method.methods_conditional.y_column]
467 batch = annotation[,$method.methods_conditional.batch_column]
468 condition = annotation[,$method.methods_conditional.condition_column]
469
470 ### stop if not enough batches provided
471 tryCatch(
472 {
473
474 if (unique(batch<2))
475 {
476 stop(call.=FALSE)
477 }
478 },
479 error=function(cond) {
480 ## in case user provided an annotation tabular with less than two batches
481 message("Error in annotation tabular")
482 message("Possible problems: Annotation tabular file has not enough batch levels - to perform ComBat at least 2 batches and 2 pixels per batch are necessary)")
483 stop(call.=FALSE)
484 }
485 )
486
487 ## get intensity matrix from imzml file
488 intensity_matrix = as.matrix(iData(msidata))
489 mz_names = paste0("mz_", mz(msidata))
490 pixel_names = paste0("xy_", msidata@elementMetadata@coord@listData[["x"]], "_", msidata@elementMetadata@coord@listData[["y"]])
491 rownames(intensity_matrix) = mz_names
492 colnames(intensity_matrix) = pixel_names
493
494 ## reorder columns of intensity matrix to row order of batch column
495 rownames(annotation) = paste0("xy_", annotation_x, "_", annotation_y)
496 col_order = rownames(annotation)
497
498 ### stop if pixel/sample names (columns) in intensity matrix from imzml file don't match samples names (rows) in annotation tabular file
499 tryCatch(
500 {
501
502 if (all(colnames(intensity_matrix) %in% col_order == FALSE))
503 {
504 stop(call.=FALSE)
505 }
506 },
507 error=function(cond) {
508 ## in case pixel names (columns) from the imzml file don't match the pixel names in the annotation tabular file
509 message("Error in annotation tabular")
510 message("Possible problems: Annotation tabular file does not contain the correct pixel names (columns) from the imzml file)")
511 stop(call.=FALSE)
512 }
513 )
514
515 intensity_matrix = intensity_matrix[, col_order]
516 print("columns have been ordered to annotation row order")
517
518 ## execution of ComBat algorithm from sva package
519 combat_data = ComBat(dat = intensity_matrix, batch = batch, mod = NULL, par.prior = TRUE, prior.plots = FALSE)
520 print("Combat has been executed")
521
522 ## change intensity data of loaded imzml file after combat has been performed
523 iData(msidata) = as.matrix(combat_data)
524
525 ############################### QC ###########################
526
527 maxfeatures =nrow(msidata)
528 pixelcount = ncol(msidata)
529 minmz = round(min(mz(msidata)), digits=2)
530 maxmz = round(max(mz(msidata)), digits=2)
531 batch_corrected = c(minmz, maxmz, maxfeatures, pixelcount)
532 QC_numbers= cbind(QC_numbers, batch_corrected)
533 vectorofactions = append(vectorofactions, "batch_corrected")
534 print(plot(msidata, pixel=random_spectra, col="black"))
535 title("Spectra after ComBat batch correction", outer=TRUE, line=0)
536
537
538 ## PCA plot function and execution
539 combat_data = as.data.frame(combat_data)
540 intensity_data = as.data.frame(intensity_matrix)
541
542 ## PCA function
543 plot_PCA = function(input_data, condition, batch, title, color){
544 data <- input_data
545 pca_data <- prcomp(t(data[, seq_len(ncol(input_data))]))
546 pca_sdev <- pca_data[["sdev"]]
547 pca_data_perc <- round(100 * pca_sdev^2 / sum(pca_sdev^2), 1)
548 pca_components <- pca_data[["x"]]
549 df_pca_data <- data.frame(PC1 = pca_components[, 1], PC2 = pca_components[, 2], sample = colnames(input_data), condition = condition)
550 ggplot(df_pca_data, aes(PC1, PC2, color = as.factor(batch), shape = as.factor(condition))) +
551 ggtitle(title) +
552 geom_point(size = 4) +
553 stat_ellipse(aes(PC1, PC2, color = as.factor(batch), group = as.factor(batch)), type = "norm")+
554 scale_color_manual(values=color) +
555 theme_bw() +
556 theme(legend.position = "bottom", legend.box="vertical", plot.title = element_text(size = 12, hjust = 0.5), axis.title = element_text(size = 12), axis.text = element_text(size = 12, color = "black")) +
557 labs(x=paste0("PC1 (",pca_data_perc[1],")"), y=paste0("PC2 (",pca_data_perc[2],")")) +
558 labs(color = "Batches", shape = "Conditions")}
559
560 ## define colors
561 color_pal = distinctColorPalette(length(levels(as.factor(batch))))
562
563 ## execution of PCA plots
564 PCA_bc = plot_PCA(intensity_data, condition, batch, "before batch correction", color_pal)
565 PCA_ac = plot_PCA(combat_data, condition, batch, "batch corrected", color_pal)
566 print(PCA_bc)
567 print(PCA_ac)
568
451 #end if 569 #end if
452 #end for 570 #end for
453 571
454 ############# Outputs: RData, imzml and QC report ############# 572 ############# Outputs: RData, imzml and QC report #############
455 ################################################################################ 573 ################################################################################
490 <option value="Peak_alignment">Peak alignment</option> 608 <option value="Peak_alignment">Peak alignment</option>
491 <option value="Peak_filtering">Peak filtering</option> 609 <option value="Peak_filtering">Peak filtering</option>
492 <option value="Peak_binning">Peak binning to reference peaks</option> 610 <option value="Peak_binning">Peak binning to reference peaks</option>
493 <option value="Mass_binning">m/z binning</option> 611 <option value="Mass_binning">m/z binning</option>
494 <option value="Transformation">Transformation</option> 612 <option value="Transformation">Transformation</option>
613 <option value="ComBat_batch_correction">ComBat batch correction of centroided data</option>
495 </param> 614 </param>
496 <when value="Normalization"> 615 <when value="Normalization">
497 <conditional name="methods_for_normalization"> 616 <conditional name="methods_for_normalization">
498 <param name="normalization_method" type="select" label="Normalization method"> 617 <param name="normalization_method" type="select" label="Normalization method">
499 <option value="tic" selected="True">TIC</option> 618 <option value="tic" selected="True">TIC</option>
688 <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/> 807 <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/>
689 </when> 808 </when>
690 <when value="sqrt"/> 809 <when value="sqrt"/>
691 </conditional> 810 </conditional>
692 </when> 811 </when>
812 <when value="ComBat_batch_correction">
813 <param name="annotation_file" type="data" format="tabular" label="Annotation file that contains the pixel x and y coordinates, the batch identifier, and the condition annotation for each spectrum." help="Annotation tabular file that contains the batch identifier for each spectrum in one column."/>
814 <param name="x_column" type="data_column" data_ref="annotation_file" label="X coordinates" help="Column with x coordinates of pixels."/>
815 <param name="y_column" type="data_column" data_ref="annotation_file" label="Y coordinates" help="Column with y ccordinates of pixels."/>
816 <param name="batch_column" type="data_column" data_ref="annotation_file" label="Batch column" help="The column that contains the batch identifier for each spectrum."/>
817 <param name="condition_column" type="data_column" data_ref="annotation_file" label="Condition column" help="The column that contains the condition annotation for each spectrum. Typically these are the groups you want to compare. If not applicable, the batch column can be selected again as this information is only used for the QC plot."/>
818 <param name="feature_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
819 </when>
693 </conditional> 820 </conditional>
694 </repeat> 821 </repeat>
695 </inputs> 822 </inputs>
696 <outputs> 823 <outputs>
697 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> 824 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/>
868 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results5.imzml.txt" compare="sim_size"> 995 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results5.imzml.txt" compare="sim_size">
869 <extra_files type="file" file="preprocessing_results5.imzml" name="imzml" lines_diff="6"/> 996 <extra_files type="file" file="preprocessing_results5.imzml" name="imzml" lines_diff="6"/>
870 <extra_files type="file" file="preprocessing_results5.ibd" name="ibd" compare="sim_size"/> 997 <extra_files type="file" file="preprocessing_results5.ibd" name="ibd" compare="sim_size"/>
871 </output> 998 </output>
872 </test> 999 </test>
1000 <test>
1001 <param name="infile" value="" ftype="imzml">
1002 <composite_data value="Combat_40pixel.imzML" />
1003 <composite_data value="Combat_40pixel.ibd"/>
1004 </param>
1005 <repeat name="methods">
1006 <conditional name="methods_conditional">
1007 <param name="preprocessing_method" value="ComBat_batch_correction"/>
1008 <param name="annotation_file" value="annotation_40pixel.tabular" ftype="tabular"/>
1009 <param name="feature_header" value="TRUE"/>
1010 <param name="x_column" value="2"/>
1011 <param name="y_column" value="3"/>
1012 <param name="batch_column" value="4"/>
1013 <param name="condition_column" value="6"/>
1014 </conditional>
1015 </repeat>
1016 <output name="QC_overview" file="preprocessing_results_combat_40pixel.pdf" compare="sim_size"/>
1017 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results_combat_40pixel.imzml.txt" compare="sim_size">
1018 <extra_files type="file" file="preprocessing_results_combat_40pixel.imzml" name="imzml" lines_diff="6"/>
1019 <extra_files type="file" file="preprocessing_results_combat_40pixel.ibd" name="ibd" compare="sim_size"/>
1020 </output>
1021 </test>
1022 <test>
1023 <expand macro="processed_infile_imzml"/>
1024 <conditional name="processed_cond">
1025 <param name="processed_file" value="processed"/>
1026 <param name="accuracy" value="50"/>
1027 <param name="units" value="ppm"/>
1028 </conditional>
1029 <repeat name="methods">
1030 <conditional name="methods_conditional">
1031 <param name="preprocessing_method" value="ComBat_batch_correction"/>
1032 <param name="annotation_file" value="Example_processed_ComBat_annotation.tabular" ftype="tabular"/>
1033 <param name="feature_header" value="TRUE"/>
1034 <param name="x_column" value="2"/>
1035 <param name="y_column" value="3"/>
1036 <param name="batch_column" value="4"/>
1037 <param name="condition_column" value="5"/>
1038 </conditional>
1039 </repeat>
1040 <output name="QC_overview" file="ComBat_results_Example_processed_file_preprocessing.pdf" compare="sim_size"/>
1041 <output name="outfile_imzml" ftype="imzml" file="ComBat_results_Example_processed_file.imzml.txt" compare="sim_size">
1042 <extra_files type="file" file="ComBat_results_Example_processed_file.imzml" name="imzml" lines_diff="6"/>
1043 <extra_files type="file" file="ComBat_results_Example_processed_file.ibd" name="ibd" compare="sim_size"/>
1044 </output>
1045 </test>
873 </tests> 1046 </tests>
874 <help> 1047 <help>
875 <![CDATA[ 1048 <![CDATA[
876 1049
877 @CARDINAL_DESCRIPTION@ 1050 @CARDINAL_DESCRIPTION@
894 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept 1067 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept
895 - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot 1068 - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot
896 - Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values 1069 - Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values
897 - m/z binning: generates new m/z bins 1070 - m/z binning: generates new m/z bins
898 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. 1071 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems.
1072 - ComBat batch correction: corrects the intensity values of picked m/z features according to batches given in an annotation table. For now, it can only be applied to m/z features after peak picking (=centroided data). The annotation table needs to contain the x and y coordinates for each pixel and a batch identifier (e.g. TMA_1, TMA_2, TMA_3). Additionally a condition column can be provided, which is only used for the PCA plots in the pdf file. Example of annotation file for ComBat batch correction:
1073
1074 ::
1075
1076 x_coord y_coord batch_identifier condition
1077 10 29 TMA_1 A
1078 22 14 TMA_1 B
1079 22 27 TMA_2 A
1080 23 7 TMA_2 B
1081 29 45 TMA_3 A
1082 33 41 TMA_3 B
1083 ...
1084 ...
1085
899 1086
900 1087
901 **Output** 1088 **Output**
902 1089
903 - MSI data as continuous imzML file 1090 - MSI data as continuous imzML file