Mercurial > repos > galaxyp > cardinal_preprocessing
comparison preprocessing.xml @ 17:611d80c0e29d draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit eeeb69463a2037a6ee620b9223cb152fcc39f1b0
author | galaxyp |
---|---|
date | Wed, 19 Apr 2023 22:46:23 +0000 |
parents | accf9fb6ea01 |
children | 83aac7741200 |
comparison
equal
deleted
inserted
replaced
16:798da6bdff3d | 17:611d80c0e29d |
---|---|
1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0"> | 1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1"> |
2 <description> | 2 <description> |
3 mass spectrometry imaging preprocessing | 3 mass spectrometry imaging preprocessing |
4 </description> | 4 </description> |
5 <macros> | 5 <macros> |
6 <import>macros.xml</import> | 6 <import>macros.xml</import> |
7 </macros> | 7 </macros> |
8 <expand macro="requirements"> | 8 <expand macro="requirements"> |
9 <requirement type="package" version="2.3">r-gridextra</requirement> | 9 <requirement type="package" version="2.3">r-gridextra</requirement> |
10 <requirement type="package" version="3.3.5">r-ggplot2</requirement> | 10 <requirement type="package" version="3.4.0">r-ggplot2</requirement> |
11 <requirement type="package" version="3.40.0">bioconductor-sva</requirement> | |
12 <requirement type="package" version="1.1.0.1">r-randomcolor</requirement> | |
11 </expand> | 13 </expand> |
12 <command detect_errors="exit_code"> | 14 <command detect_errors="exit_code"> |
13 <![CDATA[ | 15 <![CDATA[ |
14 | 16 |
15 @INPUT_LINKING@ | 17 @INPUT_LINKING@ |
39 } | 41 } |
40 | 42 |
41 library(Cardinal) | 43 library(Cardinal) |
42 library(gridExtra) | 44 library(gridExtra) |
43 library(ggplot2) | 45 library(ggplot2) |
46 library(sva) | |
47 library(randomcoloR) | |
44 | 48 |
45 | 49 |
46 @READING_MSIDATA_FULLY_COMPATIBLE@ | 50 @READING_MSIDATA_FULLY_COMPATIBLE@ |
47 | 51 |
48 | 52 |
446 QC_numbers= cbind(QC_numbers, transformed) | 450 QC_numbers= cbind(QC_numbers, transformed) |
447 vectorofactions = append(vectorofactions, "transformed") | 451 vectorofactions = append(vectorofactions, "transformed") |
448 print(plot(msidata, pixel=random_spectra, col="black")) | 452 print(plot(msidata, pixel=random_spectra, col="black")) |
449 title("Spectra after transformation", outer=TRUE, line=0) | 453 title("Spectra after transformation", outer=TRUE, line=0) |
450 | 454 |
455 | |
456 | |
457 | |
458 ############################### ComBat batch correction ########################### | |
459 | |
460 #elif str( $method.methods_conditional.preprocessing_method) == 'ComBat_batch_correction': | |
461 print('ComBat batch correction of centroided data') | |
462 | |
463 ## load annotation tabular and define batch and condition column | |
464 annotation = read.delim("$method.methods_conditional.annotation_file", header=$method.methods_conditional.feature_header, sep="\t") | |
465 annotation_x = annotation[,$method.methods_conditional.x_column] | |
466 annotation_y = annotation[,$method.methods_conditional.y_column] | |
467 batch = annotation[,$method.methods_conditional.batch_column] | |
468 condition = annotation[,$method.methods_conditional.condition_column] | |
469 | |
470 ### stop if not enough batches provided | |
471 tryCatch( | |
472 { | |
473 | |
474 if (unique(batch<2)) | |
475 { | |
476 stop(call.=FALSE) | |
477 } | |
478 }, | |
479 error=function(cond) { | |
480 ## in case user provided an annotation tabular with less than two batches | |
481 message("Error in annotation tabular") | |
482 message("Possible problems: Annotation tabular file has not enough batch levels - to perform ComBat at least 2 batches and 2 pixels per batch are necessary)") | |
483 stop(call.=FALSE) | |
484 } | |
485 ) | |
486 | |
487 ## get intensity matrix from imzml file | |
488 intensity_matrix = as.matrix(iData(msidata)) | |
489 mz_names = paste0("mz_", mz(msidata)) | |
490 pixel_names = paste0("xy_", msidata@elementMetadata@coord@listData[["x"]], "_", msidata@elementMetadata@coord@listData[["y"]]) | |
491 rownames(intensity_matrix) = mz_names | |
492 colnames(intensity_matrix) = pixel_names | |
493 | |
494 ## reorder columns of intensity matrix to row order of batch column | |
495 rownames(annotation) = paste0("xy_", annotation_x, "_", annotation_y) | |
496 col_order = rownames(annotation) | |
497 | |
498 ### stop if pixel/sample names (columns) in intensity matrix from imzml file don't match samples names (rows) in annotation tabular file | |
499 tryCatch( | |
500 { | |
501 | |
502 if (all(colnames(intensity_matrix) %in% col_order == FALSE)) | |
503 { | |
504 stop(call.=FALSE) | |
505 } | |
506 }, | |
507 error=function(cond) { | |
508 ## in case pixel names (columns) from the imzml file don't match the pixel names in the annotation tabular file | |
509 message("Error in annotation tabular") | |
510 message("Possible problems: Annotation tabular file does not contain the correct pixel names (columns) from the imzml file)") | |
511 stop(call.=FALSE) | |
512 } | |
513 ) | |
514 | |
515 intensity_matrix = intensity_matrix[, col_order] | |
516 print("columns have been ordered to annotation row order") | |
517 | |
518 ## execution of ComBat algorithm from sva package | |
519 combat_data = ComBat(dat = intensity_matrix, batch = batch, mod = NULL, par.prior = TRUE, prior.plots = FALSE) | |
520 print("Combat has been executed") | |
521 | |
522 ## change intensity data of loaded imzml file after combat has been performed | |
523 iData(msidata) = as.matrix(combat_data) | |
524 | |
525 ############################### QC ########################### | |
526 | |
527 maxfeatures =nrow(msidata) | |
528 pixelcount = ncol(msidata) | |
529 minmz = round(min(mz(msidata)), digits=2) | |
530 maxmz = round(max(mz(msidata)), digits=2) | |
531 batch_corrected = c(minmz, maxmz, maxfeatures, pixelcount) | |
532 QC_numbers= cbind(QC_numbers, batch_corrected) | |
533 vectorofactions = append(vectorofactions, "batch_corrected") | |
534 print(plot(msidata, pixel=random_spectra, col="black")) | |
535 title("Spectra after ComBat batch correction", outer=TRUE, line=0) | |
536 | |
537 | |
538 ## PCA plot function and execution | |
539 combat_data = as.data.frame(combat_data) | |
540 intensity_data = as.data.frame(intensity_matrix) | |
541 | |
542 ## PCA function | |
543 plot_PCA = function(input_data, condition, batch, title, color){ | |
544 data <- input_data | |
545 pca_data <- prcomp(t(data[, seq_len(ncol(input_data))])) | |
546 pca_sdev <- pca_data[["sdev"]] | |
547 pca_data_perc <- round(100 * pca_sdev^2 / sum(pca_sdev^2), 1) | |
548 pca_components <- pca_data[["x"]] | |
549 df_pca_data <- data.frame(PC1 = pca_components[, 1], PC2 = pca_components[, 2], sample = colnames(input_data), condition = condition) | |
550 ggplot(df_pca_data, aes(PC1, PC2, color = as.factor(batch), shape = as.factor(condition))) + | |
551 ggtitle(title) + | |
552 geom_point(size = 4) + | |
553 stat_ellipse(aes(PC1, PC2, color = as.factor(batch), group = as.factor(batch)), type = "norm")+ | |
554 scale_color_manual(values=color) + | |
555 theme_bw() + | |
556 theme(legend.position = "bottom", legend.box="vertical", plot.title = element_text(size = 12, hjust = 0.5), axis.title = element_text(size = 12), axis.text = element_text(size = 12, color = "black")) + | |
557 labs(x=paste0("PC1 (",pca_data_perc[1],")"), y=paste0("PC2 (",pca_data_perc[2],")")) + | |
558 labs(color = "Batches", shape = "Conditions")} | |
559 | |
560 ## define colors | |
561 color_pal = distinctColorPalette(length(levels(as.factor(batch)))) | |
562 | |
563 ## execution of PCA plots | |
564 PCA_bc = plot_PCA(intensity_data, condition, batch, "before batch correction", color_pal) | |
565 PCA_ac = plot_PCA(combat_data, condition, batch, "batch corrected", color_pal) | |
566 print(PCA_bc) | |
567 print(PCA_ac) | |
568 | |
451 #end if | 569 #end if |
452 #end for | 570 #end for |
453 | 571 |
454 ############# Outputs: RData, imzml and QC report ############# | 572 ############# Outputs: RData, imzml and QC report ############# |
455 ################################################################################ | 573 ################################################################################ |
490 <option value="Peak_alignment">Peak alignment</option> | 608 <option value="Peak_alignment">Peak alignment</option> |
491 <option value="Peak_filtering">Peak filtering</option> | 609 <option value="Peak_filtering">Peak filtering</option> |
492 <option value="Peak_binning">Peak binning to reference peaks</option> | 610 <option value="Peak_binning">Peak binning to reference peaks</option> |
493 <option value="Mass_binning">m/z binning</option> | 611 <option value="Mass_binning">m/z binning</option> |
494 <option value="Transformation">Transformation</option> | 612 <option value="Transformation">Transformation</option> |
613 <option value="ComBat_batch_correction">ComBat batch correction of centroided data</option> | |
495 </param> | 614 </param> |
496 <when value="Normalization"> | 615 <when value="Normalization"> |
497 <conditional name="methods_for_normalization"> | 616 <conditional name="methods_for_normalization"> |
498 <param name="normalization_method" type="select" label="Normalization method"> | 617 <param name="normalization_method" type="select" label="Normalization method"> |
499 <option value="tic" selected="True">TIC</option> | 618 <option value="tic" selected="True">TIC</option> |
688 <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/> | 807 <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/> |
689 </when> | 808 </when> |
690 <when value="sqrt"/> | 809 <when value="sqrt"/> |
691 </conditional> | 810 </conditional> |
692 </when> | 811 </when> |
812 <when value="ComBat_batch_correction"> | |
813 <param name="annotation_file" type="data" format="tabular" label="Annotation file that contains the pixel x and y coordinates, the batch identifier, and the condition annotation for each spectrum." help="Annotation tabular file that contains the batch identifier for each spectrum in one column."/> | |
814 <param name="x_column" type="data_column" data_ref="annotation_file" label="X coordinates" help="Column with x coordinates of pixels."/> | |
815 <param name="y_column" type="data_column" data_ref="annotation_file" label="Y coordinates" help="Column with y ccordinates of pixels."/> | |
816 <param name="batch_column" type="data_column" data_ref="annotation_file" label="Batch column" help="The column that contains the batch identifier for each spectrum."/> | |
817 <param name="condition_column" type="data_column" data_ref="annotation_file" label="Condition column" help="The column that contains the condition annotation for each spectrum. Typically these are the groups you want to compare. If not applicable, the batch column can be selected again as this information is only used for the QC plot."/> | |
818 <param name="feature_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> | |
819 </when> | |
693 </conditional> | 820 </conditional> |
694 </repeat> | 821 </repeat> |
695 </inputs> | 822 </inputs> |
696 <outputs> | 823 <outputs> |
697 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> | 824 <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> |
868 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results5.imzml.txt" compare="sim_size"> | 995 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results5.imzml.txt" compare="sim_size"> |
869 <extra_files type="file" file="preprocessing_results5.imzml" name="imzml" lines_diff="6"/> | 996 <extra_files type="file" file="preprocessing_results5.imzml" name="imzml" lines_diff="6"/> |
870 <extra_files type="file" file="preprocessing_results5.ibd" name="ibd" compare="sim_size"/> | 997 <extra_files type="file" file="preprocessing_results5.ibd" name="ibd" compare="sim_size"/> |
871 </output> | 998 </output> |
872 </test> | 999 </test> |
1000 <test> | |
1001 <param name="infile" value="" ftype="imzml"> | |
1002 <composite_data value="Combat_40pixel.imzML" /> | |
1003 <composite_data value="Combat_40pixel.ibd"/> | |
1004 </param> | |
1005 <repeat name="methods"> | |
1006 <conditional name="methods_conditional"> | |
1007 <param name="preprocessing_method" value="ComBat_batch_correction"/> | |
1008 <param name="annotation_file" value="annotation_40pixel.tabular" ftype="tabular"/> | |
1009 <param name="feature_header" value="TRUE"/> | |
1010 <param name="x_column" value="2"/> | |
1011 <param name="y_column" value="3"/> | |
1012 <param name="batch_column" value="4"/> | |
1013 <param name="condition_column" value="6"/> | |
1014 </conditional> | |
1015 </repeat> | |
1016 <output name="QC_overview" file="preprocessing_results_combat_40pixel.pdf" compare="sim_size"/> | |
1017 <output name="outfile_imzml" ftype="imzml" file="preprocessing_results_combat_40pixel.imzml.txt" compare="sim_size"> | |
1018 <extra_files type="file" file="preprocessing_results_combat_40pixel.imzml" name="imzml" lines_diff="6"/> | |
1019 <extra_files type="file" file="preprocessing_results_combat_40pixel.ibd" name="ibd" compare="sim_size"/> | |
1020 </output> | |
1021 </test> | |
1022 <test> | |
1023 <expand macro="processed_infile_imzml"/> | |
1024 <conditional name="processed_cond"> | |
1025 <param name="processed_file" value="processed"/> | |
1026 <param name="accuracy" value="50"/> | |
1027 <param name="units" value="ppm"/> | |
1028 </conditional> | |
1029 <repeat name="methods"> | |
1030 <conditional name="methods_conditional"> | |
1031 <param name="preprocessing_method" value="ComBat_batch_correction"/> | |
1032 <param name="annotation_file" value="Example_processed_ComBat_annotation.tabular" ftype="tabular"/> | |
1033 <param name="feature_header" value="TRUE"/> | |
1034 <param name="x_column" value="2"/> | |
1035 <param name="y_column" value="3"/> | |
1036 <param name="batch_column" value="4"/> | |
1037 <param name="condition_column" value="5"/> | |
1038 </conditional> | |
1039 </repeat> | |
1040 <output name="QC_overview" file="ComBat_results_Example_processed_file_preprocessing.pdf" compare="sim_size"/> | |
1041 <output name="outfile_imzml" ftype="imzml" file="ComBat_results_Example_processed_file.imzml.txt" compare="sim_size"> | |
1042 <extra_files type="file" file="ComBat_results_Example_processed_file.imzml" name="imzml" lines_diff="6"/> | |
1043 <extra_files type="file" file="ComBat_results_Example_processed_file.ibd" name="ibd" compare="sim_size"/> | |
1044 </output> | |
1045 </test> | |
873 </tests> | 1046 </tests> |
874 <help> | 1047 <help> |
875 <![CDATA[ | 1048 <![CDATA[ |
876 | 1049 |
877 @CARDINAL_DESCRIPTION@ | 1050 @CARDINAL_DESCRIPTION@ |
894 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept | 1067 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept |
895 - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot | 1068 - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot |
896 - Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values | 1069 - Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values |
897 - m/z binning: generates new m/z bins | 1070 - m/z binning: generates new m/z bins |
898 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. | 1071 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. |
1072 - ComBat batch correction: corrects the intensity values of picked m/z features according to batches given in an annotation table. For now, it can only be applied to m/z features after peak picking (=centroided data). The annotation table needs to contain the x and y coordinates for each pixel and a batch identifier (e.g. TMA_1, TMA_2, TMA_3). Additionally a condition column can be provided, which is only used for the PCA plots in the pdf file. Example of annotation file for ComBat batch correction: | |
1073 | |
1074 :: | |
1075 | |
1076 x_coord y_coord batch_identifier condition | |
1077 10 29 TMA_1 A | |
1078 22 14 TMA_1 B | |
1079 22 27 TMA_2 A | |
1080 23 7 TMA_2 B | |
1081 29 45 TMA_3 A | |
1082 33 41 TMA_3 B | |
1083 ... | |
1084 ... | |
1085 | |
899 | 1086 |
900 | 1087 |
901 **Output** | 1088 **Output** |
902 | 1089 |
903 - MSI data as continuous imzML file | 1090 - MSI data as continuous imzML file |