Mercurial > repos > galaxyp > cardinal_quality_report
diff quality_report.xml @ 18:d426a9107a6c draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 91e77c139cb3b7c6d67727dc39140dd79355fa0c
author | galaxyp |
---|---|
date | Thu, 04 Jul 2024 13:35:30 +0000 |
parents | 23d0394b5908 |
children |
line wrap: on
line diff
--- a/quality_report.xml Wed Apr 19 22:49:12 2023 +0000 +++ b/quality_report.xml Thu Jul 04 13:35:30 2024 +0000 @@ -1,18 +1,11 @@ -<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.0"> +<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> <description> mass spectrometry imaging QC </description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements"> - <requirement type="package" version="2.3">r-gridextra</requirement> - <requirement type="package" version="3.3.5">r-ggplot2</requirement> - <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> - <requirement type="package" version="2.23_20">r-kernsmooth</requirement> - <requirement type="package" version="1.1.1">r-scales</requirement> - <requirement type="package" version="1.0.12">r-pheatmap</requirement> - </expand> + <expand macro="requirements"/> <command detect_errors="exit_code"> <![CDATA[ @INPUT_LINKING@ @@ -41,9 +34,11 @@ run(msidata) = "infile" } + ## remove duplicated coordinates msidata <- msidata[,!duplicated(coord(msidata))] + ## optional annotation from tabular file to obtain pixel groups (otherwise all pixels are considered to be one sample) #if str($tabular_annotation.load_annotation) == 'yes_annotation': @@ -80,13 +75,14 @@ medTIC = round(median(TICs), digits=1) sdTIC = round(sd(TICs), digits=0) ## Median and sd # peaks per spectrum -medpeaks = round(median(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0) -sdpeaks = round(sd(colSums(spectra(msidata)>0, na.rm=TRUE), na.rm=TRUE), digits=0) -##max window size +medpeaks = round(median(colSums(as.matrix(spectra(msidata))>0, na.rm=TRUE), na.rm=TRUE), digits=0) +sdpeaks = round(sd(colSums(as.matrix(spectra(msidata))>0, na.rm=TRUE), na.rm=TRUE), digits=0) +## max window size max_window = round(mz(msidata)[nrow(msidata)]-mz(msidata)[nrow(msidata)-1], digits=2) ## Processing informations centroidedinfo = centroided(msidata) + ############## Read and filter tabular file with m/z ########################### ### reading m/z input (calibrant) file: @@ -95,6 +91,7 @@ calibrant_list = read.delim("$calibrant_file", header = $calibrant_header, na.strings=c(" ","","NA"), stringsAsFactors = FALSE) calibrant_list = calibrant_list[,c($mz_column, $name_column)] + calibrant_list = calibrant_list[order(calibrant_list[,1]),] ### calculate how many input calibrant m/z are valid: @@ -244,6 +241,7 @@ pixelmatrix = matrix(ncol=ncol(msidata), nrow = 0) ## plot only possible when there is at least one valid calibrant + if (length(inputcalibrantmasses) != 0){ ## calculate plusminus values in m/z for each calibrant @@ -253,26 +251,26 @@ for (mass in 1:length(inputcalibrantmasses)){ - filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] + filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] - if (nrow(filtered_data) > 0 & sum(spectra(filtered_data),na.rm=TRUE) > 0){ + if (nrow(filtered_data) > 0 & sum(as.matrix(spectra(filtered_data)),na.rm=TRUE) > 0){ - ## intensity of all m/z > 0 - intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0 + ## intensity of all m/z > 0 + intensity_sum = colSums(as.matrix(spectra(filtered_data)[1,]), na.rm=TRUE) > 0 - ###}else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){ + ###}else if(nrow(filtered_data) == 1 & sum(as.matrix(spectra(filtered_data)), na.rm=TRUE) > 0){ - ## intensity of only m/z > 0 - intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0 - - }else{ + ## intensity of only m/z > 0 + intensity_sum = colSums(as.matrix(spectra(filtered_data)[1,]), na.rm=TRUE) > 0 - intensity_sum = rep(FALSE, ncol(filtered_data))} + }else{ + intensity_sum = rep(FALSE, ncol(filtered_data))} - ## for each pixel add sum of intensities > 0 in the given m/z range - pixelmatrix = rbind(pixelmatrix, intensity_sum) + ## for each pixel add sum of intensities > 0 in the given m/z range + pixelmatrix = rbind(pixelmatrix, intensity_sum) } + ## for each pixel count TRUE (each calibrant m/z range with intensity > 0 is TRUE) countvector= as.factor(colSums(pixelmatrix, na.rm=TRUE)) countdf= data.frame(coord(msidata)\$x, coord(msidata)\$y, countvector) ## add pixel coordinates to counts @@ -292,7 +290,9 @@ rm(countdf) gc() - }else{print("2) The inputcalibrant m/z were not provided or outside the m/z range")} + }else{plot.new() + text(0.5, 0.5, "The input calibrant m/z were not provided \n or outside the m/z range.", cex = 1.5) + print("2) The input calibrant m/z were not provided or outside the m/z range")} ########################## 3) fold change image ########################### @@ -379,14 +379,16 @@ } ) } - } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")} + } else {plot.new() + text(0.5, 0.5, "The input peptide and calibrant m/z were not \n provided or outside the m/z range.", cex = 1.5) + print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")} #end if #################### 5) Number of peaks per pixel - image ################## ## here every intensity value > 0 counts as peak - peaksperpixel = colSums(spectra(msidata)> 0, na.rm=TRUE) + peaksperpixel = colSums(int_matrix> 0, na.rm=TRUE) peakscoordarray=data.frame(coord(msidata)\$x, coord(msidata)\$y, peaksperpixel) colnames(peakscoordarray) = c("x", "y", "peaksperpixel") @@ -495,24 +497,24 @@ #end if ########################## 8) optional pca image for two components ################# - - #if $do_pca: + ### PCA removed until next Update + ####if $do_pca: set.seed(1) - pca = PCA(msidata, ncomp=2) + ##pca = PCA(msidata, ncomp=2) ## plot overview image and plot and PC1 and 2 images - print(plot(pca, col=c("black", "darkgrey"), main="PCA for two components", layout=c(2,1), strip=FALSE)) - print(image(pca, run="infile", col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1), layout=FALSE)) + ##print(plot(pca, col=c("black", "darkgrey"), main="PCA for two components", layout=c(2,1), strip=FALSE)) + ##print(image(pca, run="infile", col=c("black", "white"), strip=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1), layout=FALSE)) - par(oma=c(0,0,0,1))## margin for image legend - print(image(pca, column = "PC1" , strip=FALSE, superpose = FALSE, main="PC1", col.regions = risk.colors(100), layout=c(2,1), ylim= c(maximumy+0.2*maximumy,minimumy-1))) - print(image(pca, column = "PC2" , strip=FALSE, superpose = FALSE, main="PC2", col.regions = risk.colors(100), layout=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1))) + ##par(oma=c(0,0,0,1))## margin for image legend + ##print(image(pca, column = "PC1" , strip=FALSE, superpose = FALSE, main="PC1", col.regions = risk.colors(100), layout=c(2,1), ylim= c(maximumy+0.2*maximumy,minimumy-1))) + ##print(image(pca, column = "PC2" , strip=FALSE, superpose = FALSE, main="PC2", col.regions = risk.colors(100), layout=FALSE, ylim= c(maximumy+0.2*maximumy,minimumy-1))) ## remove pca to clean up space - rm(pca) - gc() + ##rm(pca) + ##gc() - #end if + ####end if ################## III) properties over spectra index ###################### ############################################################################ @@ -612,7 +614,7 @@ #if $report_depth: - peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) + peakspermz = rowSums(int_matrix > 0, na.rm=TRUE) par(mfrow = c(2,1), mar=c(5,6,4,4.5)) ## 12a) scatterplot @@ -665,7 +667,7 @@ title(main="Intensity histogram", line=2) title(xlab="intensities") title(ylab="Frequency", line=4) - abline(v=median(int_matrix)[(as.matrix(spectra(msidata))>0)], col="blue") + abline(v=median(int_matrix)[(int_matrix>0)], col="blue") #end if @@ -675,7 +677,7 @@ df_13 = data.frame(matrix(,ncol=2, nrow=0)) for (subsample in unique(msidata\$annotation)){ - log2_int_subsample = spectra(msidata)[,msidata\$annotation==subsample] + log2_int_subsample = int_matrix[,msidata\$annotation==subsample] df_subsample = data.frame(as.numeric(log2_int_subsample)) df_subsample\$annotation = subsample df_13 = rbind(df_13, df_subsample)} @@ -698,7 +700,7 @@ mean_matrix = matrix(,ncol=0, nrow = nrow(msidata)) for (subsample in unique(msidata\$annotation)){ - mean_mz_sample = rowMeans(spectra(msidata)[,msidata\$annotation==subsample],na.rm=TRUE) + mean_mz_sample = rowMeans(int_matrix[,msidata\$annotation==subsample],na.rm=TRUE) mean_matrix = cbind(mean_matrix, mean_mz_sample)} boxplot(log10(as.data.frame(mean_matrix)), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n") @@ -707,6 +709,8 @@ ## 14e) Heatmap of mean intensities of annotation groups colnames(mean_matrix) = unique(msidata\$annotation) + print(mean_matrix) + mean_matrix[is.na(mean_matrix)] = 0 heatmap.parameters <- list(mean_matrix, show_rownames = T, show_colnames = T, @@ -807,7 +811,7 @@ ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17 filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] - if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){ + if (nrow(filtered_data) > 0 & sum(as.matrix(spectra(filtered_data))) > 0){ maxmassrow = featureApply(filtered_data, mean) ## for each m/z average intensity is calculated maxvalue = mz(filtered_data)[which.max(maxmassrow)] ### m/z with highest average intensity in m/z range mzdifference = maxvalue - inputcalibrantmasses[mass] ### difference: theoretical value - closest m/z value @@ -834,7 +838,7 @@ abline(v=c(mzvalue), col="green2", lty=4) ## average plot including points per data point print(plot(msidata_no_NA[minmasspixel1:maxmasspixel1,], run="infile", layout=FALSE, strip=FALSE, main="Average spectrum with data points", col="black")) - points(mz(msidata_no_NA[minmasspixel1:maxmasspixel1,]), rowMeans(spectra(msidata_no_NA)[minmasspixel1:maxmasspixel1,,drop=FALSE]), col="blue", pch=20) + points(mz(msidata_no_NA[minmasspixel1:maxmasspixel1,]), rowMeans(as.matrix(spectra(msidata_no_NA))[minmasspixel1:maxmasspixel1,,drop=FALSE]), col="blue", pch=20) ## plot of third average plot print(plot(msidata_no_NA[minmasspixel2:maxmasspixel2,], run="infile", layout=FALSE, strip=FALSE, main= "Average spectrum", col="black")) abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3)) @@ -912,10 +916,12 @@ #end if #################### 19) ppm difference over pixels ##################### + print("ppm difference over pixels") par(mfrow = c(1,1)) count = 1 ppm_df = as.data.frame(matrix(,ncol=0, nrow = ncol(msidata))) + for (calibrant in inputcalibrantmasses){ ### find m/z with the highest mean intensity in m/z range, if no m/z in the range, ppm differences for this calibrant will be NA filtered_data = msidata[mz(msidata) >= calibrant-plusminusvalues[count] & mz(msidata) <= calibrant+plusminusvalues[count],] @@ -978,9 +984,14 @@ #end if - }else{print("plot 16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")} + }else{ + plot.new() + text(0.5, 0.5, "plot 16+17+18+19) The input calibrant m/z were not provided \n or outside the m/z range", cex = 1.5) + print("plot 16+17+18+19) The input calibrant m/z were not provided or outside the m/z range")} }else{ - print("inputfile has no intensities > 0") + plot.new() + text(0.5, 0.5, "The input file has no intensities > 0", cex = 1.5) + print("input file has no intensities > 0") } dev.off() @@ -1002,7 +1013,7 @@ <expand macro="pdf_filename"/> <expand macro="reading_2_column_mz_tabular" optional="true"/> <param name="plusminus_ppm" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/> - <param name="do_pca" type="boolean" label="PCA with 2 components"/> + <!--param name="do_pca" type="boolean" label="PCA with 2 components"/--> <param name="report_depth" type="boolean" label="Generate full QC report" truevalue="TRUE" falsevalue="FALSE" checked="True" help="No: does not generate all plots but only the most informatives"/> <repeat name="calibrantratio" title="Plot fold change of two m/z" min="0" max="10"> <param name="mass1" value="1111" type="float" label="M/z 1" help="First m/z"/> @@ -1040,7 +1051,7 @@ <param name="name_column" value="1"/> <param name="plusminus_ppm" value="100"/> <param name="filename" value="Testfile_imzml"/> - <param name="do_pca" value="True"/> + <!--param name="do_pca" value="True"/--> <repeat name="calibrantratio"> <param name="mass1" value="328.9"/> <param name="mass2" value="398.8"/> @@ -1055,7 +1066,7 @@ <param name="load_annotation" value="no_annotation"/> </conditional> <param name="filename" value="Testfile_analyze75"/> - <param name="do_pca" value="True"/> + <!--param name="do_pca" value="True"/--> <output name="QC_report" file="QC_analyze75.pdf" compare="sim_size"/> </test> <test> @@ -1073,8 +1084,12 @@ <param name="name_column" value="1"/> <param name="plusminus_ppm" value="100"/> <param name="filename" value="Testfile_rdata"/> - <param name="do_pca" value="True"/> - <output name="QC_report" file="QC_rdata.pdf" compare="sim_size"/> + <!--param name="do_pca" value="True"/--> + <output name="QC_report" ftype="pdf"> + <assert_contents> + <has_size value="1276311" delta="100"/> + </assert_contents> + </output> </test> <test> <param name="infile" value="empty_spectra.rdata" ftype="rdata"/> @@ -1085,7 +1100,7 @@ <param name="mz_column" value="1"/> <param name="name_column" value="2"/> <param name="filename" value="Testfile_rdata"/> - <param name="do_pca" value="False"/> + <!--param name="do_pca" value="False"/--> <output name="QC_report" file="QC_empty_spectra.pdf" compare="sim_size"/> </test> <test>