Mercurial > repos > galaxyp > cardinal_preprocessing
diff preprocessing.xml @ 2:1b875f0b8024 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit f127be2141cf22e269c85282d226eb16fe14a9c1
author | galaxyp |
---|---|
date | Fri, 15 Feb 2019 10:22:14 -0500 |
parents | 1b22c1e7bfe7 |
children | f172efe92629 |
line wrap: on
line diff
--- a/preprocessing.xml Thu Oct 25 07:29:29 2018 -0400 +++ b/preprocessing.xml Fri Feb 15 10:22:14 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1"> +<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.2"> <description> mass spectrometry imaging preprocessing </description> @@ -17,15 +17,13 @@ cat '${cardinal_preprocessing}' && Rscript '${cardinal_preprocessing}' && - #if $imzml_output: + #if str($imzml_output) == "imzml_format": mkdir $outfile_imzml.files_path && - ls -l && mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && #end if - echo "imzML file:" > $outfile_imzml && - ls -l "$outfile_imzml.files_path" >> $outfile_imzml - + echo "imzML file:" > $outfile_imzml && + ls -l "$outfile_imzml.files_path" >> $outfile_imzml ]]> </command> @@ -41,24 +39,28 @@ @READING_MSIDATA@ - ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail -print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) -msidata <- msidata[,!duplicated(coord(msidata))] - -print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) +print(paste0(sum(duplicated(coord(msidata)[,1:2])), " duplicated coordinates were removed")) +msidata <- msidata[,!duplicated(coord(msidata)[,1:2])] -if (sum(spectra(msidata)[]>0, na.rm=TRUE)> 0){ +if (ncol(msidata)>0 & nrow(msidata) >0){ + + ## start QC report + + pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) + plot(0,type='n',axes=FALSE,ann=FALSE) + title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) + ######################### preparations for QC report ################# - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) - medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medianpeaks, medint)) - vectorofactions = "inputdata" + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, pixelcount)) + vectorofactions = "inputdata" + plot(msidata, pixel = 1:pixelcount, main="Average spectrum of input file") ############################### Preprocessing steps ########################### ############################################################################### @@ -75,14 +77,14 @@ ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - normalized = c(minmz, maxmz,maxfeatures, medianpeaks, medint) - QC_numbers= cbind(QC_numbers, normalized) - vectorofactions = append(vectorofactions, "normalized") + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + normalized = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, normalized) + vectorofactions = append(vectorofactions, "normalized") + plot(msidata, pixel = 1:pixelcount, main="Average spectrum after normalization") ############################### Baseline reduction ########################### @@ -94,14 +96,14 @@ ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - baseline = c(minmz, maxmz,maxfeatures, medianpeaks, medint) - QC_numbers= cbind(QC_numbers, baseline) - vectorofactions = append(vectorofactions, "baseline red.") + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + baseline = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, baseline) + vectorofactions = append(vectorofactions, "baseline red.") + plot(msidata, pixel = 1:pixelcount, main="Average spectrum after baseline reduction") ############################### Smoothing ########################### @@ -127,14 +129,14 @@ ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - smoothed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) - QC_numbers= cbind(QC_numbers, smoothed) - vectorofactions = append(vectorofactions, "smoothed") + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + smoothed = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, smoothed) + vectorofactions = append(vectorofactions, "smoothed") + plot(msidata, pixel = 1:pixelcount, main="Average spectrum after smoothing") ############################### Peak picking ########################### @@ -161,14 +163,14 @@ ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - picked = c(minmz, maxmz,maxfeatures, medianpeaks, medint) - QC_numbers= cbind(QC_numbers, picked) - vectorofactions = append(vectorofactions, "picked") + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + picked = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, picked) + vectorofactions = append(vectorofactions, "picked") + plot(msidata, pixel = 1:pixelcount, main="Average spectrum after peak picking") ############################### Peak alignment ########################### @@ -208,14 +210,14 @@ ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - aligned = c(minmz, maxmz,maxfeatures, medianpeaks, medint) - QC_numbers= cbind(QC_numbers, aligned) - vectorofactions = append(vectorofactions, "aligned") + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + aligned = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, aligned) + vectorofactions = append(vectorofactions, "aligned") + plot(msidata, pixel = 1:pixelcount, main="Average spectrum after alignment") ############################### Peak filtering ########################### @@ -226,14 +228,14 @@ ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - filtered = c(minmz, maxmz,maxfeatures, medianpeaks, medint) - QC_numbers= cbind(QC_numbers, filtered) - vectorofactions = append(vectorofactions, "filtered") + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + filtered = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, filtered) + vectorofactions = append(vectorofactions, "filtered") + plot(msidata, pixel = 1:pixelcount, main="Average spectrum after filtering") ############################### Data reduction ########################### @@ -247,8 +249,11 @@ ## optional: replace NA with 0 #if $method.methods_conditional.methods_for_reduction.replace_NA_bin: + ## binning seems to create normal R matrix but to be sure: + iData(msidata) <- iData(msidata)[] + ## count and replace NAs print(paste0("Number of NA that were set to zero after binning:",sum(is.na(spectra(msidata)[])))) - spectra(msidata)[][is.na(spectra(msidata)[])] = 0 + spectra(msidata)[is.na(spectra(msidata))] = 0 #end if #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample': @@ -275,24 +280,27 @@ #end if ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - reduced = c(minmz, maxmz,maxfeatures, medianpeaks, medint) - QC_numbers= cbind(QC_numbers, reduced) - vectorofactions = append(vectorofactions, "reduced") + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + reduced = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, reduced) + vectorofactions = append(vectorofactions, "reduced") + plot(msidata, pixel = 1:pixelcount, main="Average spectrum after data reduction") ############################### Transformation ########################### #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': print('Transformation') + ## convert data into R matrix what brings it automatically into memory and can take some take but next steps need R matrix + iData(msidata) <- iData(msidata)[] + #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': print('log2 transformation') - ## replace 0 with NA + ## replace 0 with NA to prevent Inf spectra_df = spectra(msidata)[] spectra_df[spectra_df ==0] = NA print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df)))) @@ -313,14 +321,14 @@ ############################### QC ########################### - maxfeatures = length(features(msidata)) - medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) - medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) - minmz = round(min(mz(msidata)), digits=2) - maxmz = round(max(mz(msidata)), digits=2) - transformed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) - QC_numbers= cbind(QC_numbers, transformed) - vectorofactions = append(vectorofactions, "transformed") + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + transformed = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, transformed) + vectorofactions = append(vectorofactions, "transformed") + plot(msidata, pixel = 1:pixelcount, main="Average spectrum after transformation") #end if #end for @@ -328,24 +336,19 @@ ############# Outputs: RData, imzml and QC report ############# ################################################################################ - print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[])))) - - ## save as (.RData) - save(msidata, file="$msidata_preprocessed") + ## save msidata as imzML file, will only work if there is at least 1 m/z left - ## save msidata as imzML file, will only work if there is at least 1 m/z left - #if $imzml_output: + #if str($imzml_output) == "imzml_format": if (nrow(msidata) > 0){ -print("write outputfile") writeImzML(msidata, "out")} + #elif str($imzml_output) == "rdata_format": + ## save as (.RData) + iData(msidata) = iData(msidata)[] + save(msidata, file="$outfile_rdata") #end if - ## save QC report - - pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) - title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) - rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\n# peaks", "median\nintensity") + rownames(QC_numbers) = c("min m/z", "max mz", "# features", "# spectra") grid.table(t(QC_numbers)) dev.off() @@ -361,7 +364,7 @@ <repeat name="methods" title="Preprocessing" min="1" max="50"> <conditional name="methods_conditional"> <param name="preprocessing_method" type="select" label="Preprocessing methods"> - <option value="Normalization" selected="True">Normalization</option> + <option value="Normalization" selected="True">Intensity Normalization (TIC)</option> <option value="Baseline_reduction">Baseline Reduction</option> <option value="Smoothing">Peak smoothing</option> <option value="Peak_picking">Peak picking</option> @@ -528,14 +531,20 @@ </when> </conditional> </repeat> - <param name="imzml_output" type="boolean" label="Output of imzML file" truevalue="TRUE" falsevalue="FALSE"/> + <param name="imzml_output" type="select" display = "radio" optional = "False" + label="Output format" help= "Choose the output format"> + <option value="imzml_format" selected="True">imzML</option> + <option value="rdata_format">RData</option> + </param> </inputs> <outputs> - <data format="rdata" name="msidata_preprocessed" label="${tool.name} on ${on_string}"/> + <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"> + <filter>imzml_output=='imzml_format'</filter> + </data> + <data format="rdata" name="outfile_rdata" label="${tool.name} on ${on_string}: RData"> + <filter>imzml_output == 'rdata_format'</filter> + </data> <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/> - <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"> - <filter>imzml_output</filter> - </data> </outputs> <tests> <test> @@ -591,8 +600,12 @@ </conditional> </conditional> </repeat> - <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> + <param name="imzml_output" value="imzml_format"/> <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> + <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size"> + <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="4"/> + <extra_files type="file" file="preprocessing_results1.ibd" name="ibd" compare="sim_size"/> + </output> </test> <test> <param name="infile" value="3_files_combined.RData" ftype="rdata"/> @@ -615,8 +628,12 @@ </conditional> </conditional> </repeat> - <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> + <param name="imzml_output" value="imzml_format"/> <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> + <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size"> + <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="4"/> + <extra_files type="file" file="preprocessing_results2.ibd" name="ibd" compare="sim_size"/> + </output> </test> <test> <expand macro="infile_analyze75"/> @@ -645,8 +662,12 @@ </conditional> </conditional> </repeat> - <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> + <param name="imzml_output" value="imzml_format"/> <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> + <output name="outfile_imzml" ftype="imzml" file="preprocessing_results3.imzml.txt" compare="sim_size"> + <extra_files type="file" file="preprocessing_results3.imzml" name="imzml" lines_diff="4"/> + <extra_files type="file" file="preprocessing_results3.ibd" name="ibd" compare="sim_size"/> + </output> </test> <test> <expand macro="infile_analyze75"/> @@ -661,8 +682,12 @@ <param name="bin_width" value="0.1"/> </conditional> </repeat> - <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> + <param name="imzml_output" value="imzml_format"/> <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/> + <output name="outfile_imzml" ftype="imzml" file="preprocessing_results4.imzml.txt" compare="sim_size"> + <extra_files type="file" file="preprocessing_results4.imzml" name="imzml" lines_diff="4"/> + <extra_files type="file" file="preprocessing_results4.ibd" name="ibd" compare="sim_size"/> + </output> </test> <test> <expand macro="infile_imzml"/> @@ -675,7 +700,8 @@ </conditional> </conditional> </repeat> - <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/> + <param name="imzml_output" value="rdata_format"/> + <output name="outfile_rdata" file="preprocessing_results5.RData" compare="sim_size"/> <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/> </test> </tests> @@ -710,11 +736,11 @@ **Output** -- MSI data as .RData output (can be read with the Cardinal package in R) -- optional: MSI data as imzML file -- pdf with key values after each processing step +- MSI data as imzML file or .RData (can be read with the Cardinal package in R) +- pdf with key values and average mass spectra after each processing step ]]> </help> <expand macro="citations"/> </tool> +