Mercurial > repos > galaxyp > msi_filtering
diff msi_filtering.xml @ 0:f17d3f1a065f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_filtering commit 3363c40790b0d64a085f980980f4289165eed27f
author | galaxyp |
---|---|
date | Wed, 28 Feb 2018 14:02:21 -0500 |
parents | |
children | 98c101b19f3c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msi_filtering.xml Wed Feb 28 14:02:21 2018 -0500 @@ -0,0 +1,361 @@ +<tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.7.0"> + <description>tool for filtering mass spectrometry imaging data</description> + <requirements> + <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> + <requirement type="package" version="2.2.1">r-gridextra</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + + #if $infile.ext == 'imzml' + cp '${infile.extra_files_path}/imzml' infile.imzML && + cp '${infile.extra_files_path}/ibd' infile.ibd && + #elif $infile.ext == 'analyze75' + cp '${infile.extra_files_path}/hdr' infile.hdr && + cp '${infile.extra_files_path}/img' infile.img && + cp '${infile.extra_files_path}/t2m' infile.t2m && + #else + ln -s $infile infile.RData && + #end if + cat '${MSI_subsetting}' && + echo ${MSI_subsetting} && + Rscript '${MSI_subsetting}' + + ]]> + </command> + <configfiles> + <configfile name="MSI_subsetting"><![CDATA[ + + +################################# load libraries and read file ######################### + + +library(Cardinal) +library(gridExtra) + +## Read MALDI Imaging dataset + +#if $infile.ext == 'imzml' + msidata = readMSIData('infile.imzML') +#elif $infile.ext == 'analyze75' + msidata = readMSIData('infile.hdr') +#else + load('infile.RData') +#end if + +###################################### inputfile properties in numbers ###################### + +#if $outputs.outputs_select == "quality_control" + ## Number of features (mz) + maxfeatures = length(features(msidata)) + ## Range mz + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + ## Number of spectra (pixels) + pixelcount = length(pixels(msidata)) + ## Range x coordinates + minimumx = min(coord(msidata)[,1]) + maximumx = max(coord(msidata)[,1]) + ## Range y coordinates + minimumy = min(coord(msidata)[,2]) + maximumy = max(coord(msidata)[,2]) + ## Number of intensities > 0 + npeaks= sum(spectra(msidata)[]>0) + ## Spectra multiplied with mz (potential number of peaks) + numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) + ## Percentage of intensities > 0 + percpeaks = round(npeaks/numpeaks*100, digits=2) + ## Number of empty TICs + TICs = colSums(spectra(msidata)[]) + NumemptyTIC = sum(TICs == 0) + ## median TIC + medint = round(median(TICs), digits=2) + ## Store features for QC plot + featuresinfile = mz(msidata) +#end if + + +###################################### filtering of pixels ###################### +#if $inputpixels: + input_list = read.delim("$inputpixels", header = FALSE, + na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) + validpixels = input_list[,$pixel_column] %in% names(pixels(msidata)) + + if (validpixels != 0) + { + pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[validpixels,$pixel_column]] + msidata = msidata[,pixelsofinterest] + numberpixels = length(input_list[,$pixel_column]) + }else { + numberpixels = 0 + } + + +#else + input_list = data.frame(0, 0) + validpixels=0 + numberpixels = 0 +#end if + + + +###################################### filtering of features ###################### + +#if $inputfeatures: + input_features = read.delim("$inputfeatures", header = FALSE, + na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) + validfeatures = input_features[,$feature_column] %in% names(features(msidata)) + + if (validfeatures != 0) + { + featuresofinterest = features(msidata)[names(features(msidata)) %in% input_features[validfeatures,$feature_column]] + msidata = msidata[featuresofinterest,] + numberfeatures = length(input_features[,$feature_column]) + } else { + numberfeatures = 0 + } + + +#else + input_features = data.frame(0, 0) + validfeatures = 0 + numberfeatures = 0 +#end if + + + + + + +# save msidata as Rfile +save(msidata, file="$msidata_filtered") + +###################################### outputfile properties in numbers ###################### + +#if $outputs.outputs_select == "quality_control" + +## Number of features (mz) +maxfeatures2 = length(features(msidata)) +## Range mz +minmz2 = round(min(mz(msidata)), digits=2) +maxmz2 = round(max(mz(msidata)), digits=2) +## Number of spectra (pixels) +pixelcount2 = length(pixels(msidata)) +## Range x coordinates +minimumx2 = min(coord(msidata)[,1]) +maximumx2 = max(coord(msidata)[,1]) +## Range y coordinates +minimumy2 = min(coord(msidata)[,2]) +maximumy2 = max(coord(msidata)[,2]) +## Number of intensities > 0 +npeaks2= sum(spectra(msidata)[]>0) +## Spectra multiplied with mz (potential number of peaks) +numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) +## Percentage of intensities > 0 +percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) +## Number of empty TICs +TICs2 = colSums(spectra(msidata)[]) +NumemptyTIC2 = sum(TICs2 == 0) +## median TIC +medint2 = round(median(TICs2), digits=2) + + +properties = c("Number of mz features", + "Range of mz values [Da]", + "Number of pixels", + "Range of x coordinates", + "Range of y coordinates", + "Intensities > 0", + "Median TIC per pixel", + "Number of zero TICs", + paste0("# pixels in ", "$inputpixels.display_name"), + paste0("# mz in ", "$inputfeatures.display_name")) + +before = c(paste0(maxfeatures), + paste0(minmz, " - ", maxmz), + paste0(pixelcount), + paste0(minimumx, " - ", maximumx), + paste0(minimumy, " - ", maximumy), + paste0(percpeaks, " %"), + paste0(medint), + paste0(NumemptyTIC), + paste0("input pixels: ", numberpixels), + paste0("input mz: ", numberfeatures)) + +filtered = c(paste0(maxfeatures2), + paste0(minmz2, " - ", maxmz2), + paste0(pixelcount2), + paste0(minimumx2, " - ", maximumx2), + paste0(minimumy2, " - ", maximumy2), + paste0(percpeaks2, " %"), + paste0(medint2), + paste0(NumemptyTIC2), + paste0("valid pixels: ", sum(validpixels)), + paste0("valid mz: ", sum(validfeatures))) + + +property_df = data.frame(properties, before, filtered) + + + +######################################## PDF QC ############################################# + + + pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) + plot(0,type='n',axes=FALSE,ann=FALSE) + + title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) + + + grid.table(property_df, rows= NULL) + + + ### heatmap image as visual pixel control + + + image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", + main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) + + ### control features which are left + + par(mfrow = c(2,1)) + plot(featuresinfile, ylab = "m/z in Dalton", xlab = "feature index") + plot(mz(msidata), ylab = "m/z in Dalton", xlab = "feature index") + + + dev.off() + +#end if + +######################################## intensity matrix ################################## + +#if $output_matrix: + +if (length(features(msidata))> 0 & length(pixels(msidata)) > 0) +{ + + spectramatrix = spectra(msidata) + rownames(spectramatrix) = mz(msidata) + newmatrix = rbind(pixels(msidata), spectramatrix) + write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + +}else{ + print("file has no features or pixels left") +} + +#end if + + + ]]></configfile> + </configfiles> + <inputs> + <param name="infile" type="data" format="imzml, rdata, analyze75" + label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" + help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> + <param name="inputpixels" type="data" optional="true" format="tabular" label="pixels for filtering of MSI data" + help="tabular file with pixels of interest in the form x = 1, y = 1"/> + <param name="pixel_column" data_ref="inputpixels" optional="true" label="Column with pixels" type="data_column" /> + <param name="inputfeatures" type="data" optional="true" format="tabular" label="features for filtering of MSI data" + help="tabular file with masses of interest in the form mz = 800.05"/> + <param name="feature_column" data_ref="inputfeatures" optional="true" label="Column with features" type="data_column" /> + + <conditional name="outputs"> + <param name="outputs_select" type="select" label="Quality control output"> + <option value="quality_control" selected="True">yes</option> + <option value="no_quality_control" >no</option> + </param> + <when value="quality_control"> + <param name="inputmz" type="float" value="1296.7" label="Mass for which a heatmap image will be drawn" help="Use a mass which is still present in all pixels to control if the pixel filtering went well"/> + <param name="plusminus_dalton" value="0.25" type="float" label="mass range for mz value" help="plusminus mass window in Dalton"/> + </when> + </conditional> + <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> + </inputs> + <outputs> + <data format="rdata" name="msidata_filtered" label="${tool.name} on $infile.display_name"/> + <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "QC ${tool.name} on $infile.display_name"> + <filter>outputs["outputs_select"] == "quality_control"</filter> + </data> + <data format="tabular" name="matrixasoutput" label="matrix ${tool.name} on $infile.display_name"> + <filter>output_matrix</filter> + </data> + </outputs> + + <tests> + <test expect_num_outputs="2"> + <param name="infile" value="" ftype="imzml"> + <composite_data value="Example_Continuous.imzML"/> + <composite_data value="Example_Continuous.ibd"/> + </param> + <param name="inputpixels" ftype="tabular" value = "inputpixels.tabular"/> + <param name="pixel_column" value="1"/> + <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/> + <param name="feature_column" value="2"/> + + <conditional name="outputs"> + <param name="outputs_select" value="quality_control"/> + <param name="inputmz" value="328.9"/> + <param name="plusminus_dalton" value="0.25"/> + </conditional> + <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/> + <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size" /> + </test> + <test expect_num_outputs="3"> + <param name="infile" value="" ftype="analyze75"> + <composite_data value="Analyze75.hdr"/> + <composite_data value="Analyze75.img"/> + <composite_data value="Analyze75.t2m"/> + </param> + <param name="inputpixels" ftype="tabular" value = "inputpixels2.tabular"/> + <param name="pixel_column" value="1"/> + <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/> + <param name="feature_column" value="1"/> + <conditional name="outputs"> + <param name="outputs_select" value="quality_control"/> + <param name="inputmz" value="702"/> + <param name="plusminus_dalton" value="0.25"/> + </conditional> + <param name="output_matrix" value="True"/> + <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/> + <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" /> + <output name="matrixasoutput" file="analyze_matrix.tabular"/> + </test> + <test expect_num_outputs="1"> + <param name="infile" value="" ftype="analyze75"> + <composite_data value="Analyze75.hdr"/> + <composite_data value="Analyze75.img"/> + <composite_data value="Analyze75.t2m"/> + </param> + <conditional name="outputs"> + <param name="outputs_select" value="no_quality_control"/> + </conditional> + <output name="msidata_filtered" file="analyze_originaloutput.RData" compare="sim_size" /> + </test> + <test expect_num_outputs="2"> + <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/> + <conditional name="outputs"> + <param name="outputs_select" value="no_quality_control"/> + </conditional> + <param name="output_matrix" value="True"/> + <output name="matrixasoutput" file="rdata_matrix.tabular"/> + </test> + </tests> + <help> + <![CDATA[ + +This tool can filter three types of mass-spectrometry imaging files (see below) for pixels and features of interest. This can be used to keep only pixels in a regions of interest. +For filtering at least one valid pixel/feature is needed otherwise no filtering will be performed. + +Input data: 3 types of input data can be used: + +- imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <http://ms-imaging.org/wp/introduction/>`_ +- Analyze7.5 (upload hdr, img and t2m file via the "composite" function) +- Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) + +The output of this tool is a subsetted Cardinal "MSImageSet" with the variable name "msidata" saved as .RData. + ]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btv146</citation> + </citations> +</tool>