Mercurial > repos > galaxyp > msi_filtering
changeset 6:bab12ded74a5 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit 5bceedc3a11c950790692a4c64bbb83d46897bee
author | galaxyp |
---|---|
date | Tue, 24 Jul 2018 04:52:54 -0400 |
parents | 3d5ac78fb2b0 |
children | 73b5a754f35c |
files | msi_filtering.xml test-data/analyze75_filtered2.pdf test-data/analyze_filtered.RData test-data/analyze_filtered.pdf test-data/analyze_filteredoutside.RData test-data/imzml_filtered.RData test-data/imzml_filtered.pdf test-data/imzml_filtered2.pdf test-data/imzml_filtered3.RData test-data/imzml_filtered3.pdf test-data/imzml_filtered4.RData test-data/imzml_filtered4.pdf test-data/imzml_filtered5.RData test-data/imzml_filtered5.pdf test-data/inputpixels.tabular test-data/inputpixels_2column_challenge.tabular test-data/rdata_notfiltered.pdf |
diffstat | 17 files changed, 102 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- a/msi_filtering.xml Fri Jul 06 14:13:22 2018 -0400 +++ b/msi_filtering.xml Tue Jul 24 04:52:54 2018 -0400 @@ -1,8 +1,9 @@ -<tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.3"> +<tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.4"> <description>tool for filtering mass spectrometry imaging data</description> <requirements> <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> <requirement type="package" version="2.2.1">r-gridextra</requirement> + <requirement type="package" version="2.2.1">r-ggplot2</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ @@ -32,6 +33,7 @@ library(Cardinal) library(gridExtra) +library(ggplot2) #if $infile.ext == 'imzml' @@ -47,11 +49,7 @@ #end if -########################### optional QC numbers ######################## - -if (sum(spectra(msidata)[]>0, na.rm=TRUE) > 0) -{ - #if $outputs.outputs_select == "quality_control": +########################### QC numbers ######################## ## Number of features (m/z) maxfeatures = length(features(msidata)) @@ -80,7 +78,15 @@ ## Store features for QC plot featuresinfile = mz(msidata) - #end if +## Next steps will only run if there are more than 0 intensities/pixels/features in the file + +if (sum(spectra(msidata)[]>0, na.rm=TRUE) > 0) +{ + + + ## prepare dataframe for QC of pixel distribution (will be overwritten in filtering of pixels condition) + position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata))) + ###################################### Filtering of pixels ##################### ################################################################################ @@ -90,14 +96,22 @@ #if str($pixels_cond.pixel_filtering) == "single_column": print("single column") + ## read tabular file, count number of rows (= number of pixels), count how many pixels are valid input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE) - numberpixels = length(input_list[,$pixels_cond.pixel_column]) - valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata)) + startingrow = $pixels_cond.pixel_header+1 + numberpixels = length(startingrow:nrow(input_list)) + valid_entries = input_list[startingrow:nrow(input_list),$pixels_cond.pixel_column] %in% names(pixels(msidata)) validpixels = sum(valid_entries) + valid_annotations = input_list[valid_entries,c($pixels_cond.pixel_column, $pixels_cond.annotation_column)] + ## for valid pixels: filter file for pixels and create dataframe with x,y,annotation for QC if (validpixels != 0){ - pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]] + ## filter file for pixels + pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% valid_annotations[,1]] msidata = msidata[,pixelsofinterest] + ## position_df for QC + pixel_coords = coord(msidata)[names(pixels(msidata)) %in% valid_annotations[,1],1:2] + position_df = cbind(pixel_coords, valid_annotations[,2]) }else{ msidata = msidata[,0] validpixels=0} @@ -107,21 +121,22 @@ #elif str($pixels_cond.pixel_filtering) == "two_columns": print("two columns") + ## read tabular file, count number of rows (= number of pixels), extract dataframe with x,y,annotation (for QC), count number of valid pixels input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, stringsAsFactors = FALSE) - numberpixels = length(input_list[,$pixels_cond.pixel_column_x]) + startingrow = $pixels_cond.pixel_header+1 + numberpixels = length(startingrow:nrow(input_list)) + inputpixels = input_list[startingrow:nrow(input_list),c($pixels_cond.pixel_column_x, $pixels_cond.pixel_column_y, $pixels_cond.annotation_column_xy)] + colnames(inputpixels) = c("x", "y", "annotation") + position_df = merge(coord(msidata)[,1:2], inputpixels, by=c("x", "y"), all.x=TRUE) - inputpixel_x = input_list[,$pixels_cond.pixel_column_x] - inputpixel_y = input_list[,$pixels_cond.pixel_column_y] - inputpixels = cbind(inputpixel_x, inputpixel_y) - colnames(inputpixels) = c("x", "y") - valid_rows = merge(inputpixels, coord(msidata)[,1:2]) - validpixels = nrow(valid_rows) + validpixels = nrow(position_df) + ## for valid pixels: filter file for pixels if (validpixels != 0){ pixelvector = character() - for (pixel in 1:nrow(valid_rows)){ - pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2])} + for (pixel in 1:nrow(position_df)){ + pixelvector[pixel] = paste0("x = ", position_df[pixel,1],", ", "y = ", position_df[pixel,2])} pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] msidata = msidata[,pixelsofinterest] }else{ @@ -135,8 +150,7 @@ numberpixels = "range" validpixels = "range" - ## only filter pixels if at least one pixel will be left - + ## only filter pixels if at least one pixel will be left if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){ msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] @@ -153,35 +167,42 @@ #end if - }else{ print("Inputfile has no intensities > 0") - } -###################################### filtering of features ###################### -################################################################################## + ################################# filtering of features ###################### + ############################################################################## -######################## Keep m/z from tabular file ######################### + ####################### Keep m/z from tabular file ######################### -if (sum(spectra(msidata)[], na.rm=TRUE) > 0){ +## feature filtering only when pixels/features/intensities are left +if (sum(spectra(msidata)[], na.rm=TRUE) > 0) +{ #if str($features_cond.features_filtering) == "features_list": print("feature list") + ## read tabular file, define starting row, extract and count valid features input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE) startingrow = $features_cond.feature_header+1 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column] numberfeatures = length(extracted_features) + ## find out type of tabular file (numeric or character format) if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE){ ### if input is in numeric format if (class(extracted_features) == "numeric"){ - ### max digits given in the input file will be used to match m/z + ### max digits given in the input file will be used to match m/z but the maximum is 4 max_digits = max(nchar(matrix(unlist(strsplit(as.character(extracted_features), "\\.")), ncol=2, byrow=TRUE)[,2])) - validfeatures = extracted_features %in% round(mz(msidata),max_digits) - featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% extracted_features[validfeatures]] + if (max_digits >4) + { + max_digits = 4 + } + + validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits) + featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)] validmz = length(unique(featuresofinterest)) }else{ validmz = 0 @@ -198,7 +219,7 @@ msidata = msidata[featuresofinterest,] - ############### features within a given range are kept ######################### + ############### features within a given range are kept ##################### #elif str($features_cond.features_filtering) == "features_range": print("feature range") @@ -270,9 +291,14 @@ ## save msidata as Rfile save(msidata, file="$msidata_filtered") - #################### optional QC numbers ####################### +}else{ + print("Inputfile or file filtered for pixels has no intensities > 0") + numberfeatures = NA + validmz = NA +} - #if $outputs.outputs_select == "quality_control": + #################### QC numbers ####################### + ## Number of features (m/z) maxfeatures2 = length(features(msidata)) @@ -334,22 +360,37 @@ property_df = data.frame(properties, before, filtered) - ############################### optional PDF QC ################################ + ############################### PDF QC ################################ + pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) grid.table(property_df, rows= NULL) - ### heatmap image as visual pixel control - if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ - image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", - main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) +## QC report with more than value-table: only when pixels/features/intensities are left +if (sum(spectra(msidata)[], na.rm=TRUE) > 0) +{ + ### visual pixel control + + colnames(position_df)[3] = "annotation_name" + pixel_image = ggplot(position_df, aes(x=x, y=y, fill=annotation_name))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation of combined data")+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=4,byrow=TRUE)) + coord_labels = aggregate(cbind(x,y)~annotation_name, data=position_df, mean, na.rm=TRUE, na.action="na.pass") + coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$annotation_name) + + print(pixel_image) ### control features which are removed hist(mz(msidata), xlab="m/z", main="Kept m/z values") #if str($features_cond.features_filtering) == "none": - print("no difference histogram as no m/z filtering took place") + print("no difference histogram as no m/z filtering took place") #else: if (isTRUE(all.equal(featuresinfile, mz(msidata)))){ @@ -357,13 +398,9 @@ }else{ hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")} #end if - }else{ - print("file has no features or pixels left")} dev.off() - #end if - ############################### optional intensity matrix ###################### #if $output_matrix: @@ -376,7 +413,8 @@ #end if }else{ - print("Inputfile or file filtered for pixels has no intensities > 0") + print("Inputfile or filtered file has no intensities > 0") + dev.off() } ]]></configfile> </configfiles> @@ -398,6 +436,7 @@ </param> </when> </conditional> + <conditional name="pixels_cond"> <param name="pixel_filtering" type="select" label="Select pixel filtering option"> <option value="none" selected="True">none</option> @@ -410,12 +449,16 @@ <param name="single_pixels" type="data" format="tabular" label="Pixels in single column for filtering of MSI data" help="tabular file with pixels of interest in the form x = 1, y = 1"/> <param name="pixel_column" data_ref="single_pixels" label="Column with pixels" type="data_column"/> + <param name="annotation_column" data_ref="single_pixels" label="Column with annotations for each pixel" type="data_column"/> + <param name="pixel_header" label="Number of header lines to skip" value="0" type="integer"/> </when> <when value="two_columns"> <param name="two_columns_pixel" type="data" format="tabular" label="Pixels in two columns for filtering of MSI data" help="tabular file with pixels of interest in two separate columns"/> <param name="pixel_column_x" data_ref="two_columns_pixel" label="Column with x values" type="data_column"/> <param name="pixel_column_y" data_ref="two_columns_pixel" label="Column with y values" type="data_column"/> + <param name="annotation_column_xy" data_ref="two_columns_pixel" label="Column with annotations" type="data_column"/> + <param name="pixel_header" label="Number of header lines to skip" value="0" type="integer"/> </when> <when value="pixel_range"> <param name="min_x_range" type="integer" value="0" label="Minimum value for x"/> @@ -424,6 +467,7 @@ <param name="max_y_range" type="integer" value="100" label="Maximum value for y"/> </when> </conditional> + <conditional name="features_cond"> <param name="features_filtering" type="select" label="Select feature filtering option"> <option value="none" selected="True">none</option> @@ -450,26 +494,14 @@ <option value="ppm" selected="True">ppm</option> <option value="Da">Da</option> </param> - </when> + </when> </conditional> - <conditional name="outputs"> - <param name="outputs_select" type="select" label="Quality control output"> - <option value="quality_control" selected="True">yes</option> - <option value="no_quality_control">no</option> - </param> - <when value="quality_control"> - <param name="inputmz" type="float" value="1296.7" label="M/z for which a heatmap image will be drawn" help="Use a m/z which is still present in all pixels to control if the pixel filtering went well"/> - <param name="plusminus_dalton" value="0.25" type="float" label="Range for m/z value" help="plusminus m/z window"/> - </when> - <when value="no_quality_control"/> - </conditional> <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> </inputs> + <outputs> <data format="rdata" name="msidata_filtered" label="$infile.display_name filtered"/> - <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"> - <filter>outputs["outputs_select"] == "quality_control"</filter> - </data> + <data format="pdf" name="filtering_qc" from_work_dir="filtertool_QC.pdf" label = "$infile.display_name filtered_QC"/> <data format="tabular" name="matrixasoutput" label="$infile.display_name filtered_matrix"> <filter>output_matrix</filter> </data> @@ -487,9 +519,6 @@ <param name="inputfeatures" ftype="tabular" value = "inputfeatures.tabular"/> <param name="feature_column" value="2"/> <param name="feature_header" value="1"/> - <param name="outputs_select" value="quality_control"/> - <param name="inputmz" value="328.9"/> - <param name="plusminus_dalton" value="0.25"/> <output name="filtering_qc" file="imzml_filtered.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered.RData" compare="sim_size" /> </test> @@ -503,9 +532,6 @@ <param name="max_x_range" value="20"/> <param name="min_y_range" value="2"/> <param name="max_y_range" value="2"/> - <param name="outputs_select" value="quality_control"/> - <param name="inputmz" value="328.9"/> - <param name="plusminus_dalton" value="0.25"/> <output name="filtering_qc" file="imzml_filtered2.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered2.RData" compare="sim_size" /> </test> @@ -522,9 +548,6 @@ <param name="features_filtering" value="features_range"/> <param name="min_mz" value="350" /> <param name="max_mz" value="500"/> - <param name="outputs_select" value="quality_control"/> - <param name="inputmz" value="328.9"/> - <param name="plusminus_dalton" value="0.25"/> <param name="output_matrix" value="True"/> <output name="filtering_qc" file="imzml_filtered3.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered3.RData" compare="sim_size" /> @@ -539,13 +562,11 @@ <param name="two_columns_pixel" ftype="tabular" value = "inputpixels_2column.tabular"/> <param name="pixel_column_x" value="1"/> <param name="pixel_column_y" value="3"/> + <param name="annotation_column_xy" value="2"/> <param name="features_filtering" value="features_list"/> <param name="inputfeatures" ftype="tabular" value = "inputcalibrantfile2.txt"/> <param name="feature_column" value="1"/> <param name="feature_header" value="0"/> - <param name="outputs_select" value="quality_control"/> - <param name="inputmz" value="328.9"/> - <param name="plusminus_dalton" value="0.25"/> <output name="filtering_qc" file="imzml_filtered4.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered4.RData" compare="sim_size" /> </test> @@ -563,9 +584,6 @@ <param name="inputfeatures" ftype="tabular" value = "featuresofinterest5.tabular"/> <param name="feature_column" value="1"/> <param name="feature_header" value="0"/> - <param name="outputs_select" value="quality_control"/> - <param name="inputmz" value="328.9"/> - <param name="plusminus_dalton" value="0.25"/> <output name="filtering_qc" file="imzml_filtered5.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="imzml_filtered5.RData" compare="sim_size" /> </test> @@ -581,11 +599,6 @@ <param name="features_filtering" value="features_list"/> <param name="inputfeatures" ftype="tabular" value = "featuresofinterest2.tabular"/> <param name="feature_column" value="1"/> - <conditional name="outputs"> - <param name="outputs_select" value="quality_control"/> - <param name="inputmz" value="1200"/> - <param name="plusminus_dalton" value="0.25"/> - </conditional> <param name="output_matrix" value="True"/> <output name="filtering_qc" file="analyze_filtered.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="analyze_filtered.RData" compare="sim_size" /> @@ -597,15 +610,10 @@ <composite_data value="Analyze75.img"/> <composite_data value="Analyze75.t2m"/> </param> - <conditional name="outputs"> - <param name="outputs_select" value="quality_control"/> - <param name="inputmz" value="1200"/> - <param name="plusminus_dalton" value="0.25"/> - </conditional> <output name="filtering_qc" file="analyze75_filtered2.pdf" compare="sim_size" delta="20000"/> <output name="msidata_filtered" file="analyze_filteredoutside.RData" compare="sim_size" /> </test> - <test expect_num_outputs="2"> + <test expect_num_outputs="3"> <param name="infile" value="preprocessed.RData" ftype="rdata"/> <conditional name="outputs"> <param name="outputs_select" value="no_quality_control"/> @@ -613,6 +621,7 @@ <param name="output_matrix" value="True"/> <output name="matrixasoutput" file="rdata_matrix.tabular"/> <output name="msidata_filtered" file="rdata_notfiltered.RData" compare="sim_size" /> + <output name="filtering_qc" file="rdata_notfiltered.pdf" compare="sim_size" /> </test> </tests> <help> @@ -631,9 +640,9 @@ Options: -- pixel filtering: can use a tabular file containing x and y coordinates or by defining a range for x and y by hand +- pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible) - m/z feature filtering: can use a tabular file containing m/z of interest or by defining a range for the m/z values (! numeric input will be rounded to 2 digits before matching to m/z!) -- m/z feature removing: infering m/z such as matrix contaminants can be removed by specifying their m/z in a tabular file and optionally set a window (window in ppm or m/z in which peaks should be removed) +- m/z feature removing: perturbing m/z such as matrix contaminants can be removed by specifying their m/z in a tabular file and optionally set a window (window in ppm or m/z in which peaks should be removed) Output:
--- a/test-data/inputpixels.tabular Fri Jul 06 14:13:22 2018 -0400 +++ b/test-data/inputpixels.tabular Tue Jul 24 04:52:54 2018 -0400 @@ -1,6 +1,6 @@ -x = 1, y = 1 -x = 1, y = 2 -x = 1, y = 3 -x = 3, y = 1 -x = 3, y = 2 -x = 3, y = 3 +x = 1, y = 1 ROI1 +x = 1, y = 2 ROI1 +x = 1, y = 3 ROI1 +x = 3, y = 1 ROI1 +x = 3, y = 2 ROI1 +x = 3, y = 3 ROI1