# HG changeset patch # User galaxyp # Date 1540466713 14400 # Node ID aac805a9d2ae3416ffcd6cc69113964ddff14ef6 # Parent a2988d8d4b77f8f321eca87f6fe10b26c78854e3 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit d2f311f7fff24e54c565127c40414de708e31b3c diff -r a2988d8d4b77 -r aac805a9d2ae filtering.xml --- a/filtering.xml Mon Oct 01 01:04:17 2018 -0400 +++ b/filtering.xml Thu Oct 25 07:25:13 2018 -0400 @@ -1,21 +1,34 @@ - + tool for filtering mass spectrometry imaging data macros.xml - r-gridextra - r-ggplot2 + r-gridextra + r-ggplot2 + $outfile_imzml && + ls -l "$outfile_imzml.files_path" >> $outfile_imzml + ]]> + + = $pixels_cond.min_x_range] msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] }else{ - msidata = msidata[,0] - print("no valid pixel found")} + + print("no valid pixel found") + msidata = msidata[,0]} ## update position_df for filtered pixels position_df = cbind(coord(msidata)[,1:2], rep("$infile.element_identifier", times=ncol(msidata))) @@ -138,103 +152,121 @@ ####################### Keep m/z from tabular file ######################### ## feature filtering only when pixels/features/intensities are left + +if (ncol(msidata) > 0){ npeaks_before_filtering= sum(spectra(msidata)[]>0, na.rm=TRUE) + if (npeaks_before_filtering > 0) + { + + #if str($features_cond.features_filtering) == "features_list": + print("feature list") + + ## read tabular file, define starting row, extract and count valid features + input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE) + extracted_features = input_features[,$features_cond.feature_column] + numberfeatures = length(extracted_features) + if (class(extracted_features) == "numeric"){ + ### max digits given in the input file will be used to match m/z but the maximum is 4 + max_digits = max(nchar(sapply(strsplit(as.character(extracted_features), "\\."),`[`,2)), na.rm=TRUE) + + if (max_digits >4) + { + max_digits = 4 + } + + validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits) + featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)] + validmz = length(unique(featuresofinterest)) + }else{ + validmz = 0 + featuresofinterest = 0} + + ### filter msidata for valid features + msidata = msidata[featuresofinterest,] + + ############### features within a given range are kept ##################### + + #elif str($features_cond.features_filtering) == "features_range": + print("feature range") + + numberfeatures = "range" + validmz = "range" + + if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){ + msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] + }else{ + msidata = msidata[0,] + print("no valid mz range")} + + ############### Remove m/z from tabular file ######################### + + #elif str($features_cond.features_filtering) == "remove_features": + print("remove features") + + ## read tabular file, define starting row, extract and count valid features + input_features = read.delim("$mz_tabular", header = $features_cond.removal_header, stringsAsFactors = FALSE) + extracted_features = input_features[,$features_cond.removal_column] + numberfeatures = length(extracted_features) + if (class(extracted_features) == "numeric"){ + print("input is numeric") + featuresofinterest = extracted_features + validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata))) + }else{featuresofinterest = 0 + validmz = 0} + + ### Here starts removal of features: + plusminus = $features_cond.removal_plusminus + + mass_to_remove = numeric() + if (sum(featuresofinterest) > 0){ + for (masses in featuresofinterest){ + #if str($features_cond.units_removal) == "ppm": + plusminus = masses * $features_cond.removal_plusminus/1000000 + #end if + current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) + mass_to_remove = append(mass_to_remove, current_mass)} + msidata= msidata[-mass_to_remove, ] + }else{print("No features were removed as they were not fitting to m/z values and/or range")} -if (npeaks_before_filtering > 0) - -{ - - #if str($features_cond.features_filtering) == "features_list": - print("feature list") + #elif str($features_cond.features_filtering) == "none": - ## read tabular file, define starting row, extract and count valid features - input_features = read.delim("$mz_tabular", header = $features_cond.feature_header, stringsAsFactors = FALSE) - extracted_features = input_features[,$features_cond.feature_column] - numberfeatures = length(extracted_features) - if (class(extracted_features) == "numeric"){ - ### max digits given in the input file will be used to match m/z but the maximum is 4 - max_digits = max(nchar(matrix(unlist(strsplit(as.character(extracted_features), "\\.")), ncol=2, byrow=TRUE)[,2])) - if (max_digits >4) - { - max_digits = 4 - } + print("no feature filtering") + validmz = 0 + numberfeatures = 0 - validfeatures = round(extracted_features, max_digits) %in% round(mz(msidata),max_digits) - featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% round(extracted_features[validfeatures], max_digits)] - validmz = length(unique(featuresofinterest)) - }else{ - validmz = 0 - featuresofinterest = 0} - - ### filter msidata for valid features - msidata = msidata[featuresofinterest,] - - ############### features within a given range are kept ##################### - - #elif str($features_cond.features_filtering) == "features_range": - print("feature range") + #end if - numberfeatures = "range" - validmz = "range" - - if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){ - msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] - }else{ - msidata = msidata[0,] - print("no valid mz range")} - - ############### Remove m/z from tabular file ######################### - - #elif str($features_cond.features_filtering) == "remove_features": - print("remove features") + ## save msidata as Rfile + save(msidata, file="$msidata_filtered") - ## read tabular file, define starting row, extract and count valid features - input_features = read.delim("$mz_tabular", header = $features_cond.removal_header, stringsAsFactors = FALSE) - extracted_features = input_features[,$features_cond.removal_column] - numberfeatures = length(extracted_features) - if (class(extracted_features) == "numeric"){ - print("input is numeric") - featuresofinterest = extracted_features - validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata))) - }else{featuresofinterest = 0 - validmz = 0} - - ### Here starts removal of features: - plusminus = $features_cond.removal_plusminus - - mass_to_remove = numeric() - if (sum(featuresofinterest) > 0){ - for (masses in featuresofinterest){ - #if str($features_cond.units_removal) == "ppm": - plusminus = masses * $features_cond.removal_plusminus/1000000 - #end if - current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) - mass_to_remove = append(mass_to_remove, current_mass)} - msidata= msidata[-mass_to_remove, ] - }else{print("No features were removed as they were not fitting to m/z values and/or range")} + ## Number of empty TICs + TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) + ## Number of intensities > 0 + npeaks2= sum(spectra(msidata)[]>0, na.rm=TRUE) + ## Spectra multiplied with m/z (potential number of peaks) + numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) - #elif str($features_cond.features_filtering) == "none": - print("no feature filtering") - validmz = 0 - numberfeatures = 0 - - #end if - - ## save msidata as Rfile - save(msidata, file="$msidata_filtered") + }else{ + print("Inputfile or file filtered for pixels has no intensities > 0") + numberfeatures = NA + validmz = NA ## Number of empty TICs - TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) + TICs2 = 0 + npeaks2 = 0 + numpeaks2 = 0 + } }else{ - print("Inputfile or file filtered for pixels has no intensities > 0") - numberfeatures = NA - validmz = NA - ## Number of empty TICs - TICs2 = NA + print("Inputfile or file filtered for pixels has no pixels left") + numberfeatures = NA + validmz = NA + ## Number of empty TICs + TICs2 = 0 + npeaks2 = 0 + numpeaks2 = 0 } - #################### QC numbers ####################### @@ -251,10 +283,7 @@ ## Range y coordinates minimumy2 = min(coord(msidata)[,2]) maximumy2 = max(coord(msidata)[,2]) - ## Number of intensities > 0 - npeaks2= sum(spectra(msidata)[]>0, na.rm=TRUE) - ## Spectra multiplied with m/z (potential number of peaks) - numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) + ## Percentage of intensities > 0 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) ## Number of empty TICs @@ -296,9 +325,9 @@ paste0("valid mz: ", validmz)) property_df = data.frame(properties, before, filtered) +print(property_df) - ############################### PDF QC ################################ - + ########################### PDF QC and imzml output ########################### pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) plot(0,type='n',axes=FALSE,ann=FALSE) @@ -308,6 +337,14 @@ ## QC report with more than value-table: only when pixels/features/intensities are left if (npeaks2 > 0) { + + ## save msidata as imzML file, will only work if there is at least 1 m/z left + #if $imzml_output: + if (maxfeatures2 > 0){ + writeImzML(msidata, "out")} + #end if + + ### visual pixel control levels(position_df\$annotation) = factor(paste(1:length(levels(position_df\$annotation)), levels(position_df\$annotation), sep="_")) @@ -346,6 +383,7 @@ dev.off() + }else{ print("Inputfile or filtered file has no intensities > 0") dev.off() @@ -364,17 +402,6 @@ - - - - - - - - - - @@ -408,26 +435,21 @@ + + + + imzml_output + - - - - - - - - - - @@ -447,6 +469,10 @@ + @@ -493,8 +519,8 @@ **Options** -- pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible). Pixel that are not present in the dataset are ignored. In case all pixels are not present in the dataset the output file will be empty and no further mz filtering will be performed. -- m/z feature filtering: m/z values for filtering should be either imported as a tabular file containing containing m/z of interest or by defining a range for the m/z values. m/z that are not present in the dataset are ignored. If all given m/z values or the m/z range is outside the dataset, the output file will be empty. +- pixel filtering/annotation: either with a tabular file containing x and y coordinates and pixel annotations or by defining a range for x and y by hand (for the latter no annotation is possible). Pixel that are not present in the dataset are ignored. It is not possible to filter only for pixels that are not present in the dataset. +- m/z feature filtering: m/z values for filtering should be either imported as a tabular file containing containing m/z of interest or by defining a range for the m/z values. m/z that are not present in the dataset are ignored. It is not possible to filter only for m/z that are not present in the dataset. - m/z feature removing: perturbing m/z features such as matrix contaminants can be removed by specifying their m/z in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed. @@ -506,7 +532,8 @@ **Output** -- imzML file filtered for pixels and/or m/z +- MSI data as .RData output (can be read with the Cardinal package in R) +- optional: MSI data as imzML file - pdf with heatmap showing the pixels that are left after filtering and histograms of kept and removed m/z diff -r a2988d8d4b77 -r aac805a9d2ae macros.xml --- a/macros.xml Mon Oct 01 01:04:17 2018 -0400 +++ b/macros.xml Thu Oct 25 07:25:13 2018 -0400 @@ -4,10 +4,17 @@ bioconductor-cardinal + r-base + + /dev/null | grep -v -i "WARNING: ") + ]]> + + - + + @@ -198,12 +206,12 @@ - - + - - - + + + diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Heatmaps_LM8_file16.pdf Binary file test-data/Heatmaps_LM8_file16.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Heatmaps_analyze75.pdf Binary file test-data/Heatmaps_analyze75.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Heatmaps_imzml.pdf Binary file test-data/Heatmaps_imzml.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Heatmaps_rdata.pdf Binary file test-data/Heatmaps_rdata.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Plot_analyze75.pdf Binary file test-data/Plot_analyze75.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Plot_analyze75_allpixels.pdf Binary file test-data/Plot_analyze75_allpixels.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Plot_empty_spectra.pdf Binary file test-data/Plot_empty_spectra.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Plot_imzml.pdf Binary file test-data/Plot_imzml.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/Plot_rdata.pdf Binary file test-data/Plot_rdata.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/QC_analyze75.pdf Binary file test-data/QC_analyze75.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/QC_empty_spectra.pdf Binary file test-data/QC_empty_spectra.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/QC_imzml.pdf Binary file test-data/QC_imzml.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/QC_rdata.pdf Binary file test-data/QC_rdata.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/analyze75.svg --- a/test-data/analyze75.svg Mon Oct 01 01:04:17 2018 -0400 +++ b/test-data/analyze75.svg Thu Oct 25 07:25:13 2018 -0400 @@ -1,15 +1,15 @@ - + - - - - - - - - - - + + + + + + + + + + diff -r a2988d8d4b77 -r aac805a9d2ae test-data/analyze75_filtered2.pdf Binary file test-data/analyze75_filtered2.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/analyze_filteredoutside.RData Binary file test-data/analyze_filteredoutside.RData has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/centroids_rdata.pdf Binary file test-data/centroids_rdata.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/cluster_skm.RData Binary file test-data/cluster_skm.RData has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/imzml_filtered2.RData diff -r a2988d8d4b77 -r aac805a9d2ae test-data/imzml_filtered2.pdf Binary file test-data/imzml_filtered2.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/imzml_filtered3.RData Binary file test-data/imzml_filtered3.RData has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/imzml_filtered3.pdf Binary file test-data/imzml_filtered3.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/imzml_filtered4.RData Binary file test-data/imzml_filtered4.RData has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/imzml_filtered4.pdf Binary file test-data/imzml_filtered4.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/imzml_filtered5.RData Binary file test-data/imzml_filtered5.RData has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/imzml_filtered5.pdf Binary file test-data/imzml_filtered5.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/kmeans_analyze.pdf Binary file test-data/kmeans_analyze.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/pca_imzml.pdf Binary file test-data/pca_imzml.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/rdata_notfiltered.RData Binary file test-data/rdata_notfiltered.RData has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/rdata_notfiltered.pdf Binary file test-data/rdata_notfiltered.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test1.pdf Binary file test-data/test1.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test2.pdf Binary file test-data/test2.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test2.rdata Binary file test-data/test2.rdata has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test3.pdf Binary file test-data/test3.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test4.pdf Binary file test-data/test4.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test4.rdata Binary file test-data/test4.rdata has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test5.pdf Binary file test-data/test5.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test6.pdf Binary file test-data/test6.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test6.rdata Binary file test-data/test6.rdata has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test7.pdf Binary file test-data/test7.pdf has changed diff -r a2988d8d4b77 -r aac805a9d2ae test-data/test7.rdata Binary file test-data/test7.rdata has changed