Mercurial > repos > galaxyp > msi_filtering
comparison msi_filtering.xml @ 5:3d5ac78fb2b0 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_filtering commit 8087490eb4dcaf4ead0f03eae4126780d21e5503
author | galaxyp |
---|---|
date | Fri, 06 Jul 2018 14:13:22 -0400 |
parents | bf61fc662615 |
children | bab12ded74a5 |
comparison
equal
deleted
inserted
replaced
4:bf61fc662615 | 5:3d5ac78fb2b0 |
---|---|
1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.2"> | 1 <tool id="mass_spectrometry_imaging_filtering" name="MSI filtering" version="1.10.0.3"> |
2 <description>tool for filtering mass spectrometry imaging data</description> | 2 <description>tool for filtering mass spectrometry imaging data</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> | 4 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> |
5 <requirement type="package" version="2.2.1">r-gridextra</requirement> | 5 <requirement type="package" version="2.2.1">r-gridextra</requirement> |
6 </requirements> | 6 </requirements> |
31 | 31 |
32 | 32 |
33 library(Cardinal) | 33 library(Cardinal) |
34 library(gridExtra) | 34 library(gridExtra) |
35 | 35 |
36 | |
36 #if $infile.ext == 'imzml' | 37 #if $infile.ext == 'imzml' |
37 msidata <- readImzML('infile', mass.accuracy=$accuracy, units.accuracy = "$units") | 38 #if str($processed_cond.processed_file) == "processed": |
39 msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units") | |
40 #else | |
41 msidata <- readImzML('infile') | |
42 #end if | |
38 #elif $infile.ext == 'analyze75' | 43 #elif $infile.ext == 'analyze75' |
39 msidata = readAnalyze('infile') | 44 msidata = readAnalyze('infile') |
40 #else | 45 #else |
41 load('infile.RData') | 46 load('infile.RData') |
42 #end if | 47 #end if |
43 | 48 |
49 | |
44 ########################### optional QC numbers ######################## | 50 ########################### optional QC numbers ######################## |
45 | 51 |
46 #if $outputs.outputs_select == "quality_control": | 52 if (sum(spectra(msidata)[]>0, na.rm=TRUE) > 0) |
47 | 53 { |
48 ## Number of features (m/z) | 54 #if $outputs.outputs_select == "quality_control": |
49 maxfeatures = length(features(msidata)) | 55 |
50 ## Range m/z | 56 ## Number of features (m/z) |
51 minmz = round(min(mz(msidata)), digits=2) | 57 maxfeatures = length(features(msidata)) |
52 maxmz = round(max(mz(msidata)), digits=2) | 58 ## Range m/z |
53 ## Number of spectra (pixels) | 59 minmz = round(min(mz(msidata)), digits=2) |
54 pixelcount = length(pixels(msidata)) | 60 maxmz = round(max(mz(msidata)), digits=2) |
55 ## Range x coordinates | 61 ## Number of spectra (pixels) |
56 minimumx = min(coord(msidata)[,1]) | 62 pixelcount = length(pixels(msidata)) |
57 maximumx = max(coord(msidata)[,1]) | 63 ## Range x coordinates |
58 ## Range y coordinates | 64 minimumx = min(coord(msidata)[,1]) |
59 minimumy = min(coord(msidata)[,2]) | 65 maximumx = max(coord(msidata)[,1]) |
60 maximumy = max(coord(msidata)[,2]) | 66 ## Range y coordinates |
61 ## Number of intensities > 0 | 67 minimumy = min(coord(msidata)[,2]) |
62 npeaks= sum(spectra(msidata)[]>0) | 68 maximumy = max(coord(msidata)[,2]) |
63 ## Spectra multiplied with m/z (potential number of peaks) | 69 ## Number of intensities > 0 |
64 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | 70 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) |
65 ## Percentage of intensities > 0 | 71 ## Spectra multiplied with m/z (potential number of peaks) |
66 percpeaks = round(npeaks/numpeaks*100, digits=2) | 72 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) |
67 ## Number of empty TICs | 73 ## Percentage of intensities > 0 |
68 TICs = colSums(spectra(msidata)[]) | 74 percpeaks = round(npeaks/numpeaks*100, digits=2) |
69 NumemptyTIC = sum(TICs == 0) | 75 ## Number of empty TICs |
70 ## median TIC | 76 TICs = colSums(spectra(msidata)[], na.rm=TRUE) |
71 medint = round(median(TICs), digits=2) | 77 NumemptyTIC = sum(TICs == 0) |
72 ## Store features for QC plot | 78 ## median TIC |
73 featuresinfile = mz(msidata) | 79 medint = round(median(TICs), digits=2) |
74 | 80 ## Store features for QC plot |
75 #end if | 81 featuresinfile = mz(msidata) |
76 | 82 |
77 ###################################### Filtering of pixels ##################### | 83 #end if |
78 ################################################################################ | 84 |
79 | 85 ###################################### Filtering of pixels ##################### |
80 #################### Pixels in the one column format "x=,y=" ##################### | 86 ################################################################################ |
81 | 87 |
82 #if str($pixels_cond.pixel_filtering) == "single_column": | 88 #################### Pixels in the one column format "x=,y=" ##################### |
83 print("single column") | 89 |
84 | 90 #if str($pixels_cond.pixel_filtering) == "single_column": |
85 input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE) | 91 print("single column") |
86 numberpixels = length(input_list[,$pixels_cond.pixel_column]) | 92 |
87 valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata)) | 93 input_list = read.delim("$pixels_cond.single_pixels", header = FALSE, stringsAsFactors = FALSE) |
88 validpixels = sum(valid_entries) | 94 numberpixels = length(input_list[,$pixels_cond.pixel_column]) |
89 | 95 valid_entries = input_list[,$pixels_cond.pixel_column] %in% names(pixels(msidata)) |
90 if (validpixels != 0){ | 96 validpixels = sum(valid_entries) |
91 pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]] | 97 |
92 msidata = msidata[,pixelsofinterest] | 98 if (validpixels != 0){ |
93 }else{ | 99 pixelsofinterest = pixels(msidata)[names(pixels(msidata)) %in% input_list[valid_entries,$pixels_cond.pixel_column]] |
94 msidata = msidata[,0] | 100 msidata = msidata[,pixelsofinterest] |
95 validpixels=0} | 101 }else{ |
96 | 102 msidata = msidata[,0] |
97 ############ Pixels in two columns format: x and y in different columns ############# | 103 validpixels=0} |
98 | 104 |
99 #elif str($pixels_cond.pixel_filtering) == "two_columns": | 105 ############ Pixels in two columns format: x and y in different columns ############# |
100 print("two columns") | 106 |
101 | 107 #elif str($pixels_cond.pixel_filtering) == "two_columns": |
102 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, | 108 print("two columns") |
103 stringsAsFactors = FALSE) | 109 |
104 numberpixels = length(input_list[,$pixels_cond.pixel_column_x]) | 110 input_list = read.delim("$pixels_cond.two_columns_pixel", header = FALSE, |
105 | 111 stringsAsFactors = FALSE) |
106 inputpixel_x = input_list[,$pixels_cond.pixel_column_x] | 112 numberpixels = length(input_list[,$pixels_cond.pixel_column_x]) |
107 inputpixel_y = input_list[,$pixels_cond.pixel_column_y] | 113 |
108 inputpixels = cbind(inputpixel_x, inputpixel_y) | 114 inputpixel_x = input_list[,$pixels_cond.pixel_column_x] |
109 colnames(inputpixels) = c("x", "y") | 115 inputpixel_y = input_list[,$pixels_cond.pixel_column_y] |
110 valid_rows = merge(inputpixels, coord(msidata)[,1:2]) | 116 inputpixels = cbind(inputpixel_x, inputpixel_y) |
111 validpixels = nrow(valid_rows) | 117 colnames(inputpixels) = c("x", "y") |
112 | 118 valid_rows = merge(inputpixels, coord(msidata)[,1:2]) |
113 if (validpixels != 0){ | 119 validpixels = nrow(valid_rows) |
114 pixelvector = character() | 120 |
115 for (pixel in 1:nrow(valid_rows)){ | 121 if (validpixels != 0){ |
116 pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2])} | 122 pixelvector = character() |
117 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] | 123 for (pixel in 1:nrow(valid_rows)){ |
118 msidata = msidata[,pixelsofinterest] | 124 pixelvector[pixel] = paste0("x = ", valid_rows[pixel,1],", ", "y = ", valid_rows[pixel,2])} |
119 }else{ | 125 pixelsofinterest= pixels(msidata)[names(pixels(msidata)) %in% pixelvector] |
120 validpixels=0} | 126 msidata = msidata[,pixelsofinterest] |
121 | 127 }else{ |
122 ########### Pixels wihin x and y minima and maxima are kept ################### | 128 validpixels=0} |
123 | 129 |
124 #elif str($pixels_cond.pixel_filtering) == "pixel_range": | 130 ########### Pixels wihin x and y minima and maxima are kept ################### |
125 print("pixel range") | 131 |
126 | 132 #elif str($pixels_cond.pixel_filtering) == "pixel_range": |
127 numberpixels = "range" | 133 print("pixel range") |
128 validpixels = "range" | 134 |
129 | 135 numberpixels = "range" |
130 ## only filter pixels if at least one pixel will be left | 136 validpixels = "range" |
131 | 137 |
132 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){ | 138 ## only filter pixels if at least one pixel will be left |
133 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] | 139 |
134 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] | 140 if (sum(coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range) > 0 & sum(coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range) > 0){ |
135 }else{ | 141 |
136 msidata = msidata[,0] | 142 msidata = msidata[, coord(msidata)\$x <= $pixels_cond.max_x_range & coord(msidata)\$x >= $pixels_cond.min_x_range] |
137 print("no valid pixel found")} | 143 msidata = msidata[, coord(msidata)\$y <= $pixels_cond.max_y_range & coord(msidata)\$y >= $pixels_cond.min_y_range] |
138 | 144 }else{ |
139 #elif str($pixels_cond.pixel_filtering) == "none": | 145 msidata = msidata[,0] |
140 print("no pixel filtering") | 146 print("no valid pixel found")} |
141 | 147 |
142 numberpixels = 0 | 148 #elif str($pixels_cond.pixel_filtering) == "none": |
143 validpixels = 0 | 149 print("no pixel filtering") |
144 | 150 |
145 #end if | 151 numberpixels = 0 |
146 | 152 validpixels = 0 |
153 | |
154 #end if | |
155 | |
156 | |
157 }else{ | |
158 print("Inputfile has no intensities > 0") | |
159 | |
160 } | |
147 | 161 |
148 ###################################### filtering of features ###################### | 162 ###################################### filtering of features ###################### |
149 ################################################################################## | 163 ################################################################################## |
150 | 164 |
151 ######################## Keep m/z from tabular file ######################### | 165 ######################## Keep m/z from tabular file ######################### |
152 | 166 |
153 #if str($features_cond.features_filtering) == "features_list": | 167 if (sum(spectra(msidata)[], na.rm=TRUE) > 0){ |
154 print("feature list") | 168 |
155 | 169 #if str($features_cond.features_filtering) == "features_list": |
156 input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE) | 170 print("feature list") |
157 startingrow = $features_cond.feature_header+1 | 171 |
158 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column] | 172 input_features = read.delim("$inputfeatures", header = FALSE, stringsAsFactors = FALSE) |
159 numberfeatures = length(extracted_features) | 173 startingrow = $features_cond.feature_header+1 |
160 | 174 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.feature_column] |
161 if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE){ | 175 numberfeatures = length(extracted_features) |
162 | 176 |
163 ### if input is in numeric format | 177 if (grepl("m/z = ", input_features[startingrow,$features_cond.feature_column])==FALSE){ |
164 if (class(extracted_features) == "numeric"){ | 178 |
165 ### max digits given in the input file will be used to match m/z | 179 ### if input is in numeric format |
166 max_digits = max(nchar(matrix(unlist(strsplit(as.character(extracted_features), "\\.")), ncol=2, byrow=TRUE)[,2])) | 180 if (class(extracted_features) == "numeric"){ |
167 validfeatures = extracted_features %in% round(mz(msidata),max_digits) | 181 ### max digits given in the input file will be used to match m/z |
168 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% extracted_features[validfeatures]] | 182 max_digits = max(nchar(matrix(unlist(strsplit(as.character(extracted_features), "\\.")), ncol=2, byrow=TRUE)[,2])) |
169 validmz = length(unique(featuresofinterest)) | 183 validfeatures = extracted_features %in% round(mz(msidata),max_digits) |
184 featuresofinterest = features(msidata)[round(mz(msidata), digits = max_digits) %in% extracted_features[validfeatures]] | |
185 validmz = length(unique(featuresofinterest)) | |
186 }else{ | |
187 validmz = 0 | |
188 featuresofinterest = 0} | |
189 | |
190 ### if input is already in character format (m/z = 800.01) | |
191 | |
170 }else{ | 192 }else{ |
171 validmz = 0 | 193 validfeatures = extracted_features %in% names(features(msidata)) |
172 featuresofinterest = 0} | 194 featuresofinterest = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]] |
173 | 195 validmz = sum(validfeatures)} |
174 ### if input is already in character format (m/z = 800.01) | 196 |
175 | 197 ### filter msidata for valid features |
176 }else{ | 198 |
177 validfeatures = extracted_features %in% names(features(msidata)) | 199 msidata = msidata[featuresofinterest,] |
178 featuresofinterest = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]] | 200 |
179 validmz = sum(validfeatures)} | 201 ############### features within a given range are kept ######################### |
180 | 202 |
181 ### filter msidata for valid features | 203 #elif str($features_cond.features_filtering) == "features_range": |
182 | 204 print("feature range") |
183 msidata = msidata[featuresofinterest,] | 205 |
184 | 206 numberfeatures = "range" |
185 ############### features within a given range are kept ######################### | 207 validmz = "range" |
186 | 208 |
187 #elif str($features_cond.features_filtering) == "features_range": | 209 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){ |
188 print("feature range") | 210 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] |
189 | 211 }else{ |
190 numberfeatures = "range" | 212 msidata = msidata[0,] |
191 validmz = "range" | 213 print("no valid mz range")} |
192 | 214 |
193 if (sum(mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz)> 0){ | 215 ############### Remove m/z from tabular file ######################### |
194 msidata = msidata[mz(msidata) >= $features_cond.min_mz & mz(msidata) <= $features_cond.max_mz,] | 216 |
195 }else{ | 217 #elif str($features_cond.features_filtering) == "remove_features": |
196 msidata = msidata[0,] | 218 print("remove features") |
197 print("no valid mz range")} | 219 |
198 | 220 ### Tabular file contains mz either as numbers or in the format mz = 800.01 |
199 ############### Remove m/z from tabular file ######################### | 221 |
200 | 222 input_features = read.delim("$inputfeatures_removal", header = FALSE, stringsAsFactors = FALSE) |
201 #elif str($features_cond.features_filtering) == "remove_features": | 223 startingrow = $features_cond.removal_header+1 |
202 print("remove features") | 224 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.removal_column] |
203 | 225 numberfeatures = length(extracted_features) |
204 ### Tabular file contains mz either as numbers or in the format mz = 800.01 | 226 |
205 | 227 if (grepl("m/z = ", input_features[startingrow,$features_cond.removal_column])==TRUE){ |
206 input_features = read.delim("$inputfeatures_removal", header = FALSE, stringsAsFactors = FALSE) | 228 |
207 startingrow = $features_cond.removal_header+1 | 229 ### if input is mz = 800 character format |
208 extracted_features = input_features[startingrow:nrow(input_features),$features_cond.removal_column] | 230 print("input is in format mz = 400") |
209 numberfeatures = length(extracted_features) | 231 validfeatures = extracted_features %in% names(features(msidata)) |
210 | 232 validmz = sum(validfeatures) |
211 if (grepl("m/z = ", input_features[startingrow,$features_cond.removal_column])==TRUE){ | 233 filtered_features = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]] |
212 | 234 featuresofinterest = mz(msidata)[filtered_features] |
213 ### if input is mz = 800 character format | 235 |
214 print("input is in format mz = 400") | 236 ### if input is numeric: |
215 validfeatures = extracted_features %in% names(features(msidata)) | 237 }else{ |
216 validmz = sum(validfeatures) | 238 if (class(extracted_features) == "numeric"){ |
217 filtered_features = features(msidata)[names(features(msidata)) %in% extracted_features[validfeatures]] | 239 print("input is numeric") |
218 featuresofinterest = mz(msidata)[filtered_features] | 240 featuresofinterest = extracted_features |
219 | 241 validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata))) |
220 ### if input is numeric: | 242 }else{featuresofinterest = 0 |
221 }else{ | 243 validmz = 0} |
222 if (class(extracted_features) == "numeric"){ | 244 } |
223 print("input is numeric") | 245 |
224 featuresofinterest = extracted_features | 246 ### Here starts removal of features: |
225 validmz = sum(featuresofinterest <= max(mz(msidata))& featuresofinterest >= min(mz(msidata))) | 247 |
226 }else{featuresofinterest = 0 | 248 plusminus = $features_cond.removal_plusminus |
227 validmz = 0} | 249 |
228 } | 250 mass_to_remove = numeric() |
229 | 251 if (sum(featuresofinterest) > 0){ |
230 ### Here starts removal of features: | 252 for (masses in featuresofinterest){ |
231 | 253 #if str($features_cond.units_removal) == "ppm": |
232 plusminus = $features_cond.removal_plusminus | 254 plusminus = masses * $features_cond.removal_plusminus/1000000 |
233 | 255 #end if |
234 mass_to_remove = numeric() | 256 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) |
235 if (sum(featuresofinterest) > 0){ | 257 mass_to_remove = append(mass_to_remove, current_mass)} |
236 for (masses in featuresofinterest){ | 258 msidata= msidata[-mass_to_remove, ] |
237 #if str($features_cond.units_removal) == "ppm": | 259 }else{print("No features were removed as they were not fitting to m/z values and/or range")} |
238 plusminus = masses * $features_cond.removal_plusminus/1000000 | 260 |
239 #end if | 261 |
240 current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus)) | 262 #elif str($features_cond.features_filtering) == "none": |
241 mass_to_remove = append(mass_to_remove, current_mass)} | 263 |
242 msidata= msidata[-mass_to_remove, ] | 264 print("no feature filtering") |
243 }else{print("No features were removed as they were not fitting to m/z values and/or range")} | 265 validmz = 0 |
244 | 266 numberfeatures = 0 |
245 | 267 |
246 #elif str($features_cond.features_filtering) == "none": | 268 #end if |
247 | 269 |
248 print("no feature filtering") | 270 ## save msidata as Rfile |
249 validmz = 0 | 271 save(msidata, file="$msidata_filtered") |
250 numberfeatures = 0 | 272 |
251 | 273 #################### optional QC numbers ####################### |
252 #end if | 274 |
253 | 275 #if $outputs.outputs_select == "quality_control": |
254 ## save msidata as Rfile | 276 |
255 save(msidata, file="$msidata_filtered") | 277 ## Number of features (m/z) |
256 | 278 maxfeatures2 = length(features(msidata)) |
257 #################### optional QC numbers ####################### | 279 ## Range m/z |
258 | 280 minmz2 = round(min(mz(msidata)), digits=2) |
259 #if $outputs.outputs_select == "quality_control": | 281 maxmz2 = round(max(mz(msidata)), digits=2) |
260 | 282 ## Number of spectra (pixels) |
261 ## Number of features (m/z) | 283 pixelcount2 = length(pixels(msidata)) |
262 maxfeatures2 = length(features(msidata)) | 284 ## Range x coordinates |
263 ## Range m/z | 285 minimumx2 = min(coord(msidata)[,1]) |
264 minmz2 = round(min(mz(msidata)), digits=2) | 286 maximumx2 = max(coord(msidata)[,1]) |
265 maxmz2 = round(max(mz(msidata)), digits=2) | 287 ## Range y coordinates |
266 ## Number of spectra (pixels) | 288 minimumy2 = min(coord(msidata)[,2]) |
267 pixelcount2 = length(pixels(msidata)) | 289 maximumy2 = max(coord(msidata)[,2]) |
268 ## Range x coordinates | 290 ## Number of intensities > 0 |
269 minimumx2 = min(coord(msidata)[,1]) | 291 npeaks2= sum(spectra(msidata)[]>0, na.rm=TRUE) |
270 maximumx2 = max(coord(msidata)[,1]) | 292 ## Spectra multiplied with m/z (potential number of peaks) |
271 ## Range y coordinates | 293 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) |
272 minimumy2 = min(coord(msidata)[,2]) | 294 ## Percentage of intensities > 0 |
273 maximumy2 = max(coord(msidata)[,2]) | 295 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) |
274 ## Number of intensities > 0 | 296 ## Number of empty TICs |
275 npeaks2= sum(spectra(msidata)[]>0) | 297 TICs2 = colSums(spectra(msidata)[], na.rm=TRUE) |
276 ## Spectra multiplied with m/z (potential number of peaks) | 298 NumemptyTIC2 = sum(TICs2 == 0) |
277 numpeaks2 = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) | 299 ## median TIC |
278 ## Percentage of intensities > 0 | 300 medint2 = round(median(TICs2), digits=2) |
279 percpeaks2 = round(npeaks2/numpeaks2*100, digits=2) | 301 |
280 ## Number of empty TICs | 302 properties = c("Number of m/z features", |
281 TICs2 = colSums(spectra(msidata)[]) | 303 "Range of m/z values", |
282 NumemptyTIC2 = sum(TICs2 == 0) | 304 "Number of pixels", |
283 ## median TIC | 305 "Range of x coordinates", |
284 medint2 = round(median(TICs2), digits=2) | 306 "Range of y coordinates", |
285 | 307 "Intensities > 0", |
286 properties = c("Number of m/z features", | 308 "Median TIC per pixel", |
287 "Range of m/z values", | 309 "Number of zero TICs", |
288 "Number of pixels", | 310 "pixel overview", |
289 "Range of x coordinates", | 311 "feature overview") |
290 "Range of y coordinates", | 312 |
291 "Intensities > 0", | 313 before = c(paste0(maxfeatures), |
292 "Median TIC per pixel", | 314 paste0(minmz, " - ", maxmz), |
293 "Number of zero TICs", | 315 paste0(pixelcount), |
294 "pixel overview", | 316 paste0(minimumx, " - ", maximumx), |
295 "feature overview") | 317 paste0(minimumy, " - ", maximumy), |
296 | 318 paste0(percpeaks, " %"), |
297 before = c(paste0(maxfeatures), | 319 paste0(medint), |
298 paste0(minmz, " - ", maxmz), | 320 paste0(NumemptyTIC), |
299 paste0(pixelcount), | 321 paste0("input pixels: ", numberpixels), |
300 paste0(minimumx, " - ", maximumx), | 322 paste0("input mz: ", numberfeatures)) |
301 paste0(minimumy, " - ", maximumy), | 323 |
302 paste0(percpeaks, " %"), | 324 filtered = c(paste0(maxfeatures2), |
303 paste0(medint), | 325 paste0(minmz2, " - ", maxmz2), |
304 paste0(NumemptyTIC), | 326 paste0(pixelcount2), |
305 paste0("input pixels: ", numberpixels), | 327 paste0(minimumx2, " - ", maximumx2), |
306 paste0("input mz: ", numberfeatures)) | 328 paste0(minimumy2, " - ", maximumy2), |
307 | 329 paste0(percpeaks2, " %"), |
308 filtered = c(paste0(maxfeatures2), | 330 paste0(medint2), |
309 paste0(minmz2, " - ", maxmz2), | 331 paste0(NumemptyTIC2), |
310 paste0(pixelcount2), | 332 paste0("valid pixels: ", validpixels), |
311 paste0(minimumx2, " - ", maximumx2), | 333 paste0("valid mz: ", validmz)) |
312 paste0(minimumy2, " - ", maximumy2), | 334 |
313 paste0(percpeaks2, " %"), | 335 property_df = data.frame(properties, before, filtered) |
314 paste0(medint2), | 336 |
315 paste0(NumemptyTIC2), | 337 ############################### optional PDF QC ################################ |
316 paste0("valid pixels: ", validpixels), | 338 |
317 paste0("valid mz: ", validmz)) | 339 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) |
318 | 340 plot(0,type='n',axes=FALSE,ann=FALSE) |
319 property_df = data.frame(properties, before, filtered) | 341 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) |
320 | 342 grid.table(property_df, rows= NULL) |
321 ############################### optional PDF QC ################################ | 343 |
322 | 344 ### heatmap image as visual pixel control |
323 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12) | 345 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ |
324 plot(0,type='n',axes=FALSE,ann=FALSE) | 346 image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", |
325 title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name")) | 347 main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) |
326 grid.table(property_df, rows= NULL) | 348 |
327 | 349 ### control features which are removed |
328 ### heatmap image as visual pixel control | 350 hist(mz(msidata), xlab="m/z", main="Kept m/z values") |
329 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ | 351 #if str($features_cond.features_filtering) == "none": |
330 image(msidata, mz=$outputs.inputmz, plusminus = $outputs.plusminus_dalton, contrast.enhance = "none", | 352 print("no difference histogram as no m/z filtering took place") |
331 main= paste0($outputs.inputmz," ± ", $outputs.plusminus_dalton, " Da"), ylim = c(maximumy2+0.2*maximumy2,minimumy2-0.2*minimumy2)) | 353 #else: |
332 | 354 |
333 ### control features which are removed | 355 if (isTRUE(all.equal(featuresinfile, mz(msidata)))){ |
334 hist(mz(msidata), xlab="m/z", main="Kept m/z values") | 356 print("No difference in m/z values before and after filtering, no histogram drawn") |
335 #if str($features_cond.features_filtering) == "none": | 357 }else{ |
336 print("no difference histogram as no m/z filtering took place") | 358 hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")} |
337 #else: | 359 #end if |
338 hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values") | 360 }else{ |
339 #end if | 361 print("file has no features or pixels left")} |
340 }else{ | 362 |
341 print("file has no features or pixels left")} | 363 dev.off() |
342 | 364 |
343 dev.off() | 365 #end if |
344 | 366 |
345 #end if | 367 ############################### optional intensity matrix ###################### |
346 | 368 |
347 ############################### optional intensity matrix ###################### | 369 #if $output_matrix: |
348 | 370 |
349 #if $output_matrix: | 371 spectramatrix = spectra(msidata)[] |
350 | 372 spectramatrix = cbind(mz(msidata),spectramatrix) |
351 if (length(features(msidata))> 0 & length(pixels(msidata)) > 0){ | 373 newmatrix = rbind(c("mz | spectra", names(pixels(msidata))), spectramatrix) |
352 spectramatrix = spectra(msidata) | 374 write.table(newmatrix, file="$matrixasoutput", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
353 rownames(spectramatrix) = mz(msidata) | 375 |
354 newmatrix = rbind(pixels(msidata), spectramatrix) | 376 #end if |
355 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") | 377 |
356 }else{ | 378 }else{ |
357 print("file has no features or pixels left")} | 379 print("Inputfile or file filtered for pixels has no intensities > 0") |
358 | 380 } |
359 #end if | |
360 | |
361 | |
362 ]]></configfile> | 381 ]]></configfile> |
363 </configfiles> | 382 </configfiles> |
364 <inputs> | 383 <inputs> |
365 <param name="infile" type="data" format="imzml,rdata,analyze75" | 384 <param name="infile" type="data" format="imzml,rdata,analyze75" |
366 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" | 385 label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" |
367 help="Upload composite datatype imzML (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> | 386 help="Upload composite datatype imzML (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> |
368 <param name="accuracy" type="float" value="50" label="Only for processed imzML files: enter mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/> | 387 <conditional name="processed_cond"> |
369 <param name="units" display="radio" type="select" label="Only for processed imzML files: unit of the mass accuracy" help="either m/z or ppm"> | 388 <param name="processed_file" type="select" label="Is the input file a processed imzML file "> |
370 <option value="mz" >mz</option> | 389 <option value="no_processed" selected="True">not a processed imzML</option> |
371 <option value="ppm" selected="True" >ppm</option> | 390 <option value="processed">processed imzML</option> |
372 </param> | 391 </param> |
392 <when value="no_processed"/> | |
393 <when value="processed"> | |
394 <param name="accuracy" type="float" value="50" label="Mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/> | |
395 <param name="units" display="radio" type="select" label="Unit of the mass accuracy" help="either m/z or ppm"> | |
396 <option value="mz" >mz</option> | |
397 <option value="ppm" selected="True" >ppm</option> | |
398 </param> | |
399 </when> | |
400 </conditional> | |
373 <conditional name="pixels_cond"> | 401 <conditional name="pixels_cond"> |
374 <param name="pixel_filtering" type="select" label="Select pixel filtering option"> | 402 <param name="pixel_filtering" type="select" label="Select pixel filtering option"> |
375 <option value="none" selected="True">none</option> | 403 <option value="none" selected="True">none</option> |
376 <option value="single_column">tabular file with single column (x = 1, y = 1)</option> | 404 <option value="single_column">tabular file with single column (x = 1, y = 1)</option> |
377 <option value="two_columns">tabular file with separate columns for x and y values</option> | 405 <option value="two_columns">tabular file with separate columns for x and y values</option> |
603 | 631 |
604 Options: | 632 Options: |
605 | 633 |
606 - pixel filtering: can use a tabular file containing x and y coordinates or by defining a range for x and y by hand | 634 - pixel filtering: can use a tabular file containing x and y coordinates or by defining a range for x and y by hand |
607 - m/z feature filtering: can use a tabular file containing m/z of interest or by defining a range for the m/z values (! numeric input will be rounded to 2 digits before matching to m/z!) | 635 - m/z feature filtering: can use a tabular file containing m/z of interest or by defining a range for the m/z values (! numeric input will be rounded to 2 digits before matching to m/z!) |
608 - m/z feature removing: infering m/z such as matrix contaminants can be removed by specifying their m/z in a tabular file and optionally set a window (window in ppm or Da in which peaks should be removed) | 636 - m/z feature removing: infering m/z such as matrix contaminants can be removed by specifying their m/z in a tabular file and optionally set a window (window in ppm or m/z in which peaks should be removed) |
609 | 637 |
610 | 638 |
611 Output: | 639 Output: |
612 | 640 |
613 - imzML file filtered for pixels and/or m/z | 641 - imzML file filtered for pixels and/or m/z |