comparison msi_qualitycontrol.xml @ 9:963c7ec00141 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_qualitycontrol commit a7be47698f53eb4f00961192327d93e8989276a7
author galaxyp
date Mon, 11 Jun 2018 17:34:19 -0400
parents 52ef77866de8
children 3eee933c27cf
comparison
equal deleted inserted replaced
8:52ef77866de8 9:963c7ec00141
1 <tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.10.0.0"> 1 <tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.10.0.1">
2 <description> 2 <description>
3 mass spectrometry imaging QC 3 mass spectrometry imaging QC
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
26 ]]> 26 ]]>
27 </command> 27 </command>
28 <configfiles> 28 <configfiles>
29 <configfile name="cardinal_qualitycontrol_script"><![CDATA[ 29 <configfile name="cardinal_qualitycontrol_script"><![CDATA[
30 30
31 ################################# load libraries and read file #################
32
31 library(Cardinal) 33 library(Cardinal)
32 library(ggplot2) 34 library(ggplot2)
33 library(RColorBrewer) 35 library(RColorBrewer)
34 library(gridExtra) 36 library(gridExtra)
35 library(KernSmooth) 37 library(KernSmooth)
36
37 ## Read MALDI Imaging dataset
38 38
39 #if $infile.ext == 'imzml' 39 #if $infile.ext == 'imzml'
40 msidata = readImzML('infile') 40 msidata = readImzML('infile')
41 #elif $infile.ext == 'analyze75' 41 #elif $infile.ext == 'analyze75'
42 msidata = readAnalyze('infile') 42 msidata = readAnalyze('infile')
43 #else 43 #else
44 load('infile.RData') 44 load('infile.RData')
45 #end if 45 #end if
46 46
47
48
49 ###################################### file properties in numbers ###################### 47 ###################################### file properties in numbers ######################
50 48
51 ## Number of features (mz) 49 ## Number of features (m/z)
52 maxfeatures = length(features(msidata)) 50 maxfeatures = length(features(msidata))
53 ## Range mz 51 ## Range m/z
54 minmz = round(min(mz(msidata)), digits=2) 52 minmz = round(min(mz(msidata)), digits=2)
55 maxmz = round(max(mz(msidata)), digits=2) 53 maxmz = round(max(mz(msidata)), digits=2)
56 ## Number of spectra (pixels) 54 ## Number of spectra (pixels)
57 pixelcount = length(pixels(msidata)) 55 pixelcount = length(pixels(msidata))
58 ## Range x coordinates 56 ## Range x coordinates
65 minint = round(min(spectra(msidata)[]), digits=2) 63 minint = round(min(spectra(msidata)[]), digits=2)
66 maxint = round(max(spectra(msidata)[]), digits=2) 64 maxint = round(max(spectra(msidata)[]), digits=2)
67 medint = round(median(spectra(msidata)[]), digits=2) 65 medint = round(median(spectra(msidata)[]), digits=2)
68 ## Number of intensities > 0 66 ## Number of intensities > 0
69 npeaks= sum(spectra(msidata)[]>0) 67 npeaks= sum(spectra(msidata)[]>0)
70 ## Spectra multiplied with mz (potential number of peaks) 68 ## Spectra multiplied with m/z (potential number of peaks)
71 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[]) 69 numpeaks = ncol(spectra(msidata)[])*nrow(spectra(msidata)[])
72 ## Percentage of intensities > 0 70 ## Percentage of intensities > 0
73 percpeaks = round(npeaks/numpeaks*100, digits=2) 71 percpeaks = round(npeaks/numpeaks*100, digits=2)
74 ## Number of empty TICs 72 ## Number of empty TICs
75 TICs = colSums(spectra(msidata)[]) 73 TICs = colSums(spectra(msidata)[])
76 NumemptyTIC = sum(TICs == 0) 74 NumemptyTIC = sum(TICs == 0)
75 ## Median TIC
76 medTIC = median(TICs)
77 ## Median peaks per spectrum
78 medpeaks = median(colSums(spectra(msidata)[]>0))
79 print(cor(TICs,colSums(spectra(msidata)[]>0), method="pearson"))
77 80
78 ## Processing informations 81 ## Processing informations
79 processinginfo = processingData(msidata) 82 processinginfo = processingData(msidata)
80 centroidedinfo = processinginfo@centroided # TRUE or FALSE 83 centroidedinfo = processinginfo@centroided
81 84
82 ## if TRUE write processinginfo if no write FALSE 85 ## if TRUE write processinginfo if FALSE write FALSE
83 86
84 ## normalization 87 ## normalization
85 if (length(processinginfo@normalization) == 0) { 88 if (length(processinginfo@normalization) == 0) {
86 normalizationinfo='FALSE' 89 normalizationinfo='FALSE'
87 } else { 90 } else {
104 peakpickinginfo='FALSE' 107 peakpickinginfo='FALSE'
105 } else { 108 } else {
106 peakpickinginfo=processinginfo@peakPicking 109 peakpickinginfo=processinginfo@peakPicking
107 } 110 }
108 111
109 ### Read tabular file with masses for plots and heatmap images: 112
113 ############## Read and filter tabular file with m/z ###########################
114
115 ### reading peptide file:
110 116
111 #if $peptide_file: 117 #if $peptide_file:
112 118
113 input_list = read.delim("$peptide_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE) 119 input_list = read.delim("$peptide_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
114 if (ncol(input_list) == 1) 120 if (ncol(input_list) == 1)
115 { 121 {input_list = cbind(input_list, input_list)} ## if there is just one column dublicate it to have a names column
116 input_list = cbind(input_list, input_list) 122
117 } 123 ### calculate how many input peptide m/z are valid:
118 124
119 ### calculate how many input peptide masses are valid: 125 inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,]
120 inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,] 126 number_peptides_in = length(input_list[,1])
121 number_peptides_in = length(input_list[,1]) 127 number_peptides_valid = length(inputpeptides[,1])
122 number_peptides_valid = length(inputpeptides[,1])
123 128
124 #else 129 #else
125 ###inputpeptides = data.frame(0,0) 130
126 inputpeptides = as.data.frame(matrix(, nrow = 0, ncol = 2)) 131 inputpeptides = as.data.frame(matrix(, nrow = 0, ncol = 2))
127 number_peptides_in = 0 132 number_peptides_in = 0
128 number_peptides_valid = 0 133 number_peptides_valid = 0
134
129 #end if 135 #end if
130 136
131 colnames(inputpeptides) = c("mz", "name") 137 colnames(inputpeptides) = c("m/z", "name")
138
139 ### reading calibrant file:
132 140
133 #if $calibrant_file: 141 #if $calibrant_file:
134 ### Read tabular file with calibrant masses: 142
135 calibrant_list = read.delim("$calibrant_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE) 143 calibrant_list = read.delim("$calibrant_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE)
136 if (ncol(calibrant_list) == 1) 144 if (ncol(calibrant_list) == 1)
137 { 145 {calibrant_list = cbind(calibrant_list, calibrant_list)} ## if there is just one column dublicate it to have a names column
138 calibrant_list = cbind(calibrant_list, calibrant_list) 146
139 } 147 ### calculate how many input calibrant m/z are valid:
140 ### calculate how many input calibrant masses are valid: 148
141 inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,] 149 inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,]
142 number_calibrants_in = length(calibrant_list[,1]) 150 number_calibrants_in = length(calibrant_list[,1])
143 number_calibrants_valid = length(inputcalibrants[,1]) 151 number_calibrants_valid = length(inputcalibrants[,1])
152
144 #else 153 #else
145 154
146 inputcalibrants = as.data.frame(matrix(, nrow = 0, ncol = 2)) 155 inputcalibrants = as.data.frame(matrix(, nrow = 0, ncol = 2))
147 number_calibrants_in = 0 156 number_calibrants_in = 0
148 number_calibrants_valid = 0 157 number_calibrants_valid = 0
158
149 #end if 159 #end if
150 160
151 colnames(inputcalibrants) = c("mz", "name") 161 colnames(inputcalibrants) = c("m/z", "name")
152 162
153 ### bind inputcalibrants and inputpeptides together, to make heatmap on both lists 163 ### bind inputcalibrants and inputpeptides together, to make m/z heatmaps on both
154 164
155 inputs_all = rbind(inputcalibrants[,1:2], inputpeptides[,1:2]) 165 inputs_all = rbind(inputcalibrants[,1:2], inputpeptides[,1:2])
156 inputmasses = inputs_all[,1] 166 inputmasses = inputs_all[,1]
157 inputnames = inputs_all[,2] 167 inputnames = inputs_all[,2]
158 168
159 169 ######################################## PDF #############################################
160 170 ##########################################################################################
161 properties = c("Number of mz features", 171 ##########################################################################################
162 "Range of mz values [Da]", 172
173 pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12)
174 plot(0,type='n',axes=FALSE,ann=FALSE)
175
176 ## if no filename is given, name of file in Galaxy history is used
177
178 #if not $filename:
179 #set $filename = $infile.display_name
180 #end if
181
182 title(main=paste("$filename"))
183
184 ################# I) file properties in numbers ################################
185 ################################################################################
186 print("properties in numbers")
187
188 properties = c("Number of m/z features",
189 "Range of m/z values [Da]",
163 "Number of pixels", 190 "Number of pixels",
164 "Range of x coordinates", 191 "Range of x coordinates",
165 "Range of y coordinates", 192 "Range of y coordinates",
166 "Range of intensities", 193 "Range of intensities",
167 "Median of intensities", 194 "Median of intensities",
168 "Intensities > 0", 195 "Intensities > 0",
169 "Number of zero TICs", 196 "Number of zero TICs",
170 "Preprocessing", 197 "Median TIC",
198 "Median # peaks per spectrum",
171 "Normalization", 199 "Normalization",
172 "Smoothing", 200 "Smoothing",
173 "Baseline reduction", 201 "Baseline reduction",
174 "Peak picking", 202 "Peak picking",
175 "Centroided", 203 "Centroided",
183 paste0(minimumy, " - ", maximumy), 211 paste0(minimumy, " - ", maximumy),
184 paste0(minint, " - ", maxint), 212 paste0(minint, " - ", maxint),
185 paste0(medint), 213 paste0(medint),
186 paste0(percpeaks, " %"), 214 paste0(percpeaks, " %"),
187 paste0(NumemptyTIC), 215 paste0(NumemptyTIC),
188 paste0(" "), 216 paste0(medTIC),
217 paste0(medpeaks),
189 paste0(normalizationinfo), 218 paste0(normalizationinfo),
190 paste0(smoothinginfo), 219 paste0(smoothinginfo),
191 paste0(baselinereductioninfo), 220 paste0(baselinereductioninfo),
192 paste0(peakpickinginfo), 221 paste0(peakpickinginfo),
193 paste0(centroidedinfo), 222 paste0(centroidedinfo),
195 paste0(number_calibrants_valid, " / ", number_calibrants_in)) 224 paste0(number_calibrants_valid, " / ", number_calibrants_in))
196 225
197 226
198 property_df = data.frame(properties, values) 227 property_df = data.frame(properties, values)
199 228
200 ######################################## PDF #############################################
201 ##########################################################################################
202 ##########################################################################################
203
204 pdf("qualitycontrol.pdf", fonts = "Times", pointsize = 12)
205 plot(0,type='n',axes=FALSE,ann=FALSE)
206 #if not $filename:
207 #set $filename = $infile.display_name
208 #end if
209 title(main=paste("Quality control of MSI data\n\n", "Filename:", "$filename"))
210
211 ############################# I) numbers ####################################
212 #############################################################################
213 grid.table(property_df, rows= NULL) 229 grid.table(property_df, rows= NULL)
214 230
215 if (npeaks > 0) 231 ####################### II) images in x-y grid ###############################
216 {
217 ############################# II) ion images #################################
218 ############################################################################## 232 ##############################################################################
219 233 print("x-y images")
220 ## function without xaxt for plots with automatic x axis 234 if (npeaks > 0){
235
236 ## function for density plots
221 plot_colorByDensity = function(x1,x2, 237 plot_colorByDensity = function(x1,x2,
222 ylim=c(min(x2),max(x2)), 238 ylim=c(min(x2),max(x2)),
223 xlim=c(min(x1),max(x1)), 239 xlim=c(min(x1),max(x1)),
224 xlab="",ylab="",main=""){ 240 xlab="",ylab="",main=""){
225
226 df = data.frame(x1,x2) 241 df = data.frame(x1,x2)
227 x = densCols(x1,x2, colramp=colorRampPalette(c("black", "white"))) 242 x = densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
228 df\$dens = col2rgb(x)[1,] + 1L 243 df\$dens = col2rgb(x)[1,] + 1L
229 cols = colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256) 244 cols = colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256)
230 df\$col = cols[df\$dens] 245 df\$col = cols[df\$dens]
231 plot(x2~x1, data=df[order(df\$dens),], 246 plot(x2~x1, data=df[order(df\$dens),], ylim=ylim,xlim=xlim,pch=20,col=col,
232 ylim=ylim,xlim=xlim,pch=20,col=col, 247 cex=1,xlab=xlab,ylab=ylab,las=1, main=main)}
233 cex=1,xlab=xlab,ylab=ylab,las=1, 248
234 main=main) 249 abline_vector= -100000 ## will be filled for samples in case data is combined
235 } 250
236 251 ################### 0) overview for combined data ###########################
237 252
238 ############################################################################ 253 ### only for previously combined data, same plot as in combine QC pdf
239 254 if (!is.null(levels(msidata\$combined_sample))){
240 ## 1) Acquisition image 255 position_df = cbind(coord(msidata)[,1:2], msidata\$combined_sample)
256 colnames(position_df)[3] = "sample_name"
257
258 combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+
259 geom_tile() +
260 coord_fixed()+
261 ggtitle("Spatial orientation of combined data")+
262 theme_bw()+
263 theme(text=element_text(family="ArialMT", face="bold", size=15))+
264 theme(legend.position="bottom",legend.direction="vertical")+
265 guides(fill=guide_legend(ncol=4,byrow=TRUE))
266 coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean)
267 coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name)
268 for(file_count in 1:nrow(coord_labels))
269 {combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],
270 y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4]))}
271 print(combine_plot)
272
273 ### find max pixelnumber per subsample to later draw ablines
274 pixel_name_df = data.frame(pixels(msidata), msidata\$combined_sample)
275 colnames(pixel_name_df) = c("pixel_number", "pixel_name")
276 last_pixel = aggregate(pixel_number~pixel_name, data = pixel_name_df, max)
277 pixel_vector = last_pixel[,2]
278 abline_vector = pixel_vector[1:length(levels(msidata\$combined_sample))-1]
279 print(abline_vector)
280 }
281
282
283 ################### 1) Pixel order image ###################################
241 284
242 pixelnumber = 1:pixelcount 285 pixelnumber = 1:pixelcount
243 pixelxyarray=cbind(coord(msidata)[,1:2],pixelnumber) 286 pixelxyarray=cbind(coord(msidata)[,1:2],pixelnumber)
244 287
245 print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber)) 288 print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))
246 + geom_tile() + coord_fixed() 289 + geom_tile() + coord_fixed()
247 + ggtitle("Order of Acquisition") 290 + ggtitle("Pixel order")
248 +theme_bw() 291 +theme_bw()
249 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), 292 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"),
250 space = "Lab", na.value = "black", name = "Acq")) 293 space = "Lab", na.value = "black", name = "Acq"))
251 294
252 ## 2) Number of calibrants per spectrum 295 ################ 2) Number of calibrants per spectrum ######################
253 296
254 pixelmatrix = matrix(ncol=ncol(msidata), nrow=0) 297 pixelmatrix = matrix(ncol=ncol(msidata), nrow=0)
255 inputcalibrantmasses = inputcalibrants[,1] 298 inputcalibrantmasses = inputcalibrants[,1]
256 299
257 if (length(inputcalibrantmasses) != 0) 300 ### find m/z range (ppm) for each calibrant and extract intensity matrix for this range
258 { for (calibrantnr in 1:length(inputcalibrantmasses)) 301 plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses
259 { 302
260 calibrantmz = inputcalibrantmasses[calibrantnr] 303 if (length(inputcalibrantmasses) != 0){
261 calibrantfeaturemin = features(msidata, mz=calibrantmz-$plusminus_dalton) 304 for (calibrantnr in 1:length(inputcalibrantmasses)){
262 calibrantfeaturemax = features(msidata, mz=calibrantmz+$plusminus_dalton) 305 calibrantmz = inputcalibrantmasses[calibrantnr]
263 306 calibrantfeaturemin = features(msidata, mz=calibrantmz-plusminusvalues[calibrantnr])
264 if (calibrantfeaturemin == calibrantfeaturemax) 307 calibrantfeaturemax = features(msidata, mz=calibrantmz+plusminusvalues[calibrantnr])
265 { 308
266 309 ## in case m/z range includes only 1 m/z:
267 calibrantintensity = spectra(msidata)[calibrantfeaturemin,] 310 if (calibrantfeaturemin == calibrantfeaturemax){
268 311 calibrantintensity = spectra(msidata)[calibrantfeaturemin,]
269 }else{ 312 }else{
270 313 ## if m/z range includes more than 1 m/z take sum of intensities
271 calibrantintensity = colSums(spectra(msidata)[calibrantfeaturemin:calibrantfeaturemax,] ) 314 calibrantintensity = colSums(spectra(msidata)[calibrantfeaturemin:calibrantfeaturemax,])
272
273 } 315 }
274 pixelmatrix = rbind(pixelmatrix, calibrantintensity) 316 ## for each pixel add sum of intensity in the given m/z range
317 pixelmatrix = rbind(pixelmatrix, calibrantintensity)
275 } 318 }
276 319
277 countvector= as.factor(colSums(pixelmatrix>0)) 320 countvector= as.factor(colSums(pixelmatrix>0))
278 countdf= cbind(coord(msidata)[,1:2], countvector) 321 countdf= cbind(coord(msidata)[,1:2], countvector)
279 mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen") 322 mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen")
280 323
281 print(ggplot(countdf, aes(x=x, y=y, fill=countvector)) 324 print(ggplot(countdf, aes(x=x, y=y, fill=countvector))
282 + geom_tile() + coord_fixed() 325 + geom_tile() + coord_fixed()
283 + ggtitle("Number of calibrants per pixel") 326 + ggtitle("Number of calibrants per pixel")
284 + theme_bw() 327 + theme_bw()
285 + theme(text=element_text(family="ArialMT", face="bold", size=12)) 328 + theme(text=element_text(family="ArialMT", face="bold", size=12))
286 + scale_fill_manual(values = mycolours[1:length(countvector)], 329 + scale_fill_manual(values = mycolours[1:length(countvector)],
287 na.value = "black", name = "# calibrants")) 330 na.value = "black", name = "# calibrants"))
288 }else{print("2) The inputcalibrant masses were not provided or outside the mass range")} 331 }else{print("2) The inputcalibrant m/z were not provided or outside the m/z range")}
289 332
290 333 ########################## 3) fold change image ###########################
291 ############# new 2b) image of foldchanges (log2 intensity ratios) between two masses in the same spectrum
292 334
293 #if $calibrantratio: 335 #if $calibrantratio:
294 #for $foldchanges in $calibrantratio: 336 #for $foldchanges in $calibrantratio:
295 mass1 = $foldchanges.mass1 337 mass1 = $foldchanges.mass1
296 mass2 = $foldchanges.mass2 338 mass2 = $foldchanges.mass2
297 distance = $foldchanges.distance 339 distance = $foldchanges.distance
298 340
341 ### if user did not write a label use input m/z as label
299 #if not str($foldchanges.filenameratioplot).strip(): 342 #if not str($foldchanges.filenameratioplot).strip():
300 #set $label = "Fold change %s Da / %s Da" % ($foldchanges.mass1, $foldchanges.mass2) 343 #set $label = "Fold change %s Da / %s Da" % ($foldchanges.mass1, $foldchanges.mass2)
301 #else: 344 #else:
302 #set $label = $foldchanges.filenameratioplot 345 #set $label = $foldchanges.filenameratioplot
303 #end if 346 #end if
304 347
305 ### find rows which contain masses: 348 ### filter msidata for given m/z range (for both input m/z)
306 349 filtered_data1 = msidata[mz(msidata) >= mass1-distance & mz(msidata) <= mass1+distance,]
307 mzrowdown1 = features(msidata, mz = mass1-distance) 350 filtered_data2 = msidata[mz(msidata) >= mass2-distance & mz(msidata) <= mass2+distance,]
308 mzrowup1 = features(msidata, mz = mass1+distance) 351
309 mzrowdown2 = features(msidata, mz = mass2-distance) 352 ### find m/z in the two given ranges with the highest mean intensity
310 mzrowup2 = features(msidata, mz = mass2+distance) 353 ### this two m/z will be used to calculate the fold change (red line in plot)
311 354 maxmassrow1 = rowMeans(spectra(filtered_data1))
312 ### lower and upperlimit for the plot 355 maxmass1 = mz(filtered_data1)[which.max(maxmassrow1)]
356 maxmassrow2 = rowMeans(spectra(filtered_data2))
357 maxmass2 = mz(filtered_data2)[which.max(maxmassrow2)]
358
359 ### plot legend: chosen value in blue, distance in blue, max m/z in red
360 ### m/z range for each plot (fixed range of 5 Da)
361 ### xlim does not work because it does not adjust for the max. intensities within the range
313 mzdown1 = features(msidata, mz = mass1-2) 362 mzdown1 = features(msidata, mz = mass1-2)
314 mzup1 = features(msidata, mz = mass1+3) 363 mzup1 = features(msidata, mz = mass1+3)
315 mzdown2 = features(msidata, mz = mass2-2) 364 mzdown2 = features(msidata, mz = mass2-2)
316 mzup2 = features(msidata, mz = mass2+3) 365 mzup2 = features(msidata, mz = mass2+3)
317 366
318 ### find mass in the given range with the highest intensity (will be plotted in red) 367 ### plot for first m/z
319
320 if (mzrowdown1 == mzrowup1)
321 {
322 maxmass1 = mz(msidata)[ mzrowdown1]
323 }else{ ### for all masses in the massrange calculate mean intensity over all pixels and take mass which has highest mean
324 maxmassrow1 = rowMeans(spectra(msidata)[mzrowdown1:mzrowup1,])
325 maxmass1 = mz(msidata)[mzrowdown1:mzrowup1][which.max(maxmassrow1)]
326 }
327 if (mzrowdown2 == mzrowup2)
328 {
329 maxmass2 = mz(msidata)[mzrowup2]
330 }else{
331 maxmassrow2 = rowMeans(spectra(msidata)[mzrowdown2:mzrowup2,])
332 maxmass2 = mz(msidata)[mzrowdown2:mzrowup2][which.max(maxmassrow2)]
333 }
334
335 ### plot the part which was chosen, with chosen value in blue, distance in blue, maxmass in red, xlim fixed to 5 Da window
336 par(mfrow=c(2,1), oma=c(0,0,2,0)) 368 par(mfrow=c(2,1), oma=c(0,0,2,0))
337 plot(msidata[mzdown1:mzup1,], pixel = 1:pixelcount, main=paste0("average spectrum ", mass1, " Da")) 369 plot(msidata[mzdown1:mzup1,], pixel = 1:pixelcount, main=paste0("average spectrum ", mass1, " Da"))
338 abline(v=c(mass1-distance, mass1, mass1+distance), col="blue",lty=c(3,5,3)) 370 abline(v=c(mass1-distance, mass1, mass1+distance), col="blue",lty=c(3,6,3))
339 abline(v=maxmass1, col="red", lty=5) 371 abline(v=maxmass1, col="red", lty=5)
340 372
373 ### plot for second m/z
341 plot(msidata[mzdown2:mzup2,], pixel = 1:pixelcount, main= paste0("average spectrum ", mass2, " Da")) 374 plot(msidata[mzdown2:mzup2,], pixel = 1:pixelcount, main= paste0("average spectrum ", mass2, " Da"))
342 abline(v=c(mass2-distance, mass2, mass2+distance), col="blue", lty=c(3,5,3)) 375 abline(v=c(mass2-distance, mass2, mass2+distance), col="blue", lty=c(3,6,3))
343 abline(v=maxmass2, col="red", lty=5) 376 abline(v=maxmass2, col="red", lty=5)
344 title("Control of fold change plot", outer=TRUE) 377 title("Control of fold change plot", outer=TRUE)
345 378
346 ### filter spectra for maxmass to have two vectors, which can be divided 379 ### filter spectra for max m/z to have two vectors, which can be divided
347 380 ### plot spatial distribution of fold change
348 mass1vector = spectra(msidata)[features(msidata, mz = maxmass1),] 381 ### only possible when there are intensities > 0 in both given m/z ranges
349 mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),] 382
350 foldchange = log2(mass1vector/mass2vector) 383 if (length(maxmass1)>0&length(maxmass2)>0){
351 384 mass1vector = spectra(msidata)[features(msidata, mz = maxmass1),]
352 ratiomatrix = cbind(foldchange, coord(msidata)[,1:2]) 385 mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),]
353 386 foldchange= log2(mass1vector/mass2vector)
354 print(ggplot(ratiomatrix, aes(x=x, y=y, fill=foldchange), colour=colo) 387 fcmatrix = cbind(foldchange, coord(msidata)[,1:2])
355 + geom_tile() + coord_fixed() 388
356 + ggtitle("$label") 389 print(ggplot(fcmatrix, aes(x=x, y=y, fill=foldchange), colour=colo)
357 + theme_bw() 390 + geom_tile() + coord_fixed()
358 + theme(text=element_text(family="ArialMT", face="bold", size=12)) 391 + ggtitle("$label")
359 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 392 + theme_bw()
360 ,space = "Lab", na.value = "black", name ="FC")) 393 + theme(text=element_text(family="ArialMT", face="bold", size=12))
394 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
395 ,space = "Lab", na.value = "black", name ="FC"))
396 }else{
397 plot(0,type='n',axes=FALSE,ann=FALSE)
398 title(main=paste("At least one m/z range did not contain any intensity value > 0,\n therefore no foldchange plot could be drawn"))}
399
361 #end for 400 #end for
362 #end if 401 #end if
363 402
364 ## 3) Calibrant images: 403 #################### 4) m/z heatmaps #######################################
365 404
366 par(mfrow=c(1,1), mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0) 405 par(mfrow=c(1,1), mar=c(5.1, 4.1, 4.1, 2.1), mgp=c(3, 1, 0), las=0)
367 if (length(inputmasses) != 0) 406 if (length(inputmasses) != 0){
368 { for (mass in 1:length(inputmasses)) 407 for (mass in 1:length(inputmasses)){
369 { 408 image(msidata, mz=inputmasses[mass], plusminus=$plusminus_dalton,
370 image(msidata, mz=inputmasses[mass], plusminus=$plusminus_dalton, 409 main= paste0(inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminus_dalton, " Da)"),
371 main= paste0(inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminus_dalton, " Da)"), 410 contrast.enhance = "histogram")
372 contrast.enhance = "histogram", ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy))
373 } 411 }
374 } else {print("3) The inputpeptide masses were not provided or outside the mass range")} 412 } else {print("4) The input peptide and calibrant m/z were not provided or outside the m/z range")}
375 413
376 414 #################### 5) Number of peaks per pixel - image ##################
377 ## 4) Number of peaks per pixel - image 415
378 416 ## here every intensity value > 0 counts as pixel
379 peaksperpixel = colSums(spectra(msidata)[]> 0) 417 peaksperpixel = colSums(spectra(msidata)[]> 0)
380 peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel) 418 peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel)
381 419
382 print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo) 420 print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo)
383 + geom_tile() + coord_fixed() 421 + geom_tile() + coord_fixed()
384 + ggtitle("Number of peaks per pixel") 422 + ggtitle("Number of peaks per spectrum")
385 + theme_bw() 423 + theme_bw()
386 + theme(text=element_text(family="ArialMT", face="bold", size=12)) 424 + theme(text=element_text(family="ArialMT", face="bold", size=12))
387 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 425 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
388 ,space = "Lab", na.value = "black", name = "# peaks")) 426 ,space = "Lab", na.value = "black", name = "# peaks"))
389 427
390 ## 5) TIC image 428 ############################### 6) TIC image ###############################
429
391 TICcoordarray=cbind(coord(msidata)[,1:2], TICs) 430 TICcoordarray=cbind(coord(msidata)[,1:2], TICs)
392 colo = colorRampPalette( 431 colo = colorRampPalette(
393 c("blue", "cyan", "green", "yellow","red")) 432 c("blue", "cyan", "green", "yellow","red"))
394 print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo) 433 print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)
395 + geom_tile() + coord_fixed() 434 + geom_tile() + coord_fixed()
397 + theme_bw() 436 + theme_bw()
398 + theme(text=element_text(family="ArialMT", face="bold", size=12)) 437 + theme(text=element_text(family="ArialMT", face="bold", size=12))
399 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 438 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
400 ,space = "Lab", na.value = "black", name = "TIC")) 439 ,space = "Lab", na.value = "black", name = "TIC"))
401 440
402 ## 6) Most abundant mass image 441 ############################### 7) Most abundant m/z image #################
403 442
404 highestmz = apply(spectra(msidata)[],2,which.max) 443 highestmz = apply(spectra(msidata)[],2,which.max)
405 highestmz_matrix = cbind(coord(msidata)[,1:2],mz(msidata)[highestmz]) 444 highestmz_matrix = cbind(coord(msidata)[,1:2],mz(msidata)[highestmz])
406 colnames(highestmz_matrix)[3] = "highestmzinDa" 445 colnames(highestmz_matrix)[3] = "highestmzinDa"
407 446
408 print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa)) 447 print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))
409 + geom_tile() + coord_fixed() 448 + geom_tile() + coord_fixed()
410 + ggtitle("Most abundant m/z in each pixel") 449 + ggtitle("Most abundant m/z in each spectrum")
411 + theme_bw() 450 + theme_bw()
412 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", 451 + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z",
413 labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]), 452 labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]),
414 breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa))) 453 breaks = pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)], limits=c(min(highestmz_matrix\$highestmzinDa), max(highestmz_matrix\$highestmzinDa)))
415 + theme(text=element_text(family="ArialMT", face="bold", size=12))) 454 + theme(text=element_text(family="ArialMT", face="bold", size=12)))
416 455
417 ## which mz are highest 456 ## which m/z are highest
418 highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1]) 457 highestmz_peptides = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[1])
419 highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1] 458 highestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == highestmz_peptides)[1]
420 459
421 secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2]) 460 secondhighestmz = names(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)[2])
422 secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1] 461 secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1]
423 462
424 ## 7) pca image for two components 463 print(head(sort(table(round(highestmz_matrix\$highestmzinDa, digits=0)), decreasing=TRUE)))
464
465 ########################## 8) pca image for two components #################
466
425 pca = PCA(msidata, ncomp=2) 467 pca = PCA(msidata, ncomp=2)
426 par(mfrow = c(2,1)) 468 par(mfrow = c(2,1))
427 plot(pca, col=c("black", "darkgrey"), main="PCA for two components") 469 plot(pca, col=c("black", "darkgrey"), main="PCA for two components")
428 image(pca, col=c("black", "white"),ylim= c(maximumy+0.2*maximumy,minimumy-0.2*minimumy), strip=FALSE) 470 image(pca, col=c("black", "white"), strip=FALSE)
429 471
430 472 ################## III) properties over spectra index ##########
431 ############################# III) properties over acquisition (spectra index)##########
432 ############################################################################## 473 ##############################################################################
433 474 print("properties over pixels")
434 par(mfrow = c(2,1), mar=c(5,6,4,2)) 475 par(mfrow = c(2,1), mar=c(5,6,4,2))
435 476
436 ## 8a) number of peaks per spectrum - scatterplot 477 ########################## 9) number of peaks per spectrum #################
478 ## 9a) scatterplot
437 plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum") 479 plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum")
438 title(xlab="Spectra index \n (= Acquisition time)", line=3) 480 title(xlab="Spectra index", line=3)
439 title(ylab="Number of peaks", line=4) 481 title(ylab="Number of peaks", line=4)
440 482 abline(v=abline_vector, lty = 3)
441 ## 8b) number of peaks per spectrum - histogram 483
484 ## 9b) histogram
485
442 hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") 486 hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="")
443 title(main="Number of peaks per spectrum", line=2) 487 title(main="Number of peaks per spectrum", line=2)
444 title(ylab="Frequency = # spectra", line=4) 488 title(ylab="Frequency = # spectra", line=4)
445 abline(v=median(peaksperpixel), col="blue") 489 abline(v=median(peaksperpixel), col="blue")
446 490
447 ## 9a) TIC per spectrum - density scatterplot 491 ## 9c) additional histogram to show subsample contributions
448 zero=0 492 ## only when samples were combined before (combined_sample)
493 if (!is.null(levels(msidata\$combined_sample))){
494
495 df_9 = data.frame(peaksperpixel, msidata\$combined_sample)
496 colnames(df_9) = c("Npeaks", "sample_name")
497
498 hist_9 = ggplot(df_9, aes(x=Npeaks, fill=sample_name)) +
499 geom_histogram()+ theme_bw()+
500 theme(text=element_text(family="ArialMT", face="bold", size=12))+
501 labs(title="Number of peaks per spectrum and sample", x="Number of peaks per spectrum", y = "Frequency = # spectra") +
502 geom_vline(xintercept = median(peaksperpixel), size = 1, colour = "black",linetype = "dashed")
503 print(hist_9)}
504
505 ########################## 10) TIC per spectrum ###########################
506
507 ## 10a)density scatterplot
449 par(mfrow = c(2,1), mar=c(5,6,4,2)) 508 par(mfrow = c(2,1), mar=c(5,6,4,2))
450 plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="TIC per spectrum") 509 plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="TIC per spectrum")
451 title(xlab="Spectra index \n (= Acquisition time)", line=3) 510 title(xlab="Spectra index", line=3)
452 title(ylab = "Total ion chromatogram intensity", line=4) 511 title(ylab = "Total ion chromatogram intensity", line=4)
453 512 abline(v=abline_vector, lty = 3)
454 ## 9b) TIC per spectrum - histogram 513
514 ## 10b) histogram
455 hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="") 515 hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
456 title(main= "TIC per spectrum", line=2) 516 title(main= "TIC per spectrum", line=2)
457 title(ylab="Frequency = # spectra", line=4) 517 title(ylab="Frequency = # spectra", line=4)
458 abline(v=median(log(TICs[TICs>0])), col="blue") 518 abline(v=median(log(TICs[TICs>0])), col="blue")
459 519
460 520 ## 10c) additional histogram to show subsample contributions
461 ################################## IV) changes over mz ############################ 521 ## only when samples were combined before (combined_sample)
462 ################################################################################### 522 if (!is.null(levels(msidata\$combined_sample))){
463 523 df_10 = data.frame(log(TICs), msidata\$combined_sample)
464 ## 11) Number of peaks per mz 524 colnames(df_10) = c("TICs", "sample_name")
465 ## Number of peaks per mz - number across all pixel 525
526 hist_10 = ggplot(df_10, aes(x=TICs, fill=sample_name)) +
527 geom_histogram()+ theme_bw()+
528 theme(text=element_text(family="ArialMT", face="bold", size=12))+
529 labs(title="TIC per spectrum and sample", x="log(TIC per spectrum)", y = "Frequency = # spectra") +
530 geom_vline(xintercept = median(log(TICs[TICs>0])), size = 1, colour = "black",linetype = "dashed")
531 print(hist_10)}
532
533 ################################## IV) changes over m/z ####################
534 ############################################################################
535 print("changes over m/z")
536 ########################## 11) Number of peaks per m/z #####################
537
466 peakspermz = rowSums(spectra(msidata)[] > 0 ) 538 peakspermz = rowSums(spectra(msidata)[] > 0 )
467 539
468 par(mfrow = c(2,1), mar=c(5,6,4,4.5)) 540 par(mfrow = c(2,1), mar=c(5,6,4,4.5))
469 ## Number of peaks per mz - scatterplot 541 ## 11a) scatterplot
470 plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per mz", ylab ="") 542 plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per m/z", ylab ="")
471 title(xlab="mz in Dalton", line=2.5) 543 title(xlab="m/z", line=2.5)
472 title(ylab = "Number of peaks", line=4) 544 title(ylab = "Number of peaks", line=4)
473 axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1) 545 axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1)
474 mtext("Coverage of spectra [%]", 4, line=3, adj=1) 546 mtext("Coverage of spectra [%]", 4, line=3, adj=1)
475 547
476 # make plot smaller to fit axis and labels, add second y axis with % 548 ## 11b) histogram
477 ## Number of peaks per mz - histogram
478 hist(peakspermz, main="", las=1, ylab="", xlab="") 549 hist(peakspermz, main="", las=1, ylab="", xlab="")
479 title(ylab = "Frequency", line=4) 550 title(ylab = "Frequency", line=4)
480 title(main="Number of peaks per mz", xlab = "Number of peaks per mz", line=2) 551 title(main="Number of peaks per m/z", xlab = "Number of peaks per m/z", line=2)
481 abline(v=median(peakspermz), col="blue") 552 abline(v=median(peakspermz), col="blue")
482 553
483 554 ########################## 12) Sum of intensities per m/z ##################
484 ## 12) Sum of intensities per mz 555
485 556 ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel)
486 ## Sum of all intensities for each mz (like TIC, but for mz instead of pixel) 557 mzTIC = rowSums(spectra(msidata)[]) ## calculate intensity sum for each m/z
487 mzTIC = rowSums(spectra(msidata)[]) # calculate intensity sum for each mz
488 558
489 par(mfrow = c(2,1), mar=c(5,6,4,2)) 559 par(mfrow = c(2,1), mar=c(5,6,4,2))
490 # 12a) sum of intensities per mz - scatterplot 560 ## 12a) scatterplot
491 plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per mz", ylab ="") 561 plot_colorByDensity(mz(msidata),mzTIC, main= "Sum of intensities per m/z", ylab ="")
492 title(xlab="mz in Dalton", line=2.5) 562 title(xlab="m/z", line=2.5)
493 title(ylab="Intensity sum", line=4) 563 title(ylab="Intensity sum", line=4)
494 # 12b) sum of intensities per mz - histogram 564
565 ## 12b) histogram
495 hist(log(mzTIC), main="", xlab = "", las=1, ylab="") 566 hist(log(mzTIC), main="", xlab = "", las=1, ylab="")
496 title(main="Sum of intensities per mz", line=2, ylab="") 567 title(main="Sum of intensities per m/z", line=2, ylab="")
497 title(xlab = "log (sum of intensities per mz)") 568 title(xlab = "log (sum of intensities per m/z)")
498 title(ylab = "Frequency", line=4) 569 title(ylab = "Frequency", line=4)
499 abline(v=median(log(mzTIC[mzTIC>0])), col="blue") 570 abline(v=median(log(mzTIC[mzTIC>0])), col="blue")
500 571
501 ################################## V) general plots ############################ 572 ################################## V) general plots ########################
502 ################################################################################### 573 ############################################################################
503 574 print("general plots")
504 ## 13) Intensity distribution 575 ########################## 13) Intensity distribution ######################
505 576
506 par(mfrow = c(2,1), mar=c(5,6,4,2)) 577 par(mfrow = c(2,1), mar=c(5,6,4,2))
507 578
508 ## 13a) Intensity histogram: 579 ## 13a) Median intensity over spectra
580 medianint_spectra = apply(spectra(msidata), 2, median)
581 plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="")
582 title(ylab="Median spectrum intensity", line=4)
583 abline(v=abline_vector, lty = 3)
584
585 ## 13b) histogram:
509 hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1) 586 hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
510 title(main="Log2-transformed intensities", line=2) 587 title(main="Log2-transformed intensities", line=2)
511 title(xlab="log2 intensities") 588 title(xlab="log2 intensities")
512 title(ylab="Frequency", line=4) 589 title(ylab="Frequency", line=4)
513 abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue") 590 abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue")
514 591
515 ## 13b) Median intensity over spectra 592 ## 13c) histogram to show subsample contribution
516 medianint_spectra = apply(spectra(msidata), 2, median) 593 ## only for previously combined samples
517 plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="") 594 if (!is.null(levels(msidata\$combined_sample))){
518 title(ylab="Median spectrum intensity", line=4) 595
519 596 df_13 = data.frame(matrix(,ncol=2, nrow=0))
520 ## 13c) Histogram on mz values 597 for (subsample in levels(msidata\$combined_sample)){
521 par(mfrow = c(1, 1)) 598 log2_int_subsample = log2(spectra(msidata)[,msidata\$combined_sample==subsample])
522 hist(mz(msidata), xlab = "mz in Dalton", main="Histogram of mz values") 599 df_subsample = data.frame(as.numeric(log2_int_subsample))
523 600 df_subsample\$sample_name = subsample
524 601 df_13 = rbind(df_13, df_subsample)}
525 ## 14) Mass spectra 602 df_13\$sample_name = as.factor(df_13\$sample_name)
603 colnames(df_13) = c("logint", "sample_name")
604
605 hist_13 = ggplot(df_13, aes(x=logint, fill=sample_name)) +
606 geom_histogram()+ theme_bw()+
607 theme(text=element_text(family="ArialMT", face="bold", size=12))+
608 labs(title="Log2-transformed intensities per sample", x="log2 intensities", y = "Frequency") +
609 geom_vline(xintercept = median(log2(spectra(msidata)[(spectra(msidata)>0)])), size = 1, colour = "black",linetype = "dashed")
610 print(hist_13)
611
612 ## 13d) boxplots to visualize in a different way the intensity distributions
613 par(mfrow = c(1,1), cex.axis=1.3, cex.lab=1.3, mar=c(13.1,4.1,5.1,2.1))
614
615 mean_matrix = matrix(,ncol=0, nrow = nrow(msidata))
616 for (subsample in levels(msidata\$combined_sample)){
617 mean_mz_sample = colMeans(spectra(msidata)[,msidata\$combined_sample==subsample])
618 mean_matrix = cbind(mean_matrix, mean_mz_sample)}
619 boxplot(mean_matrix, ylab = "mean intensity per m/z", names=levels(msidata\$combined_sample), main="Mean intensities per m/z and sample", las=2)
620 }
621
622 ########################## 14) Histogram on m/z values #####################
623
624 par(mfrow = c(1, 1), cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
625 hist(mz(msidata), xlab = "m/z", main="Histogram of m/z values")
626
627 ############################ 15) Mass spectra ##############################
526 628
527 par(mfrow = c(2, 2)) 629 par(mfrow = c(2, 2))
630 pixels_for_plot = c(round(length(pixelnumber)/2, , digits=0), round(length(pixelnumber)/4, , digits=0), round(length(pixelnumber)/4*3, , digits=0))
528 plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum") 631 plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum")
529 plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition") 632 plot(msidata, pixel = pixels_for_plot[1], main=paste0("Spectrum at ", rownames(coord(msidata)[pixels_for_plot[1],1:2])))
530 plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,1:2]))) 633 plot(msidata, pixel = pixels_for_plot[2], main= paste0("Spectrum at ", rownames(coord(msidata)[pixels_for_plot[2],1:2])))
531 plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,1:2]))) 634 plot(msidata, pixel = pixels_for_plot[3], main= paste0("Spectrum at ", rownames(coord(msidata)[pixels_for_plot[3],1:2])))
532 635
533 ## 15) Zoomed in mass spectra for calibrants 636 #################### 16) Zoomed in mass spectra for calibrants##############
534 plusminusvalue = $plusminus_dalton 637
535 x = 1 638 count = 1
536 if (length(inputcalibrantmasses) != 0) 639 differencevector = numeric()
537 { 640 differencevector2 = vector()
538 641
539 for (calibrant in inputcalibrantmasses) 642 if (length(inputcalibrantmasses) != 0){
540 { 643
541 minmasspixel = features(msidata, mz=calibrant-1) 644 ### calculate plusminus values in m/z for each calibrant, this is used for all following plots
542 maxmasspixel = features(msidata, mz=calibrant+3) 645 plusminusvalues = rep($plusminus_ppm/1000000, length(inputcalibrantmasses))*inputcalibrantmasses
543 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 646
544 plot(msidata[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main= "average spectrum") 647 for (mass in 1:length(inputcalibrantmasses)){
545 abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) 648
546 plot(msidata[minmasspixel:maxmasspixel,], pixel =round(length(pixelnumber)/2, digits=0), main="pixel in middle of acquisition") 649 ### define the plot window with xmin und xmax
547 abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) 650 minmasspixel = features(msidata, mz=inputcalibrantmasses[mass]-1)
548 plot(msidata[minmasspixel:maxmasspixel,], pixel = highestmz_pixel,main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,1:2]))) 651 maxmasspixel = features(msidata, mz=inputcalibrantmasses[mass]+3)
549 abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) 652
550 plot(msidata[minmasspixel:maxmasspixel,], pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,1:2]))) 653 ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17
551 abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) 654 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
552 title(paste0(inputcalibrants[x,1]), outer=TRUE) 655 if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){
553 x=x+1 656 maxmassrow = rowMeans(spectra(filtered_data))
657 maxvalue = mz(filtered_data)[which.max(maxmassrow)] ### m/z with highestaverage intensity in m/z range
658 mzdifference = maxvalue - inputcalibrantmasses[mass] ### difference: theoretical value - closest m/z value
659 ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement
660 }else{
661 ppmdifference = NA
662 maxvalue = NA}
663 differencevector[mass] = round(ppmdifference, digits=2)
664
665 ### find m/z closest to inputcalibrant and calculate ppm difference for plot 18
666 mznumber = features(msidata, mz = inputcalibrantmasses[mass]) ### gives closest featurenumber which is closest to given m/z
667 mzvalue = mz(msidata)[mznumber] ### gives the closest m/z
668 mzdifference2 = mzvalue - inputcalibrantmasses[mass]
669 ppmdifference2 = mzdifference2/inputcalibrantmasses[mass]*1000000
670 differencevector2[mass] = round(ppmdifference2, digits=2)
671
672 par(mfrow = c(2, 2), oma=c(0,0,2,0))
673 plot(msidata[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main= "average spectrum")
674 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,6,3))
675 abline(v=c(maxvalue), col="red", lty=5)
676 abline(v=c(mzvalue), col="green2", lty=5)
677 plot(msidata[minmasspixel:maxmasspixel,], pixel = pixels_for_plot[1], main=paste0("Spectrum at ", rownames(coord(msidata)[pixels_for_plot[1],1:2])))
678 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,6,3))
679 abline(v=c(maxvalue), col="red", lty=5)
680 abline(v=c(mzvalue), col="green2", lty=5)
681 plot(msidata[minmasspixel:maxmasspixel,], pixel = pixels_for_plot[2], main= paste0("Spectrum at ", rownames(coord(msidata)[pixels_for_plot[2],1:2])))
682 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,6,3))
683 abline(v=c(maxvalue), col="red", lty=5)
684 abline(v=c(mzvalue), col="green2", lty=5)
685 plot(msidata[minmasspixel:maxmasspixel,], pixel = pixels_for_plot[3], main= paste0("Spectrum at ", rownames(coord(msidata)[pixels_for_plot[3],1:2])))
686 abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,6,3))
687 abline(v=c(maxvalue), col="red", lty=5)
688 abline(v=c(mzvalue), col="green2", lty=5)
689 title(paste0("theor. m/z: ", inputcalibrants[count,1]), col.main="blue", outer=TRUE, line=0, adj=0.074)
690 title(paste0("most abundant m/z: ", round(maxvalue, digits=4)), col.main="red", outer=TRUE, line=0, adj=0.49)
691 title(paste0("closest m/z: ", round(mzvalue, digits=4)), col.main="green2", outer=TRUE, line=0, adj=0.93)
692 count=count+1
554 } 693 }
555 694
556 }else{print("15) The inputcalibrant masses were not provided or outside the mass range")} 695 ######### 17) ppm difference input calibrant m/z and m/z with max intensity in given m/z range#########
557 696
558 ## 16) ppm accuracy measured vs. theoretical calibrant mass
559
560 if (length(inputcalibrantmasses) != 0)
561 {
562 par(mfrow = c(1, 1)) 697 par(mfrow = c(1, 1))
563 698
564 differencevector = vector() 699 ### plot the ppm difference calculated above: theor. m/z value to highest m/z value:
565
566 for (mass in 1:length(inputcalibrantmasses))
567 {mznumber = features(msidata, mz = inputcalibrantmasses[mass]) ### this gives the featurenumber which is closest to given mz
568 mzvalue = mz(msidata)[mznumber] ### gives the mz in Da which is closest to the given mz (using the featurenumber)
569 mzdifference = inputcalibrantmasses[mass] - mzvalue ### difference in Da: theoretical value - closest mz value
570 ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement
571 differencevector[mass] = ppmdifference }
572 differencevector = round(differencevector, digits=2)
573
574 ### plot the ppm difference theor. mz value to closest mz value:
575 700
576 calibrant_names = as.character(inputcalibrants[,2]) 701 calibrant_names = as.character(inputcalibrants[,2])
577 diff_df = data.frame(differencevector, calibrant_names) 702 diff_df = data.frame(differencevector, calibrant_names)
578 diff_plot<-ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector)) + geom_col() + theme_minimal() + 703
579 labs(title="Theoretical calibrant mz vs. closest measured mz", x="calibrants", y = "Difference in ppm")+ 704 if (sum(is.na(diff_df[,1])) == nrow(diff_df)){
705 print("plot 17: no peaks in the chosen region, repeat with higher ppm range")
706 }else{
707
708 diff_plot=ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector)) + geom_bar(stat="identity", fill = "darkgray") + theme_minimal() +
709 labs(title="Difference m/z with max. average intensity vs. theoretical calibrant m/z", x="calibrants", y = "Difference in ppm")+
580 geom_text(aes(label=differencevector), vjust=-0.3, size=3.5, col="blue") 710 geom_text(aes(label=differencevector), vjust=-0.3, size=3.5, col="blue")
581 711
712 print(diff_plot)}
713
714 ######### 18) ppm difference input calibrant m/z and closest m/z ###########
715
716 ### plot the ppm difference calculated above theor. m/z value to closest m/z value:
717
718 differencevector2 = round(differencevector2, digits=2)
719 calibrant_names = as.character(inputcalibrants[,2])
720 diff_df = data.frame(differencevector2, calibrant_names)
721
722 diff_plot=ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector2)) + geom_bar(stat="identity", fill = "darkgray") + theme_minimal() +
723 labs(title="Difference closest measured m/z vs. theoretical calibrant m/z", x="calibrants", y = "Difference in ppm")+
724 geom_text(aes(label=differencevector2), vjust=-0.3, size=3.5, col="blue")
725
582 print(diff_plot) 726 print(diff_plot)
583 727
584 }else{print("16) The inputcalibrant masses were not provided or outside the mass range")} 728 #################### 19) ppm difference over pixels #####################
585 729
730 mycolours = c("darkgrey", "darkblue", "blue", "green" , "red", "orange", "yellow", "magenta", "olivedrab1", "lightseagreen")
731 count = 1
732 ppm_df = as.data.frame(matrix(,ncol=0, nrow = ncol(msidata)))
733 for (calibrant in inputcalibrantmasses){
734 ### find m/z with the highest mean intensity in m/z range, if no m/z in the range, all ppm differences will be NA
735 filtered_data = msidata[mz(msidata) >= calibrant-plusminusvalues[count] & mz(msidata) <= calibrant+plusminusvalues[count],]
736
737 if (nrow(filtered_data) > 0){
738 ### filtered for m/z range, now go through it pixel by pixel to find max peak in each spectrum
739 ppm_vector = numeric()
740 for (pixel_count in 1:ncol(filtered_data)){
741 mz_max = mz(filtered_data)[which.max(spectra(filtered_data)[,pixel_count])]
742
743 mzdiff = mz_max - calibrant
744 ppmdiff = mzdiff/calibrant*1000000
745
746 ### if maximum intensity in m/z range was 0 set ppm diff to NA (not shown in plot)
747 if (max(spectra(filtered_data)[,pixel_count]) == 0){
748 ppmdiff = NA}
749 ppm_vector[pixel_count] = ppmdiff}
750 }else{ppm_vector = rep(NA, ncol(msidata))}
751
752 ppm_df = cbind(ppm_df, ppm_vector)
753 count=count+1}
754
755 if (sum(is.na(ppm_df)) == ncol(ppm_df)*nrow(ppm_df)){
756 print("plot 19: no peaks in the chosen region, repeat with higher ppm range")
757 }else{
758
759 ### plot ppm differences over pixels (spectra index)
760
761 par(mar=c(4.1, 4.1, 4.1, 7.5))
762 plot(0,0,type="n", ylim=c(min(ppm_df, na.rm=TRUE),max(ppm_df, na.rm=TRUE)), xlim = c(1,ncol(filtered_data)),xlab = "Spectra index", ylab = "m/z difference in ppm", main="Difference m/z with max. average intensity vs. theoretical m/z\n(per spectrum)")
763
764 for (each_cal in 1:ncol(ppm_df)){
765 lines(ppm_df[,each_cal], col=mycolours[each_cal], type="p")}
766 legend("topright", inset=c(-0.25,0), xpd = TRUE, bty="n", legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1)
767 abline(v=abline_vector, lty = 3)}
768
769 }else{print("16+17+18+19) The inputcalibrant m/z were not provided or outside the m/z range")}
586 770
587 dev.off() 771 dev.off()
588 772
589 }else{ 773 }else{
590 print("inputfile has no intensities > 0") 774 print("inputfile has no intensities > 0")
594 ]]></configfile> 778 ]]></configfile>
595 </configfiles> 779 </configfiles>
596 <inputs> 780 <inputs>
597 <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" 781 <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData"
598 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> 782 help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/>
599 <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/> 783 <param name="filename" type="text" value="" optional="true" label="Title" help="will appear as header in the quality report, if nothing given input dataset name is used"/>
600 <param name="peptide_file" type="data" optional="true" format="tabular" label="Text file with masses and names"
601 help="first column m/z, second column name, tab separated file"/>
602 <param name="calibrant_file" type="data" optional="true" format="tabular" 784 <param name="calibrant_file" type="data" optional="true" format="tabular"
603 label="Internal calibrants and names" 785 label="File with internal calibrants" help="first column: m/z, second column: name (optional), tabular file"/>
604 help="Used for plot number of calibrant per spectrum and for zoomed in mass spectra"/> 786 <param name="peptide_file" type="data" optional="true" format="tabular" label="File with m/z of interest"
605 <param name="plusminus_dalton" value="0.25" type="text" label="Mass range in Dalton" help="Plusminus mass window in Dalton for calibrant and peptide plots"/> 787 help="first column: m/z, second column: name (optional), tabular file"/>
606 <repeat name="calibrantratio" title="Plot fold change of two masses for each spectrum" min="0" max="10"> 788 <param name="plusminus_dalton" value="0.25" type="float" label="M/z range for m/z heatmaps (x-y grid)" help="Will be added in both directions to input calibrants and peptide m/z"/>
607 <param name="mass1" value="1111" type="float" label="Mass 1" help="First mass in Dalton"/> 789 <param name="plusminus_ppm" value="50" type="float" label="Ppm range for accuracy and number of calibrants plots" help="Will be added in both directions to input calibrant m/z"/>
608 <param name="mass2" value="2222" type="float" label="Mass 2" help="Second mass in Dalton"/> 790 <repeat name="calibrantratio" title="Plot fold change of two m/z" min="0" max="10">
609 <param name="distance" value="0.25" type="float" label="Distance in Dalton" help="Distance in Da used to find peak maximum from input masses in both directions"/> 791 <param name="mass1" value="1111" type="float" label="M/z 1" help="First m/z"/>
792 <param name="mass2" value="2222" type="float" label="M/z 2" help="Second m/z"/>
793 <param name="distance" value="0.25" type="float" label="M/z range" help="Plusminus m/z window added to input m/z. In both m/z ranges the maximum intensity is used to calculate the fold change"/>
610 <param name="filenameratioplot" type="text" optional="true" label="Title" help="Optional title for fold change plot."/> 794 <param name="filenameratioplot" type="text" optional="true" label="Title" help="Optional title for fold change plot."/>
611 </repeat> 795 </repeat>
612 </inputs> 796 </inputs>
613 <outputs> 797 <outputs>
614 <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label = "${tool.name} ${on_string}"/> 798 <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label = "$infile.display_name QC_report"/>
615 </outputs> 799 </outputs>
616
617 <tests> 800 <tests>
618 <test> 801 <test>
619 <param name="infile" value="" ftype="imzml"> 802 <param name="infile" value="" ftype="imzml">
620 <composite_data value="Example_Continuous.imzML" /> 803 <composite_data value="Example_Continuous.imzML" />
621 <composite_data value="Example_Continuous.ibd" /> 804 <composite_data value="Example_Continuous.ibd" />
622 </param> 805 </param>
623 <param name="peptide_file" value="inputpeptides.txt"/> 806 <param name="peptide_file" value="inputpeptides.txt"/>
624 <param name="calibrant_file" value="inputcalibrantfile1.txt"/> 807 <param name="calibrant_file" value="inputcalibrantfile1.txt"/>
625 <param name="plusminus_dalton" value="0.25"/> 808 <param name="plusminus_dalton" value="0.25"/>
809 <param name="plusminus_ppm" value="100"/>
626 <param name="filename" value="Testfile_imzml"/> 810 <param name="filename" value="Testfile_imzml"/>
627 <repeat name="calibrantratio"> 811 <repeat name="calibrantratio">
628 <param name="mass1" value="111"/> 812 <param name="mass1" value="328.9"/>
629 <param name="mass2" value="222"/> 813 <param name="mass2" value="398.8"/>
630 <param name="distance" value="0.25"/> 814 <param name="distance" value="0.25"/>
631 <param name="filenameratioplot" value = "Ratio of mass1 (111) / mass2 (222)"/> 815 <param name="filenameratioplot" value = "Ratio of mass1 (328.9) / mass2 (398.8)"/>
632 </repeat> 816 </repeat>
633 <output name="plots" file="QC_imzml.pdf" compare="sim_size" delta="20000"/> 817 <output name="plots" file="QC_imzml.pdf" compare="sim_size" delta="20000"/>
634 </test> 818 </test>
635 <test> 819 <test>
636 <param name="infile" value="" ftype="analyze75"> 820 <param name="infile" value="" ftype="analyze75">
643 <param name="plusminus_dalton" value="0.5"/> 827 <param name="plusminus_dalton" value="0.5"/>
644 <param name="filename" value="Testfile_analyze75"/> 828 <param name="filename" value="Testfile_analyze75"/>
645 <output name="plots" file="QC_analyze75.pdf" compare="sim_size" delta="20000"/> 829 <output name="plots" file="QC_analyze75.pdf" compare="sim_size" delta="20000"/>
646 </test> 830 </test>
647 <test> 831 <test>
648 <param name="infile" value="preprocessed.RData" ftype="rdata"/> 832 <param name="infile" value="123_combined.RData" ftype="rdata"/>
649 <param name="plusminus_dalton" value="0"/> 833 <param name="plusminus_dalton" value="0.2"/>
650 <param name="filename" value="Testfile_rdata"/> 834 <param name="filename" value="Testfile_rdata"/>
651 <output name="plots" file="QC_rdata.pdf" compare="sim_size" delta="20000"/> 835 <output name="plots" file="QC_rdata.pdf" compare="sim_size" delta="20000"/>
652 </test> 836 </test>
653 <test> 837 <test>
654 <param name="infile" value="empty_spectra.rdata" ftype="rdata"/> 838 <param name="infile" value="empty_spectra.rdata" ftype="rdata"/>
661 </tests> 845 </tests>
662 <help> 846 <help>
663 <![CDATA[ 847 <![CDATA[
664 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ 848 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_
665 849
666 This tool uses some Cardinal functions to create a quality control report with descriptive plots for mass-spectrometry imaging data. 850 This tool uses some Cardinal functions to create a quality control report with descriptive plots for mass spectrometry imaging data.
667 851
668 Input data: 3 types of input data can be used: 852 Input data: 3 types of input data can be used:
669 853
670 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ 854 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
671 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) 855 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function)
672 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) 856 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData)
673 857
674 Options: 858 Options:
675 859
676 - masses of interest as tabular file, used to generate heatmap images 860 - internal calibrants are used for m/z heatmaps (x-y grid), heatmap of number of calibrants per spectrum (x-y grid), zoomed in mass spectra, m/z accuracy
677 - internal calibrants as tabular file, used for the following plots: Number of calibrant per spectrum, heatmap images, mass-spectrum plot zoomed in for calibrant region, ppm accuracy 861 - m/z of interest are used to generate m/z heatmaps (x-y grid)
678 - fold change plot: draws a heatmap of the fold change of two masses (log2(intensity ratio)) 862 - optional fold change plot: draws a heatmap (x-y grid) for the fold change of two m/z (log2(intensity ratio))
679 863
680 Output: 864 Output:
681 865
682 - pdf with numbers and descriptive plots to check the quality of the mass-spectrometry imaging data 866 - quality control report as pdf with key numbers and descriptive plots describing the mass spectrometry imaging data
683 867
684 Tip: 868 Tip:
685 869
686 - For additional heatmap images use the MSI ion images tool and to plot more mass spectra use the MSI massspectra tool. 870 - For additional m/z heatmaps use the MSI ion images tool and to plot more mass spectra use the MSI massspectra tool.
687 871
688 ]]> 872 ]]>
689 </help> 873 </help>
690 <citations> 874 <citations>
691 <citation type="doi">10.1093/bioinformatics/btv146</citation> 875 <citation type="doi">10.1093/bioinformatics/btv146</citation>