Mercurial > repos > galaxyp > cardinal_data_exporter
comparison data_exporter.xml @ 4:e521b5767819 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author | galaxyp |
---|---|
date | Fri, 22 Mar 2019 08:16:20 -0400 |
parents | d94770c22f13 |
children | 350a84ea795c |
comparison
equal
deleted
inserted
replaced
3:d94770c22f13 | 4:e521b5767819 |
---|---|
1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.1"> | 1 <tool id="cardinal_data_exporter" name="MSI data exporter" version="@VERSION@.2"> |
2 <description> | 2 <description> |
3 exports imzML and Analyze7.5 to tabular files | 3 exports imzML and Analyze7.5 to tabular files |
4 </description> | 4 </description> |
5 <macros> | 5 <macros> |
6 <import>macros.xml</import> | 6 <import>macros.xml</import> |
22 | 22 |
23 library(Cardinal) | 23 library(Cardinal) |
24 | 24 |
25 @READING_MSIDATA_INRAM@ | 25 @READING_MSIDATA_INRAM@ |
26 | 26 |
27 ## to make sure that processed files work as well: | |
28 iData(msidata) = iData(msidata)[] | |
29 | 27 |
30 ###################### Intensity matrix output ################################ | 28 ###################### Intensity matrix output ################################ |
31 | 29 |
32 #if "int_matrix" in str($output_options).split(","): | 30 #if "int_matrix" in str($output_options).split(","): |
33 print("intensity matrix output") | 31 print("intensity matrix output") |
35 mz_names = gsub(" = ", "_", names(features(msidata))) | 33 mz_names = gsub(" = ", "_", names(features(msidata))) |
36 mz_names = gsub("/", "", mz_names) | 34 mz_names = gsub("/", "", mz_names) |
37 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) | 35 pixel_names = gsub(", y = ", "_", names(pixels(msidata))) |
38 pixel_names = gsub(" = ", "y_", pixel_names) | 36 pixel_names = gsub(" = ", "y_", pixel_names) |
39 | 37 |
40 spectramatrix = cbind(mz_names,spectra(msidata)[]) | 38 ##spectramatrix = cbind(mz_names,spectra(msidata)) |
41 newmatrix = rbind(c("mz_name", pixel_names), spectramatrix) | 39 newmatrix = rbind(c("mz_name", pixel_names), cbind(mz_names,spectra(msidata))) |
42 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | 40 write.table(newmatrix, file="$intensity_matrix", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
43 ## free up RAM space in case furhter steps will be run: | 41 ## free up RAM space in case furhter steps will be run: |
44 rm(newmatrix) | 42 rm(newmatrix) |
45 rm(spectramatrix) | |
46 gc() | 43 gc() |
47 | 44 |
48 #end if | 45 #end if |
49 | 46 |
50 | 47 |
54 | 51 |
55 mz_names = gsub(" = ", "_", names(features(msidata))) | 52 mz_names = gsub(" = ", "_", names(features(msidata))) |
56 mz_names = gsub("/", "", mz_names) | 53 mz_names = gsub("/", "", mz_names) |
57 | 54 |
58 ## mean, median, sd and SEM intensity per file and mz | 55 ## mean, median, sd and SEM intensity per file and mz |
59 full_sample_mean = apply(spectra(msidata)[],1,mean, na.rm=TRUE) | 56 full_sample_mean = rowMeans(spectra(msidata), na.rm=TRUE) |
60 full_sample_median = apply(spectra(msidata)[],1,median, na.rm=TRUE) | 57 full_sample_median = apply(spectra(msidata),1,median, na.rm=TRUE) |
61 full_sample_sd = apply(spectra(msidata)[],1,sd, na.rm=TRUE) | 58 full_sample_sd = apply(spectra(msidata),1,sd, na.rm=TRUE) |
62 full_sample_sem = full_sample_sd/full_sample_mean*100 | 59 full_sample_sem = full_sample_sd/full_sample_mean*100 |
63 ## npeaks and sum of all intensities per spectrum and mz | 60 ## npeaks and sum of all intensities per spectrum and mz |
64 npeaks= sum(spectra(msidata)[]>0, na.rm=TRUE) | 61 npeaks= sum(spectra(msidata)>0, na.rm=TRUE) |
65 mzTIC = rowSums(spectra(msidata)[], na.rm=TRUE) ## calculate intensity sum for each m/z | 62 mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z |
66 peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) | 63 peakspermz = rowSums(spectra(msidata) > 0, na.rm=TRUE) ## calculate number of intensities > 0 for each m/z (max = number of spectra) |
67 | 64 |
68 ## combine into dataframe, order is the same for all vectors | 65 ## combine into dataframe, order is the same for all vectors |
69 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) | 66 mz_df = data.frame(mz_names, mz(msidata), full_sample_mean, full_sample_median, full_sample_sd, full_sample_sem, mzTIC, peakspermz) |
70 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") | 67 colnames(mz_df) = c("mz_names", "mz", "sample_mean", "sample_median", "sample_sd", "sample_sem", "intensity_sum", "number_peaks") |
71 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 68 write.table(mz_df, file="$feature_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
69 ## free up RAM space in case furhter steps will be run: | |
70 rm(mz_df) | |
71 gc() | |
72 #end if | 72 #end if |
73 | 73 |
74 ###################### summarized m/z feature output ####################### | 74 ###################### summarized m/z feature output ####################### |
75 | 75 |
76 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | 76 #if str($tabular_annotation.load_annotation) == 'yes_annotation': |
99 ## calculate mean per annotation group | 99 ## calculate mean per annotation group |
100 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) | 100 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) |
101 count = 1 | 101 count = 1 |
102 for (subsample in levels(msidata\$annotation)){ | 102 for (subsample in levels(msidata\$annotation)){ |
103 subsample_pixels = msidata[,msidata\$annotation == subsample] | 103 subsample_pixels = msidata[,msidata\$annotation == subsample] |
104 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE) | 104 subsample_calc = rowMeans(spectra(subsample_pixels), na.rm=TRUE) |
105 sample_matrix = cbind(sample_matrix, subsample_calc) | 105 sample_matrix = cbind(sample_matrix, subsample_calc) |
106 count = count+1} | 106 count = count+1} |
107 sample_matrix_mean = cbind(mz_names,sample_matrix) | 107 sample_matrix_mean = cbind(mz_names,sample_matrix) |
108 sample_matrix_mean = rbind(c("mz_name", levels(msidata\$annotation)), sample_matrix_mean) | 108 sample_matrix_mean = rbind(c("mz_name", levels(msidata\$annotation)), sample_matrix_mean) |
109 write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | 109 write.table(sample_matrix_mean, file="$summarized_mean", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
114 | 114 |
115 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) | 115 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) |
116 count = 1 | 116 count = 1 |
117 for (subsample in levels(msidata\$annotation)){ | 117 for (subsample in levels(msidata\$annotation)){ |
118 subsample_pixels = msidata[,msidata\$annotation == subsample] | 118 subsample_pixels = msidata[,msidata\$annotation == subsample] |
119 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE) | 119 subsample_calc = apply(spectra(subsample_pixels),1,median, na.rm=TRUE) |
120 sample_matrix = cbind(sample_matrix, subsample_calc) | 120 sample_matrix = cbind(sample_matrix, subsample_calc) |
121 count = count+1} | 121 count = count+1} |
122 sample_matrix_median = cbind(mz_names,sample_matrix) | 122 sample_matrix_median = cbind(mz_names,sample_matrix) |
123 sample_matrix_median = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_median) | 123 sample_matrix_median = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_median) |
124 write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | 124 write.table(sample_matrix_median, file="$summarized_median", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
129 | 129 |
130 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) | 130 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata)) |
131 count = 1 | 131 count = 1 |
132 for (subsample in levels(msidata\$annotation)){ | 132 for (subsample in levels(msidata\$annotation)){ |
133 subsample_pixels = msidata[,msidata\$annotation == subsample] | 133 subsample_pixels = msidata[,msidata\$annotation == subsample] |
134 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE) | 134 subsample_calc = apply(spectra(subsample_pixels),1,sd, na.rm=TRUE) |
135 sample_matrix = cbind(sample_matrix, subsample_calc) | 135 sample_matrix = cbind(sample_matrix, subsample_calc) |
136 count = count+1} | 136 count = count+1} |
137 sample_matrix_sd = cbind(mz_names,sample_matrix) | 137 sample_matrix_sd = cbind(mz_names,sample_matrix) |
138 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) | 138 sample_matrix_sd = rbind(c("mz name", levels(msidata\$annotation)), sample_matrix_sd) |
139 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") | 139 write.table(sample_matrix_sd, file="$summarized_sd", quote = FALSE, row.names = FALSE, col.names=FALSE, sep = "\t") |
154 | 154 |
155 ## pixel order | 155 ## pixel order |
156 pixelxyarray=1:length(pixels(msidata)) | 156 pixelxyarray=1:length(pixels(msidata)) |
157 | 157 |
158 ## number of pixels per spectrum: every intensity value > 0 counts as peak | 158 ## number of pixels per spectrum: every intensity value > 0 counts as peak |
159 peaksperpixel = apply(spectra(msidata)[]> 0, 2, sum, na.rm=TRUE) | 159 peaksperpixel = colSums(spectra(msidata)>0, na.rm=TRUE) |
160 | 160 |
161 ## Total ion chromatogram per spectrum | 161 ## Total ion chromatogram per spectrum |
162 TICs = round(apply(spectra(msidata)[],2, sum, na.rm=TRUE), digits = 2) | 162 TICs = round(colSums(spectra(msidata), na.rm=TRUE), digits = 2) |
163 | 163 |
164 ## Median ion intensity per spectrum | 164 ## Median ion intensity per spectrum |
165 med_int = round(apply(spectra(msidata)[], 2, median, na.rm=TRUE), digits = 2) | 165 med_int = round(apply(spectra(msidata), 2, median, na.rm=TRUE), digits = 2) |
166 | 166 |
167 ## Maximum ion intensity per spectrum | 167 ## Maximum ion intensity per spectrum |
168 max_int = round(apply(spectra(msidata)[], 2, max, na.rm=TRUE), digits = 2) | 168 max_int = round(apply(spectra(msidata), 2, max, na.rm=TRUE), digits = 2) |
169 | 169 |
170 ## Highest m/z per spectrum | 170 ## Highest m/z per spectrum |
171 highestmz = apply(spectra(msidata)[],2,which.max) | 171 highestmz = apply(spectra(msidata),2,which.max) |
172 highestmz_data = mz(msidata)[highestmz] | 172 highestmz_data = mz(msidata)[highestmz] |
173 | 173 |
174 ## Combine into dataframe; order is the same for all vectors | 174 ## Combine into dataframe; order is the same for all vectors |
175 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data) | 175 spectra_df = data.frame(pixel_names, xycoordinates, pixelxyarray, peaksperpixel, med_int, TICs, max_int, highestmz_data) |
176 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") | 176 colnames(spectra_df) = c("spectra_names", "x_values", "y_values","pixel_order", "peaks_per_spectrum", "median_intensity", "spectrum_TIC", "maximum_intensity", "most_abundant_mz") |
195 | 195 |
196 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0 | 196 ## filter for m/z window of each calibrant and calculate if sum of peak intensities > 0 |
197 | 197 |
198 for (mass in 1:length(inputcalibrantmasses)){ | 198 for (mass in 1:length(inputcalibrantmasses)){ |
199 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] | 199 filtered_data = msidata[mz(msidata) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata) <= inputcalibrantmasses[mass]+plusminusvalues[mass],] |
200 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data)[],na.rm=TRUE) > 0){ | 200 if (nrow(filtered_data) > 1 & sum(spectra(filtered_data),na.rm=TRUE) > 0){ |
201 ## intensity of all m/z > 0 | 201 ## intensity of all m/z > 0 |
202 intensity_sum = apply(spectra(filtered_data)[],2,sum, na.rm=TRUE) > 0 | 202 intensity_sum = colSums(spectra(filtered_data), na.rm=TRUE) > 0 |
203 | 203 |
204 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data)[], na.rm=TRUE) > 0){ | 204 }else if(nrow(filtered_data) == 1 & sum(spectra(filtered_data), na.rm=TRUE) > 0){ |
205 ## intensity of only m/z > 0 | 205 ## intensity of only m/z > 0 |
206 intensity_sum = spectra(filtered_data)[] > 0 | 206 intensity_sum = spectra(filtered_data) > 0 |
207 }else{ | 207 }else{ |
208 intensity_sum = rep(FALSE, ncol(filtered_data))} | 208 intensity_sum = rep(FALSE, ncol(filtered_data))} |
209 ## for each pixel add sum of intensities > 0 in the given m/z range | 209 ## for each pixel add sum of intensities > 0 in the given m/z range |
210 pixelmatrix = rbind(pixelmatrix, intensity_sum) | 210 pixelmatrix = rbind(pixelmatrix, intensity_sum) |
211 } | 211 } |