comparison FlowSOMMApIndividualFCS.R @ 1:a1054bd1060a draft

planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_cross_comp commit f2ed3cfcc71ccf61cf6058533d217443318fd024
author azomics
date Mon, 25 Sep 2023 22:04:40 +0000
parents e796ed5dfd02
children
comparison
equal deleted inserted replaced
0:e796ed5dfd02 1:a1054bd1060a
14 library(FlowSOM) 14 library(FlowSOM)
15 library(flowCore) 15 library(flowCore)
16 16
17 ## geometric mean from 17 ## geometric mean from
18 # https://stackoverflow.com/questions/2602583/geometric-mean-is-there-a-built-in 18 # https://stackoverflow.com/questions/2602583/geometric-mean-is-there-a-built-in
19 gm_mean = function(x, na.rm=TRUE){ 19 gm_mean <- function(x, na_rm = TRUE) {
20 exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) 20 exp(sum(log(x[x > 0]), na.rm = na_rm) / length(x))
21 } 21 }
22 22
23 prettyMarkerNames <- function(flowFrame){ 23 pretty_marker_names <- function(flow_frame) {
24 n <- flowFrame@parameters@data[, "name"] 24 n <- flow_frame@parameters@data[, "name"]
25 d <- flowFrame@parameters@data[, "desc"] 25 d <- flow_frame@parameters@data[, "desc"]
26 d[is.na(d)] <- n[is.na(d)] 26 d[is.na(d)] <- n[is.na(d)]
27 prettyNames <-list() 27 pretty_names <- list()
28 if(any(grepl("#",d))){ 28 if (any(grepl("#", d))) {
29 # Support for hashtag notation: 29 # Support for hashtag notation:
30 # antibody#fluorochrome -> antibody (fluorochrome) 30 pretty_names <- gsub("#(.*)$", " (\\1)", d)
31 prettyNames <- gsub("#(.*)$"," (\\1)",d)
32 } else { 31 } else {
33 prettyNames <- paste(d, " <", n, ">", sep="") 32 pretty_names <- paste(d, " <", n, ">", sep = "")
34 } 33 }
35 return(prettyNames) 34 return(pretty_names)
36 } 35 }
37 36
38 checkMarkers <- function(fcsfiles, flag_ff=FALSE){ 37 check_markers <- function(fcsfiles, flag_ff = FALSE) {
39 markerCheck <- T 38 marker_check <- TRUE
40 for (i in 1:length(fcsfiles)){ 39 for (i in seq_along(fcsfiles)) {
41 if (flag_ff){ 40 if (flag_ff) {
42 fcs <- readRDS(fcsfiles[i]) 41 fcs <- readRDS(fcsfiles[i])
43 } else { 42 } else {
44 fcs <- read.FCS(fcsfiles[i], transformation=FALSE) 43 fcs <- read.FCS(fcsfiles[i], transformation = FALSE)
45 } 44 }
46 if (i == 1) { 45 if (i == 1) {
47 m1 <- as.vector(pData(parameters(fcs))$desc) 46 m1 <- as.vector(pData(parameters(fcs))$desc)
48 } else { 47 } else {
49 m2 <- as.vector(pData(parameters(fcs))$desc) 48 m2 <- as.vector(pData(parameters(fcs))$desc)
50 if (is.na(all(m1==m2))) { 49 if (is.na(all(m1 == m2))) {
51 mm1 <- is.na(m1) 50 mm1 <- is.na(m1)
52 mm2 <- is.na(m2) 51 mm2 <- is.na(m2)
53 if (all(mm1==mm2)){ 52 if (all(mm1 == mm2)) {
54 if (!all(m1==m2, na.rm=TRUE)){ 53 if (!all(m1 == m2, na.rm = TRUE)) {
55 markerCheck <- F 54 marker_check <- FALSE
56 } 55 }
57 } else { 56 } else {
58 markerCheck <- F 57 marker_check <- FALSE
59 } 58 }
60 } else if (!all(m1==m2)) { 59 } else if (!all(m1 == m2)) {
61 markerCheck <- F 60 marker_check <- FALSE
62 } 61 }
63 } 62 }
64 } 63 }
65 if (!markerCheck) { 64 if (!marker_check) {
66 quit(save = "no", status = 13, runLast = FALSE) 65 quit(save = "no", status = 13, runLast = FALSE)
67 } 66 }
68 } 67 }
69 68
70 mapToTree <- function(filenames, filepaths, flag_ff=FALSE, reftree, cluster=10, 69 map_to_tree <- function(filenames, filepaths,
71 outdir="",flag_meta=FALSE, mfi='mfi', stat1="", stat2="", 70 flag_ff = FALSE, reftree, cluster = 10,
72 stat3="", plot="", mplot="") { 71 outdir = "", flag_meta = FALSE,
73 72 mfi = "mfi", stat1 = "", stat2 = "",
74 checkMarkers(filepaths, flag_ff) 73 stat3 = "", plot = "", mplot = "") {
75 74
75 check_markers(filepaths, flag_ff)
76 # get tree 76 # get tree
77 fst <- readRDS(reftree) 77 fst <- readRDS(reftree)
78 plots <- FALSE 78 plots <- FALSE
79 mplots <- FALSE 79 mplots <- FALSE
80 dir.create(outdir) 80 dir.create(outdir)
81 if (!plot==""){ 81 if (!plot == "") {
82 dir.create(plot) 82 dir.create(plot)
83 plots <- TRUE 83 plots <- TRUE
84 } 84 }
85 if (!mplot==""){ 85 if (!mplot == "") {
86 dir.create(mplot) 86 dir.create(mplot)
87 mplots <- TRUE 87 mplots <- TRUE
88 } 88 }
89 metaC <- metaClustering_consensus(fst$map$codes, k=cluster, seed=33) 89 meta_c <- metaClustering_consensus(fst$map$codes, k = cluster, seed = 33)
90 nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[,1]) 90 nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[, 1])
91 nb_samples <- length(filepaths) 91 nb_samples <- length(filepaths)
92 nb_marker <- length(fst$prettyColnames) 92 nb_marker <- length(fst$prettyColnames)
93 print_markers <- gsub(' <.*>','',fst$prettyColnames) 93 print_markers <- gsub(" <.*>", "", fst$prettyColnames)
94 print_markers_ff <- append(print_markers, "Population") 94 print_markers_ff <- append(print_markers, "Population")
95 95
96 m_stat1 <- matrix(0, nrow=nb_samples, ncol=(nb_pop+2)) 96 m_stat1 <- matrix(0, nrow = nb_samples, ncol = (nb_pop + 2))
97 colnames(m_stat1) <- c("FileID", "SampleName", seq_len(nb_pop)) 97 colnames(m_stat1) <- c("FileID", "SampleName", seq_len(nb_pop))
98 98
99 sink(stat2) 99 sink(stat2)
100 cat(print_markers, sep="\t") 100 cat(print_markers, sep = "\t")
101 cat("\tPercentage\tPopulation\tSampleName\n") 101 cat("\tPercentage\tPopulation\tSampleName\n")
102 sink() 102 sink()
103 103
104 col_m3 <- c("Population") 104 col_m3 <- c("Population")
105 for (m in print_markers){ 105 for (m in print_markers){
106 m1 <- paste(m, "mean", sep="_") 106 m1 <- paste(m, "mean", sep = "_")
107 m2 <- paste(m, "median", sep="_") 107 m2 <- paste(m, "median", sep = "_")
108 m3 <- paste(m, "stdev", sep="_") 108 m3 <- paste(m, "stdev", sep = "_")
109 col_m3 <- append(col_m3, c(m1,m2,m3)) 109 col_m3 <- append(col_m3, c(m1, m2, m3))
110 } 110 }
111 col_stat3 <- c(col_m3, "Percentage_mean","Percentage_median","Percentage_stdev") 111 col_stat3 <- c(col_m3,
112 112 "Percentage_mean",
113 for (i in 1:length(filepaths)){ 113 "Percentage_median",
114 if (flag_ff){ 114 "Percentage_stdev")
115
116 for (i in seq_along(filepaths)) {
117 if (flag_ff) {
115 ff <- readRDS(filepaths[i]) 118 ff <- readRDS(filepaths[i])
116 } else { 119 } else {
117 ff <- read.FCS(filepaths[i], transformation=FALSE) 120 ff <- read.FCS(filepaths[i], transformation = FALSE)
118 } 121 }
119 if (i==1) { 122 if (i == 1) {
120 # compare to tree markers 123 # compare to tree markers
121 pm <- prettyMarkerNames(ff) 124 pm <- pretty_marker_names(ff)
122 if (!all(fst$prettyColnames %in% pm)){ 125 if (!all(fst$prettyColnames %in% pm)) {
123 quit(save = "no", status = 14, runLast = FALSE) 126 quit(save = "no", status = 14, runLast = FALSE)
124 } 127 }
125 } 128 }
126 129
127 fsom <- NewData(fst, ff) 130 fsom <- NewData(fst, ff)
128 131
129 if (mplots){ 132 if (mplots) {
130 markers <- colnames(ff) 133 markers <- colnames(ff)
131 tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep="_") 134 tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep = "_")
132 pdf(file.path(mplot,tmpmplot), useDingbats=FALSE, onefile=TRUE) 135 pdf(file.path(mplot, tmpmplot), useDingbats = FALSE, onefile = TRUE)
133 for (marker in markers){ 136 for (marker in markers){
134 PlotMarker(fsom, marker) 137 PlotMarker(fsom, marker)
135 } 138 }
136 dev.off() 139 dev.off()
137 } 140 }
138 141
139 if (!plot==""){ 142 if (!plot == "") {
140 plotpath <- paste(filenames[i], "tree.png", sep="_") 143 plotpath <- paste(filenames[i], "tree.png", sep = "_")
141 png(file.path(plot, plotpath), type="cairo", height=800, width=800) 144 png(file.path(plot, plotpath), type = "cairo", height = 800, width = 800)
142 PlotStars(fsom, backgroundValues = as.factor(metaC)) 145 PlotStars(fsom, backgroundValues = as.factor(meta_c))
143 dev.off() 146 dev.off()
144 } 147 }
145 148
146 m <- matrix(0,nrow=nrow(ff),ncol=1) 149 m <- matrix(0, nrow = nrow(ff), ncol = 1)
147 s <- seq_len(nrow(ff)) 150 s <- seq_len(nrow(ff))
148 if (flag_meta){ 151 if (flag_meta) {
149 m[s,] <- metaC[fsom$map$mapping[,1]] 152 m[s, ] <- meta_c[fsom$map$mapping[, 1]]
150 } else { 153 } else {
151 m[s,] <- fsom$map$mapping[,1] 154 m[s, ] <- fsom$map$mapping[, 1]
152 } 155 }
153 colnames(m) <- "FlowSOM" 156 colnames(m) <- "FlowSOM"
154 ff <- cbind2(ff,m) 157 ff <- cbind2(ff, m)
155 out <- exprs(ff) 158 out <- exprs(ff)
156 colnames(out) <- print_markers_ff 159 colnames(out) <- print_markers_ff
157 160
158 clr_table <- paste(filenames[i], "clustered.flowclr", sep="_") 161 clr_table <- paste(filenames[i], "clustered.flowclr", sep = "_")
159 write.table(out, file=file.path(outdir, clr_table), quote=F, row.names=F, col.names=T, sep='\t', 162 write.table(out,
160 append=F) 163 file = file.path(outdir, clr_table),
161 164 quote = FALSE,
162 cluster_count <- table(out[,"Population"]) 165 row.names = FALSE,
163 cluster_prop <- prop.table(cluster_count)*100 166 col.names = TRUE,
167 sep = "\t",
168 append = FALSE)
169
170 cluster_count <- table(out[, "Population"])
171 cluster_prop <- prop.table(cluster_count) * 100
164 m1_tmp <- numeric(nb_pop) 172 m1_tmp <- numeric(nb_pop)
165 for (j in 1:nb_pop){ 173 for (j in 1:nb_pop){
166 if (as.character(j) %in% names(cluster_count)){ 174 if (as.character(j) %in% names(cluster_count)) {
167 m1_tmp[[j]] <- format(round(cluster_prop[[as.character(j)]], 2), nsmall=2) 175 m1_tmp[[j]] <- format(
176 round(cluster_prop[[as.character(j)]], 2),
177 nsmall = 2)
168 } 178 }
169 } 179 }
170 samplename <- paste("Sample", i, sep="") 180 samplename <- paste("Sample", i, sep = "")
171 m_stat1[i,] <- c(filenames[i], samplename, m1_tmp) 181 m_stat1[i, ] <- c(filenames[i], samplename, m1_tmp)
172 # flowstat2 182 # flowstat2
173 # Marker1 Marker2 Marker3 ... Population Percentage SampleName 183 # Marker1 Marker2 Marker3 ... Population Percentage SampleName
174 # MFIs for each marker 184 # MFIs for each marker
175 # dimension ==> col = nb of markers + 3; row = nb of files * nb of clusters 185 # dimension ==> col = nb of markers + 3; row = nb of files * nb of clusters
176 if (mfi=="mfi"){ 186 if (mfi == "mfi") {
177 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), mean) 187 m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), mean)
178 } else if (mfi=="mdfi") { 188 } else if (mfi == "mdfi") {
179 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), median) 189 m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), median)
180 } else { 190 } else {
181 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), gm_mean) 191 m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), gm_mean)
182 } 192 }
183 193
184 m2["Percentage"] <- as.character(cluster_prop) 194 m2["Percentage"] <- as.character(cluster_prop)
185 m2["Population"] <- as.character(m2$Group.1) 195 m2["Population"] <- as.character(m2$Group.1)
186 m2["SampleName"] <- samplename 196 m2["SampleName"] <- samplename
187 m2t <- as.matrix(m2[2:length(m2)]) 197 m2t <- as.matrix(m2[2:length(m2)])
188 write.table(m2t, file=stat2, quote=F, row.names=F, col.names=F, sep='\t', 198 write.table(m2t,
189 append=T) 199 file = stat2,
190 200 quote = FALSE,
191 } 201 row.names = FALSE,
192 write.table(m_stat1, file=stat1, quote=F, row.names=F, col.names=T, sep='\t', 202 col.names = FALSE,
193 append=F) 203 sep = "\t",
194 204 append = TRUE)
195 m2df <- read.table(stat2, sep="\t", header=T) 205
196 ag <- aggregate(m2df[,0:nb_marker+1], list(m2df[,nb_marker+2]), function(x) c(mn=mean(x), med=median(x), stdv=sd(x))) 206 }
207 write.table(m_stat1,
208 file = stat1,
209 quote = FALSE,
210 row.names = FALSE,
211 col.names = TRUE,
212 sep = "\t",
213 append = FALSE)
214
215 m2df <- read.table(stat2, sep = "\t", header = TRUE)
216 ag <- aggregate(m2df[, 0:nb_marker + 1],
217 list(m2df[, nb_marker + 2]),
218 function(x) c(mn = mean(x), med = median(x), stdv = sd(x)))
197 m3t <- as.matrix(ag) 219 m3t <- as.matrix(ag)
198 colnames(m3t) <- col_stat3 220 colnames(m3t) <- col_stat3
199 write.table(m3t, file=stat3, quote=F, row.names=F, col.names=T, sep='\t', 221 write.table(m3t, file = stat3,
200 append=F) 222 quote = FALSE,
201 } 223 row.names = FALSE,
202 224 col.names = TRUE, sep = "\t",
203 flowFrameOrFCS <- function(filenames, filepaths, reftree, cluster=10, outdir="", 225 append = FALSE)
204 flag_meta=FALSE, mfi='mfi', stat1="", stat2="", 226 }
205 stat3="", plot="", mplot=""){ 227
206 228 flow_frame_or_fcs <- function(filenames,
207 isValid <- F 229 filepaths,
208 is_fcs <- F 230 reftree,
209 is_ff <- F 231 cluster = 10, outdir = "",
232 flag_meta = FALSE,
233 mfi = "mfi", stat1 = "", stat2 = "",
234 stat3 = "", plot = "", mplot = "") {
235 is_fcs <- FALSE
236 is_ff <- FALSE
210 flag_ff <- FALSE 237 flag_ff <- FALSE
211 i <- 0 238 i <- 0
212 for (f in filepaths){ 239 for (f in filepaths){
213 tryCatch({ 240 tryCatch({
214 is_fcs <- isFCSfile(f) 241 is_fcs <- isFCSfile(f)
215 }, error = function(ex) { 242 }, error = function(ex) {
216 print(paste(ex)) 243 print(paste(ex))
217 }) 244 })
218 if (!is_fcs){ 245 if (!is_fcs) {
219 tryCatch({ 246 tryCatch({
220 ff <- readRDS(f) 247 ff <- readRDS(f)
221 is_ff <- T 248 is_ff <- TRUE
222 }, error = function(ex) { 249 }, error = function(ex) {
223 print(paste(ex)) 250 print(paste(ex))
224 }) 251 })
225 } else { 252 } else {
226 i <- i + 1 253 i <- i + 1
227 } 254 }
228 if (!is_ff && !is_fcs) { 255 if (!is_ff && !is_fcs) {
229 quit(save = "no", status = 10, runLast = FALSE) 256 quit(save = "no", status = 10, runLast = FALSE)
230 } 257 }
231 } 258 }
232 if (i==0) { 259 if (i == 0) {
233 flag_ff <- TRUE 260 flag_ff <- TRUE
234 } else if (!i==length(filenames)){ 261 } else if (!i == length(filenames)) {
235 quit(save = "no", status = 12, runLast = FALSE) 262 quit(save = "no", status = 12, runLast = FALSE)
236 } 263 }
237 mapToTree(filenames, filepaths, flag_ff, reftree, cluster, outdir, flag_meta, 264 map_to_tree(filenames,
238 mfi, stat1, stat2, stat3, plot, mplot) 265 filepaths,
266 flag_ff, reftree, cluster, outdir, flag_meta,
267 mfi, stat1, stat2, stat3, plot, mplot)
239 } 268 }
240 269
241 args <- commandArgs(trailingOnly = TRUE) 270 args <- commandArgs(trailingOnly = TRUE)
242 plot <- "" 271 plot <- ""
243 mplot <- "" 272 mplot <- ""
244 m <- 8 273 m <- 8
245 flag_meta <- FALSE 274 flag_meta <- FALSE
246 if (args[4]=='meta') { 275 if (args[4] == "meta") {
247 flag_meta <- TRUE 276 flag_meta <- TRUE
248 } 277 }
249 if (args[9] == 'newDataTrees') { 278 if (args[9] == "newDataTrees") {
250 plot <- 'newDataTrees' 279 plot <- "newDataTrees"
251 m <- m + 1 280 m <- m + 1
252 if (args[10] == 'newDataMarkers') { 281 if (args[10] == "newDataMarkers") {
253 mplot <- "newDataMarkers" 282 mplot <- "newDataMarkers"
254 m <- m + 1 283 m <- m + 1
255 } 284 }
256 } else if (args[9] == 'newDataMarkers') { 285 } else if (args[9] == "newDataMarkers") {
257 mplot <- "newDataMarkers" 286 mplot <- "newDataMarkers"
258 m <- m + 1 287 m <- m + 1
259 } 288 }
260 289
261 n <- m+1 290 n <- m + 1
262 # files + filenames
263 nb_files <- (length(args) - m) / 2 291 nb_files <- (length(args) - m) / 2
264 files1 <- character(nb_files) 292 files1 <- character(nb_files)
265 files2 <- character(nb_files) 293 files2 <- character(nb_files)
266 j <- 1 294 j <- 1
267 file_list <- args[n:length(args)] 295 file_list <- args[n:length(args)]
268 for (i in 1:length(file_list)) { 296 for (i in seq_along(file_list)) {
269 if (i%%2){ 297 if (i %% 2) {
270 files1[[j]] <- file_list[i] 298 files1[[j]] <- file_list[i]
271 files2[[j]] <- file_list[i+1] 299 files2[[j]] <- file_list[i + 1]
272 j <- j + 1 300 j <- j + 1
273 } 301 }
274 } 302 }
275 303
276 flowFrameOrFCS(files2, files1, args[1], as.numeric(args[3]), args[2], flag_meta, 304 flow_frame_or_fcs(files2,
277 args[5], args[6], args[7], args[8], plot, mplot) 305 files1, args[1], as.numeric(args[3]), args[2], flag_meta,
306 args[5], args[6], args[7], args[8], plot, mplot)