Mercurial > repos > azomics > flowsom_cross_comp
comparison FlowSOMMApIndividualFCS.R @ 1:a1054bd1060a draft
planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_cross_comp commit f2ed3cfcc71ccf61cf6058533d217443318fd024
author | azomics |
---|---|
date | Mon, 25 Sep 2023 22:04:40 +0000 |
parents | e796ed5dfd02 |
children |
comparison
equal
deleted
inserted
replaced
0:e796ed5dfd02 | 1:a1054bd1060a |
---|---|
14 library(FlowSOM) | 14 library(FlowSOM) |
15 library(flowCore) | 15 library(flowCore) |
16 | 16 |
17 ## geometric mean from | 17 ## geometric mean from |
18 # https://stackoverflow.com/questions/2602583/geometric-mean-is-there-a-built-in | 18 # https://stackoverflow.com/questions/2602583/geometric-mean-is-there-a-built-in |
19 gm_mean = function(x, na.rm=TRUE){ | 19 gm_mean <- function(x, na_rm = TRUE) { |
20 exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) | 20 exp(sum(log(x[x > 0]), na.rm = na_rm) / length(x)) |
21 } | 21 } |
22 | 22 |
23 prettyMarkerNames <- function(flowFrame){ | 23 pretty_marker_names <- function(flow_frame) { |
24 n <- flowFrame@parameters@data[, "name"] | 24 n <- flow_frame@parameters@data[, "name"] |
25 d <- flowFrame@parameters@data[, "desc"] | 25 d <- flow_frame@parameters@data[, "desc"] |
26 d[is.na(d)] <- n[is.na(d)] | 26 d[is.na(d)] <- n[is.na(d)] |
27 prettyNames <-list() | 27 pretty_names <- list() |
28 if(any(grepl("#",d))){ | 28 if (any(grepl("#", d))) { |
29 # Support for hashtag notation: | 29 # Support for hashtag notation: |
30 # antibody#fluorochrome -> antibody (fluorochrome) | 30 pretty_names <- gsub("#(.*)$", " (\\1)", d) |
31 prettyNames <- gsub("#(.*)$"," (\\1)",d) | |
32 } else { | 31 } else { |
33 prettyNames <- paste(d, " <", n, ">", sep="") | 32 pretty_names <- paste(d, " <", n, ">", sep = "") |
34 } | 33 } |
35 return(prettyNames) | 34 return(pretty_names) |
36 } | 35 } |
37 | 36 |
38 checkMarkers <- function(fcsfiles, flag_ff=FALSE){ | 37 check_markers <- function(fcsfiles, flag_ff = FALSE) { |
39 markerCheck <- T | 38 marker_check <- TRUE |
40 for (i in 1:length(fcsfiles)){ | 39 for (i in seq_along(fcsfiles)) { |
41 if (flag_ff){ | 40 if (flag_ff) { |
42 fcs <- readRDS(fcsfiles[i]) | 41 fcs <- readRDS(fcsfiles[i]) |
43 } else { | 42 } else { |
44 fcs <- read.FCS(fcsfiles[i], transformation=FALSE) | 43 fcs <- read.FCS(fcsfiles[i], transformation = FALSE) |
45 } | 44 } |
46 if (i == 1) { | 45 if (i == 1) { |
47 m1 <- as.vector(pData(parameters(fcs))$desc) | 46 m1 <- as.vector(pData(parameters(fcs))$desc) |
48 } else { | 47 } else { |
49 m2 <- as.vector(pData(parameters(fcs))$desc) | 48 m2 <- as.vector(pData(parameters(fcs))$desc) |
50 if (is.na(all(m1==m2))) { | 49 if (is.na(all(m1 == m2))) { |
51 mm1 <- is.na(m1) | 50 mm1 <- is.na(m1) |
52 mm2 <- is.na(m2) | 51 mm2 <- is.na(m2) |
53 if (all(mm1==mm2)){ | 52 if (all(mm1 == mm2)) { |
54 if (!all(m1==m2, na.rm=TRUE)){ | 53 if (!all(m1 == m2, na.rm = TRUE)) { |
55 markerCheck <- F | 54 marker_check <- FALSE |
56 } | 55 } |
57 } else { | 56 } else { |
58 markerCheck <- F | 57 marker_check <- FALSE |
59 } | 58 } |
60 } else if (!all(m1==m2)) { | 59 } else if (!all(m1 == m2)) { |
61 markerCheck <- F | 60 marker_check <- FALSE |
62 } | 61 } |
63 } | 62 } |
64 } | 63 } |
65 if (!markerCheck) { | 64 if (!marker_check) { |
66 quit(save = "no", status = 13, runLast = FALSE) | 65 quit(save = "no", status = 13, runLast = FALSE) |
67 } | 66 } |
68 } | 67 } |
69 | 68 |
70 mapToTree <- function(filenames, filepaths, flag_ff=FALSE, reftree, cluster=10, | 69 map_to_tree <- function(filenames, filepaths, |
71 outdir="",flag_meta=FALSE, mfi='mfi', stat1="", stat2="", | 70 flag_ff = FALSE, reftree, cluster = 10, |
72 stat3="", plot="", mplot="") { | 71 outdir = "", flag_meta = FALSE, |
73 | 72 mfi = "mfi", stat1 = "", stat2 = "", |
74 checkMarkers(filepaths, flag_ff) | 73 stat3 = "", plot = "", mplot = "") { |
75 | 74 |
75 check_markers(filepaths, flag_ff) | |
76 # get tree | 76 # get tree |
77 fst <- readRDS(reftree) | 77 fst <- readRDS(reftree) |
78 plots <- FALSE | 78 plots <- FALSE |
79 mplots <- FALSE | 79 mplots <- FALSE |
80 dir.create(outdir) | 80 dir.create(outdir) |
81 if (!plot==""){ | 81 if (!plot == "") { |
82 dir.create(plot) | 82 dir.create(plot) |
83 plots <- TRUE | 83 plots <- TRUE |
84 } | 84 } |
85 if (!mplot==""){ | 85 if (!mplot == "") { |
86 dir.create(mplot) | 86 dir.create(mplot) |
87 mplots <- TRUE | 87 mplots <- TRUE |
88 } | 88 } |
89 metaC <- metaClustering_consensus(fst$map$codes, k=cluster, seed=33) | 89 meta_c <- metaClustering_consensus(fst$map$codes, k = cluster, seed = 33) |
90 nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[,1]) | 90 nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[, 1]) |
91 nb_samples <- length(filepaths) | 91 nb_samples <- length(filepaths) |
92 nb_marker <- length(fst$prettyColnames) | 92 nb_marker <- length(fst$prettyColnames) |
93 print_markers <- gsub(' <.*>','',fst$prettyColnames) | 93 print_markers <- gsub(" <.*>", "", fst$prettyColnames) |
94 print_markers_ff <- append(print_markers, "Population") | 94 print_markers_ff <- append(print_markers, "Population") |
95 | 95 |
96 m_stat1 <- matrix(0, nrow=nb_samples, ncol=(nb_pop+2)) | 96 m_stat1 <- matrix(0, nrow = nb_samples, ncol = (nb_pop + 2)) |
97 colnames(m_stat1) <- c("FileID", "SampleName", seq_len(nb_pop)) | 97 colnames(m_stat1) <- c("FileID", "SampleName", seq_len(nb_pop)) |
98 | 98 |
99 sink(stat2) | 99 sink(stat2) |
100 cat(print_markers, sep="\t") | 100 cat(print_markers, sep = "\t") |
101 cat("\tPercentage\tPopulation\tSampleName\n") | 101 cat("\tPercentage\tPopulation\tSampleName\n") |
102 sink() | 102 sink() |
103 | 103 |
104 col_m3 <- c("Population") | 104 col_m3 <- c("Population") |
105 for (m in print_markers){ | 105 for (m in print_markers){ |
106 m1 <- paste(m, "mean", sep="_") | 106 m1 <- paste(m, "mean", sep = "_") |
107 m2 <- paste(m, "median", sep="_") | 107 m2 <- paste(m, "median", sep = "_") |
108 m3 <- paste(m, "stdev", sep="_") | 108 m3 <- paste(m, "stdev", sep = "_") |
109 col_m3 <- append(col_m3, c(m1,m2,m3)) | 109 col_m3 <- append(col_m3, c(m1, m2, m3)) |
110 } | 110 } |
111 col_stat3 <- c(col_m3, "Percentage_mean","Percentage_median","Percentage_stdev") | 111 col_stat3 <- c(col_m3, |
112 | 112 "Percentage_mean", |
113 for (i in 1:length(filepaths)){ | 113 "Percentage_median", |
114 if (flag_ff){ | 114 "Percentage_stdev") |
115 | |
116 for (i in seq_along(filepaths)) { | |
117 if (flag_ff) { | |
115 ff <- readRDS(filepaths[i]) | 118 ff <- readRDS(filepaths[i]) |
116 } else { | 119 } else { |
117 ff <- read.FCS(filepaths[i], transformation=FALSE) | 120 ff <- read.FCS(filepaths[i], transformation = FALSE) |
118 } | 121 } |
119 if (i==1) { | 122 if (i == 1) { |
120 # compare to tree markers | 123 # compare to tree markers |
121 pm <- prettyMarkerNames(ff) | 124 pm <- pretty_marker_names(ff) |
122 if (!all(fst$prettyColnames %in% pm)){ | 125 if (!all(fst$prettyColnames %in% pm)) { |
123 quit(save = "no", status = 14, runLast = FALSE) | 126 quit(save = "no", status = 14, runLast = FALSE) |
124 } | 127 } |
125 } | 128 } |
126 | 129 |
127 fsom <- NewData(fst, ff) | 130 fsom <- NewData(fst, ff) |
128 | 131 |
129 if (mplots){ | 132 if (mplots) { |
130 markers <- colnames(ff) | 133 markers <- colnames(ff) |
131 tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep="_") | 134 tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep = "_") |
132 pdf(file.path(mplot,tmpmplot), useDingbats=FALSE, onefile=TRUE) | 135 pdf(file.path(mplot, tmpmplot), useDingbats = FALSE, onefile = TRUE) |
133 for (marker in markers){ | 136 for (marker in markers){ |
134 PlotMarker(fsom, marker) | 137 PlotMarker(fsom, marker) |
135 } | 138 } |
136 dev.off() | 139 dev.off() |
137 } | 140 } |
138 | 141 |
139 if (!plot==""){ | 142 if (!plot == "") { |
140 plotpath <- paste(filenames[i], "tree.png", sep="_") | 143 plotpath <- paste(filenames[i], "tree.png", sep = "_") |
141 png(file.path(plot, plotpath), type="cairo", height=800, width=800) | 144 png(file.path(plot, plotpath), type = "cairo", height = 800, width = 800) |
142 PlotStars(fsom, backgroundValues = as.factor(metaC)) | 145 PlotStars(fsom, backgroundValues = as.factor(meta_c)) |
143 dev.off() | 146 dev.off() |
144 } | 147 } |
145 | 148 |
146 m <- matrix(0,nrow=nrow(ff),ncol=1) | 149 m <- matrix(0, nrow = nrow(ff), ncol = 1) |
147 s <- seq_len(nrow(ff)) | 150 s <- seq_len(nrow(ff)) |
148 if (flag_meta){ | 151 if (flag_meta) { |
149 m[s,] <- metaC[fsom$map$mapping[,1]] | 152 m[s, ] <- meta_c[fsom$map$mapping[, 1]] |
150 } else { | 153 } else { |
151 m[s,] <- fsom$map$mapping[,1] | 154 m[s, ] <- fsom$map$mapping[, 1] |
152 } | 155 } |
153 colnames(m) <- "FlowSOM" | 156 colnames(m) <- "FlowSOM" |
154 ff <- cbind2(ff,m) | 157 ff <- cbind2(ff, m) |
155 out <- exprs(ff) | 158 out <- exprs(ff) |
156 colnames(out) <- print_markers_ff | 159 colnames(out) <- print_markers_ff |
157 | 160 |
158 clr_table <- paste(filenames[i], "clustered.flowclr", sep="_") | 161 clr_table <- paste(filenames[i], "clustered.flowclr", sep = "_") |
159 write.table(out, file=file.path(outdir, clr_table), quote=F, row.names=F, col.names=T, sep='\t', | 162 write.table(out, |
160 append=F) | 163 file = file.path(outdir, clr_table), |
161 | 164 quote = FALSE, |
162 cluster_count <- table(out[,"Population"]) | 165 row.names = FALSE, |
163 cluster_prop <- prop.table(cluster_count)*100 | 166 col.names = TRUE, |
167 sep = "\t", | |
168 append = FALSE) | |
169 | |
170 cluster_count <- table(out[, "Population"]) | |
171 cluster_prop <- prop.table(cluster_count) * 100 | |
164 m1_tmp <- numeric(nb_pop) | 172 m1_tmp <- numeric(nb_pop) |
165 for (j in 1:nb_pop){ | 173 for (j in 1:nb_pop){ |
166 if (as.character(j) %in% names(cluster_count)){ | 174 if (as.character(j) %in% names(cluster_count)) { |
167 m1_tmp[[j]] <- format(round(cluster_prop[[as.character(j)]], 2), nsmall=2) | 175 m1_tmp[[j]] <- format( |
176 round(cluster_prop[[as.character(j)]], 2), | |
177 nsmall = 2) | |
168 } | 178 } |
169 } | 179 } |
170 samplename <- paste("Sample", i, sep="") | 180 samplename <- paste("Sample", i, sep = "") |
171 m_stat1[i,] <- c(filenames[i], samplename, m1_tmp) | 181 m_stat1[i, ] <- c(filenames[i], samplename, m1_tmp) |
172 # flowstat2 | 182 # flowstat2 |
173 # Marker1 Marker2 Marker3 ... Population Percentage SampleName | 183 # Marker1 Marker2 Marker3 ... Population Percentage SampleName |
174 # MFIs for each marker | 184 # MFIs for each marker |
175 # dimension ==> col = nb of markers + 3; row = nb of files * nb of clusters | 185 # dimension ==> col = nb of markers + 3; row = nb of files * nb of clusters |
176 if (mfi=="mfi"){ | 186 if (mfi == "mfi") { |
177 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), mean) | 187 m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), mean) |
178 } else if (mfi=="mdfi") { | 188 } else if (mfi == "mdfi") { |
179 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), median) | 189 m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), median) |
180 } else { | 190 } else { |
181 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), gm_mean) | 191 m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), gm_mean) |
182 } | 192 } |
183 | 193 |
184 m2["Percentage"] <- as.character(cluster_prop) | 194 m2["Percentage"] <- as.character(cluster_prop) |
185 m2["Population"] <- as.character(m2$Group.1) | 195 m2["Population"] <- as.character(m2$Group.1) |
186 m2["SampleName"] <- samplename | 196 m2["SampleName"] <- samplename |
187 m2t <- as.matrix(m2[2:length(m2)]) | 197 m2t <- as.matrix(m2[2:length(m2)]) |
188 write.table(m2t, file=stat2, quote=F, row.names=F, col.names=F, sep='\t', | 198 write.table(m2t, |
189 append=T) | 199 file = stat2, |
190 | 200 quote = FALSE, |
191 } | 201 row.names = FALSE, |
192 write.table(m_stat1, file=stat1, quote=F, row.names=F, col.names=T, sep='\t', | 202 col.names = FALSE, |
193 append=F) | 203 sep = "\t", |
194 | 204 append = TRUE) |
195 m2df <- read.table(stat2, sep="\t", header=T) | 205 |
196 ag <- aggregate(m2df[,0:nb_marker+1], list(m2df[,nb_marker+2]), function(x) c(mn=mean(x), med=median(x), stdv=sd(x))) | 206 } |
207 write.table(m_stat1, | |
208 file = stat1, | |
209 quote = FALSE, | |
210 row.names = FALSE, | |
211 col.names = TRUE, | |
212 sep = "\t", | |
213 append = FALSE) | |
214 | |
215 m2df <- read.table(stat2, sep = "\t", header = TRUE) | |
216 ag <- aggregate(m2df[, 0:nb_marker + 1], | |
217 list(m2df[, nb_marker + 2]), | |
218 function(x) c(mn = mean(x), med = median(x), stdv = sd(x))) | |
197 m3t <- as.matrix(ag) | 219 m3t <- as.matrix(ag) |
198 colnames(m3t) <- col_stat3 | 220 colnames(m3t) <- col_stat3 |
199 write.table(m3t, file=stat3, quote=F, row.names=F, col.names=T, sep='\t', | 221 write.table(m3t, file = stat3, |
200 append=F) | 222 quote = FALSE, |
201 } | 223 row.names = FALSE, |
202 | 224 col.names = TRUE, sep = "\t", |
203 flowFrameOrFCS <- function(filenames, filepaths, reftree, cluster=10, outdir="", | 225 append = FALSE) |
204 flag_meta=FALSE, mfi='mfi', stat1="", stat2="", | 226 } |
205 stat3="", plot="", mplot=""){ | 227 |
206 | 228 flow_frame_or_fcs <- function(filenames, |
207 isValid <- F | 229 filepaths, |
208 is_fcs <- F | 230 reftree, |
209 is_ff <- F | 231 cluster = 10, outdir = "", |
232 flag_meta = FALSE, | |
233 mfi = "mfi", stat1 = "", stat2 = "", | |
234 stat3 = "", plot = "", mplot = "") { | |
235 is_fcs <- FALSE | |
236 is_ff <- FALSE | |
210 flag_ff <- FALSE | 237 flag_ff <- FALSE |
211 i <- 0 | 238 i <- 0 |
212 for (f in filepaths){ | 239 for (f in filepaths){ |
213 tryCatch({ | 240 tryCatch({ |
214 is_fcs <- isFCSfile(f) | 241 is_fcs <- isFCSfile(f) |
215 }, error = function(ex) { | 242 }, error = function(ex) { |
216 print(paste(ex)) | 243 print(paste(ex)) |
217 }) | 244 }) |
218 if (!is_fcs){ | 245 if (!is_fcs) { |
219 tryCatch({ | 246 tryCatch({ |
220 ff <- readRDS(f) | 247 ff <- readRDS(f) |
221 is_ff <- T | 248 is_ff <- TRUE |
222 }, error = function(ex) { | 249 }, error = function(ex) { |
223 print(paste(ex)) | 250 print(paste(ex)) |
224 }) | 251 }) |
225 } else { | 252 } else { |
226 i <- i + 1 | 253 i <- i + 1 |
227 } | 254 } |
228 if (!is_ff && !is_fcs) { | 255 if (!is_ff && !is_fcs) { |
229 quit(save = "no", status = 10, runLast = FALSE) | 256 quit(save = "no", status = 10, runLast = FALSE) |
230 } | 257 } |
231 } | 258 } |
232 if (i==0) { | 259 if (i == 0) { |
233 flag_ff <- TRUE | 260 flag_ff <- TRUE |
234 } else if (!i==length(filenames)){ | 261 } else if (!i == length(filenames)) { |
235 quit(save = "no", status = 12, runLast = FALSE) | 262 quit(save = "no", status = 12, runLast = FALSE) |
236 } | 263 } |
237 mapToTree(filenames, filepaths, flag_ff, reftree, cluster, outdir, flag_meta, | 264 map_to_tree(filenames, |
238 mfi, stat1, stat2, stat3, plot, mplot) | 265 filepaths, |
266 flag_ff, reftree, cluster, outdir, flag_meta, | |
267 mfi, stat1, stat2, stat3, plot, mplot) | |
239 } | 268 } |
240 | 269 |
241 args <- commandArgs(trailingOnly = TRUE) | 270 args <- commandArgs(trailingOnly = TRUE) |
242 plot <- "" | 271 plot <- "" |
243 mplot <- "" | 272 mplot <- "" |
244 m <- 8 | 273 m <- 8 |
245 flag_meta <- FALSE | 274 flag_meta <- FALSE |
246 if (args[4]=='meta') { | 275 if (args[4] == "meta") { |
247 flag_meta <- TRUE | 276 flag_meta <- TRUE |
248 } | 277 } |
249 if (args[9] == 'newDataTrees') { | 278 if (args[9] == "newDataTrees") { |
250 plot <- 'newDataTrees' | 279 plot <- "newDataTrees" |
251 m <- m + 1 | 280 m <- m + 1 |
252 if (args[10] == 'newDataMarkers') { | 281 if (args[10] == "newDataMarkers") { |
253 mplot <- "newDataMarkers" | 282 mplot <- "newDataMarkers" |
254 m <- m + 1 | 283 m <- m + 1 |
255 } | 284 } |
256 } else if (args[9] == 'newDataMarkers') { | 285 } else if (args[9] == "newDataMarkers") { |
257 mplot <- "newDataMarkers" | 286 mplot <- "newDataMarkers" |
258 m <- m + 1 | 287 m <- m + 1 |
259 } | 288 } |
260 | 289 |
261 n <- m+1 | 290 n <- m + 1 |
262 # files + filenames | |
263 nb_files <- (length(args) - m) / 2 | 291 nb_files <- (length(args) - m) / 2 |
264 files1 <- character(nb_files) | 292 files1 <- character(nb_files) |
265 files2 <- character(nb_files) | 293 files2 <- character(nb_files) |
266 j <- 1 | 294 j <- 1 |
267 file_list <- args[n:length(args)] | 295 file_list <- args[n:length(args)] |
268 for (i in 1:length(file_list)) { | 296 for (i in seq_along(file_list)) { |
269 if (i%%2){ | 297 if (i %% 2) { |
270 files1[[j]] <- file_list[i] | 298 files1[[j]] <- file_list[i] |
271 files2[[j]] <- file_list[i+1] | 299 files2[[j]] <- file_list[i + 1] |
272 j <- j + 1 | 300 j <- j + 1 |
273 } | 301 } |
274 } | 302 } |
275 | 303 |
276 flowFrameOrFCS(files2, files1, args[1], as.numeric(args[3]), args[2], flag_meta, | 304 flow_frame_or_fcs(files2, |
277 args[5], args[6], args[7], args[8], plot, mplot) | 305 files1, args[1], as.numeric(args[3]), args[2], flag_meta, |
306 args[5], args[6], args[7], args[8], plot, mplot) |