Mercurial > repos > azomics > flowsom_cross_comp
comparison FlowSOMMApIndividualFCS.R @ 0:e796ed5dfd02 draft
"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_cross_comp commit 2f04d23cb47d9b233d159c54ff406e56c87746e0"
author | azomics |
---|---|
date | Tue, 23 Jun 2020 19:49:33 -0400 |
parents | |
children | a1054bd1060a |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e796ed5dfd02 |
---|---|
1 #!/usr/bin/Rscript | |
2 # Module for Galaxy | |
3 # Generates FlowSOM reference tree | |
4 # with FlowSOM AggregateFlowFrames | |
5 ###################################################################### | |
6 # Copyright (c) 2017 Northrop Grumman. | |
7 # All rights reserved. | |
8 ###################################################################### | |
9 # | |
10 # Version 1 | |
11 # Cristel Thomas | |
12 # | |
13 # | |
14 library(FlowSOM) | |
15 library(flowCore) | |
16 | |
17 ## geometric mean from | |
18 # https://stackoverflow.com/questions/2602583/geometric-mean-is-there-a-built-in | |
19 gm_mean = function(x, na.rm=TRUE){ | |
20 exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) | |
21 } | |
22 | |
23 prettyMarkerNames <- function(flowFrame){ | |
24 n <- flowFrame@parameters@data[, "name"] | |
25 d <- flowFrame@parameters@data[, "desc"] | |
26 d[is.na(d)] <- n[is.na(d)] | |
27 prettyNames <-list() | |
28 if(any(grepl("#",d))){ | |
29 # Support for hashtag notation: | |
30 # antibody#fluorochrome -> antibody (fluorochrome) | |
31 prettyNames <- gsub("#(.*)$"," (\\1)",d) | |
32 } else { | |
33 prettyNames <- paste(d, " <", n, ">", sep="") | |
34 } | |
35 return(prettyNames) | |
36 } | |
37 | |
38 checkMarkers <- function(fcsfiles, flag_ff=FALSE){ | |
39 markerCheck <- T | |
40 for (i in 1:length(fcsfiles)){ | |
41 if (flag_ff){ | |
42 fcs <- readRDS(fcsfiles[i]) | |
43 } else { | |
44 fcs <- read.FCS(fcsfiles[i], transformation=FALSE) | |
45 } | |
46 if (i == 1) { | |
47 m1 <- as.vector(pData(parameters(fcs))$desc) | |
48 } else { | |
49 m2 <- as.vector(pData(parameters(fcs))$desc) | |
50 if (is.na(all(m1==m2))) { | |
51 mm1 <- is.na(m1) | |
52 mm2 <- is.na(m2) | |
53 if (all(mm1==mm2)){ | |
54 if (!all(m1==m2, na.rm=TRUE)){ | |
55 markerCheck <- F | |
56 } | |
57 } else { | |
58 markerCheck <- F | |
59 } | |
60 } else if (!all(m1==m2)) { | |
61 markerCheck <- F | |
62 } | |
63 } | |
64 } | |
65 if (!markerCheck) { | |
66 quit(save = "no", status = 13, runLast = FALSE) | |
67 } | |
68 } | |
69 | |
70 mapToTree <- function(filenames, filepaths, flag_ff=FALSE, reftree, cluster=10, | |
71 outdir="",flag_meta=FALSE, mfi='mfi', stat1="", stat2="", | |
72 stat3="", plot="", mplot="") { | |
73 | |
74 checkMarkers(filepaths, flag_ff) | |
75 | |
76 # get tree | |
77 fst <- readRDS(reftree) | |
78 plots <- FALSE | |
79 mplots <- FALSE | |
80 dir.create(outdir) | |
81 if (!plot==""){ | |
82 dir.create(plot) | |
83 plots <- TRUE | |
84 } | |
85 if (!mplot==""){ | |
86 dir.create(mplot) | |
87 mplots <- TRUE | |
88 } | |
89 metaC <- metaClustering_consensus(fst$map$codes, k=cluster, seed=33) | |
90 nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[,1]) | |
91 nb_samples <- length(filepaths) | |
92 nb_marker <- length(fst$prettyColnames) | |
93 print_markers <- gsub(' <.*>','',fst$prettyColnames) | |
94 print_markers_ff <- append(print_markers, "Population") | |
95 | |
96 m_stat1 <- matrix(0, nrow=nb_samples, ncol=(nb_pop+2)) | |
97 colnames(m_stat1) <- c("FileID", "SampleName", seq_len(nb_pop)) | |
98 | |
99 sink(stat2) | |
100 cat(print_markers, sep="\t") | |
101 cat("\tPercentage\tPopulation\tSampleName\n") | |
102 sink() | |
103 | |
104 col_m3 <- c("Population") | |
105 for (m in print_markers){ | |
106 m1 <- paste(m, "mean", sep="_") | |
107 m2 <- paste(m, "median", sep="_") | |
108 m3 <- paste(m, "stdev", sep="_") | |
109 col_m3 <- append(col_m3, c(m1,m2,m3)) | |
110 } | |
111 col_stat3 <- c(col_m3, "Percentage_mean","Percentage_median","Percentage_stdev") | |
112 | |
113 for (i in 1:length(filepaths)){ | |
114 if (flag_ff){ | |
115 ff <- readRDS(filepaths[i]) | |
116 } else { | |
117 ff <- read.FCS(filepaths[i], transformation=FALSE) | |
118 } | |
119 if (i==1) { | |
120 # compare to tree markers | |
121 pm <- prettyMarkerNames(ff) | |
122 if (!all(fst$prettyColnames %in% pm)){ | |
123 quit(save = "no", status = 14, runLast = FALSE) | |
124 } | |
125 } | |
126 | |
127 fsom <- NewData(fst, ff) | |
128 | |
129 if (mplots){ | |
130 markers <- colnames(ff) | |
131 tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep="_") | |
132 pdf(file.path(mplot,tmpmplot), useDingbats=FALSE, onefile=TRUE) | |
133 for (marker in markers){ | |
134 PlotMarker(fsom, marker) | |
135 } | |
136 dev.off() | |
137 } | |
138 | |
139 if (!plot==""){ | |
140 plotpath <- paste(filenames[i], "tree.png", sep="_") | |
141 png(file.path(plot, plotpath), type="cairo", height=800, width=800) | |
142 PlotStars(fsom, backgroundValues = as.factor(metaC)) | |
143 dev.off() | |
144 } | |
145 | |
146 m <- matrix(0,nrow=nrow(ff),ncol=1) | |
147 s <- seq_len(nrow(ff)) | |
148 if (flag_meta){ | |
149 m[s,] <- metaC[fsom$map$mapping[,1]] | |
150 } else { | |
151 m[s,] <- fsom$map$mapping[,1] | |
152 } | |
153 colnames(m) <- "FlowSOM" | |
154 ff <- cbind2(ff,m) | |
155 out <- exprs(ff) | |
156 colnames(out) <- print_markers_ff | |
157 | |
158 clr_table <- paste(filenames[i], "clustered.flowclr", sep="_") | |
159 write.table(out, file=file.path(outdir, clr_table), quote=F, row.names=F, col.names=T, sep='\t', | |
160 append=F) | |
161 | |
162 cluster_count <- table(out[,"Population"]) | |
163 cluster_prop <- prop.table(cluster_count)*100 | |
164 m1_tmp <- numeric(nb_pop) | |
165 for (j in 1:nb_pop){ | |
166 if (as.character(j) %in% names(cluster_count)){ | |
167 m1_tmp[[j]] <- format(round(cluster_prop[[as.character(j)]], 2), nsmall=2) | |
168 } | |
169 } | |
170 samplename <- paste("Sample", i, sep="") | |
171 m_stat1[i,] <- c(filenames[i], samplename, m1_tmp) | |
172 # flowstat2 | |
173 # Marker1 Marker2 Marker3 ... Population Percentage SampleName | |
174 # MFIs for each marker | |
175 # dimension ==> col = nb of markers + 3; row = nb of files * nb of clusters | |
176 if (mfi=="mfi"){ | |
177 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), mean) | |
178 } else if (mfi=="mdfi") { | |
179 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), median) | |
180 } else { | |
181 m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), gm_mean) | |
182 } | |
183 | |
184 m2["Percentage"] <- as.character(cluster_prop) | |
185 m2["Population"] <- as.character(m2$Group.1) | |
186 m2["SampleName"] <- samplename | |
187 m2t <- as.matrix(m2[2:length(m2)]) | |
188 write.table(m2t, file=stat2, quote=F, row.names=F, col.names=F, sep='\t', | |
189 append=T) | |
190 | |
191 } | |
192 write.table(m_stat1, file=stat1, quote=F, row.names=F, col.names=T, sep='\t', | |
193 append=F) | |
194 | |
195 m2df <- read.table(stat2, sep="\t", header=T) | |
196 ag <- aggregate(m2df[,0:nb_marker+1], list(m2df[,nb_marker+2]), function(x) c(mn=mean(x), med=median(x), stdv=sd(x))) | |
197 m3t <- as.matrix(ag) | |
198 colnames(m3t) <- col_stat3 | |
199 write.table(m3t, file=stat3, quote=F, row.names=F, col.names=T, sep='\t', | |
200 append=F) | |
201 } | |
202 | |
203 flowFrameOrFCS <- function(filenames, filepaths, reftree, cluster=10, outdir="", | |
204 flag_meta=FALSE, mfi='mfi', stat1="", stat2="", | |
205 stat3="", plot="", mplot=""){ | |
206 | |
207 isValid <- F | |
208 is_fcs <- F | |
209 is_ff <- F | |
210 flag_ff <- FALSE | |
211 i <- 0 | |
212 for (f in filepaths){ | |
213 tryCatch({ | |
214 is_fcs <- isFCSfile(f) | |
215 }, error = function(ex) { | |
216 print(paste(ex)) | |
217 }) | |
218 if (!is_fcs){ | |
219 tryCatch({ | |
220 ff <- readRDS(f) | |
221 is_ff <- T | |
222 }, error = function(ex) { | |
223 print(paste(ex)) | |
224 }) | |
225 } else { | |
226 i <- i + 1 | |
227 } | |
228 if (!is_ff && !is_fcs) { | |
229 quit(save = "no", status = 10, runLast = FALSE) | |
230 } | |
231 } | |
232 if (i==0) { | |
233 flag_ff <- TRUE | |
234 } else if (!i==length(filenames)){ | |
235 quit(save = "no", status = 12, runLast = FALSE) | |
236 } | |
237 mapToTree(filenames, filepaths, flag_ff, reftree, cluster, outdir, flag_meta, | |
238 mfi, stat1, stat2, stat3, plot, mplot) | |
239 } | |
240 | |
241 args <- commandArgs(trailingOnly = TRUE) | |
242 plot <- "" | |
243 mplot <- "" | |
244 m <- 8 | |
245 flag_meta <- FALSE | |
246 if (args[4]=='meta') { | |
247 flag_meta <- TRUE | |
248 } | |
249 if (args[9] == 'newDataTrees') { | |
250 plot <- 'newDataTrees' | |
251 m <- m + 1 | |
252 if (args[10] == 'newDataMarkers') { | |
253 mplot <- "newDataMarkers" | |
254 m <- m + 1 | |
255 } | |
256 } else if (args[9] == 'newDataMarkers') { | |
257 mplot <- "newDataMarkers" | |
258 m <- m + 1 | |
259 } | |
260 | |
261 n <- m+1 | |
262 # files + filenames | |
263 nb_files <- (length(args) - m) / 2 | |
264 files1 <- character(nb_files) | |
265 files2 <- character(nb_files) | |
266 j <- 1 | |
267 file_list <- args[n:length(args)] | |
268 for (i in 1:length(file_list)) { | |
269 if (i%%2){ | |
270 files1[[j]] <- file_list[i] | |
271 files2[[j]] <- file_list[i+1] | |
272 j <- j + 1 | |
273 } | |
274 } | |
275 | |
276 flowFrameOrFCS(files2, files1, args[1], as.numeric(args[3]), args[2], flag_meta, | |
277 args[5], args[6], args[7], args[8], plot, mplot) |