Mercurial > repos > azomics > flowsom_compare
comparison FlowSOMCompare.R @ 1:33b8673272cf draft default tip
planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_compare commit bbff20e20dc2b9dbb40b613a0d5f16ee8132446d
author | azomics |
---|---|
date | Fri, 29 Sep 2023 07:19:42 +0000 |
parents | bd35f3b66a1e |
children |
comparison
equal
deleted
inserted
replaced
0:bd35f3b66a1e | 1:33b8673272cf |
---|---|
1 #!/usr/bin/Rscript | 1 #!/usr/bin/env Rscript |
2 # Module for Galaxy | 2 # Module for Galaxy |
3 # Compares groups of FCS to FlowSOM reference tree | 3 # Compares groups of FCS to FlowSOM reference tree |
4 # with FlowSOM | 4 # with FlowSOM |
5 ###################################################################### | 5 ###################################################################### |
6 # Copyright (c) 2017 Northrop Grumman. | 6 # Copyright (c) 2017 Northrop Grumman. |
12 # | 12 # |
13 # | 13 # |
14 library(FlowSOM) | 14 library(FlowSOM) |
15 library(flowCore) | 15 library(flowCore) |
16 | 16 |
17 checkFiles <- function(groups){ | 17 check_files <- function(groups) { |
18 all_files <- unlist(groups) | 18 all_files <- unlist(groups) |
19 all_unique <- unique(all_files) | 19 all_unique <- unique(all_files) |
20 if (length(all_unique) != length(all_files)) { | 20 if (length(all_unique) != length(all_files)) { |
21 quit(save = "no", status = 14, runLast = FALSE) | 21 quit(save = "no", status = 14, runLast = FALSE) |
22 } | 22 } |
23 } | 23 } |
24 | 24 |
25 compareLists <- function(m1, m2){ | 25 compare_lists <- function(m1, m2) { |
26 listCheck <- T | 26 list_check <- TRUE |
27 if (is.na(all(m1==m2))) { | 27 if (is.na(all(m1 == m2))) { |
28 mm1 <- is.na(m1) | 28 mm1 <- is.na(m1) |
29 mm2 <- is.na(m2) | 29 mm2 <- is.na(m2) |
30 if (all(mm1==mm2)){ | 30 if (all(mm1 == mm2)) { |
31 if (!all(m1==m2, na.rm=TRUE)){ | 31 if (!all(m1 == m2, na.rm = TRUE)) { |
32 listCheck <- F | 32 list_check <- FALSE |
33 } | 33 } |
34 } else { | 34 } else { |
35 listCheck <- F | 35 list_check <- FALSE |
36 } | 36 } |
37 } else if (!all(m1==m2)) { | 37 } else if (!all(m1 == m2)) { |
38 listCheck <- F | 38 list_check <- FALSE |
39 } | 39 } |
40 return(listCheck) | 40 return(list_check) |
41 } | 41 } |
42 | 42 |
43 prettyMarkerNames <- function(flowFrame){ | 43 pretty_marker_names <- function(flow_frame) { |
44 n <- flowFrame@parameters@data[, "name"] | 44 n <- flow_frame@parameters@data[, "name"] |
45 d <- flowFrame@parameters@data[, "desc"] | 45 d <- flow_frame@parameters@data[, "desc"] |
46 d[is.na(d)] <- n[is.na(d)] | 46 d[is.na(d)] <- n[is.na(d)] |
47 prettyNames <- list() | 47 pretty_names <- list() |
48 if(any(grepl("#",d))){ | 48 if (any(grepl("#", d))) { |
49 # Support for hashtag notation: | 49 # Support for hashtag notation: |
50 # antibody#fluorochrome -> antibody (fluorochrome) | 50 pretty_names <- gsub("#(.*)$", " (\\1)", d) |
51 prettyNames <- gsub("#(.*)$"," (\\1)",d) | |
52 } else { | 51 } else { |
53 prettyNames <- paste(d, " <", n, ">", sep="") | 52 pretty_names <- paste(d, " <", n, ">", sep = "") |
54 } | 53 } |
55 return(prettyNames) | 54 return(pretty_names) |
56 } | 55 } |
57 | 56 |
58 compareToTree <- function(fst, wilc_thresh=0.05, output="", plot="", stats, | 57 compare_to_tree <- function(fst, |
59 comp_groups, filenames) { | 58 wilc_thresh = 0.05, output = "", plot = "", stats, |
60 groupRes <- CountGroups(fst, groups=comp_groups, plot=FALSE) | 59 comp_groups, filenames) { |
61 pdf(plot, useDingbats=FALSE, onefile=TRUE) | 60 group_res <- CountGroups(fst, groups = comp_groups, plot = FALSE) |
61 pdf(plot, useDingbats = FALSE, onefile = TRUE) | |
62 tresh <- wilc_thresh | 62 tresh <- wilc_thresh |
63 pg <- PlotGroups(fst, groupRes, p_tresh=tresh) | 63 pg <- PlotGroups(fst, group_res, p_tresh = tresh) |
64 dev.off() | 64 dev.off() |
65 | 65 |
66 nb_nodes <- length(pg[[1]]) | 66 nb_nodes <- length(pg[[1]]) |
67 nb_comp <- length(pg) | 67 nb_comp <- length(pg) |
68 m <- matrix(0, nrow=nb_nodes, ncol=nb_comp+1) | 68 m <- matrix(0, nrow = nb_nodes, ncol = nb_comp + 1) |
69 s <- seq_len(nb_nodes) | 69 s <- seq_len(nb_nodes) |
70 m[,1]<- as.character(s) | 70 m[, 1] <- as.character(s) |
71 for (i in 1:nb_comp){ | 71 for (i in 1:nb_comp) { |
72 m[s,i+1]<- as.character(pg[[i]]) | 72 m[s, i + 1] <- as.character(pg[[i]]) |
73 } | 73 } |
74 groupnames <- attr(comp_groups,"names") | 74 groupnames <- attr(comp_groups, "names") |
75 out_colnames <- paste(groupnames, collapse="-") | 75 out_colnames <- paste(groupnames, collapse = "-") |
76 colnames(m) <- c("Node",out_colnames) | 76 colnames(m) <- c("Node", out_colnames) |
77 write.table(m, file=output, quote=F, row.names=F, col.names=T, sep='\t', | 77 write.table(m, file = output, quote = FALSE, row.names = FALSE, |
78 append=F) | 78 col.names = TRUE, sep = "\t", |
79 append = FALSE) | |
79 | 80 |
80 ## get filenames | 81 ## get filenames |
81 filepaths <- unlist(comp_groups) | 82 filepaths <- unlist(comp_groups) |
82 fnames <- unlist(filenames) | 83 fnames <- unlist(filenames) |
83 nb_files <- length(filepaths) | 84 nb_files <- length(filepaths) |
84 comp_files <- list() | 85 comp_files <- list() |
85 for (i in 1:length(filepaths)){ | 86 for (i in seq_along(filepaths)) { |
86 comp_files[[filepaths[[i]]]] <- fnames[[i]] | 87 comp_files[[filepaths[[i]]]] <- fnames[[i]] |
87 } | 88 } |
88 | 89 |
89 group_list <- list() | 90 group_list <- list() |
90 for (grp in attr(comp_groups, "names")) { | 91 for (grp in attr(comp_groups, "names")) { |
91 for (f in comp_groups[[grp]]){ | 92 for (f in comp_groups[[grp]]) { |
92 group_list[[f]] <- grp | 93 group_list[[f]] <- grp |
93 } | 94 } |
94 } | 95 } |
95 out_stats <- attr(stats, "names") | 96 out_stats <- attr(stats, "names") |
96 if ("counts" %in% out_stats){ | 97 if ("counts" %in% out_stats) { |
97 gp_counts <- as.matrix(groupRes$counts) | 98 gp_counts <- as.matrix(group_res$counts) |
98 tpc <- matrix("", nrow=nb_files, ncol=2) | 99 tpc <- matrix("", nrow = nb_files, ncol = 2) |
99 tpc[,1] <- as.character(lapply(rownames(gp_counts), function(x) comp_files[[x]])) | 100 tpc[, 1] <- as.character( |
100 tpc[,2] <- as.character(lapply(rownames(gp_counts), function(x) group_list[[x]])) | 101 lapply(rownames(gp_counts), |
102 function(x) comp_files[[x]])) | |
103 tpc[, 2] <- as.character( | |
104 lapply(rownames(gp_counts), | |
105 function(x) group_list[[x]])) | |
101 gp_counts <- cbind(tpc, gp_counts) | 106 gp_counts <- cbind(tpc, gp_counts) |
102 colnames(gp_counts)[[1]] <- "Filename" | 107 colnames(gp_counts)[[1]] <- "Filename" |
103 colnames(gp_counts)[[2]] <- "Group" | 108 colnames(gp_counts)[[2]] <- "Group" |
104 t_gp_counts <- t(gp_counts) | 109 t_gp_counts <- t(gp_counts) |
105 write.table(t_gp_counts, file=stats[["counts"]], quote=F, row.names=T, col.names=F, sep='\t', | 110 write.table(t_gp_counts, |
106 append=F) | 111 file = stats[["counts"]], |
107 } | 112 quote = FALSE, |
108 if ("pctgs" %in% out_stats){ | 113 row.names = TRUE, |
109 gp_prop <- as.matrix(groupRes$pctgs) | 114 col.names = FALSE, |
110 tpp <- matrix("", nrow=nb_files, ncol=2) | 115 sep = "\t", |
111 tpp[,1] <- as.character(lapply(rownames(gp_prop), function(x) comp_files[[x]])) | 116 append = FALSE) |
112 tpp[,2] <- as.character(lapply(rownames(gp_prop), function(x) group_list[[x]])) | 117 } |
118 if ("pctgs" %in% out_stats) { | |
119 gp_prop <- as.matrix(group_res$pctgs) | |
120 tpp <- matrix("", nrow = nb_files, ncol = 2) | |
121 tpp[, 1] <- as.character( | |
122 lapply(rownames(gp_prop), | |
123 function(x) comp_files[[x]])) | |
124 tpp[, 2] <- as.character( | |
125 lapply(rownames(gp_prop), | |
126 function(x) group_list[[x]])) | |
113 gp_prop <- cbind(tpp, gp_prop) | 127 gp_prop <- cbind(tpp, gp_prop) |
114 colnames(gp_prop)[[1]] <- "Filename" | 128 colnames(gp_prop)[[1]] <- "Filename" |
115 colnames(gp_prop)[[2]] <- "Group" | 129 colnames(gp_prop)[[2]] <- "Group" |
116 t_gp_prop <- t(gp_prop) | 130 t_gp_prop <- t(gp_prop) |
117 write.table(t_gp_prop, file=stats[["pctgs"]], quote=F, row.names=T, col.names=F, sep='\t', | 131 write.table(t_gp_prop, |
118 append=F) | 132 file = stats[["pctgs"]], |
119 } | 133 quote = FALSE, |
120 if ("means" %in% out_stats){ | 134 row.names = TRUE, |
121 gp_mean <- as.matrix(groupRes$means) | 135 col.names = FALSE, |
136 sep = "\t", | |
137 append = FALSE) | |
138 } | |
139 if ("means" %in% out_stats) { | |
140 gp_mean <- as.matrix(group_res$means) | |
122 t_gp_mean <- t(gp_mean) | 141 t_gp_mean <- t(gp_mean) |
123 tpm <- matrix(0, nrow=nb_nodes, ncol=1) | 142 tpm <- matrix(0, nrow = nb_nodes, ncol = 1) |
124 tpm[,1] <- seq_len(nb_nodes) | 143 tpm[, 1] <- seq_len(nb_nodes) |
125 t_gp_mean <- cbind(tpm, t_gp_mean) | 144 t_gp_mean <- cbind(tpm, t_gp_mean) |
126 colnames(t_gp_mean)[[1]] <- "Nodes" | 145 colnames(t_gp_mean)[[1]] <- "Nodes" |
127 write.table(t_gp_mean, file=stats[["means"]], quote=F, row.names=F, col.names=T, sep='\t', | 146 write.table(t_gp_mean, |
128 append=F) | 147 file = stats[["means"]], |
129 } | 148 quote = FALSE, |
130 if ("medians" %in% out_stats){ | 149 row.names = FALSE, |
131 gp_med <- as.matrix(groupRes$medians) | 150 col.names = TRUE, |
151 sep = "\t", | |
152 append = FALSE) | |
153 } | |
154 if ("medians" %in% out_stats) { | |
155 gp_med <- as.matrix(group_res$medians) | |
132 t_gp_med <- t(gp_med) | 156 t_gp_med <- t(gp_med) |
133 tpd <- matrix(0, nrow=nb_nodes, ncol=1) | 157 tpd <- matrix(0, nrow = nb_nodes, ncol = 1) |
134 tpd[,1] <- seq_len(nb_nodes) | 158 tpd[, 1] <- seq_len(nb_nodes) |
135 t_gp_med <- cbind(tpd, t_gp_med) | 159 t_gp_med <- cbind(tpd, t_gp_med) |
136 colnames(t_gp_med)[[1]] <- "Nodes" | 160 colnames(t_gp_med)[[1]] <- "Nodes" |
137 write.table(t_gp_med, file=stats[["medians"]], quote=F, row.names=F, col.names=T, sep='\t', | 161 write.table(t_gp_med, |
138 append=F) | 162 file = stats[["medians"]], |
139 } | 163 quote = FALSE, |
140 } | 164 row.names = FALSE, |
141 | 165 col.names = TRUE, |
142 checkFCS <- function(tree, output="", plot="", thresh = 0.05, stats, groups, | 166 sep = "\t", |
143 filenames) { | 167 append = FALSE) |
168 } | |
169 } | |
170 | |
171 check_fcs <- function(tree, | |
172 output = "", plot = "", thresh = 0.05, stats, groups, | |
173 filenames) { | |
144 | 174 |
145 fcsfiles <- unlist(groups) | 175 fcsfiles <- unlist(groups) |
146 tree_valid <- F | 176 tree_valid <- FALSE |
147 markerCheck <- T | 177 marker_check <- TRUE |
148 tryCatch({ | 178 tryCatch({ |
149 fsomtree <- readRDS(tree) | 179 fsomtree <- readRDS(tree) |
150 tree_valid <- T | 180 tree_valid <- TRUE |
151 }, error = function(ex) { | 181 }, error = function(ex) { |
152 print(paste(ex)) | 182 print(paste(ex)) |
153 }) | 183 }) |
154 | 184 |
155 fst <- if (length(fsomtree)==2) fsomtree[[1]] else fsomtree | 185 fst <- if (length(fsomtree) == 2) fsomtree[[1]] else fsomtree |
156 | 186 |
157 if (tree_valid){ | 187 if (tree_valid) { |
158 tree_markers <- as.vector(fst$prettyColnames) | 188 tree_markers <- as.vector(fst$prettyColnames) |
159 tree_channels <- as.vector(colnames(fst$data)) | 189 if (length(tree_markers) < 1) { |
160 if (length(tree_markers) < 1){ | |
161 quit(save = "no", status = 11, runLast = FALSE) | 190 quit(save = "no", status = 11, runLast = FALSE) |
162 } | 191 } |
163 } else { | 192 } else { |
164 quit(save = "no", status = 11, runLast = FALSE) | 193 quit(save = "no", status = 11, runLast = FALSE) |
165 } | 194 } |
166 | 195 |
167 for (i in 1:length(fcsfiles)){ | 196 for (i in seq_along(fcsfiles)) { |
168 is_file_valid <- F | |
169 tryCatch({ | 197 tryCatch({ |
170 fcs <- read.FCS(fcsfiles[i], transformation=FALSE) | 198 fcs <- read.FCS(fcsfiles[i], transformation = FALSE) |
171 is_file_valid <- T | |
172 }, error = function(ex) { | 199 }, error = function(ex) { |
173 print(paste(ex)) | 200 print(paste(ex)) |
174 }) | 201 }) |
175 if (i == 1) { | 202 if (i == 1) { |
176 m1 <- as.vector(pData(parameters(fcs))$desc) | 203 m1 <- as.vector(pData(parameters(fcs))$desc) |
177 c1 <- colnames(fcs) | 204 c1 <- colnames(fcs) |
178 # compare to tree markers | 205 # compare to tree markers |
179 pm <- prettyMarkerNames(fcs) | 206 pm <- pretty_marker_names(fcs) |
180 if (!all(tree_markers %in% pm)){ | 207 if (!all(tree_markers %in% pm)) { |
181 quit(save = "no", status = 13, runLast = FALSE) | 208 quit(save = "no", status = 13, runLast = FALSE) |
182 } | 209 } |
183 } else { | 210 } else { |
184 m2 <- as.vector(pData(parameters(fcs))$desc) | 211 m2 <- as.vector(pData(parameters(fcs))$desc) |
185 c2 <- colnames(fcs) | 212 c2 <- colnames(fcs) |
186 markerCheck <- compareLists(m1,m2) | 213 marker_check <- compare_lists(m1, m2) |
187 markerChannel <- compareLists(c1,c2) | 214 marker_channel <- compare_lists(c1, c2) |
188 } | 215 } |
189 } | 216 } |
190 if (markerCheck && markerChannel) { | 217 if (marker_check && marker_channel) { |
191 compareToTree(fst, thresh, output, plot, stats, groups, filenames) | 218 compare_to_tree(fst, thresh, output, plot, stats, groups, filenames) |
192 } else { | 219 } else { |
193 quit(save = "no", status = 12, runLast = FALSE) | 220 quit(save = "no", status = 12, runLast = FALSE) |
194 } | 221 } |
195 } | 222 } |
196 | 223 |
199 first_g1 <- 5 | 226 first_g1 <- 5 |
200 tot_args <- length(args) | 227 tot_args <- length(args) |
201 g <- list() | 228 g <- list() |
202 tmplist <- c("counts", "means", "medians", "pctgs") | 229 tmplist <- c("counts", "means", "medians", "pctgs") |
203 | 230 |
204 for (i in 5:13){ | 231 for (i in 5:13) { |
205 if (args[i] %in% tmplist){ | 232 if (args[i] %in% tmplist) { |
206 first_g1 <- first_g1 + 2 | 233 first_g1 <- first_g1 + 2 |
207 g[[args[i]]] <- args[i+1] | 234 g[[args[i]]] <- args[i + 1] |
208 } | 235 } |
209 } | 236 } |
210 | 237 |
211 tmpargs <- paste(args[first_g1:tot_args], collapse="=%=") | 238 tmpargs <- paste(args[first_g1:tot_args], collapse = "=%=") |
212 tmpgroups <- strsplit(tmpargs, "=%=DONE=%=") | 239 tmpgroups <- strsplit(tmpargs, "=%=DONE=%=") |
213 | 240 |
214 groups <- list() | 241 groups <- list() |
215 filenames <- list() | 242 filenames <- list() |
216 for (gps in tmpgroups[[1]]) { | 243 for (gps in tmpgroups[[1]]) { |
217 tmpgroup <- strsplit(gps, "=%=") | 244 tmpgroup <- strsplit(gps, "=%=") |
218 nb_files <- (length(tmpgroup[[1]]) - 1 ) /2 | 245 nb_files <- (length(tmpgroup[[1]]) - 1) / 2 |
219 tmplist <- character(nb_files) | 246 tmplist <- character(nb_files) |
220 tmpnames <- character(nb_files) | 247 tmpnames <- character(nb_files) |
221 j <- 1 | 248 j <- 1 |
222 for (i in 2:length(tmpgroup[[1]])){ | 249 for (i in 2:length(tmpgroup[[1]])) { |
223 if (!i%%2){ | 250 if (!i %% 2) { |
224 tmplist[[j]] <- tmpgroup[[1]][i] | 251 tmplist[[j]] <- tmpgroup[[1]][i] |
225 tmpnames[[j]]<- tmpgroup[[1]][i+1] | 252 tmpnames[[j]] <- tmpgroup[[1]][i + 1] |
226 j <- j + 1 | 253 j <- j + 1 |
227 } | 254 } |
228 } | 255 } |
229 groups[[tmpgroup[[1]][1]]] <- tmplist | 256 groups[[tmpgroup[[1]][1]]] <- tmplist |
230 filenames[[tmpgroup[[1]][1]]] <- tmpnames | 257 filenames[[tmpgroup[[1]][1]]] <- tmpnames |
231 } | 258 } |
232 | 259 |
233 checkFiles(groups) | 260 check_files(groups) |
234 checkFCS(args[1], args[2], args[3], args[4], g, groups, filenames) | 261 check_fcs(args[1], args[2], args[3], args[4], g, groups, filenames) |