Mercurial > repos > azomics > flowsom_compare
comparison FlowSOMCompare.R @ 1:33b8673272cf draft default tip
planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_compare commit bbff20e20dc2b9dbb40b613a0d5f16ee8132446d
| author | azomics |
|---|---|
| date | Fri, 29 Sep 2023 07:19:42 +0000 |
| parents | bd35f3b66a1e |
| children |
comparison
equal
deleted
inserted
replaced
| 0:bd35f3b66a1e | 1:33b8673272cf |
|---|---|
| 1 #!/usr/bin/Rscript | 1 #!/usr/bin/env Rscript |
| 2 # Module for Galaxy | 2 # Module for Galaxy |
| 3 # Compares groups of FCS to FlowSOM reference tree | 3 # Compares groups of FCS to FlowSOM reference tree |
| 4 # with FlowSOM | 4 # with FlowSOM |
| 5 ###################################################################### | 5 ###################################################################### |
| 6 # Copyright (c) 2017 Northrop Grumman. | 6 # Copyright (c) 2017 Northrop Grumman. |
| 12 # | 12 # |
| 13 # | 13 # |
| 14 library(FlowSOM) | 14 library(FlowSOM) |
| 15 library(flowCore) | 15 library(flowCore) |
| 16 | 16 |
| 17 checkFiles <- function(groups){ | 17 check_files <- function(groups) { |
| 18 all_files <- unlist(groups) | 18 all_files <- unlist(groups) |
| 19 all_unique <- unique(all_files) | 19 all_unique <- unique(all_files) |
| 20 if (length(all_unique) != length(all_files)) { | 20 if (length(all_unique) != length(all_files)) { |
| 21 quit(save = "no", status = 14, runLast = FALSE) | 21 quit(save = "no", status = 14, runLast = FALSE) |
| 22 } | 22 } |
| 23 } | 23 } |
| 24 | 24 |
| 25 compareLists <- function(m1, m2){ | 25 compare_lists <- function(m1, m2) { |
| 26 listCheck <- T | 26 list_check <- TRUE |
| 27 if (is.na(all(m1==m2))) { | 27 if (is.na(all(m1 == m2))) { |
| 28 mm1 <- is.na(m1) | 28 mm1 <- is.na(m1) |
| 29 mm2 <- is.na(m2) | 29 mm2 <- is.na(m2) |
| 30 if (all(mm1==mm2)){ | 30 if (all(mm1 == mm2)) { |
| 31 if (!all(m1==m2, na.rm=TRUE)){ | 31 if (!all(m1 == m2, na.rm = TRUE)) { |
| 32 listCheck <- F | 32 list_check <- FALSE |
| 33 } | 33 } |
| 34 } else { | 34 } else { |
| 35 listCheck <- F | 35 list_check <- FALSE |
| 36 } | 36 } |
| 37 } else if (!all(m1==m2)) { | 37 } else if (!all(m1 == m2)) { |
| 38 listCheck <- F | 38 list_check <- FALSE |
| 39 } | 39 } |
| 40 return(listCheck) | 40 return(list_check) |
| 41 } | 41 } |
| 42 | 42 |
| 43 prettyMarkerNames <- function(flowFrame){ | 43 pretty_marker_names <- function(flow_frame) { |
| 44 n <- flowFrame@parameters@data[, "name"] | 44 n <- flow_frame@parameters@data[, "name"] |
| 45 d <- flowFrame@parameters@data[, "desc"] | 45 d <- flow_frame@parameters@data[, "desc"] |
| 46 d[is.na(d)] <- n[is.na(d)] | 46 d[is.na(d)] <- n[is.na(d)] |
| 47 prettyNames <- list() | 47 pretty_names <- list() |
| 48 if(any(grepl("#",d))){ | 48 if (any(grepl("#", d))) { |
| 49 # Support for hashtag notation: | 49 # Support for hashtag notation: |
| 50 # antibody#fluorochrome -> antibody (fluorochrome) | 50 pretty_names <- gsub("#(.*)$", " (\\1)", d) |
| 51 prettyNames <- gsub("#(.*)$"," (\\1)",d) | |
| 52 } else { | 51 } else { |
| 53 prettyNames <- paste(d, " <", n, ">", sep="") | 52 pretty_names <- paste(d, " <", n, ">", sep = "") |
| 54 } | 53 } |
| 55 return(prettyNames) | 54 return(pretty_names) |
| 56 } | 55 } |
| 57 | 56 |
| 58 compareToTree <- function(fst, wilc_thresh=0.05, output="", plot="", stats, | 57 compare_to_tree <- function(fst, |
| 59 comp_groups, filenames) { | 58 wilc_thresh = 0.05, output = "", plot = "", stats, |
| 60 groupRes <- CountGroups(fst, groups=comp_groups, plot=FALSE) | 59 comp_groups, filenames) { |
| 61 pdf(plot, useDingbats=FALSE, onefile=TRUE) | 60 group_res <- CountGroups(fst, groups = comp_groups, plot = FALSE) |
| 61 pdf(plot, useDingbats = FALSE, onefile = TRUE) | |
| 62 tresh <- wilc_thresh | 62 tresh <- wilc_thresh |
| 63 pg <- PlotGroups(fst, groupRes, p_tresh=tresh) | 63 pg <- PlotGroups(fst, group_res, p_tresh = tresh) |
| 64 dev.off() | 64 dev.off() |
| 65 | 65 |
| 66 nb_nodes <- length(pg[[1]]) | 66 nb_nodes <- length(pg[[1]]) |
| 67 nb_comp <- length(pg) | 67 nb_comp <- length(pg) |
| 68 m <- matrix(0, nrow=nb_nodes, ncol=nb_comp+1) | 68 m <- matrix(0, nrow = nb_nodes, ncol = nb_comp + 1) |
| 69 s <- seq_len(nb_nodes) | 69 s <- seq_len(nb_nodes) |
| 70 m[,1]<- as.character(s) | 70 m[, 1] <- as.character(s) |
| 71 for (i in 1:nb_comp){ | 71 for (i in 1:nb_comp) { |
| 72 m[s,i+1]<- as.character(pg[[i]]) | 72 m[s, i + 1] <- as.character(pg[[i]]) |
| 73 } | 73 } |
| 74 groupnames <- attr(comp_groups,"names") | 74 groupnames <- attr(comp_groups, "names") |
| 75 out_colnames <- paste(groupnames, collapse="-") | 75 out_colnames <- paste(groupnames, collapse = "-") |
| 76 colnames(m) <- c("Node",out_colnames) | 76 colnames(m) <- c("Node", out_colnames) |
| 77 write.table(m, file=output, quote=F, row.names=F, col.names=T, sep='\t', | 77 write.table(m, file = output, quote = FALSE, row.names = FALSE, |
| 78 append=F) | 78 col.names = TRUE, sep = "\t", |
| 79 append = FALSE) | |
| 79 | 80 |
| 80 ## get filenames | 81 ## get filenames |
| 81 filepaths <- unlist(comp_groups) | 82 filepaths <- unlist(comp_groups) |
| 82 fnames <- unlist(filenames) | 83 fnames <- unlist(filenames) |
| 83 nb_files <- length(filepaths) | 84 nb_files <- length(filepaths) |
| 84 comp_files <- list() | 85 comp_files <- list() |
| 85 for (i in 1:length(filepaths)){ | 86 for (i in seq_along(filepaths)) { |
| 86 comp_files[[filepaths[[i]]]] <- fnames[[i]] | 87 comp_files[[filepaths[[i]]]] <- fnames[[i]] |
| 87 } | 88 } |
| 88 | 89 |
| 89 group_list <- list() | 90 group_list <- list() |
| 90 for (grp in attr(comp_groups, "names")) { | 91 for (grp in attr(comp_groups, "names")) { |
| 91 for (f in comp_groups[[grp]]){ | 92 for (f in comp_groups[[grp]]) { |
| 92 group_list[[f]] <- grp | 93 group_list[[f]] <- grp |
| 93 } | 94 } |
| 94 } | 95 } |
| 95 out_stats <- attr(stats, "names") | 96 out_stats <- attr(stats, "names") |
| 96 if ("counts" %in% out_stats){ | 97 if ("counts" %in% out_stats) { |
| 97 gp_counts <- as.matrix(groupRes$counts) | 98 gp_counts <- as.matrix(group_res$counts) |
| 98 tpc <- matrix("", nrow=nb_files, ncol=2) | 99 tpc <- matrix("", nrow = nb_files, ncol = 2) |
| 99 tpc[,1] <- as.character(lapply(rownames(gp_counts), function(x) comp_files[[x]])) | 100 tpc[, 1] <- as.character( |
| 100 tpc[,2] <- as.character(lapply(rownames(gp_counts), function(x) group_list[[x]])) | 101 lapply(rownames(gp_counts), |
| 102 function(x) comp_files[[x]])) | |
| 103 tpc[, 2] <- as.character( | |
| 104 lapply(rownames(gp_counts), | |
| 105 function(x) group_list[[x]])) | |
| 101 gp_counts <- cbind(tpc, gp_counts) | 106 gp_counts <- cbind(tpc, gp_counts) |
| 102 colnames(gp_counts)[[1]] <- "Filename" | 107 colnames(gp_counts)[[1]] <- "Filename" |
| 103 colnames(gp_counts)[[2]] <- "Group" | 108 colnames(gp_counts)[[2]] <- "Group" |
| 104 t_gp_counts <- t(gp_counts) | 109 t_gp_counts <- t(gp_counts) |
| 105 write.table(t_gp_counts, file=stats[["counts"]], quote=F, row.names=T, col.names=F, sep='\t', | 110 write.table(t_gp_counts, |
| 106 append=F) | 111 file = stats[["counts"]], |
| 107 } | 112 quote = FALSE, |
| 108 if ("pctgs" %in% out_stats){ | 113 row.names = TRUE, |
| 109 gp_prop <- as.matrix(groupRes$pctgs) | 114 col.names = FALSE, |
| 110 tpp <- matrix("", nrow=nb_files, ncol=2) | 115 sep = "\t", |
| 111 tpp[,1] <- as.character(lapply(rownames(gp_prop), function(x) comp_files[[x]])) | 116 append = FALSE) |
| 112 tpp[,2] <- as.character(lapply(rownames(gp_prop), function(x) group_list[[x]])) | 117 } |
| 118 if ("pctgs" %in% out_stats) { | |
| 119 gp_prop <- as.matrix(group_res$pctgs) | |
| 120 tpp <- matrix("", nrow = nb_files, ncol = 2) | |
| 121 tpp[, 1] <- as.character( | |
| 122 lapply(rownames(gp_prop), | |
| 123 function(x) comp_files[[x]])) | |
| 124 tpp[, 2] <- as.character( | |
| 125 lapply(rownames(gp_prop), | |
| 126 function(x) group_list[[x]])) | |
| 113 gp_prop <- cbind(tpp, gp_prop) | 127 gp_prop <- cbind(tpp, gp_prop) |
| 114 colnames(gp_prop)[[1]] <- "Filename" | 128 colnames(gp_prop)[[1]] <- "Filename" |
| 115 colnames(gp_prop)[[2]] <- "Group" | 129 colnames(gp_prop)[[2]] <- "Group" |
| 116 t_gp_prop <- t(gp_prop) | 130 t_gp_prop <- t(gp_prop) |
| 117 write.table(t_gp_prop, file=stats[["pctgs"]], quote=F, row.names=T, col.names=F, sep='\t', | 131 write.table(t_gp_prop, |
| 118 append=F) | 132 file = stats[["pctgs"]], |
| 119 } | 133 quote = FALSE, |
| 120 if ("means" %in% out_stats){ | 134 row.names = TRUE, |
| 121 gp_mean <- as.matrix(groupRes$means) | 135 col.names = FALSE, |
| 136 sep = "\t", | |
| 137 append = FALSE) | |
| 138 } | |
| 139 if ("means" %in% out_stats) { | |
| 140 gp_mean <- as.matrix(group_res$means) | |
| 122 t_gp_mean <- t(gp_mean) | 141 t_gp_mean <- t(gp_mean) |
| 123 tpm <- matrix(0, nrow=nb_nodes, ncol=1) | 142 tpm <- matrix(0, nrow = nb_nodes, ncol = 1) |
| 124 tpm[,1] <- seq_len(nb_nodes) | 143 tpm[, 1] <- seq_len(nb_nodes) |
| 125 t_gp_mean <- cbind(tpm, t_gp_mean) | 144 t_gp_mean <- cbind(tpm, t_gp_mean) |
| 126 colnames(t_gp_mean)[[1]] <- "Nodes" | 145 colnames(t_gp_mean)[[1]] <- "Nodes" |
| 127 write.table(t_gp_mean, file=stats[["means"]], quote=F, row.names=F, col.names=T, sep='\t', | 146 write.table(t_gp_mean, |
| 128 append=F) | 147 file = stats[["means"]], |
| 129 } | 148 quote = FALSE, |
| 130 if ("medians" %in% out_stats){ | 149 row.names = FALSE, |
| 131 gp_med <- as.matrix(groupRes$medians) | 150 col.names = TRUE, |
| 151 sep = "\t", | |
| 152 append = FALSE) | |
| 153 } | |
| 154 if ("medians" %in% out_stats) { | |
| 155 gp_med <- as.matrix(group_res$medians) | |
| 132 t_gp_med <- t(gp_med) | 156 t_gp_med <- t(gp_med) |
| 133 tpd <- matrix(0, nrow=nb_nodes, ncol=1) | 157 tpd <- matrix(0, nrow = nb_nodes, ncol = 1) |
| 134 tpd[,1] <- seq_len(nb_nodes) | 158 tpd[, 1] <- seq_len(nb_nodes) |
| 135 t_gp_med <- cbind(tpd, t_gp_med) | 159 t_gp_med <- cbind(tpd, t_gp_med) |
| 136 colnames(t_gp_med)[[1]] <- "Nodes" | 160 colnames(t_gp_med)[[1]] <- "Nodes" |
| 137 write.table(t_gp_med, file=stats[["medians"]], quote=F, row.names=F, col.names=T, sep='\t', | 161 write.table(t_gp_med, |
| 138 append=F) | 162 file = stats[["medians"]], |
| 139 } | 163 quote = FALSE, |
| 140 } | 164 row.names = FALSE, |
| 141 | 165 col.names = TRUE, |
| 142 checkFCS <- function(tree, output="", plot="", thresh = 0.05, stats, groups, | 166 sep = "\t", |
| 143 filenames) { | 167 append = FALSE) |
| 168 } | |
| 169 } | |
| 170 | |
| 171 check_fcs <- function(tree, | |
| 172 output = "", plot = "", thresh = 0.05, stats, groups, | |
| 173 filenames) { | |
| 144 | 174 |
| 145 fcsfiles <- unlist(groups) | 175 fcsfiles <- unlist(groups) |
| 146 tree_valid <- F | 176 tree_valid <- FALSE |
| 147 markerCheck <- T | 177 marker_check <- TRUE |
| 148 tryCatch({ | 178 tryCatch({ |
| 149 fsomtree <- readRDS(tree) | 179 fsomtree <- readRDS(tree) |
| 150 tree_valid <- T | 180 tree_valid <- TRUE |
| 151 }, error = function(ex) { | 181 }, error = function(ex) { |
| 152 print(paste(ex)) | 182 print(paste(ex)) |
| 153 }) | 183 }) |
| 154 | 184 |
| 155 fst <- if (length(fsomtree)==2) fsomtree[[1]] else fsomtree | 185 fst <- if (length(fsomtree) == 2) fsomtree[[1]] else fsomtree |
| 156 | 186 |
| 157 if (tree_valid){ | 187 if (tree_valid) { |
| 158 tree_markers <- as.vector(fst$prettyColnames) | 188 tree_markers <- as.vector(fst$prettyColnames) |
| 159 tree_channels <- as.vector(colnames(fst$data)) | 189 if (length(tree_markers) < 1) { |
| 160 if (length(tree_markers) < 1){ | |
| 161 quit(save = "no", status = 11, runLast = FALSE) | 190 quit(save = "no", status = 11, runLast = FALSE) |
| 162 } | 191 } |
| 163 } else { | 192 } else { |
| 164 quit(save = "no", status = 11, runLast = FALSE) | 193 quit(save = "no", status = 11, runLast = FALSE) |
| 165 } | 194 } |
| 166 | 195 |
| 167 for (i in 1:length(fcsfiles)){ | 196 for (i in seq_along(fcsfiles)) { |
| 168 is_file_valid <- F | |
| 169 tryCatch({ | 197 tryCatch({ |
| 170 fcs <- read.FCS(fcsfiles[i], transformation=FALSE) | 198 fcs <- read.FCS(fcsfiles[i], transformation = FALSE) |
| 171 is_file_valid <- T | |
| 172 }, error = function(ex) { | 199 }, error = function(ex) { |
| 173 print(paste(ex)) | 200 print(paste(ex)) |
| 174 }) | 201 }) |
| 175 if (i == 1) { | 202 if (i == 1) { |
| 176 m1 <- as.vector(pData(parameters(fcs))$desc) | 203 m1 <- as.vector(pData(parameters(fcs))$desc) |
| 177 c1 <- colnames(fcs) | 204 c1 <- colnames(fcs) |
| 178 # compare to tree markers | 205 # compare to tree markers |
| 179 pm <- prettyMarkerNames(fcs) | 206 pm <- pretty_marker_names(fcs) |
| 180 if (!all(tree_markers %in% pm)){ | 207 if (!all(tree_markers %in% pm)) { |
| 181 quit(save = "no", status = 13, runLast = FALSE) | 208 quit(save = "no", status = 13, runLast = FALSE) |
| 182 } | 209 } |
| 183 } else { | 210 } else { |
| 184 m2 <- as.vector(pData(parameters(fcs))$desc) | 211 m2 <- as.vector(pData(parameters(fcs))$desc) |
| 185 c2 <- colnames(fcs) | 212 c2 <- colnames(fcs) |
| 186 markerCheck <- compareLists(m1,m2) | 213 marker_check <- compare_lists(m1, m2) |
| 187 markerChannel <- compareLists(c1,c2) | 214 marker_channel <- compare_lists(c1, c2) |
| 188 } | 215 } |
| 189 } | 216 } |
| 190 if (markerCheck && markerChannel) { | 217 if (marker_check && marker_channel) { |
| 191 compareToTree(fst, thresh, output, plot, stats, groups, filenames) | 218 compare_to_tree(fst, thresh, output, plot, stats, groups, filenames) |
| 192 } else { | 219 } else { |
| 193 quit(save = "no", status = 12, runLast = FALSE) | 220 quit(save = "no", status = 12, runLast = FALSE) |
| 194 } | 221 } |
| 195 } | 222 } |
| 196 | 223 |
| 199 first_g1 <- 5 | 226 first_g1 <- 5 |
| 200 tot_args <- length(args) | 227 tot_args <- length(args) |
| 201 g <- list() | 228 g <- list() |
| 202 tmplist <- c("counts", "means", "medians", "pctgs") | 229 tmplist <- c("counts", "means", "medians", "pctgs") |
| 203 | 230 |
| 204 for (i in 5:13){ | 231 for (i in 5:13) { |
| 205 if (args[i] %in% tmplist){ | 232 if (args[i] %in% tmplist) { |
| 206 first_g1 <- first_g1 + 2 | 233 first_g1 <- first_g1 + 2 |
| 207 g[[args[i]]] <- args[i+1] | 234 g[[args[i]]] <- args[i + 1] |
| 208 } | 235 } |
| 209 } | 236 } |
| 210 | 237 |
| 211 tmpargs <- paste(args[first_g1:tot_args], collapse="=%=") | 238 tmpargs <- paste(args[first_g1:tot_args], collapse = "=%=") |
| 212 tmpgroups <- strsplit(tmpargs, "=%=DONE=%=") | 239 tmpgroups <- strsplit(tmpargs, "=%=DONE=%=") |
| 213 | 240 |
| 214 groups <- list() | 241 groups <- list() |
| 215 filenames <- list() | 242 filenames <- list() |
| 216 for (gps in tmpgroups[[1]]) { | 243 for (gps in tmpgroups[[1]]) { |
| 217 tmpgroup <- strsplit(gps, "=%=") | 244 tmpgroup <- strsplit(gps, "=%=") |
| 218 nb_files <- (length(tmpgroup[[1]]) - 1 ) /2 | 245 nb_files <- (length(tmpgroup[[1]]) - 1) / 2 |
| 219 tmplist <- character(nb_files) | 246 tmplist <- character(nb_files) |
| 220 tmpnames <- character(nb_files) | 247 tmpnames <- character(nb_files) |
| 221 j <- 1 | 248 j <- 1 |
| 222 for (i in 2:length(tmpgroup[[1]])){ | 249 for (i in 2:length(tmpgroup[[1]])) { |
| 223 if (!i%%2){ | 250 if (!i %% 2) { |
| 224 tmplist[[j]] <- tmpgroup[[1]][i] | 251 tmplist[[j]] <- tmpgroup[[1]][i] |
| 225 tmpnames[[j]]<- tmpgroup[[1]][i+1] | 252 tmpnames[[j]] <- tmpgroup[[1]][i + 1] |
| 226 j <- j + 1 | 253 j <- j + 1 |
| 227 } | 254 } |
| 228 } | 255 } |
| 229 groups[[tmpgroup[[1]][1]]] <- tmplist | 256 groups[[tmpgroup[[1]][1]]] <- tmplist |
| 230 filenames[[tmpgroup[[1]][1]]] <- tmpnames | 257 filenames[[tmpgroup[[1]][1]]] <- tmpnames |
| 231 } | 258 } |
| 232 | 259 |
| 233 checkFiles(groups) | 260 check_files(groups) |
| 234 checkFCS(args[1], args[2], args[3], args[4], g, groups, filenames) | 261 check_fcs(args[1], args[2], args[3], args[4], g, groups, filenames) |
