Mercurial > repos > azomics > metacyto_search_cl
comparison metacyto_search_clr.R @ 0:94ac403d134a draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/master/flowtools/metacyto_search_clr commit a1b796a09f6b30919a73b5ded0ce5a6378317007"
author | azomics |
---|---|
date | Wed, 28 Jul 2021 22:02:38 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:94ac403d134a |
---|---|
1 #!/usr/bin/env Rscript | |
2 ###################################################################### | |
3 # Copyright (c) 2018 Northrop Grumman. | |
4 # All rights reserved. | |
5 ###################################################################### | |
6 # | |
7 # Version 1 - January 2018 | |
8 # Author: Cristel Thomas | |
9 # | |
10 # | |
11 | |
12 library(flowCore) | |
13 library(MetaCyto) | |
14 | |
15 check_cluster_def <- function(cl_def) { | |
16 if (cl_def == "" || cl_def == "None") { | |
17 quit(save = "no", status = 14, runLast = FALSE) | |
18 } else { | |
19 tmp <- gsub(" ", "", cl_def, fixed = TRUE) | |
20 clean_def <- gsub(",", "|", tmp, fixed = TRUE) | |
21 return(toupper(clean_def)) | |
22 } | |
23 } | |
24 | |
25 path_to_group_file <- function(path_to_result) { | |
26 grp <- basename(dirname(path_to_result)) | |
27 return(paste(grp, "fcs", sep = ".", collapse = NULL)) | |
28 } | |
29 | |
30 group_file_to_group_name <- function(result_file) { | |
31 return(strsplit(result_file, ".", fixed = TRUE)[[1]][1]) | |
32 } | |
33 | |
34 | |
35 search_cluster_panels <- function(df, fcspaths, fcsnames, outdir="", uc="", | |
36 clusters=vector()) { | |
37 | |
38 working_dir <- "tmp_metacyto" | |
39 working_out <- "tmp_metacyto_out" | |
40 dir.create(working_dir) | |
41 dir.create(outdir) | |
42 | |
43 # reformat summary -- expects csv + 'fcs_names' && 'fcs_files' | |
44 new_df <- file.path(working_dir, "processed_sample_summary.csv") | |
45 df$fcs_names <- df$filenames | |
46 df$fcs_files <- df$filenames | |
47 write.csv(df, file = new_df, row.names = F) | |
48 | |
49 # move && rename FCS files to same directory | |
50 for (i in seq_len(length(fcspaths))) { | |
51 new_file <- file.path(working_dir, fcsnames[[i]]) | |
52 file.copy(fcspaths[[i]], new_file) | |
53 } | |
54 | |
55 searchCluster.batch(preprocessOutputFolder = working_dir, | |
56 outpath = working_out, | |
57 clusterLabel = clusters) | |
58 | |
59 result_files <- list.files(working_out, | |
60 pattern = "cluster_stats_in_each_sample", | |
61 recursive = T, | |
62 full.names = T) | |
63 | |
64 nb_groups <- length(fcsnames) | |
65 no_results <- vector() | |
66 if (length(result_files) != nb_groups) { | |
67 groups_with_results <- sapply(result_files, path_to_group_file) | |
68 ## one or more groups with no results, figure out which | |
69 no_results <- setdiff(fcsnames, groups_with_results) | |
70 } | |
71 | |
72 if (length(no_results) == nb_groups) { | |
73 sink(uc) | |
74 cat("No clusters were found in none of the groups.") | |
75 sink() | |
76 } else { | |
77 unused_clrs <- list() | |
78 | |
79 if (length(no_results > 0)) { | |
80 grp_no_results <- sapply(no_results, group_file_to_group_name) | |
81 unused_clrs <- data.frame("cluster_label" = "any", "not_found_in" = grp_no_results) | |
82 } | |
83 | |
84 for (result in result_files) { | |
85 group_name <- strsplit(result, .Platform$file.sep)[[1]][2] | |
86 new_filename <- paste(c(group_name, "cluster_stats.txt"), collapse = "_") | |
87 new_path <- file.path(outdir, new_filename) | |
88 tmp_df <- read.csv(result) | |
89 | |
90 used_clr <- as.character(unique(tmp_df$label)) | |
91 if (length(used_clr) != length(clusters)) { | |
92 unused <- setdiff(clusters, used_clr) | |
93 tmp_udf <- data.frame("cluster_label" = unused, "not_found_in" = group_name) | |
94 unused_clrs <- rbind(unused_clrs, tmp_udf) | |
95 } | |
96 colnames(tmp_df)[[1]] <- "group_name" | |
97 write.table(tmp_df, new_path, quote = F, row.names = F, col.names = T, sep = "\t") | |
98 } | |
99 | |
100 if (is.null(dim(unused_clrs))) { | |
101 sink(uc) | |
102 cat("All provided cluster definition were found in all provided FCS files.") | |
103 sink() | |
104 } else { | |
105 write.table(unused_clrs, uc, quote = F, row.names = F, col.names = T, sep = "\t") | |
106 } | |
107 } | |
108 } | |
109 | |
110 | |
111 check_input <- function(report="", outdir="", list_unused="", list_clusters="", | |
112 fcs_files=list(), grp_names=list(), clusters=vector()) { | |
113 # check FCS files | |
114 fcspaths <- unlist(fcs_files) | |
115 fcsnames <- unlist(grp_names) | |
116 ct_files <- 0 | |
117 some_pb <- FALSE | |
118 for (i in seq_len(length(fcspaths))) { | |
119 is_file_valid <- FALSE | |
120 tryCatch({ | |
121 fcs <- read.FCS(fcspaths[[i]], transformation = FALSE) | |
122 is_file_valid <- TRUE | |
123 }, error = function(ex) { | |
124 print(paste("File is not a valid FCS file:", fcsnames[[i]], ex)) | |
125 }) | |
126 if (is_file_valid) { | |
127 metacyto_pp_check <- if ("sample_id" %in% colnames(fcs)) TRUE else FALSE | |
128 if (metacyto_pp_check) { | |
129 idx <- length(colnames(fcs)) | |
130 ct_files <- ct_files + max(fcs@exprs[, idx]) | |
131 } else { | |
132 quit(save = "no", status = 11, runLast = FALSE) | |
133 } | |
134 } else { | |
135 some_pb <- TRUE | |
136 } | |
137 } | |
138 # check summary file format | |
139 df <- read.table(report, sep = "\t", header = T, colClasses = "character") | |
140 nm <- colnames(df) | |
141 check_ab <- if ("antibodies" %in% nm) TRUE else FALSE | |
142 check_sdy <- if ("study_id" %in% nm) TRUE else FALSE | |
143 | |
144 if (check_sdy && check_ab) { | |
145 # check that summary index compatible with FCSs in collection - by number of files == index nb | |
146 if (ct_files != length(df$antibodies)) { | |
147 quit(save = "no", status = 12, runLast = FALSE) | |
148 } | |
149 } else { | |
150 quit(save = "no", status = 13, runLast = FALSE) | |
151 } | |
152 | |
153 if (some_pb) { | |
154 quit(save = "no", status = 10, runLast = FALSE) | |
155 } else { | |
156 write.table(clusters, list_clusters, quote = F, row.names = F, col.names = F) | |
157 search_cluster_panels(df, fcspaths, fcsnames, outdir, list_unused, clusters) | |
158 } | |
159 } | |
160 | |
161 ################################################################################ | |
162 ################################################################################ | |
163 args <- commandArgs(trailingOnly = TRUE) | |
164 | |
165 i <- grep(args, pattern = "FCS_FILES") | |
166 | |
167 cluster_def <- vector() | |
168 cl_df <- args[3] | |
169 if (i > 6) { | |
170 ii <- i - 1 | |
171 more_cl <- args[6:ii] | |
172 cl_df <- c(cl_df, more_cl) | |
173 } | |
174 cluster_def <- sapply(cl_df, check_cluster_def) | |
175 | |
176 fcs_files <- list() | |
177 fcs_names <- list() | |
178 j <- 1 | |
179 m <- i + 1 | |
180 tmp_fcs <- args[m:length(args)] | |
181 | |
182 for (k in seq_len(length(tmp_fcs))) { | |
183 if (k %% 2) { | |
184 fcs_files[[j]] <- tmp_fcs[[k]] | |
185 fcs_names[[j]] <- tmp_fcs[[k + 1]] | |
186 j <- j + 1 | |
187 } | |
188 } | |
189 | |
190 check_input(args[1], args[2], args[4], args[5], fcs_files, fcs_names, | |
191 cluster_def) |