Mercurial > repos > azomics > metacyto_preprocess
comparison metacyto_preprocess.R @ 0:bf6470882a15 draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/master/flowtools/metacyto_preprocess commit c3d761b4fca140636c3f22ef0fdbb855f3ecbdb8"
author | azomics |
---|---|
date | Sun, 25 Jul 2021 10:36:03 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bf6470882a15 |
---|---|
1 #!/usr/bin/env Rscript | |
2 ###################################################################### | |
3 # Copyright (c) 2018 Northrop Grumman. | |
4 # All rights reserved. | |
5 ###################################################################### | |
6 # | |
7 # Version 1 - January 2018 | |
8 # Author: Cristel Thomas | |
9 # | |
10 # | |
11 | |
12 library(flowCore) | |
13 library(MetaCyto) | |
14 | |
15 compare_lists <- function(m1, m2) { | |
16 list_check <- T | |
17 if (is.na(all(m1 == m2))) { | |
18 mm1 <- is.na(m1) | |
19 mm2 <- is.na(m2) | |
20 if (all(mm1 == mm2)) { | |
21 if (!all(m1 == m2, na.rm = TRUE)) { | |
22 list_check <- F | |
23 } | |
24 } else { | |
25 list_check <- F | |
26 } | |
27 } else if (!all(m1 == m2)) { | |
28 list_check <- F | |
29 } | |
30 return(list_check) | |
31 } | |
32 | |
33 | |
34 run_batch_processing <- function(sampling_size = 5000, flag_default = T, | |
35 to_exclude, outdir = "", outfile = "", | |
36 labels, assays, factors, fcspaths, fcsnames) { | |
37 # Create meta_data object | |
38 fp <- unlist(fcspaths) | |
39 file_counts <- lengths(fcspaths) | |
40 group_names <- rep(labels, times = file_counts) | |
41 group_bs <- rep(factors, times = file_counts) | |
42 group_types <- rep(assays, times = file_counts) | |
43 | |
44 meta_data <- data.frame(fcs_files = fp, study_id = group_names) | |
45 | |
46 # excluded_parameters | |
47 default_param <- c("FSC-A", "FSC-H", "FSC-W", "FSC", "SSC-A", "SSC-H", | |
48 "SSC-W", "SSC", "Time", "Cell_length", "cell_length", | |
49 "CELL_LENGTH") | |
50 excluded_parameters <- if (flag_default) default_param else to_exclude | |
51 # Run preprocessing.batch | |
52 preprocessing.batch(inputMeta = meta_data, | |
53 assay = group_types, | |
54 b = group_bs, | |
55 fileSampleSize = sampling_size, | |
56 outpath = outdir, | |
57 excludeTransformParameters = excluded_parameters) | |
58 | |
59 # deal with outputs | |
60 # output[2]: a csv file summarizing the pre-processing result. | |
61 ## -> open file to pull info and print out filenames rather than path. | |
62 tmp_csv <- file.path(outdir, "processed_sample_summary.csv") | |
63 tmp <- read.csv(tmp_csv) | |
64 tmp$old_index <- seq(1, length(tmp$fcs_names)) | |
65 | |
66 fn <- unlist(fcsnames) | |
67 df <- data.frame(fcs_files = fp, filenames = fn) | |
68 | |
69 # merge two data frames by ID | |
70 total <- merge(tmp, df, by = "fcs_files") | |
71 total2 <- total[order(total$old_index), ] | |
72 to_drop <- c("fcs_names", "fcs_files", "old_index") | |
73 newdf <- total2[, !(names(total2) %in% to_drop)] | |
74 write.table(newdf, file = outfile, quote = F, row.names = F, col.names = T, sep = "\t") | |
75 | |
76 file.remove(tmp_csv) | |
77 } | |
78 | |
79 check_fcs <- function(sampling = 5000, flag_default = TRUE, to_exclude, | |
80 outdir = "", outfile = "", labels, assays, factors, | |
81 fcspaths, fcsnames) { | |
82 | |
83 if (length(labels) > length(unique(labels))) { | |
84 # we have repeated group names, all group names need to be different | |
85 print("ERROR: repeated labels among groups, make sure that labels are all different for groups.") | |
86 print("The following labels are repeated") | |
87 table(labels)[table(labels) > 1] | |
88 quit(save = "no", status = 13, runLast = FALSE) | |
89 } | |
90 | |
91 marker_pb <- FALSE | |
92 for (i in seq_len(length(fcspaths))) { | |
93 for (n in seq_len(length(fcspaths[[i]]))) { | |
94 marker_check <- FALSE | |
95 marker_channel <- FALSE | |
96 tryCatch({ | |
97 fcs <- read.FCS(fcspaths[[i]][[n]], transformation = FALSE) | |
98 }, error = function(ex) { | |
99 print(paste("File is not a valid FCS file:", fnames[[i]][[n]], ex)) | |
100 quit(save = "no", status = 10, runLast = FALSE) | |
101 }) | |
102 | |
103 if (n == 1) { | |
104 m1 <- as.vector(pData(parameters(fcs))$desc) | |
105 c1 <- colnames(fcs) | |
106 } else { | |
107 m2 <- as.vector(pData(parameters(fcs))$desc) | |
108 c2 <- colnames(fcs) | |
109 marker_check <- compare_lists(m1, m2) | |
110 marker_channel <- compare_lists(c1, c2) | |
111 } | |
112 if (n > 1 && marker_check == F) { | |
113 marker_pb <- TRUE | |
114 print(paste("Marker discrepancy detected in markers -- group", labels[[i]])) | |
115 } else if (n > 1 && marker_channel == F) { | |
116 marker_pb <- TRUE | |
117 print(paste("Marker discrepancy detected in channels -- group", labels[[i]])) | |
118 } | |
119 } | |
120 } | |
121 | |
122 if (marker_pb) { | |
123 quit(save = "no", status = 12, runLast = FALSE) | |
124 } else { | |
125 run_batch_processing(sampling, flag_default, to_exclude, outdir, outfile, | |
126 labels, assays, factors, fcspaths, fcsnames) | |
127 } | |
128 } | |
129 | |
130 ################################################################################ | |
131 ################################################################################ | |
132 args <- commandArgs(trailingOnly = TRUE) | |
133 | |
134 # Arg 1: sub_sampling number | |
135 # Arg 2: output dir for processed FCS files for check_fcs and run_batch_processing | |
136 # Arg 3: Main output file (text file) | |
137 # Arg 4: excluded params | |
138 # Arg 5: Group 1 Name | |
139 # Arg 6: Group 1 format | |
140 # Arg 7: Group 1 Scaling factor | |
141 # Cycle through files in group 1 | |
142 # Arg : file path in Galaxy | |
143 # Arg : desired real file name | |
144 # Cycle through at at least one additional group | |
145 # Arg : 'new_panel' - used as some sort of delimiter | |
146 # Arg : Group n+1 Name | |
147 # Arg : Group n+1 format | |
148 # Arg : Group n+1 Scaling factor | |
149 ## Cycle through files in that group | |
150 ## Arg : file path in Galaxy | |
151 ## Arg : desired real file path | |
152 | |
153 sub_sampling <- NULL | |
154 if (as.numeric(args[1]) > 0) { | |
155 sub_sampling <- as.numeric(args[1]) | |
156 } | |
157 | |
158 # parameters to exclude => args[4] | |
159 to_exclude <- vector() | |
160 flag_default <- FALSE | |
161 i <- 1 | |
162 if (args[4] == "None" || args[4] == "") { | |
163 flag_default <- TRUE | |
164 } else { | |
165 excluded <- unlist(strsplit(args[4], ",")) | |
166 for (channel in excluded) { | |
167 stripped_chan <- gsub(" ", "", channel, fixed = TRUE) | |
168 if (!is.na(stripped_chan)) { | |
169 to_exclude[[i]] <- stripped_chan | |
170 } | |
171 i <- i + 1 | |
172 } | |
173 } | |
174 | |
175 # handle group cycle in arguments to produce iterable panels | |
176 tot_args <- length(args) | |
177 tmpargs <- paste(args[5:tot_args], collapse = "=%=") | |
178 tmppanels <- strsplit(tmpargs, "=%=new_panel=%=") | |
179 nb_panel <- length(tmppanels[[1]]) | |
180 | |
181 labels <- vector(mode = "character", length = nb_panel) | |
182 assay_types <- vector(mode = "character", length = nb_panel) | |
183 scaling_factors <- vector(mode = "numeric", length = nb_panel) | |
184 filepaths <- list() | |
185 filenames <- list() | |
186 | |
187 # iterate over panels (groups of fcs files) | |
188 j <- 1 | |
189 for (pnl in tmppanels[[1]]) { | |
190 tmppanel <- strsplit(pnl, "=%=") | |
191 # number of FCS files | |
192 nb_files <- (length(tmppanel[[1]]) - 3) / 2 | |
193 tmplist <- character(nb_files) | |
194 tmpnames <- character(nb_files) | |
195 if (tmppanel[[1]][[1]] == "None" || tmppanel[[1]][[1]] == "") { | |
196 print(paste("ERROR: Empty group name/label for group ", j)) | |
197 quit(save = "no", status = 11, runLast = FALSE) | |
198 } else { | |
199 labels[[j]] <- tmppanel[[1]][[1]] | |
200 } | |
201 # assay type | |
202 assay_types[[j]] <- tmppanel[[1]][[2]] | |
203 | |
204 scaling_factors[[j]] <- 0 | |
205 if (as.numeric(tmppanel[[1]][[3]]) > 0) { | |
206 scaling_factors[[j]] <- 1 / as.numeric(tmppanel[[1]][[3]]) | |
207 } | |
208 | |
209 k <- 1 | |
210 for (m in 4:length(tmppanel[[1]])) { | |
211 if (!m %% 2) { | |
212 tmplist[[k]] <- tmppanel[[1]][[m]] | |
213 tmpnames[[k]] <- tmppanel[[1]][[m + 1]] | |
214 k <- k + 1 | |
215 } | |
216 } | |
217 filepaths[[tmppanel[[1]][1]]] <- tmplist | |
218 filenames[[tmppanel[[1]][1]]] <- tmpnames | |
219 j <- j + 1 | |
220 } | |
221 | |
222 check_fcs(sub_sampling, flag_default, to_exclude, args[2], args[3], labels, | |
223 assay_types, scaling_factors, filepaths, filenames) | |
224 | |
225 # check_fcs <- function(sampling = 5000, flag_default = TRUE, to_exclude, | |
226 # outdir = "", outfile = "", labels, assays, factors, | |
227 # fcspaths, fcsnames) |