Mercurial > repos > immport-devteam > fcs_gate_trans
comparison FCSGateTrans.R @ 1:c28c2e680bf5 draft
"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/fcs_gate_trans commit f34ed6ca8e77b9792a270890262c2936b13e30b9"
| author | azomics |
|---|---|
| date | Mon, 22 Jun 2020 20:30:34 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:78b8ab344edd | 1:c28c2e680bf5 |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 ###################################################################### | |
| 3 # Copyright (c) 2016 Northrop Grumman. | |
| 4 # All rights reserved. | |
| 5 ###################################################################### | |
| 6 # ImmPort FCS conversion program | |
| 7 # Authors: Yue Liu and Yu "Max" Qian | |
| 8 # | |
| 9 # Reference: FCSTrans: An open source software system for FCS | |
| 10 # file conversion and data transformation | |
| 11 # Qian Y, Liu Y, Campbell J, Thomson E, Kong YM, | |
| 12 # Scheuermann RH. 2012 Cytometry Part A. 81A(5) | |
| 13 # doi.org/10.1002/cyto.a.22037 | |
| 14 # | |
| 15 # To run in R | |
| 16 # 1) library(flowCore) | |
| 17 # 2) source("FCSTrans.R") | |
| 18 # 3) transformFCS("filename") | |
| 19 # | |
| 20 # | |
| 21 # Automated Gating of Lymphocytes with FlowDensity | |
| 22 # Authors of FlowDensity: Jafar Taghiyar, Mehrnoush Malek | |
| 23 # | |
| 24 # Reference: flowDensity: reproducing manual gating of flow | |
| 25 # cytometry data by automated density-based cell | |
| 26 # population identification | |
| 27 # Malek M, Taghiyar MJ, Chong L, Finak G, | |
| 28 # Gottardo R, Brinkman RR. 2015 Bioinformatics 31(4) | |
| 29 # doi: 10.1093/bioinformatics/btu677 | |
| 30 # | |
| 31 # | |
| 32 # Version 1.5 | |
| 33 # March 2016 -- added lines to run directly from command line (cristel thomas) | |
| 34 # May 2016 -- added automated gating (cristel thomas) | |
| 35 # August 2016 -- added options for data transformation (cristel thomas) | |
| 36 # April 2017 -- added logicle to transformation options (cristel thomas) | |
| 37 # July 2017 -- added options for outputs (cristel thomas) | |
| 38 | |
| 39 library(flowCore) | |
| 40 library(flowDensity) | |
| 41 library(GEOmap) | |
| 42 # | |
| 43 # Set output to 0 when input is less than cutoff value | |
| 44 # | |
| 45 ipfloor <- function (x, cutoff=0, target=0) { | |
| 46 y <- x | |
| 47 if (x <= cutoff) { | |
| 48 y <- target | |
| 49 } | |
| 50 return(y) | |
| 51 } | |
| 52 # | |
| 53 # Set output to 0 when input is less than cutoff value | |
| 54 # | |
| 55 ipceil <- function (x, cutoff=0, target=0) { | |
| 56 y <- x | |
| 57 if (x >= cutoff) { | |
| 58 y <- target | |
| 59 } | |
| 60 return(y) | |
| 61 } | |
| 62 # | |
| 63 # Calculation core of iplogicle | |
| 64 # | |
| 65 iplogicore <- function (x, w, r, d, scale) { | |
| 66 tol <- .Machine$double.eps^0.8 | |
| 67 maxit <- as.integer(5000) | |
| 68 d <- d * log(10) | |
| 69 scale <- scale / d | |
| 70 p <- if (w == 0) { | |
| 71 1 | |
| 72 } else { | |
| 73 uniroot(function(p) -w + 2 * p * log(p)/(p + 1), c(.Machine$double.eps, | |
| 74 2 * (w + d)))$root | |
| 75 } | |
| 76 a <- r * exp(-(d - w)) | |
| 77 b <- 1 | |
| 78 c <- r * exp(-(d - w)) * p^2 | |
| 79 d <- 1/p | |
| 80 f <- a * (p^2 - 1) | |
| 81 y <- .Call("flowCore_biexponential_transform", PACKAGE= 'flowCore', | |
| 82 as.double(x), a, b, c, d, f, w, tol, maxit) | |
| 83 y <- sapply(y * scale, ipfloor) | |
| 84 return(y) | |
| 85 } | |
| 86 # | |
| 87 # Function for calculating w | |
| 88 # | |
| 89 iplogiclew <- function (w, cutoff=-111, r=262144, d=4.5, scale=1) { | |
| 90 if (w > d) | |
| 91 w <- d | |
| 92 y <- iplogicore(cutoff, w, r, d, scale) - .Machine$double.eps^0.6 | |
| 93 return(y) | |
| 94 } | |
| 95 # | |
| 96 # ImmPort logicle function - convert fluorescent marker values to channel output | |
| 97 # | |
| 98 iplogicle <- function (x, r=262144, d=4.5, range=4096, cutoff=-111, w=-1) { | |
| 99 if (w > d) { | |
| 100 stop("Negative range decades must be smaller than total number of decades") | |
| 101 } | |
| 102 if (w < 0) { | |
| 103 w = uniroot(iplogiclew, c(0, d), cutoff=cutoff)$root | |
| 104 } | |
| 105 y <- iplogicore(x, w, r, d, range) | |
| 106 return(y) | |
| 107 } | |
| 108 # | |
| 109 # Convert fluorescent values to channel output using log transformation | |
| 110 # | |
| 111 iplog <- function(x) { | |
| 112 x <- sapply(x, ipfloor, cutoff=1, target=1) | |
| 113 y <- 1024 * log10(x) - 488.6 | |
| 114 return(y) | |
| 115 } | |
| 116 # | |
| 117 # ImmPort linear function - convert scatter values to channel output | |
| 118 # linear transformation | |
| 119 # | |
| 120 ipscatter <- function (x, channelrange=262144) { | |
| 121 y <- 4095.0 * x / channelrange | |
| 122 y <- sapply(y, ipfloor) | |
| 123 y <- sapply(y, ipceil, cutoff=4095, target=4095) | |
| 124 return(y) | |
| 125 } | |
| 126 # | |
| 127 # ImmPort time function - convert time values to channel output | |
| 128 # linear transformation | |
| 129 iptime <- function (x, channelrange) { | |
| 130 # use simple cutoff for now | |
| 131 y <- sapply(x, ipfloor) | |
| 132 return(y) | |
| 133 } | |
| 134 # | |
| 135 # Determine the type of marker. Marker type is used | |
| 136 # to determine type of transformation to apply for this channel. | |
| 137 # Before 2010 FLUO_AREA type used iplogicile and | |
| 138 # FLOU_NON_AREA type used iplog. In 2010 Yue, changed code so | |
| 139 # all fluorescent channels use iplogicle. Below is the note from SVN | |
| 140 # | |
| 141 # Version 1.1 | |
| 142 # 2010-07-02 | |
| 143 # ----------- | |
| 144 # Added data type checking on both FCS version 2 and 3 | |
| 145 # Removed log conversion for non-area fluorescent channel | |
| 146 # Applied logicle conversion for all fluorescent channels | |
| 147 # | |
| 148 # The GenePattern version uses iplog for FLOU_NON_AREA, rather | |
| 149 # than iplogicle. | |
| 150 # | |
| 151 getMarkerType <- function(name,debug=FALSE) { | |
| 152 type <- "" | |
| 153 prefix2 <- toupper(substr(name, 1, 2)) | |
| 154 prefix3 <- toupper(substr(name, 1, 3)) | |
| 155 prefix4 <- toupper(substr(name, 1, 4)) | |
| 156 if (prefix2 == "FS" || prefix2 == "SS") { | |
| 157 type <- "SCATTER" | |
| 158 } else if (prefix3 == "FSC" || prefix3 == "SSC") { | |
| 159 type <- "SCATTER" | |
| 160 } else if (prefix4 == "TIME") { | |
| 161 type <- "TIME" | |
| 162 } else { | |
| 163 pieces <- unlist(strsplit(name, "-")) | |
| 164 if (toupper(pieces[length(pieces)]) == "A") { | |
| 165 type <- "FLUO_AREA" | |
| 166 } else { | |
| 167 type <- "FLUO_NON_AREA" | |
| 168 } | |
| 169 } | |
| 170 if (debug) { | |
| 171 print(paste("Marker:", name, ", Type:", type)) | |
| 172 } | |
| 173 return(type) | |
| 174 } | |
| 175 # | |
| 176 # Scale data | |
| 177 # | |
| 178 scaleData <- function(data, channelrange=0) { | |
| 179 datamax <- range(data)[2] # range() returns [min, max] | |
| 180 if (datamax > channelrange) { | |
| 181 channelrange <- datamax | |
| 182 } | |
| 183 #if (channelrange == 0) { | |
| 184 # channelrange = range(data)[2] | |
| 185 #} | |
| 186 data <- 262144 * data / channelrange | |
| 187 return(data) | |
| 188 } | |
| 189 # | |
| 190 # Check if AccuriData. Accuri data needs different conversion | |
| 191 # | |
| 192 isAccuriData <- function(keywords) { | |
| 193 isTRUE(as.character(keywords$"$CYT") == "Accuri C6") | |
| 194 } | |
| 195 # | |
| 196 # Convert FCS file | |
| 197 # | |
| 198 convertFCS <- function(fcs, debug=FALSE) { | |
| 199 # Check file type and FCS version | |
| 200 if (class(fcs)[1] != "flowFrame") { | |
| 201 print("convertFCS requires flowFrame object as input") | |
| 202 return(FALSE) | |
| 203 } | |
| 204 keywords <- keyword(fcs) | |
| 205 markers <- colnames(fcs) | |
| 206 params <- fcs@parameters | |
| 207 list_description <- fcs@description | |
| 208 | |
| 209 if (debug) { | |
| 210 print("****Inside convertFCS") | |
| 211 print(paste(" FCS version:", keywords$FCSversion)) | |
| 212 print(paste(" DATATYPE:", keywords['$DATATYPE'])) | |
| 213 } | |
| 214 if (keywords$FCSversion == "2" || keywords$FCSversion == "3" || | |
| 215 keywords$FCSversion == "3.1" ) { | |
| 216 datatype <- unlist(keywords['$DATATYPE']) | |
| 217 if (datatype == 'F') { | |
| 218 # Process fcs expression data, using transformation | |
| 219 # based on category of the marker. | |
| 220 fcs_exprs <- exprs(fcs) | |
| 221 fcs_channel <- NULL | |
| 222 for (i in 1:length(markers)){ | |
| 223 markertype <- getMarkerType(markers[i], debug) | |
| 224 rangekeyword <- paste("$P", i, "R", sep="") | |
| 225 flowcore_min <- paste("flowCore_", rangekeyword, "min", sep="") | |
| 226 flowcore_max <- paste("flowCore_", rangekeyword, "max", sep="") | |
| 227 channelrange <- as.numeric(keywords[rangekeyword]) | |
| 228 if (debug) { | |
| 229 print(paste(" Marker name:", markers[i])) | |
| 230 print(paste(" Marker type:", markertype)) | |
| 231 print(paste(" Range value:", keywords[rangekeyword])) | |
| 232 } | |
| 233 | |
| 234 if (markertype == "TIME") { | |
| 235 channel <- iptime(fcs_exprs[, i]) | |
| 236 } else { | |
| 237 if (markertype == "SCATTER") { | |
| 238 channel <- ipscatter(scaleData(fcs_exprs[, i], channelrange)) | |
| 239 } else { | |
| 240 # Apply logicle transformation on fluorescent channels | |
| 241 channel <- iplogicle(scaleData(fcs_exprs[, i], channelrange)) | |
| 242 } | |
| 243 # adjust range in parameters and list description | |
| 244 if (params@data$range[i] > 4096){ | |
| 245 params@data$range[i] <- 4096 | |
| 246 params@data$minRange[i] <- 0 | |
| 247 params@data$maxRange[i] <- 4096 | |
| 248 list_description[rangekeyword] <- 4096 | |
| 249 list_description[flowcore_min] <- 0 | |
| 250 list_description[flowcore_max] <- 4096 | |
| 251 } | |
| 252 } | |
| 253 fcs_channel <- cbind(fcs_channel, round(channel)) | |
| 254 } | |
| 255 colnames(fcs_channel) <- markers | |
| 256 } else { | |
| 257 if (datatype != 'I') { | |
| 258 print(paste("Data type", datatype, "in FCS 3 is not supported")) | |
| 259 } | |
| 260 fcs_channel <- exprs(fcs) | |
| 261 colnames(fcs_channel) <- markers | |
| 262 } | |
| 263 } else { | |
| 264 print(paste("FCS version", keyword(fcs)$FCSversion, "is not supported")) | |
| 265 fcs_channel <- exprs(fcs) | |
| 266 colnames(fcs_channel) <- markers | |
| 267 } | |
| 268 newfcs <- flowFrame(fcs_channel, params, list_description) | |
| 269 return(newfcs) | |
| 270 } | |
| 271 # | |
| 272 # Starting function for processing a FCS file | |
| 273 # | |
| 274 processFCSFile <- function(input_file, output_file="", compensate=FALSE, | |
| 275 outformat="flowtext", gate=FALSE, | |
| 276 graph_file="", report="", method="", | |
| 277 scaling_factor, logicle_w=0.5, logicle_t=262144, | |
| 278 logicle_m=4.5, debug=FALSE) { | |
| 279 # | |
| 280 # Generate the file names for the output_file | |
| 281 # | |
| 282 pieces <- unlist(strsplit(input_file, .Platform$file.sep)) | |
| 283 filename <- pieces[length(pieces)] | |
| 284 if (debug) { | |
| 285 print (paste("Converting file: ",input_file)) | |
| 286 print (paste("Original file name: ",filename)) | |
| 287 print (paste("Output file name: ",output_file)) | |
| 288 } | |
| 289 fcs <- read.FCS(input_file, transformation=F) | |
| 290 keywords <- keyword(fcs) | |
| 291 markers <- colnames(fcs) | |
| 292 print_markers <- as.vector(pData(parameters(fcs))$desc) | |
| 293 # Update print_markers if the $P?S not in the FCS file | |
| 294 for (i in 1:length(print_markers)) { | |
| 295 if (is.na(print_markers[i])) { | |
| 296 print_markers[[i]] <- markers[i] | |
| 297 } | |
| 298 } | |
| 299 # | |
| 300 # Compensate | |
| 301 # | |
| 302 spill <- keywords$SPILL | |
| 303 | |
| 304 if (is.null(spill) == FALSE && compensate == TRUE) { | |
| 305 if (debug) { | |
| 306 print("Attempting compensation") | |
| 307 } | |
| 308 tryCatch({fcs = compensate(fcs, spill)}, | |
| 309 error = function(ex) {str(ex); }) | |
| 310 } | |
| 311 # | |
| 312 # Transform the data | |
| 313 # | |
| 314 transformed_data <- fcs | |
| 315 channels_to_exclude <- c(grep(colnames(fcs), pattern="FSC"), | |
| 316 grep(colnames(fcs), pattern="SSC"), | |
| 317 grep(colnames(fcs), pattern="Time")) | |
| 318 list_channels <- colnames(fcs)[-channels_to_exclude] | |
| 319 if (isAccuriData(keywords)) { | |
| 320 print("Accuri data is not supported") | |
| 321 } else if (method != "None"){ | |
| 322 if (method == "fcstrans"){ | |
| 323 transformed_data <- convertFCS(fcs, debug) | |
| 324 } else if (method == "logicle_auto"){ | |
| 325 lgcl <- estimateLogicle(fcs, channels = list_channels) | |
| 326 transformed_data <- transform(fcs, lgcl) | |
| 327 } else { | |
| 328 if (method == "arcsinh"){ | |
| 329 trans <- arcsinhTransform(transformationId="defaultArcsinhTransform", | |
| 330 a = 0, b = scaling_factor, c = 0) | |
| 331 } else if (method == "logicle"){ | |
| 332 trans <- logicleTransform(w = logicle_w, t = logicle_t, m = logicle_m) | |
| 333 } | |
| 334 translist <- transformList(list_channels, trans) | |
| 335 transformed_data <- transform(fcs, translist) | |
| 336 } | |
| 337 } | |
| 338 trans_gated_data <- transformed_data | |
| 339 # | |
| 340 # Gate data | |
| 341 # | |
| 342 if (gate){ | |
| 343 # check that there are SSC and FSC channels to gate on | |
| 344 chans <- c(grep(colnames(transformed_data), pattern="FSC"), | |
| 345 grep(colnames(transformed_data), pattern="SSC")) | |
| 346 totalchans <- chans | |
| 347 if (length(chans) > 2) { | |
| 348 #get first FSC and corresponding SSC | |
| 349 chans <- c(grep(colnames(transformed_data), pattern="FSC-A"), | |
| 350 grep(colnames(transformed_data), pattern="SSC-A")) | |
| 351 if (length(chans) == 0) { | |
| 352 chans <- c(grep(colnames(transformed_data), pattern="FSC-H"), | |
| 353 grep(colnames(transformed_data), pattern="SSC-H")) | |
| 354 if (length(chans) == 0) { | |
| 355 chans <- c(grep(colnames(transformed_data), pattern="FSC-W"), | |
| 356 grep(colnames(transformed_data), pattern="SSC-W")) | |
| 357 } | |
| 358 } | |
| 359 } | |
| 360 if (length(chans) == 0) { | |
| 361 warning('No forward/side scatter channels found, gating aborted.') | |
| 362 } else { | |
| 363 # gate lymphocytes | |
| 364 lymph <- flowDensity(obj=transformed_data, channels=chans, | |
| 365 position=c(TRUE, NA), | |
| 366 debris.gate=c(TRUE, FALSE)) | |
| 367 # gate singlets if A and H/W | |
| 368 if (length(totalchans) > 2) { | |
| 369 trans_gated_data <- getflowFrame(flowDensity(obj=lymph, | |
| 370 singlet.gate=TRUE)) | |
| 371 } else { | |
| 372 trans_gated_data <- getflowFrame(lymph) | |
| 373 } | |
| 374 # report | |
| 375 pregating_summary <- capture.output(summary(transformed_data)) | |
| 376 pregating_dim <- capture.output(dim(transformed_data)) | |
| 377 postgating_summary <- capture.output(summary(trans_gated_data)) | |
| 378 postgating_dim <- capture.output(dim(trans_gated_data)) | |
| 379 sink(report) | |
| 380 cat("#########################\n") | |
| 381 cat("## BEFORE GATING ##\n") | |
| 382 cat("#########################\n") | |
| 383 cat(pregating_dim, pregating_summary, sep="\n") | |
| 384 cat("\n#########################\n") | |
| 385 cat("## AFTER GATING ##\n") | |
| 386 cat("#########################\n") | |
| 387 cat(postgating_dim, postgating_summary, sep="\n") | |
| 388 sink() | |
| 389 # plots | |
| 390 time_channel <- grep(toupper(colnames(transformed_data)), pattern="TIME") | |
| 391 nb_markers <- length(colnames(transformed_data)) - length(time_channel) | |
| 392 nb_rows <- ceiling(((nb_markers-1)*nb_markers)/4) | |
| 393 h <- 400 * nb_rows | |
| 394 maxrange <- transformed_data@parameters@data$range[1] | |
| 395 | |
| 396 png(graph_file, type="cairo", height=h, width=800) | |
| 397 par(mfrow=c(nb_rows,2)) | |
| 398 for (m in 1:(nb_markers - 1)) { | |
| 399 for (n in (m+1):nb_markers) { | |
| 400 plotDens(transformed_data, c(m,n), xlab = print_markers[m], | |
| 401 ylab = print_markers[n], main = "Before Gating", | |
| 402 ylim = c(0, maxrange), xlim = c(0, maxrange)) | |
| 403 plotDens(trans_gated_data, c(m,n), xlab = print_markers[m], | |
| 404 ylab = print_markers[n], main = "After Gating", | |
| 405 ylim = c(0, maxrange), xlim = c(0, maxrange)) | |
| 406 } | |
| 407 } | |
| 408 dev.off() | |
| 409 } | |
| 410 } | |
| 411 if (outformat=="FCS") { | |
| 412 write.FCS(trans_gated_data, output_file) | |
| 413 } else if (outformat=="flowFrame") { | |
| 414 saveRDS(trans_gated_data, file = output_file) | |
| 415 } else { | |
| 416 output_data <- exprs(trans_gated_data) | |
| 417 colnames(output_data) <- print_markers | |
| 418 write.table(output_data, file=output_file, quote=F, | |
| 419 row.names=F,col.names=T, sep='\t', append=F) | |
| 420 } | |
| 421 } | |
| 422 # Convert FCS file using FCSTrans logicile transformation | |
| 423 # @param input_file FCS file to be transformed | |
| 424 # @param output_file FCS file transformed ".txt" extension | |
| 425 # @param compensate Flag indicating whether to apply compensation | |
| 426 # matrix if it exists. | |
| 427 transformFCS <- function(input_file, output_file, compensate=FALSE, | |
| 428 outformat="flowtext", gate=FALSE, graph_file="", | |
| 429 report_file="", trans_met="", scaling_factor=1/150, | |
| 430 w=0.5, t=262144, m=4.5, debug=FALSE) { | |
| 431 isValid <- F | |
| 432 # Check file beginning matches FCS standard | |
| 433 tryCatch({ | |
| 434 isValid <- isFCSfile(input_file) | |
| 435 }, error = function(ex) { | |
| 436 print (paste(" ! Error in isFCSfile", ex)) | |
| 437 }) | |
| 438 if (isValid) { | |
| 439 processFCSFile(input_file, output_file, compensate, outformat, | |
| 440 gate, graph_file, report_file, trans_met, scaling_factor, | |
| 441 w, t, m) | |
| 442 } else { | |
| 443 print (paste(input_file, "does not meet FCS standard")) | |
| 444 } | |
| 445 } | |
| 446 # | |
| 447 # Run FCS Gate-Trans | |
| 448 # | |
| 449 args <- commandArgs(trailingOnly = TRUE) | |
| 450 graphs <- "" | |
| 451 report <- "" | |
| 452 gate <- FALSE | |
| 453 trans_method <- "None" | |
| 454 scaling_factor <- 1 / 150 | |
| 455 w <- 0.5 | |
| 456 t <- 262144 | |
| 457 m <- 4.5 | |
| 458 if (args[5]!="None") { | |
| 459 gate <- TRUE | |
| 460 graphs <- args[5] | |
| 461 report <- args[6] | |
| 462 } | |
| 463 if (args[7]!="None"){ | |
| 464 trans_method <- args[7] | |
| 465 if (args[7] == "arcsinh"){ | |
| 466 scaling_factor <- 1 / as.numeric(args[8]) | |
| 467 } else if (args[7] == "logicle"){ | |
| 468 w <- args[8] | |
| 469 t <- args[9] | |
| 470 m <- args[10] | |
| 471 } | |
| 472 } | |
| 473 transformFCS(args[1], args[2], args[3], args[4], gate, graphs, | |
| 474 report, trans_method, scaling_factor, w, t, m) |
