Mercurial > repos > immport-devteam > fcs_gate_trans
diff FCSGateTrans.R @ 1:c28c2e680bf5 draft
"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/fcs_gate_trans commit f34ed6ca8e77b9792a270890262c2936b13e30b9"
author | azomics |
---|---|
date | Mon, 22 Jun 2020 20:30:34 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FCSGateTrans.R Mon Jun 22 20:30:34 2020 -0400 @@ -0,0 +1,474 @@ +#!/usr/bin/env Rscript +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### +# ImmPort FCS conversion program +# Authors: Yue Liu and Yu "Max" Qian +# +# Reference: FCSTrans: An open source software system for FCS +# file conversion and data transformation +# Qian Y, Liu Y, Campbell J, Thomson E, Kong YM, +# Scheuermann RH. 2012 Cytometry Part A. 81A(5) +# doi.org/10.1002/cyto.a.22037 +# +# To run in R +# 1) library(flowCore) +# 2) source("FCSTrans.R") +# 3) transformFCS("filename") +# +# +# Automated Gating of Lymphocytes with FlowDensity +# Authors of FlowDensity: Jafar Taghiyar, Mehrnoush Malek +# +# Reference: flowDensity: reproducing manual gating of flow +# cytometry data by automated density-based cell +# population identification +# Malek M, Taghiyar MJ, Chong L, Finak G, +# Gottardo R, Brinkman RR. 2015 Bioinformatics 31(4) +# doi: 10.1093/bioinformatics/btu677 +# +# +# Version 1.5 +# March 2016 -- added lines to run directly from command line (cristel thomas) +# May 2016 -- added automated gating (cristel thomas) +# August 2016 -- added options for data transformation (cristel thomas) +# April 2017 -- added logicle to transformation options (cristel thomas) +# July 2017 -- added options for outputs (cristel thomas) + +library(flowCore) +library(flowDensity) +library(GEOmap) +# +# Set output to 0 when input is less than cutoff value +# +ipfloor <- function (x, cutoff=0, target=0) { + y <- x + if (x <= cutoff) { + y <- target + } + return(y) +} +# +# Set output to 0 when input is less than cutoff value +# +ipceil <- function (x, cutoff=0, target=0) { + y <- x + if (x >= cutoff) { + y <- target + } + return(y) +} +# +# Calculation core of iplogicle +# +iplogicore <- function (x, w, r, d, scale) { + tol <- .Machine$double.eps^0.8 + maxit <- as.integer(5000) + d <- d * log(10) + scale <- scale / d + p <- if (w == 0) { + 1 + } else { + uniroot(function(p) -w + 2 * p * log(p)/(p + 1), c(.Machine$double.eps, + 2 * (w + d)))$root + } + a <- r * exp(-(d - w)) + b <- 1 + c <- r * exp(-(d - w)) * p^2 + d <- 1/p + f <- a * (p^2 - 1) + y <- .Call("flowCore_biexponential_transform", PACKAGE= 'flowCore', + as.double(x), a, b, c, d, f, w, tol, maxit) + y <- sapply(y * scale, ipfloor) + return(y) +} +# +# Function for calculating w +# +iplogiclew <- function (w, cutoff=-111, r=262144, d=4.5, scale=1) { + if (w > d) + w <- d + y <- iplogicore(cutoff, w, r, d, scale) - .Machine$double.eps^0.6 + return(y) +} +# +# ImmPort logicle function - convert fluorescent marker values to channel output +# +iplogicle <- function (x, r=262144, d=4.5, range=4096, cutoff=-111, w=-1) { + if (w > d) { + stop("Negative range decades must be smaller than total number of decades") + } + if (w < 0) { + w = uniroot(iplogiclew, c(0, d), cutoff=cutoff)$root + } + y <- iplogicore(x, w, r, d, range) + return(y) +} +# +# Convert fluorescent values to channel output using log transformation +# +iplog <- function(x) { + x <- sapply(x, ipfloor, cutoff=1, target=1) + y <- 1024 * log10(x) - 488.6 + return(y) +} +# +# ImmPort linear function - convert scatter values to channel output +# linear transformation +# +ipscatter <- function (x, channelrange=262144) { + y <- 4095.0 * x / channelrange + y <- sapply(y, ipfloor) + y <- sapply(y, ipceil, cutoff=4095, target=4095) + return(y) +} +# +# ImmPort time function - convert time values to channel output +# linear transformation +iptime <- function (x, channelrange) { + # use simple cutoff for now + y <- sapply(x, ipfloor) + return(y) +} +# +# Determine the type of marker. Marker type is used +# to determine type of transformation to apply for this channel. +# Before 2010 FLUO_AREA type used iplogicile and +# FLOU_NON_AREA type used iplog. In 2010 Yue, changed code so +# all fluorescent channels use iplogicle. Below is the note from SVN +# +# Version 1.1 +# 2010-07-02 +# ----------- +# Added data type checking on both FCS version 2 and 3 +# Removed log conversion for non-area fluorescent channel +# Applied logicle conversion for all fluorescent channels +# +# The GenePattern version uses iplog for FLOU_NON_AREA, rather +# than iplogicle. +# +getMarkerType <- function(name,debug=FALSE) { + type <- "" + prefix2 <- toupper(substr(name, 1, 2)) + prefix3 <- toupper(substr(name, 1, 3)) + prefix4 <- toupper(substr(name, 1, 4)) + if (prefix2 == "FS" || prefix2 == "SS") { + type <- "SCATTER" + } else if (prefix3 == "FSC" || prefix3 == "SSC") { + type <- "SCATTER" + } else if (prefix4 == "TIME") { + type <- "TIME" + } else { + pieces <- unlist(strsplit(name, "-")) + if (toupper(pieces[length(pieces)]) == "A") { + type <- "FLUO_AREA" + } else { + type <- "FLUO_NON_AREA" + } + } + if (debug) { + print(paste("Marker:", name, ", Type:", type)) + } + return(type) +} +# +# Scale data +# +scaleData <- function(data, channelrange=0) { + datamax <- range(data)[2] # range() returns [min, max] + if (datamax > channelrange) { + channelrange <- datamax + } + #if (channelrange == 0) { + # channelrange = range(data)[2] + #} + data <- 262144 * data / channelrange + return(data) +} +# +# Check if AccuriData. Accuri data needs different conversion +# +isAccuriData <- function(keywords) { + isTRUE(as.character(keywords$"$CYT") == "Accuri C6") +} +# +# Convert FCS file +# +convertFCS <- function(fcs, debug=FALSE) { + # Check file type and FCS version + if (class(fcs)[1] != "flowFrame") { + print("convertFCS requires flowFrame object as input") + return(FALSE) + } + keywords <- keyword(fcs) + markers <- colnames(fcs) + params <- fcs@parameters + list_description <- fcs@description + + if (debug) { + print("****Inside convertFCS") + print(paste(" FCS version:", keywords$FCSversion)) + print(paste(" DATATYPE:", keywords['$DATATYPE'])) + } + if (keywords$FCSversion == "2" || keywords$FCSversion == "3" || + keywords$FCSversion == "3.1" ) { + datatype <- unlist(keywords['$DATATYPE']) + if (datatype == 'F') { + # Process fcs expression data, using transformation + # based on category of the marker. + fcs_exprs <- exprs(fcs) + fcs_channel <- NULL + for (i in 1:length(markers)){ + markertype <- getMarkerType(markers[i], debug) + rangekeyword <- paste("$P", i, "R", sep="") + flowcore_min <- paste("flowCore_", rangekeyword, "min", sep="") + flowcore_max <- paste("flowCore_", rangekeyword, "max", sep="") + channelrange <- as.numeric(keywords[rangekeyword]) + if (debug) { + print(paste(" Marker name:", markers[i])) + print(paste(" Marker type:", markertype)) + print(paste(" Range value:", keywords[rangekeyword])) + } + + if (markertype == "TIME") { + channel <- iptime(fcs_exprs[, i]) + } else { + if (markertype == "SCATTER") { + channel <- ipscatter(scaleData(fcs_exprs[, i], channelrange)) + } else { + # Apply logicle transformation on fluorescent channels + channel <- iplogicle(scaleData(fcs_exprs[, i], channelrange)) + } + # adjust range in parameters and list description + if (params@data$range[i] > 4096){ + params@data$range[i] <- 4096 + params@data$minRange[i] <- 0 + params@data$maxRange[i] <- 4096 + list_description[rangekeyword] <- 4096 + list_description[flowcore_min] <- 0 + list_description[flowcore_max] <- 4096 + } + } + fcs_channel <- cbind(fcs_channel, round(channel)) + } + colnames(fcs_channel) <- markers + } else { + if (datatype != 'I') { + print(paste("Data type", datatype, "in FCS 3 is not supported")) + } + fcs_channel <- exprs(fcs) + colnames(fcs_channel) <- markers + } + } else { + print(paste("FCS version", keyword(fcs)$FCSversion, "is not supported")) + fcs_channel <- exprs(fcs) + colnames(fcs_channel) <- markers + } + newfcs <- flowFrame(fcs_channel, params, list_description) + return(newfcs) +} +# +# Starting function for processing a FCS file +# +processFCSFile <- function(input_file, output_file="", compensate=FALSE, + outformat="flowtext", gate=FALSE, + graph_file="", report="", method="", + scaling_factor, logicle_w=0.5, logicle_t=262144, + logicle_m=4.5, debug=FALSE) { + # + # Generate the file names for the output_file + # + pieces <- unlist(strsplit(input_file, .Platform$file.sep)) + filename <- pieces[length(pieces)] + if (debug) { + print (paste("Converting file: ",input_file)) + print (paste("Original file name: ",filename)) + print (paste("Output file name: ",output_file)) + } + fcs <- read.FCS(input_file, transformation=F) + keywords <- keyword(fcs) + markers <- colnames(fcs) + print_markers <- as.vector(pData(parameters(fcs))$desc) + # Update print_markers if the $P?S not in the FCS file + for (i in 1:length(print_markers)) { + if (is.na(print_markers[i])) { + print_markers[[i]] <- markers[i] + } + } + # + # Compensate + # + spill <- keywords$SPILL + + if (is.null(spill) == FALSE && compensate == TRUE) { + if (debug) { + print("Attempting compensation") + } + tryCatch({fcs = compensate(fcs, spill)}, + error = function(ex) {str(ex); }) + } + # + # Transform the data + # + transformed_data <- fcs + channels_to_exclude <- c(grep(colnames(fcs), pattern="FSC"), + grep(colnames(fcs), pattern="SSC"), + grep(colnames(fcs), pattern="Time")) + list_channels <- colnames(fcs)[-channels_to_exclude] + if (isAccuriData(keywords)) { + print("Accuri data is not supported") + } else if (method != "None"){ + if (method == "fcstrans"){ + transformed_data <- convertFCS(fcs, debug) + } else if (method == "logicle_auto"){ + lgcl <- estimateLogicle(fcs, channels = list_channels) + transformed_data <- transform(fcs, lgcl) + } else { + if (method == "arcsinh"){ + trans <- arcsinhTransform(transformationId="defaultArcsinhTransform", + a = 0, b = scaling_factor, c = 0) + } else if (method == "logicle"){ + trans <- logicleTransform(w = logicle_w, t = logicle_t, m = logicle_m) + } + translist <- transformList(list_channels, trans) + transformed_data <- transform(fcs, translist) + } + } + trans_gated_data <- transformed_data + # + # Gate data + # + if (gate){ + # check that there are SSC and FSC channels to gate on + chans <- c(grep(colnames(transformed_data), pattern="FSC"), + grep(colnames(transformed_data), pattern="SSC")) + totalchans <- chans + if (length(chans) > 2) { + #get first FSC and corresponding SSC + chans <- c(grep(colnames(transformed_data), pattern="FSC-A"), + grep(colnames(transformed_data), pattern="SSC-A")) + if (length(chans) == 0) { + chans <- c(grep(colnames(transformed_data), pattern="FSC-H"), + grep(colnames(transformed_data), pattern="SSC-H")) + if (length(chans) == 0) { + chans <- c(grep(colnames(transformed_data), pattern="FSC-W"), + grep(colnames(transformed_data), pattern="SSC-W")) + } + } + } + if (length(chans) == 0) { + warning('No forward/side scatter channels found, gating aborted.') + } else { + # gate lymphocytes + lymph <- flowDensity(obj=transformed_data, channels=chans, + position=c(TRUE, NA), + debris.gate=c(TRUE, FALSE)) + # gate singlets if A and H/W + if (length(totalchans) > 2) { + trans_gated_data <- getflowFrame(flowDensity(obj=lymph, + singlet.gate=TRUE)) + } else { + trans_gated_data <- getflowFrame(lymph) + } + # report + pregating_summary <- capture.output(summary(transformed_data)) + pregating_dim <- capture.output(dim(transformed_data)) + postgating_summary <- capture.output(summary(trans_gated_data)) + postgating_dim <- capture.output(dim(trans_gated_data)) + sink(report) + cat("#########################\n") + cat("## BEFORE GATING ##\n") + cat("#########################\n") + cat(pregating_dim, pregating_summary, sep="\n") + cat("\n#########################\n") + cat("## AFTER GATING ##\n") + cat("#########################\n") + cat(postgating_dim, postgating_summary, sep="\n") + sink() + # plots + time_channel <- grep(toupper(colnames(transformed_data)), pattern="TIME") + nb_markers <- length(colnames(transformed_data)) - length(time_channel) + nb_rows <- ceiling(((nb_markers-1)*nb_markers)/4) + h <- 400 * nb_rows + maxrange <- transformed_data@parameters@data$range[1] + + png(graph_file, type="cairo", height=h, width=800) + par(mfrow=c(nb_rows,2)) + for (m in 1:(nb_markers - 1)) { + for (n in (m+1):nb_markers) { + plotDens(transformed_data, c(m,n), xlab = print_markers[m], + ylab = print_markers[n], main = "Before Gating", + ylim = c(0, maxrange), xlim = c(0, maxrange)) + plotDens(trans_gated_data, c(m,n), xlab = print_markers[m], + ylab = print_markers[n], main = "After Gating", + ylim = c(0, maxrange), xlim = c(0, maxrange)) + } + } + dev.off() + } + } + if (outformat=="FCS") { + write.FCS(trans_gated_data, output_file) + } else if (outformat=="flowFrame") { + saveRDS(trans_gated_data, file = output_file) + } else { + output_data <- exprs(trans_gated_data) + colnames(output_data) <- print_markers + write.table(output_data, file=output_file, quote=F, + row.names=F,col.names=T, sep='\t', append=F) + } +} +# Convert FCS file using FCSTrans logicile transformation +# @param input_file FCS file to be transformed +# @param output_file FCS file transformed ".txt" extension +# @param compensate Flag indicating whether to apply compensation +# matrix if it exists. +transformFCS <- function(input_file, output_file, compensate=FALSE, + outformat="flowtext", gate=FALSE, graph_file="", + report_file="", trans_met="", scaling_factor=1/150, + w=0.5, t=262144, m=4.5, debug=FALSE) { + isValid <- F + # Check file beginning matches FCS standard + tryCatch({ + isValid <- isFCSfile(input_file) + }, error = function(ex) { + print (paste(" ! Error in isFCSfile", ex)) + }) + if (isValid) { + processFCSFile(input_file, output_file, compensate, outformat, + gate, graph_file, report_file, trans_met, scaling_factor, + w, t, m) + } else { + print (paste(input_file, "does not meet FCS standard")) + } +} +# +# Run FCS Gate-Trans +# +args <- commandArgs(trailingOnly = TRUE) +graphs <- "" +report <- "" +gate <- FALSE +trans_method <- "None" +scaling_factor <- 1 / 150 +w <- 0.5 +t <- 262144 +m <- 4.5 +if (args[5]!="None") { + gate <- TRUE + graphs <- args[5] + report <- args[6] +} +if (args[7]!="None"){ + trans_method <- args[7] + if (args[7] == "arcsinh"){ + scaling_factor <- 1 / as.numeric(args[8]) + } else if (args[7] == "logicle"){ + w <- args[8] + t <- args[9] + m <- args[10] + } +} +transformFCS(args[1], args[2], args[3], args[4], gate, graphs, + report, trans_method, scaling_factor, w, t, m)