Mercurial > repos > azomics > convert_fcstxt_to_sce
view FCStxtConvertSCE.R @ 0:4dc004880972 draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/develop/flowtools/convert_fcstxt_to_sce commit 611788da04fbda0b2735de1395d4407ecb75e068"
author | azomics |
---|---|
date | Thu, 22 Jul 2021 21:44:59 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env Rscript # GECO flow text conversion tool # Authors: Emily Combe and Pablo Moreno # # This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object # The tool was written by Emily Combe and edited by Pablo Moreno # # There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file. # # # # Version 1 # July 2020 (Emily Combe / Pablo Moreno) suppressPackageStartupMessages(library(SingleCellExperiment)) suppressPackageStartupMessages(library(optparse)) sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) { #---------------------# # reading in flowtext # #---------------------# flowtext <- read.table(input, sep = "\t", header = T) #----------------------------------# # extract-marker-fluorescence data # #----------------------------------# fl_cols_assay <- colnames(flowtext) if (length(fl_cols) > 0) { if (length(fl_cols) > ncol(flowtext)) { quit(save = "no", status = 13, runLast = FALSE) } fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols] } else { channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"), grep(fl_cols_assay, pattern = "SSC"), grep(fl_cols_assay, pattern = "FSC-A"), grep(fl_cols_assay, pattern = "SSC-A"), grep(fl_cols_assay, pattern = "FSC-W"), grep(fl_cols_assay, pattern = "SSC-W"), grep(fl_cols_assay, pattern = "FSC-H"), grep(fl_cols_assay, pattern = "SSC-H"), grep(fl_cols_assay, pattern = "Time", ignore.case = T), grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", ignore.case = T), grep(fl_cols_assay, pattern = "Live_Dead|live|dead", ignore.case = T)) fl_cols_assay <- fl_cols_assay[-channels_to_exclude] } counts <- flowtext[, fl_cols_assay, drop = FALSE] counts <- as.matrix(counts) # transpose data into assay as columns = cells and rows = features. counts <- base::t(counts) colnames(counts) <- seq_len(ncol(counts)) #-----------------# #coldata/meta data# #-----------------# # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols) mtd_cols_assay <- colnames(flowtext) if (length(mtd_cols) > 0) { if (length(mtd_cols) > ncol(flowtext)) { quit(save = "no", status = 14, runLast = FALSE) } mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols] } else { #create warning here to the user - but without failing mtd_columns <- c(grep(marker_type, pattern = "sample", ignore.case = T), grep(marker_type, pattern = "population|flowsom|cluster|pop|som", ignore.case = T)) mtd_cols_assay <- mtd_cols_assay[mtd_columns] } md <- flowtext[, mtd_cols_assay, drop = FALSE] # if metadata available will be merged with meta data from flow text if (!is.null(meta_data)) { #match column names so case insensitive md_col <- tolower(colnames(md)) mtd_col <- tolower(colnames(meta_data)) #quit if < 1 or > 1 column names match if (length(intersect(md_col, mtd_col)) == 0) { quit(save = "no", status = 15, runLast = FALSE) } if (length(intersect(md_col, mtd_col)) > 1) { quit(save = "no", status = 16, runLast = FALSE) } #merge by matched column meta_data <- merge(x = md, y = meta_data, all = T) } #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts)) if (!is.null(meta_data)) { colLabels(sce) <- meta_data } #-----------------# # row/marker data # #-----------------# if (length(marker_type) > 0) { if (length(marker_type) != nrow(rowData(sce))) { quit(save = "no", status = 17, runLast = FALSE) } marker_type[marker_type == "l"] <- "lineage" marker_type[marker_type == "f"] <- "functional" rowData(sce)$marker_type <- marker_type } return(sce) } option_list <- list( make_option( c("-i", "--input"), action = "store", default = NA, type = "character", help = "File name for FCS txt file with sample information." ), make_option( c("-o", "--output"), action = "store", default = NA, type = "character", help = "File name for output SCE R RDS Object." ), make_option( c("-f", "--fl_cols"), action = "store", default = NA, type = "character", help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay" ), make_option( c("-m", "--metadata_columns"), action = "store", default = NA, type = "character", help = "Columns to be included in the metadata of the Single Cell Experiment." ), make_option( c("--metadata_file"), action = "store", default = NA, type = "character", help = "Optional meta data txt file to include in Single Cell Experiment." ), make_option( c("--marker_type"), action = "store", default = NA, type = "character", help = "Marker type" ) ) opt <- parse_args(OptionParser(option_list = option_list)) # fluorescence markers to include in the assay fl_channels <- list() if (is.na(opt$fl_cols)) { flag_default <- TRUE } else { fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]]) for (channel in fl_channels) { if (is.na(channel)) { quit(save = "no", status = 10, runLast = FALSE) } } } # meta data columns to go into colDaa in SCE mt_channels <- list() if (is.na(opt$metadata_columns)) { flag_default <- TRUE } else { mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]]) for (channel in mt_channels) { if (is.na(channel)) { quit(save = "no", status = 11, runLast = FALSE) } } } #metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file md <- NULL if (is.na(opt$metadata_file)) { flag_default <- TRUE } else { md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, as.is = FALSE) } #comma separated list of values to define the markers included in the assay mark_type <- list() if (is.na(opt$marker_type)) { flag_default <- TRUE } else { mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]]) for (mt in mark_type) { if (is.na(mt)) { quit(save = "no", status = 12, runLast = FALSE) } } } sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type) saveRDS(sce, file = opt$output)