Mercurial > repos > azomics > convert_fcstxt_to_sce

diff FCStxtConvertSCE.R @ 0:4dc004880972 draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/develop/flowtools/convert_fcstxt_to_sce commit 611788da04fbda0b2735de1395d4407ecb75e068"
author: azomics
date: Thu, 22 Jul 2021 21:44:59 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/FCStxtConvertSCE.R	Thu Jul 22 21:44:59 2021 +0000
@@ -0,0 +1,226 @@
+#!/usr/bin/env Rscript
+# GECO flow text conversion tool
+# Authors: Emily Combe and Pablo Moreno
+#
+# This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object
+# The tool was written by Emily Combe and edited by Pablo Moreno
+#
+# There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file.
+#
+#
+#
+# Version 1
+# July 2020 (Emily Combe / Pablo Moreno)
+
+
+suppressPackageStartupMessages(library(SingleCellExperiment))
+suppressPackageStartupMessages(library(optparse))
+
+sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) {
+
+
+    #---------------------#
+    # reading in flowtext #
+    #---------------------#
+
+    flowtext <- read.table(input, sep = "\t", header = T)
+
+    #----------------------------------#
+    # extract-marker-fluorescence data #
+    #----------------------------------#
+
+    fl_cols_assay <- colnames(flowtext)
+
+    if (length(fl_cols) > 0) {
+
+        if (length(fl_cols) > ncol(flowtext)) {
+            quit(save = "no", status = 13, runLast = FALSE)
+        }
+        fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols]
+    } else {
+        channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"),
+                                 grep(fl_cols_assay, pattern = "SSC"),
+                                 grep(fl_cols_assay, pattern = "FSC-A"),
+                                 grep(fl_cols_assay, pattern = "SSC-A"),
+                                 grep(fl_cols_assay, pattern = "FSC-W"),
+                                 grep(fl_cols_assay, pattern = "SSC-W"),
+                                 grep(fl_cols_assay, pattern = "FSC-H"),
+                                 grep(fl_cols_assay, pattern = "SSC-H"),
+                                 grep(fl_cols_assay, pattern = "Time", ignore.case = T),
+                                 grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", ignore.case = T),
+                                 grep(fl_cols_assay, pattern = "Live_Dead|live|dead", ignore.case = T))
+
+        fl_cols_assay <- fl_cols_assay[-channels_to_exclude]
+    }
+    counts <- flowtext[, fl_cols_assay, drop = FALSE]
+    counts <- as.matrix(counts)
+
+    # transpose data into assay as columns = cells and rows = features.
+    counts <- base::t(counts)
+    colnames(counts) <- seq_len(ncol(counts))
+
+
+    #-----------------#
+    #coldata/meta data#
+    #-----------------#
+
+    # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols)
+    mtd_cols_assay <- colnames(flowtext)
+    if (length(mtd_cols) > 0) {
+        if (length(mtd_cols) > ncol(flowtext)) {
+            quit(save = "no", status = 14, runLast = FALSE)
+        }
+        mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols]
+    } else {
+
+        #create warning here to the user - but without failing
+        mtd_columns <- c(grep(marker_type, pattern = "sample", ignore.case = T),
+                         grep(marker_type, pattern = "population|flowsom|cluster|pop|som", ignore.case = T))
+
+        mtd_cols_assay <- mtd_cols_assay[mtd_columns]
+    }
+
+    md <- flowtext[, mtd_cols_assay, drop = FALSE]
+
+    # if metadata available will be merged with meta data from flow text
+    if (!is.null(meta_data)) {
+
+        #match column names so case insensitive
+        md_col <- tolower(colnames(md))
+        mtd_col <- tolower(colnames(meta_data))
+
+        #quit if < 1 or > 1 column names match
+        if (length(intersect(md_col, mtd_col)) == 0) {
+            quit(save = "no", status = 15, runLast = FALSE)
+        }
+        if (length(intersect(md_col, mtd_col)) > 1) {
+            quit(save = "no", status = 16, runLast = FALSE)
+        }
+
+        #merge by matched column
+        meta_data <- merge(x = md, y = meta_data, all = T)
+
+    }
+
+    #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data
+    sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts))
+    if (!is.null(meta_data)) {
+      colLabels(sce) <- meta_data
+    }
+
+
+    #-----------------#
+    # row/marker data #
+    #-----------------#
+
+    if (length(marker_type) > 0) {
+      if (length(marker_type) != nrow(rowData(sce))) {
+        quit(save = "no", status = 17, runLast = FALSE)
+      }
+      marker_type[marker_type == "l"] <- "lineage"
+      marker_type[marker_type == "f"] <- "functional"
+
+      rowData(sce)$marker_type <- marker_type
+    }
+    return(sce)
+}
+
+option_list <- list(
+  make_option(
+    c("-i", "--input"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "File name for FCS txt file with sample information."
+  ),
+  make_option(
+    c("-o", "--output"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "File name for output SCE R RDS Object."
+  ),
+  make_option(
+    c("-f", "--fl_cols"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay"
+  ),
+  make_option(
+    c("-m", "--metadata_columns"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Columns to be included in the metadata of the Single Cell Experiment."
+  ),
+  make_option(
+    c("--metadata_file"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Optional meta data txt file to include in Single Cell Experiment."
+  ),
+  make_option(
+    c("--marker_type"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Marker type"
+  )
+)
+
+opt <- parse_args(OptionParser(option_list = option_list))
+
+# fluorescence markers to include in the assay
+fl_channels <- list()
+if (is.na(opt$fl_cols)) {
+    flag_default <- TRUE
+} else {
+    fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]])
+    for (channel in fl_channels) {
+        if (is.na(channel)) {
+            quit(save = "no", status = 10, runLast = FALSE)
+        }
+    }
+}
+
+# meta data columns to go into colDaa in SCE
+mt_channels <- list()
+if (is.na(opt$metadata_columns)) {
+    flag_default <- TRUE
+} else {
+    mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]])
+    for (channel in mt_channels) {
+        if (is.na(channel)) {
+            quit(save = "no", status = 11, runLast = FALSE)
+        }
+    }
+}
+
+
+#metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file
+md <- NULL
+if (is.na(opt$metadata_file)) {
+    flag_default <- TRUE
+} else {
+    md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, as.is = FALSE)
+}
+
+#comma separated list of values to define the markers included in the assay
+mark_type <- list()
+if (is.na(opt$marker_type)) {
+    flag_default <- TRUE
+} else {
+    mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]])
+    for (mt in mark_type) {
+        if (is.na(mt)) {
+            quit(save = "no", status = 12, runLast = FALSE)
+        }
+    }
+}
+
+
+sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type)
+
+saveRDS(sce, file = opt$output)
author	azomics
date	Thu, 22 Jul 2021 21:44:59 +0000
parents
children