Mercurial > repos > azomics > convert_fcstxt_to_sce

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/FCStxtConvertSCE.R	Thu Jul 22 21:44:59 2021 +0000
@@ -0,0 +1,226 @@
+#!/usr/bin/env Rscript
+# GECO flow text conversion tool
+# Authors: Emily Combe and Pablo Moreno
+#
+# This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object
+# The tool was written by Emily Combe and edited by Pablo Moreno
+#
+# There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file.
+#
+#
+#
+# Version 1
+# July 2020 (Emily Combe / Pablo Moreno)
+
+
+suppressPackageStartupMessages(library(SingleCellExperiment))
+suppressPackageStartupMessages(library(optparse))
+
+sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) {
+
+
+    #---------------------#
+    # reading in flowtext #
+    #---------------------#
+
+    flowtext <- read.table(input, sep = "\t", header = T)
+
+    #----------------------------------#
+    # extract-marker-fluorescence data #
+    #----------------------------------#
+
+    fl_cols_assay <- colnames(flowtext)
+
+    if (length(fl_cols) > 0) {
+
+        if (length(fl_cols) > ncol(flowtext)) {
+            quit(save = "no", status = 13, runLast = FALSE)
+        }
+        fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols]
+    } else {
+        channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"),
+                                 grep(fl_cols_assay, pattern = "SSC"),
+                                 grep(fl_cols_assay, pattern = "FSC-A"),
+                                 grep(fl_cols_assay, pattern = "SSC-A"),
+                                 grep(fl_cols_assay, pattern = "FSC-W"),
+                                 grep(fl_cols_assay, pattern = "SSC-W"),
+                                 grep(fl_cols_assay, pattern = "FSC-H"),
+                                 grep(fl_cols_assay, pattern = "SSC-H"),
+                                 grep(fl_cols_assay, pattern = "Time", ignore.case = T),
+                                 grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", ignore.case = T),
+                                 grep(fl_cols_assay, pattern = "Live_Dead|live|dead", ignore.case = T))
+
+        fl_cols_assay <- fl_cols_assay[-channels_to_exclude]
+    }
+    counts <- flowtext[, fl_cols_assay, drop = FALSE]
+    counts <- as.matrix(counts)
+
+    # transpose data into assay as columns = cells and rows = features.
+    counts <- base::t(counts)
+    colnames(counts) <- seq_len(ncol(counts))
+
+
+    #-----------------#
+    #coldata/meta data#
+    #-----------------#
+
+    # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols)
+    mtd_cols_assay <- colnames(flowtext)
+    if (length(mtd_cols) > 0) {
+        if (length(mtd_cols) > ncol(flowtext)) {
+            quit(save = "no", status = 14, runLast = FALSE)
+        }
+        mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols]
+    } else {
+
+        #create warning here to the user - but without failing
+        mtd_columns <- c(grep(marker_type, pattern = "sample", ignore.case = T),
+                         grep(marker_type, pattern = "population|flowsom|cluster|pop|som", ignore.case = T))
+
+        mtd_cols_assay <- mtd_cols_assay[mtd_columns]
+    }
+
+    md <- flowtext[, mtd_cols_assay, drop = FALSE]
+
+    # if metadata available will be merged with meta data from flow text
+    if (!is.null(meta_data)) {
+
+        #match column names so case insensitive
+        md_col <- tolower(colnames(md))
+        mtd_col <- tolower(colnames(meta_data))
+
+        #quit if < 1 or > 1 column names match
+        if (length(intersect(md_col, mtd_col)) == 0) {
+            quit(save = "no", status = 15, runLast = FALSE)
+        }
+        if (length(intersect(md_col, mtd_col)) > 1) {
+            quit(save = "no", status = 16, runLast = FALSE)
+        }
+
+        #merge by matched column
+        meta_data <- merge(x = md, y = meta_data, all = T)
+
+    }
+
+    #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data
+    sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts))
+    if (!is.null(meta_data)) {
+      colLabels(sce) <- meta_data
+    }
+
+
+    #-----------------#
+    # row/marker data #
+    #-----------------#
+
+    if (length(marker_type) > 0) {
+      if (length(marker_type) != nrow(rowData(sce))) {
+        quit(save = "no", status = 17, runLast = FALSE)
+      }
+      marker_type[marker_type == "l"] <- "lineage"
+      marker_type[marker_type == "f"] <- "functional"
+
+      rowData(sce)$marker_type <- marker_type
+    }
+    return(sce)
+}
+
+option_list <- list(
+  make_option(
+    c("-i", "--input"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "File name for FCS txt file with sample information."
+  ),
+  make_option(
+    c("-o", "--output"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "File name for output SCE R RDS Object."
+  ),
+  make_option(
+    c("-f", "--fl_cols"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay"
+  ),
+  make_option(
+    c("-m", "--metadata_columns"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Columns to be included in the metadata of the Single Cell Experiment."
+  ),
+  make_option(
+    c("--metadata_file"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Optional meta data txt file to include in Single Cell Experiment."
+  ),
+  make_option(
+    c("--marker_type"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Marker type"
+  )
+)
+
+opt <- parse_args(OptionParser(option_list = option_list))
+
+# fluorescence markers to include in the assay
+fl_channels <- list()
+if (is.na(opt$fl_cols)) {
+    flag_default <- TRUE
+} else {
+    fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]])
+    for (channel in fl_channels) {
+        if (is.na(channel)) {
+            quit(save = "no", status = 10, runLast = FALSE)
+        }
+    }
+}
+
+# meta data columns to go into colDaa in SCE
+mt_channels <- list()
+if (is.na(opt$metadata_columns)) {
+    flag_default <- TRUE
+} else {
+    mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]])
+    for (channel in mt_channels) {
+        if (is.na(channel)) {
+            quit(save = "no", status = 11, runLast = FALSE)
+        }
+    }
+}
+
+
+#metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file
+md <- NULL
+if (is.na(opt$metadata_file)) {
+    flag_default <- TRUE
+} else {
+    md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, as.is = FALSE)
+}
+
+#comma separated list of values to define the markers included in the assay
+mark_type <- list()
+if (is.na(opt$marker_type)) {
+    flag_default <- TRUE
+} else {
+    mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]])
+    for (mt in mark_type) {
+        if (is.na(mt)) {
+            quit(save = "no", status = 12, runLast = FALSE)
+        }
+    }
+}
+
+
+sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type)
+
+saveRDS(sce, file = opt$output)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convertFCStxtToSCE.xml	Thu Jul 22 21:44:59 2021 +0000
@@ -0,0 +1,128 @@
+<tool id="convert_fcstxt_to_sce" name="Convert flowtext file to SCE" version="1.0+galaxy0" profile="18.01">
+    <description>Single Cell Experiment</description>
+    <requirements>
+        <requirement type="package" version="1.10.1">bioconductor-singlecellexperiment</requirement>
+        <requirement type="package" version="1.6.6">r-optparse</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:9" level="fatal" description="See stderr for more details." />
+        <exit_code range="10" level="fatal" description="Please provide comma separated chacter strings, of the columns to include in the assay" />
+        <exit_code range="11" level="fatal" description="Please provide comma separted chacter strings, of the columns to include in the meta data." />
+        <exit_code range="12" level="fatal" description="Please provide comma separated chacter strings, defining the marker type (included in the assay)." />
+        <exit_code range="13" level="fatal" description="Provided column names for the assay are out of range in the flowtext file given as input." />
+        <exit_code range="14" level="fatal" description="Provided column names for the metadata are out of range in the flowtext file given as input." />
+        <exit_code range="15" level="fatal" description="Please provide a metadata file with the 'sample' column name matching the 'sample' column name in the flowtext file." />
+        <exit_code range="16" level="fatal" description="Please make sure that only one of the column names in the flowtext file and metadata file match (the sample column)." />
+        <exit_code range="17" level="fatal" description="Please provide the same number of marker types, as there are markers in the assay." />
+        <exit_code range="18:"/>
+    </stdio>
+    <command><![CDATA[
+        Rscript '$__tool_directory__'/FCStxtConvertSCE.R -i '${input}' -o '${output_sce}'
+        #if $fl_cols
+        --fl_cols '${fl_cols}'
+        #end if
+        #if $mtd_cols
+        --metadata_columns '${mtd_cols}'
+        #end if
+        #if $meta_data
+        --metadata_file '${meta_data}'
+        #end if
+        #if $marker_type
+        --marker_type '${marker_type}'
+        #end if
+    ]]>
+    </command>
+    <inputs>
+        <param name="input" format="flowtext,tabular" type="data" label="Flow text file to be converted into a Single Cell Experiment object"/>
+        <param name="meta_data" format="txt" type="data" label="Metadata file" optional="true" help="Optional meta data txt file to include in Single Cell Experiment. Please see below for formatting."/>
+        <param name="fl_cols" type="text"  optional="true" label="Columns with markers to be included in the Single Cell Experiment assay" help="Comma-separated list of columns (i.e.:CD8,CD4,CD8). By default, all columns except for FSC, SSC, time and Live-dead will be taken into account."/>
+        <param name="mtd_cols" type="text"  optional="true" label="Columns to be included in the metadata of the Single Cell Experiment" help="Comma-separated list of columns (i.e.:Sample,Population). By default, the 'Population' and 'Sample' columns will be included in the Coldata. The coldData stors meta data for each cell in the dataset."/>
+        <param name="marker_type" type="text" optional="true" value="" label="Marker type" help="Comma-separated list of marker types (i.e: lineage,lineage,functional). This section labels the markers included in the assay. Labels must be in order of the markers in flowtext file or in order of the markers selected for the assay. Proteins labelled as l will be replaced with lineage and those with 'f' will be replaced with functional. See below for more details.">
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_sce" format="rdata.sce" from_work_dir="output.rds" label="SCE ${input.name}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="input.flowtext"/>
+            <output name="output_sce" file="output.rds" ftype="rdata.sce" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="input" value="input.flowtext"/>
+            <param name="fl_cols" value="Marker1,Marker2,Marker3,Marker4"/>
+            <param name="mtd_cols" value="Population,Sample"/>
+            <param name="marker_type" value="f,l,f,l"/>
+            <output name="output_sce" file="output.rds" ftype="rdata.sce" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="input" value="input.flowtext"/>
+            <param name="meta_data" value="metadata.txt"/>
+            <param name="fl_cols" value="Marker1,Marker2,Marker3,Marker4"/>
+            <param name="mtd_cols" value="Population,Sample"/>
+            <param name="marker_type" value="f,l,f,l"/>
+            <output name="output_sce" file="output.rds" ftype="rdata.sce" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[:A
+    This tool converts flow text files into a Single Cell Experiment object, with no tranformation or compensation.
+
+-----
+
+**Input files**
+
+This tool requires a single valid flow text file. If you have multiple flowtext files please use the *Add sample information and merge tool* first.
+
+If metadata is supplied and there are multiple samples in the flowtext file; there must be a sample column in the metadata and flowtext file, with the same column name. The sample names within the column must also match.
+For example if there is a column in the flow text file labelled Sample with 2 samples labelled 1 and 2; there must also be a column in the meta data fiel labelled 'Sample' containing 2 rows for each sample '1' and '2' with the relevant additional columns with metadata.
+
+By default the foward side scatter, side scatter columns, time, live dead stains, cluster info and sample columns will be discluded.
+To override please supply a comma separated list of columns to include in the assay of the Single Cell Experiment.
+
+By default the columns which will be included in the meta data of the SCE will be the sample and cluster results.
+To override this please supply
+
+By default this
+
+**Output file**
+
+Support the following conversion:
+ * flowtext file -> SingleCellExperiment
+ * tabular file -> SingleCellExperiment
+
+The output of this tool is a Single Cell Experiment object. This is an R object which contains slots for fluorescence data (assay) and metaData (colData). Please the image below.
+Converting to a SCE object will allow you to run tools for single cell RNAseq on your cytometry data.
+
+For more information on Single Cell Experiment objects please refer to the picture below or the 'bioconductor page<https://bioconductor.org/packages/release/bioc/vignettes/SingleCellExperiment/inst/doc/intro.html>'.
+
+-----
+
+**Example**
+
+*Input*: flow text file
+
+  FSC      SSC     Marker1 Marker2 Marker3 Marker4 Population  Sample
+  449      157     551     129     169     292     1           sample1
+  894      1023    199     277     320     227     3           sample1
+  262      73      437     69      0       146     6           sample1
+  340      115     509     268     0       74      1           sample1
+  ...      ...     ...     ...     ...     ...     ...         ...
+  523      354     554     176     213     185     2           sample2
+  678      8096    98      74      417     267     6           sample2
+  226      89      467     123     0       324     2           sample2
+  660      175     589     178     1       89      7           sample2
+  ...      ...     ...     ...     ...     ...     ...         ...
+
+*Input*: Metadata Table
+   Sample     Pateint ID  Sex      Age    ...
+   sample1    7245        female   54     ...
+   sample2    1423        male     36     ...
+   ...        ...         ...      ...    ...
+
+*Output*: Single Cell Experient object
+
+.. image:: ./images/sce_modified.png
+
+]]>
+    </help>
+</tool>
Binary file images/sce.png has changed
Binary file images/sce_modified.png has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.flowtext	Thu Jul 22 21:44:59 2021 +0000
@@ -0,0 +1,10 @@
+FSC	SSC	Marker1	Marker2	Marker3	Marker4	Population	Sample
+449	157	551	129	169	292	1	sample1
+894	1023	199	277	320	227	3	sample1
+262	73	437	69	0	146	6	sample1
+340	115	509	268	0	74	1	sample1
+523	354	554	176	213	185	2	sample2
+678	8096	98	74	417	267	6	sample2
+226	89	467	123	0	324	2	sample2
+660	175	589	178	1	89	7	sample2
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metadata.txt	Thu Jul 22 21:44:59 2021 +0000
@@ -0,0 +1,3 @@
+Sample	Patient_ID	Sex	Age
+sample1	7245	female	54
+sample2	1423	male	26
Binary file test-data/output.rds has changed