Mercurial > repos > azomics > convert_fcstxt_to_sce
changeset 0:4dc004880972 draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/develop/flowtools/convert_fcstxt_to_sce commit 611788da04fbda0b2735de1395d4407ecb75e068"
author | azomics |
---|---|
date | Thu, 22 Jul 2021 21:44:59 +0000 |
parents | |
children | |
files | FCStxtConvertSCE.R convertFCStxtToSCE.xml images/sce.png images/sce_modified.png test-data/input.flowtext test-data/metadata.txt test-data/output.rds |
diffstat | 7 files changed, 367 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FCStxtConvertSCE.R Thu Jul 22 21:44:59 2021 +0000 @@ -0,0 +1,226 @@ +#!/usr/bin/env Rscript +# GECO flow text conversion tool +# Authors: Emily Combe and Pablo Moreno +# +# This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object +# The tool was written by Emily Combe and edited by Pablo Moreno +# +# There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file. +# +# +# +# Version 1 +# July 2020 (Emily Combe / Pablo Moreno) + + +suppressPackageStartupMessages(library(SingleCellExperiment)) +suppressPackageStartupMessages(library(optparse)) + +sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) { + + + #---------------------# + # reading in flowtext # + #---------------------# + + flowtext <- read.table(input, sep = "\t", header = T) + + #----------------------------------# + # extract-marker-fluorescence data # + #----------------------------------# + + fl_cols_assay <- colnames(flowtext) + + if (length(fl_cols) > 0) { + + if (length(fl_cols) > ncol(flowtext)) { + quit(save = "no", status = 13, runLast = FALSE) + } + fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols] + } else { + channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"), + grep(fl_cols_assay, pattern = "SSC"), + grep(fl_cols_assay, pattern = "FSC-A"), + grep(fl_cols_assay, pattern = "SSC-A"), + grep(fl_cols_assay, pattern = "FSC-W"), + grep(fl_cols_assay, pattern = "SSC-W"), + grep(fl_cols_assay, pattern = "FSC-H"), + grep(fl_cols_assay, pattern = "SSC-H"), + grep(fl_cols_assay, pattern = "Time", ignore.case = T), + grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", ignore.case = T), + grep(fl_cols_assay, pattern = "Live_Dead|live|dead", ignore.case = T)) + + fl_cols_assay <- fl_cols_assay[-channels_to_exclude] + } + counts <- flowtext[, fl_cols_assay, drop = FALSE] + counts <- as.matrix(counts) + + # transpose data into assay as columns = cells and rows = features. + counts <- base::t(counts) + colnames(counts) <- seq_len(ncol(counts)) + + + #-----------------# + #coldata/meta data# + #-----------------# + + # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols) + mtd_cols_assay <- colnames(flowtext) + if (length(mtd_cols) > 0) { + if (length(mtd_cols) > ncol(flowtext)) { + quit(save = "no", status = 14, runLast = FALSE) + } + mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols] + } else { + + #create warning here to the user - but without failing + mtd_columns <- c(grep(marker_type, pattern = "sample", ignore.case = T), + grep(marker_type, pattern = "population|flowsom|cluster|pop|som", ignore.case = T)) + + mtd_cols_assay <- mtd_cols_assay[mtd_columns] + } + + md <- flowtext[, mtd_cols_assay, drop = FALSE] + + # if metadata available will be merged with meta data from flow text + if (!is.null(meta_data)) { + + #match column names so case insensitive + md_col <- tolower(colnames(md)) + mtd_col <- tolower(colnames(meta_data)) + + #quit if < 1 or > 1 column names match + if (length(intersect(md_col, mtd_col)) == 0) { + quit(save = "no", status = 15, runLast = FALSE) + } + if (length(intersect(md_col, mtd_col)) > 1) { + quit(save = "no", status = 16, runLast = FALSE) + } + + #merge by matched column + meta_data <- merge(x = md, y = meta_data, all = T) + + } + + #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data + sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts)) + if (!is.null(meta_data)) { + colLabels(sce) <- meta_data + } + + + #-----------------# + # row/marker data # + #-----------------# + + if (length(marker_type) > 0) { + if (length(marker_type) != nrow(rowData(sce))) { + quit(save = "no", status = 17, runLast = FALSE) + } + marker_type[marker_type == "l"] <- "lineage" + marker_type[marker_type == "f"] <- "functional" + + rowData(sce)$marker_type <- marker_type + } + return(sce) +} + +option_list <- list( + make_option( + c("-i", "--input"), + action = "store", + default = NA, + type = "character", + help = "File name for FCS txt file with sample information." + ), + make_option( + c("-o", "--output"), + action = "store", + default = NA, + type = "character", + help = "File name for output SCE R RDS Object." + ), + make_option( + c("-f", "--fl_cols"), + action = "store", + default = NA, + type = "character", + help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay" + ), + make_option( + c("-m", "--metadata_columns"), + action = "store", + default = NA, + type = "character", + help = "Columns to be included in the metadata of the Single Cell Experiment." + ), + make_option( + c("--metadata_file"), + action = "store", + default = NA, + type = "character", + help = "Optional meta data txt file to include in Single Cell Experiment." + ), + make_option( + c("--marker_type"), + action = "store", + default = NA, + type = "character", + help = "Marker type" + ) +) + +opt <- parse_args(OptionParser(option_list = option_list)) + +# fluorescence markers to include in the assay +fl_channels <- list() +if (is.na(opt$fl_cols)) { + flag_default <- TRUE +} else { + fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]]) + for (channel in fl_channels) { + if (is.na(channel)) { + quit(save = "no", status = 10, runLast = FALSE) + } + } +} + +# meta data columns to go into colDaa in SCE +mt_channels <- list() +if (is.na(opt$metadata_columns)) { + flag_default <- TRUE +} else { + mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]]) + for (channel in mt_channels) { + if (is.na(channel)) { + quit(save = "no", status = 11, runLast = FALSE) + } + } +} + + +#metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file +md <- NULL +if (is.na(opt$metadata_file)) { + flag_default <- TRUE +} else { + md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, as.is = FALSE) +} + +#comma separated list of values to define the markers included in the assay +mark_type <- list() +if (is.na(opt$marker_type)) { + flag_default <- TRUE +} else { + mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]]) + for (mt in mark_type) { + if (is.na(mt)) { + quit(save = "no", status = 12, runLast = FALSE) + } + } +} + + +sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type) + +saveRDS(sce, file = opt$output)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convertFCStxtToSCE.xml Thu Jul 22 21:44:59 2021 +0000 @@ -0,0 +1,128 @@ +<tool id="convert_fcstxt_to_sce" name="Convert flowtext file to SCE" version="1.0+galaxy0" profile="18.01"> + <description>Single Cell Experiment</description> + <requirements> + <requirement type="package" version="1.10.1">bioconductor-singlecellexperiment</requirement> + <requirement type="package" version="1.6.6">r-optparse</requirement> + </requirements> + <stdio> + <exit_code range="1:9" level="fatal" description="See stderr for more details." /> + <exit_code range="10" level="fatal" description="Please provide comma separated chacter strings, of the columns to include in the assay" /> + <exit_code range="11" level="fatal" description="Please provide comma separted chacter strings, of the columns to include in the meta data." /> + <exit_code range="12" level="fatal" description="Please provide comma separated chacter strings, defining the marker type (included in the assay)." /> + <exit_code range="13" level="fatal" description="Provided column names for the assay are out of range in the flowtext file given as input." /> + <exit_code range="14" level="fatal" description="Provided column names for the metadata are out of range in the flowtext file given as input." /> + <exit_code range="15" level="fatal" description="Please provide a metadata file with the 'sample' column name matching the 'sample' column name in the flowtext file." /> + <exit_code range="16" level="fatal" description="Please make sure that only one of the column names in the flowtext file and metadata file match (the sample column)." /> + <exit_code range="17" level="fatal" description="Please provide the same number of marker types, as there are markers in the assay." /> + <exit_code range="18:"/> + </stdio> + <command><![CDATA[ + Rscript '$__tool_directory__'/FCStxtConvertSCE.R -i '${input}' -o '${output_sce}' + #if $fl_cols + --fl_cols '${fl_cols}' + #end if + #if $mtd_cols + --metadata_columns '${mtd_cols}' + #end if + #if $meta_data + --metadata_file '${meta_data}' + #end if + #if $marker_type + --marker_type '${marker_type}' + #end if + ]]> + </command> + <inputs> + <param name="input" format="flowtext,tabular" type="data" label="Flow text file to be converted into a Single Cell Experiment object"/> + <param name="meta_data" format="txt" type="data" label="Metadata file" optional="true" help="Optional meta data txt file to include in Single Cell Experiment. Please see below for formatting."/> + <param name="fl_cols" type="text" optional="true" label="Columns with markers to be included in the Single Cell Experiment assay" help="Comma-separated list of columns (i.e.:CD8,CD4,CD8). By default, all columns except for FSC, SSC, time and Live-dead will be taken into account."/> + <param name="mtd_cols" type="text" optional="true" label="Columns to be included in the metadata of the Single Cell Experiment" help="Comma-separated list of columns (i.e.:Sample,Population). By default, the 'Population' and 'Sample' columns will be included in the Coldata. The coldData stors meta data for each cell in the dataset."/> + <param name="marker_type" type="text" optional="true" value="" label="Marker type" help="Comma-separated list of marker types (i.e: lineage,lineage,functional). This section labels the markers included in the assay. Labels must be in order of the markers in flowtext file or in order of the markers selected for the assay. Proteins labelled as l will be replaced with lineage and those with 'f' will be replaced with functional. See below for more details."> + </param> + </inputs> + <outputs> + <data name="output_sce" format="rdata.sce" from_work_dir="output.rds" label="SCE ${input.name}"/> + </outputs> + <tests> + <test> + <param name="input" value="input.flowtext"/> + <output name="output_sce" file="output.rds" ftype="rdata.sce" compare="sim_size"/> + </test> + <test> + <param name="input" value="input.flowtext"/> + <param name="fl_cols" value="Marker1,Marker2,Marker3,Marker4"/> + <param name="mtd_cols" value="Population,Sample"/> + <param name="marker_type" value="f,l,f,l"/> + <output name="output_sce" file="output.rds" ftype="rdata.sce" compare="sim_size"/> + </test> + <test> + <param name="input" value="input.flowtext"/> + <param name="meta_data" value="metadata.txt"/> + <param name="fl_cols" value="Marker1,Marker2,Marker3,Marker4"/> + <param name="mtd_cols" value="Population,Sample"/> + <param name="marker_type" value="f,l,f,l"/> + <output name="output_sce" file="output.rds" ftype="rdata.sce" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[:A + This tool converts flow text files into a Single Cell Experiment object, with no tranformation or compensation. + +----- + +**Input files** + +This tool requires a single valid flow text file. If you have multiple flowtext files please use the *Add sample information and merge tool* first. + +If metadata is supplied and there are multiple samples in the flowtext file; there must be a sample column in the metadata and flowtext file, with the same column name. The sample names within the column must also match. +For example if there is a column in the flow text file labelled Sample with 2 samples labelled 1 and 2; there must also be a column in the meta data fiel labelled 'Sample' containing 2 rows for each sample '1' and '2' with the relevant additional columns with metadata. + +By default the foward side scatter, side scatter columns, time, live dead stains, cluster info and sample columns will be discluded. +To override please supply a comma separated list of columns to include in the assay of the Single Cell Experiment. + +By default the columns which will be included in the meta data of the SCE will be the sample and cluster results. +To override this please supply + +By default this + +**Output file** + +Support the following conversion: + * flowtext file -> SingleCellExperiment + * tabular file -> SingleCellExperiment + +The output of this tool is a Single Cell Experiment object. This is an R object which contains slots for fluorescence data (assay) and metaData (colData). Please the image below. +Converting to a SCE object will allow you to run tools for single cell RNAseq on your cytometry data. + +For more information on Single Cell Experiment objects please refer to the picture below or the 'bioconductor page<https://bioconductor.org/packages/release/bioc/vignettes/SingleCellExperiment/inst/doc/intro.html>'. + +----- + +**Example** + +*Input*: flow text file + + FSC SSC Marker1 Marker2 Marker3 Marker4 Population Sample + 449 157 551 129 169 292 1 sample1 + 894 1023 199 277 320 227 3 sample1 + 262 73 437 69 0 146 6 sample1 + 340 115 509 268 0 74 1 sample1 + ... ... ... ... ... ... ... ... + 523 354 554 176 213 185 2 sample2 + 678 8096 98 74 417 267 6 sample2 + 226 89 467 123 0 324 2 sample2 + 660 175 589 178 1 89 7 sample2 + ... ... ... ... ... ... ... ... + +*Input*: Metadata Table + Sample Pateint ID Sex Age ... + sample1 7245 female 54 ... + sample2 1423 male 36 ... + ... ... ... ... ... + +*Output*: Single Cell Experient object + +.. image:: ./images/sce_modified.png + +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.flowtext Thu Jul 22 21:44:59 2021 +0000 @@ -0,0 +1,10 @@ +FSC SSC Marker1 Marker2 Marker3 Marker4 Population Sample +449 157 551 129 169 292 1 sample1 +894 1023 199 277 320 227 3 sample1 +262 73 437 69 0 146 6 sample1 +340 115 509 268 0 74 1 sample1 +523 354 554 176 213 185 2 sample2 +678 8096 98 74 417 267 6 sample2 +226 89 467 123 0 324 2 sample2 +660 175 589 178 1 89 7 sample2 +