Mercurial > repos > azomics > convert_fcstxt_to_sce
comparison FCStxtConvertSCE.R @ 0:4dc004880972 draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/develop/flowtools/convert_fcstxt_to_sce commit 611788da04fbda0b2735de1395d4407ecb75e068"
author | azomics |
---|---|
date | Thu, 22 Jul 2021 21:44:59 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4dc004880972 |
---|---|
1 #!/usr/bin/env Rscript | |
2 # GECO flow text conversion tool | |
3 # Authors: Emily Combe and Pablo Moreno | |
4 # | |
5 # This tool converts a flowtext file (or tabular file) into a SingleCellExperiment object | |
6 # The tool was written by Emily Combe and edited by Pablo Moreno | |
7 # | |
8 # There are the options to choose: the columns/markers to include in the assay, the columns to include in the meta data, descriptions of the markers and a metadata file. | |
9 # | |
10 # | |
11 # | |
12 # Version 1 | |
13 # July 2020 (Emily Combe / Pablo Moreno) | |
14 | |
15 | |
16 suppressPackageStartupMessages(library(SingleCellExperiment)) | |
17 suppressPackageStartupMessages(library(optparse)) | |
18 | |
19 sce <- function(input, fl_cols = list(), mtd_cols = list(), marker_type = list(), meta_data = NULL) { | |
20 | |
21 | |
22 #---------------------# | |
23 # reading in flowtext # | |
24 #---------------------# | |
25 | |
26 flowtext <- read.table(input, sep = "\t", header = T) | |
27 | |
28 #----------------------------------# | |
29 # extract-marker-fluorescence data # | |
30 #----------------------------------# | |
31 | |
32 fl_cols_assay <- colnames(flowtext) | |
33 | |
34 if (length(fl_cols) > 0) { | |
35 | |
36 if (length(fl_cols) > ncol(flowtext)) { | |
37 quit(save = "no", status = 13, runLast = FALSE) | |
38 } | |
39 fl_cols_assay <- fl_cols_assay[fl_cols_assay %in% fl_cols] | |
40 } else { | |
41 channels_to_exclude <- c(grep(fl_cols_assay, pattern = "FSC"), | |
42 grep(fl_cols_assay, pattern = "SSC"), | |
43 grep(fl_cols_assay, pattern = "FSC-A"), | |
44 grep(fl_cols_assay, pattern = "SSC-A"), | |
45 grep(fl_cols_assay, pattern = "FSC-W"), | |
46 grep(fl_cols_assay, pattern = "SSC-W"), | |
47 grep(fl_cols_assay, pattern = "FSC-H"), | |
48 grep(fl_cols_assay, pattern = "SSC-H"), | |
49 grep(fl_cols_assay, pattern = "Time", ignore.case = T), | |
50 grep(fl_cols_assay, pattern = "Population|flowSOM|cluster|SOM|pop|cluster", ignore.case = T), | |
51 grep(fl_cols_assay, pattern = "Live_Dead|live|dead", ignore.case = T)) | |
52 | |
53 fl_cols_assay <- fl_cols_assay[-channels_to_exclude] | |
54 } | |
55 counts <- flowtext[, fl_cols_assay, drop = FALSE] | |
56 counts <- as.matrix(counts) | |
57 | |
58 # transpose data into assay as columns = cells and rows = features. | |
59 counts <- base::t(counts) | |
60 colnames(counts) <- seq_len(ncol(counts)) | |
61 | |
62 | |
63 #-----------------# | |
64 #coldata/meta data# | |
65 #-----------------# | |
66 | |
67 # by default any columns with sample names or cluster results will be extracted - to over ride this user must provide a comma separated list of column name (mtd_cols) | |
68 mtd_cols_assay <- colnames(flowtext) | |
69 if (length(mtd_cols) > 0) { | |
70 if (length(mtd_cols) > ncol(flowtext)) { | |
71 quit(save = "no", status = 14, runLast = FALSE) | |
72 } | |
73 mtd_cols_assay <- mtd_cols_assay[mtd_cols_assay %in% mtd_cols] | |
74 } else { | |
75 | |
76 #create warning here to the user - but without failing | |
77 mtd_columns <- c(grep(marker_type, pattern = "sample", ignore.case = T), | |
78 grep(marker_type, pattern = "population|flowsom|cluster|pop|som", ignore.case = T)) | |
79 | |
80 mtd_cols_assay <- mtd_cols_assay[mtd_columns] | |
81 } | |
82 | |
83 md <- flowtext[, mtd_cols_assay, drop = FALSE] | |
84 | |
85 # if metadata available will be merged with meta data from flow text | |
86 if (!is.null(meta_data)) { | |
87 | |
88 #match column names so case insensitive | |
89 md_col <- tolower(colnames(md)) | |
90 mtd_col <- tolower(colnames(meta_data)) | |
91 | |
92 #quit if < 1 or > 1 column names match | |
93 if (length(intersect(md_col, mtd_col)) == 0) { | |
94 quit(save = "no", status = 15, runLast = FALSE) | |
95 } | |
96 if (length(intersect(md_col, mtd_col)) > 1) { | |
97 quit(save = "no", status = 16, runLast = FALSE) | |
98 } | |
99 | |
100 #merge by matched column | |
101 meta_data <- merge(x = md, y = meta_data, all = T) | |
102 | |
103 } | |
104 | |
105 #create Single Cell experiment object. SCOPE requires both counts and logcounts assays - for FLOW both assays contain the same data | |
106 sce <- SingleCellExperiment(assays = list(counts = counts, logcounts = counts)) | |
107 if (!is.null(meta_data)) { | |
108 colLabels(sce) <- meta_data | |
109 } | |
110 | |
111 | |
112 #-----------------# | |
113 # row/marker data # | |
114 #-----------------# | |
115 | |
116 if (length(marker_type) > 0) { | |
117 if (length(marker_type) != nrow(rowData(sce))) { | |
118 quit(save = "no", status = 17, runLast = FALSE) | |
119 } | |
120 marker_type[marker_type == "l"] <- "lineage" | |
121 marker_type[marker_type == "f"] <- "functional" | |
122 | |
123 rowData(sce)$marker_type <- marker_type | |
124 } | |
125 return(sce) | |
126 } | |
127 | |
128 option_list <- list( | |
129 make_option( | |
130 c("-i", "--input"), | |
131 action = "store", | |
132 default = NA, | |
133 type = "character", | |
134 help = "File name for FCS txt file with sample information." | |
135 ), | |
136 make_option( | |
137 c("-o", "--output"), | |
138 action = "store", | |
139 default = NA, | |
140 type = "character", | |
141 help = "File name for output SCE R RDS Object." | |
142 ), | |
143 make_option( | |
144 c("-f", "--fl_cols"), | |
145 action = "store", | |
146 default = NA, | |
147 type = "character", | |
148 help = "Comma separated list of Columns with markers to be included in the Single Cell Experiment assay" | |
149 ), | |
150 make_option( | |
151 c("-m", "--metadata_columns"), | |
152 action = "store", | |
153 default = NA, | |
154 type = "character", | |
155 help = "Columns to be included in the metadata of the Single Cell Experiment." | |
156 ), | |
157 make_option( | |
158 c("--metadata_file"), | |
159 action = "store", | |
160 default = NA, | |
161 type = "character", | |
162 help = "Optional meta data txt file to include in Single Cell Experiment." | |
163 ), | |
164 make_option( | |
165 c("--marker_type"), | |
166 action = "store", | |
167 default = NA, | |
168 type = "character", | |
169 help = "Marker type" | |
170 ) | |
171 ) | |
172 | |
173 opt <- parse_args(OptionParser(option_list = option_list)) | |
174 | |
175 # fluorescence markers to include in the assay | |
176 fl_channels <- list() | |
177 if (is.na(opt$fl_cols)) { | |
178 flag_default <- TRUE | |
179 } else { | |
180 fl_channels <- as.character(strsplit(opt$fl_cols, ",")[[1]]) | |
181 for (channel in fl_channels) { | |
182 if (is.na(channel)) { | |
183 quit(save = "no", status = 10, runLast = FALSE) | |
184 } | |
185 } | |
186 } | |
187 | |
188 # meta data columns to go into colDaa in SCE | |
189 mt_channels <- list() | |
190 if (is.na(opt$metadata_columns)) { | |
191 flag_default <- TRUE | |
192 } else { | |
193 mt_channels <- as.character(strsplit(opt$metadata_columns, ",")[[1]]) | |
194 for (channel in mt_channels) { | |
195 if (is.na(channel)) { | |
196 quit(save = "no", status = 11, runLast = FALSE) | |
197 } | |
198 } | |
199 } | |
200 | |
201 | |
202 #metadata file to add to the coldata in SCE. Must have column matching the sample column in the flowtext file | |
203 md <- NULL | |
204 if (is.na(opt$metadata_file)) { | |
205 flag_default <- TRUE | |
206 } else { | |
207 md <- read.table(opt$metadata_file, header = TRUE, sep = "\t", check.names = FALSE, as.is = FALSE) | |
208 } | |
209 | |
210 #comma separated list of values to define the markers included in the assay | |
211 mark_type <- list() | |
212 if (is.na(opt$marker_type)) { | |
213 flag_default <- TRUE | |
214 } else { | |
215 mark_type <- as.character(strsplit(opt$marker_type, ",")[[1]]) | |
216 for (mt in mark_type) { | |
217 if (is.na(mt)) { | |
218 quit(save = "no", status = 12, runLast = FALSE) | |
219 } | |
220 } | |
221 } | |
222 | |
223 | |
224 sce <- sce(input = opt$input, fl_cols = fl_channels, mtd_cols = mt_channels, meta_data = md, marker_type = mark_type) | |
225 | |
226 saveRDS(sce, file = opt$output) |