comparison scater-create-qcmetric-ready-sce.R @ 2:7a365ec81b52 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 154318f74839a4481c7c68993c4fb745842c4cce"
author iuc
date Thu, 09 Sep 2021 12:24:17 +0000
parents e6ca62ac65c6
children
comparison
equal deleted inserted replaced
1:b7ea9f09c02f 2:7a365ec81b52
6 library(scater) 6 library(scater)
7 library(LoomExperiment) 7 library(LoomExperiment)
8 8
9 # parse options 9 # parse options
10 #SCE-specific options 10 #SCE-specific options
11 option_list = list( 11 option_list <- list(
12 make_option( 12 make_option(
13 c("-a", "--counts"), 13 c("-a", "--counts"),
14 action = "store", 14 action = "store",
15 default = NA, 15 default = NA,
16 type = 'character', 16 type = "character",
17 help = "A tab-delimited expression matrix. The first column of all files is assumed to be feature names and the first row is assumed to be sample names." 17 help = "A tab-delimited expression matrix. The first column of all files is assumed to be feature names and the first row is assumed to be sample names."
18 ), 18 ),
19 make_option( 19 make_option(
20 c("-r", "--row-data"), 20 c("-r", "--row-data"),
21 action = "store", 21 action = "store",
22 default = NULL, 22 default = NULL,
23 type = 'character', 23 type = "character",
24 help = "Path to TSV (tab-delimited) format file describing the features. Row names from the expression matrix (-a), if present, become the row names of the SingleCellExperiment." 24 help = "Path to TSV (tab-delimited) format file describing the features. Row names from the expression matrix (-a), if present, become the row names of the SingleCellExperiment."
25 ), 25 ),
26 make_option( 26 make_option(
27 c("-c", "--col-data"), 27 c("-c", "--col-data"),
28 action = "store", 28 action = "store",
29 default = NULL, 29 default = NULL,
30 type = 'character', 30 type = "character",
31 help = "Path to TSV format file describing the samples (annotation). The number of rows (samples) must equal the number of columns in the expression matrix." 31 help = "Path to TSV format file describing the samples (annotation). The number of rows (samples) must equal the number of columns in the expression matrix."
32 ), 32 ),
33 #The scater-specific options 33 #The scater-specific options
34 make_option( 34 make_option(
35 c("--assay-name"),
36 action = "store",
37 default = 'counts',
38 type = 'character',
39 help= "String specifying the name of the 'assay' of the 'object' that should be used to define expression."
40 ),
41 make_option(
42 c("-f", "--mt-controls"), 35 c("-f", "--mt-controls"),
43 action = "store", 36 action = "store",
44 default = NULL, 37 default = NULL,
45 type = 'character', 38 type = "character",
46 help = "Path to file containing a list of the mitochondrial control genes" 39 help = "Path to file containing a list of the mitochondrial control genes"
47 ), 40 ),
48 make_option( 41 make_option(
49 c("-p", "--ercc-controls"), 42 c("-p", "--ercc-controls"),
50 action = "store", 43 action = "store",
51 default = NULL, 44 default = NULL,
52 type = 'character', 45 type = "character",
53 help = "Path to file containing a list of the ERCC controls" 46 help = "Path to file containing a list of the ERCC controls"
54 ), 47 ),
55 make_option( 48 make_option(
56 c("-l", "--cell-controls"), 49 c("-l", "--cell-controls"),
57 action = "store", 50 action = "store",
58 default = NULL, 51 default = NULL,
59 type = 'character', 52 type = "character",
60 help = "Path to file (one cell per line) to be used to derive a vector of cell (sample) names used to identify cell controls (for example, blank wells or bulk controls)." 53 help = "Path to file (one cell per line) to be used to derive a vector of cell (sample) names used to identify cell controls (for example, blank wells or bulk controls)."
61 ), 54 ),
62 make_option( 55 make_option(
63 c("-o", "--output-loom"), 56 c("-o", "--output-loom"),
64 action = "store", 57 action = "store",
65 default = NA, 58 default = NA,
66 type = 'character', 59 type = "character",
67 help = "File name in which to store the SingleCellExperiment object in Loom format." 60 help = "File name in which to store the SingleCellExperiment object in Loom format."
68 ) 61 )
69 ) 62 )
70 63
71 opt <- wsc_parse_args(option_list, mandatory = c('counts', 'output_loom')) 64 opt <- wsc_parse_args(option_list, mandatory = c("counts", "output_loom"))
72 65
73 # Read the expression matrix 66 # Read the expression matrix
74 67
75 counts <- wsc_split_string(opt$counts) 68 counts <- wsc_split_string(opt$counts)
76 reads <- read.table(counts) 69 reads <- read.table(counts)
77 70
78 # Read row and column annotations 71 # Read row and column annotations
79 72
80 rowdata <- opt$row_data 73 rowdata <- opt$row_data
81 74
82 if ( ! is.null(opt$row_data) ){ 75 if (! is.null(opt$row_data)) {
83 rowdata <- read.delim(opt$row_data) 76 rowdata <- read.delim(opt$row_data)
84 } 77 }
85 78
86 coldata <- opt$col_data 79 coldata <- opt$col_data
87 80
88 if ( ! is.null(opt$col_data) ){ 81 if (! is.null(opt$col_data)) {
89 coldata <- read.delim(opt$col_data) 82 coldata <- read.delim(opt$col_data)
90 } 83 }
91 84
92 # Now build the object 85 # Now build the object
93 assays <- list(as.matrix(reads))
94 names(assays) <- c(opt$assay_name)
95 scle <- SingleCellLoomExperiment(assays = assays, colData = coldata, rowData = rowdata)
96 # Define spikes (if supplied)
97 86
98 87 sce <- SingleCellLoomExperiment(assays = list(counts = as.matrix(reads)), colData = coldata)
99 #Scater options 88 #Scater options
100 89
101 # Check feature_controls (only mitochondrial and ERCC used for now) 90 # Check feature_controls (only mitochondrial and ERCC used for now)
102 feature_controls_list = list() 91
103 if (! is.null(opt$mt_controls) && opt$mt_controls != 'NULL'){ 92 if (! is.null(opt$mt_controls)) {
104 if (! file.exists(opt$mt_controls)){ 93 if (! file.exists(opt$mt_controls)) {
105 stop((paste('Supplied feature_controls file', opt$mt_controls, 'does not exist'))) 94 stop((paste("Supplied feature_controls file", opt$mt_controls, "does not exist")))
106 } else { 95 } else {
107 mt_controls <- readLines(opt$mt_controls) 96 mts <- readLines(opt$mt_controls)
108 feature_controls_list[["MT"]] <- mt_controls
109 } 97 }
98 } else {
99 mts <- NULL
110 } 100 }
111 101
112 if (! is.null(opt$ercc_controls) && opt$ercc_controls != 'NULL'){ 102 if (! is.null(opt$ercc_controls)) {
113 if (! file.exists(opt$ercc_controls)){ 103 if (! file.exists(opt$ercc_controls)) {
114 stop((paste('Supplied feature_controls file', opt$ercc_controls, 'does not exist'))) 104 stop((paste("Supplied feature_controls file", opt$ercc_controls, "does not exist")))
115 } else { 105 } else {
116 ercc_controls <- readLines(opt$ercc_controls) 106 ercc_controls <- readLines(opt$ercc_controls)
117 feature_controls_list[["ERCC"]] <- ercc_controls
118 } 107 }
119 } else { 108 } else {
120 ercc_controls <- character() 109 ercc_controls <- NULL
121 } 110 }
122 111
123 # Check cell_controls 112 # Check cell_controls
124 cell_controls_list <- list() 113
125 if (! is.null(opt$cell_controls) && opt$cell_controls != 'NULL'){ 114 if (! is.null(opt$cell_controls)) {
126 if (! file.exists(opt$cell_controls)){ 115 if (! file.exists(opt$cell_controls)) {
127 stop((paste('Supplied feature_controls file', opt$cell_controls, 'does not exist'))) 116 stop((paste("Supplied feature_controls file", opt$cell_controls, "does not exist")))
128 } else { 117 } else {
129 cell_controls <- readLines(opt$cell_controls) 118 cell_controls <- readLines(opt$cell_controls)
130 cell_controls_list[["empty"]] <- cell_controls
131 } 119 }
120 } else {
121 cell_controls <- NULL
132 } 122 }
133 123
124 # calculate QCMs
134 125
135 # calculate QCMs 126 sce <- addPerCellQC(sce, subsets = list(Mito = mts, ERCC = ercc_controls, cell_controls = cell_controls))
136 scle <- calculateQCMetrics(scle, exprs_values = opt$assay_name, feature_controls = feature_controls_list, cell_controls = cell_controls_list)
137 127
138 # Output to a Loom file 128 # Output to a Loom file
139 if (file.exists(opt$output_loom)) { 129 if (file.exists(opt$output_loom)) {
140 file.remove(opt$output_loom) 130 file.remove(opt$output_loom)
141 } 131 }
142 export(scle, opt$output_loom, format='loom') 132 export(sce, opt$output_loom, format = "loom")