comparison scater-manual-filter.R @ 0:2d455a7e8a3d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 5fdcafccb6c645d301db040dfeed693d7b6b4278
author iuc
date Thu, 18 Jul 2019 11:14:06 -0400
parents
children fd808de478b1
comparison
equal deleted inserted replaced
-1:000000000000 0:2d455a7e8a3d
1 #!/usr/bin/env Rscript
2 # Manually filter SingleCellExperiment with user-defined parameters
3
4 # Load optparse we need to check inputs
5 library(optparse)
6 library(workflowscriptscommon)
7 library(LoomExperiment)
8 library(scater)
9
10 # parse options
11 option_list = list(
12 make_option(
13 c("-i", "--input-loom"),
14 action = "store",
15 default = NA,
16 type = 'character',
17 help = "A SingleCellExperiment object file in Loom format."
18 ),
19 make_option(
20 c("-d", "--detection-limit"),
21 action = "store",
22 default = 0,
23 type = 'numeric',
24 help = "Numeric scalar providing the value above which observations are deemed to be expressed"
25 ),
26 make_option(
27 c("-l", "--library-size"),
28 action = "store",
29 default = 0,
30 type = 'numeric',
31 help = "Minimum library size (mapped reads) to filter cells on"
32 ),
33 make_option(
34 c("-m", "--percent-counts-MT"),
35 action = "store",
36 default = 100,
37 type = 'numeric',
38 help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out."
39 ),
40 make_option(
41 c("-o", "--output-loom"),
42 action = "store",
43 default = NA,
44 type = 'character',
45 help = "File name in which to store the SingleCellExperiment object in Loom format."
46 )
47 )
48
49 opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_loom'))
50
51 # Check parameter values
52
53 if ( ! file.exists(opt$input_loom)){
54 stop((paste('File', opt$input_loom, 'does not exist')))
55 }
56
57 # Filter out unexpressed features
58
59 scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment')
60 print(paste("Starting with", ncol(scle), "cells and", nrow(scle), "features."))
61
62 # Create a logical vector of features that are expressed (above detection_limit)
63 feature_expressed <- nexprs(scle, detection_limit = opt$detection_limit, exprs_values = 1, byrow=TRUE) > 0
64 scle <- scle[feature_expressed, ]
65
66 print(paste("After filtering out unexpressed features: ", ncol(scle), "cells and", nrow(scle), "features."))
67
68 # Filter low library sizes
69 to_keep <- scle$total_counts > opt$library_size
70 scle <- scle[, to_keep]
71
72 print(paste("After filtering out low library counts: ", ncol(scle), "cells and", nrow(scle), "features."))
73
74 # Filter out high MT counts
75 to_keep <- scle$pct_counts_MT < opt$percent_counts_MT
76 scle <- scle[, to_keep]
77
78 print(paste("After filtering out high MT gene counts: ", ncol(scle), "cells and", nrow(scle), "features."))
79
80 # Output to a Loom file
81 if (file.exists(opt$output_loom)) {
82 file.remove(opt$output_loom)
83 }
84 export(scle, opt$output_loom, format='loom')