comparison scater-manual-filter.R @ 2:81e5bdff4853 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 154318f74839a4481c7c68993c4fb745842c4cce"
author iuc
date Thu, 09 Sep 2021 12:23:33 +0000
parents 2e41b35b5bdd
children
comparison
equal deleted inserted replaced
1:2e41b35b5bdd 2:81e5bdff4853
6 library(workflowscriptscommon) 6 library(workflowscriptscommon)
7 library(LoomExperiment) 7 library(LoomExperiment)
8 library(scater) 8 library(scater)
9 9
10 # parse options 10 # parse options
11 option_list = list( 11 option_list <- list(
12 make_option( 12 make_option(
13 c("-i", "--input-loom"), 13 c("-i", "--input-loom"),
14 action = "store", 14 action = "store",
15 default = NA, 15 default = NA,
16 type = 'character', 16 type = "character",
17 help = "A SingleCellExperiment object file in Loom format." 17 help = "A SingleCellExperiment object file in Loom format"
18 ),
19 make_option(
20 c("-l", "--library-size"),
21 action = "store",
22 default = 0,
23 type = "numeric",
24 help = "Minimum library size (mapped reads) to filter cells on"
25 ),
26 make_option(
27 c("-m", "--percent-counts-MT"),
28 action = "store",
29 default = 100,
30 type = "numeric",
31 help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out"
32 ),
33 make_option(
34 c("-f", "--expressed-features"),
35 action = "store",
36 default = 100,
37 type = "numeric",
38 help = "Remove cells that have less than the given number of expressed features"
18 ), 39 ),
19 make_option( 40 make_option(
20 c("-d", "--detection-limit"), 41 c("-d", "--detection-limit"),
21 action = "store", 42 action = "store",
22 default = 0, 43 default = 0,
23 type = 'numeric', 44 type = "numeric",
24 help = "Numeric scalar providing the value above which observations are deemed to be expressed" 45 help = "Number of reads mapped to a feature above which it to be deemed as expressed"
25 ), 46 ),
26 make_option( 47 make_option(
27 c("-l", "--library-size"), 48 c("-e", "--min-cells-expressed"),
28 action = "store", 49 action = "store",
29 default = 0, 50 default = 0,
30 type = 'numeric', 51 type = "numeric",
31 help = "Minimum library size (mapped reads) to filter cells on" 52 help = "Remove features that occur in less than the given number of cells"
32 ),
33 make_option(
34 c("-e", "--expressed-genes"),
35 action = "store",
36 default = 0,
37 type = 'numeric',
38 help = "Minimum number of expressed genes to filter cells on"
39 ),
40 make_option(
41 c("-m", "--percent-counts-MT"),
42 action = "store",
43 default = 100,
44 type = 'numeric',
45 help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out."
46 ), 53 ),
47 make_option( 54 make_option(
48 c("-o", "--output-loom"), 55 c("-o", "--output-loom"),
49 action = "store", 56 action = "store",
50 default = NA, 57 default = NA,
51 type = 'character', 58 type = "character",
52 help = "File name in which to store the SingleCellExperiment object in Loom format." 59 help = "File name in which to store the SingleCellExperiment object in Loom format"
53 ) 60 )
54 ) 61 )
55 62
56 opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_loom')) 63 opt <- wsc_parse_args(option_list, mandatory = c("input_loom", "output_loom"))
57 64
58 # Check parameter values 65 # Check parameter values
59 66
60 if ( ! file.exists(opt$input_loom)){ 67 if (! file.exists(opt$input_loom)) {
61 stop((paste('File', opt$input_loom, 'does not exist'))) 68 stop((paste("File", opt$input_loom, "does not exist")))
62 } 69 }
63 70
64 # Filter out unexpressed features 71 # Filter out unexpressed features
65 72
66 scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment') 73 sce <- import(opt$input_loom, format = "loom", type = "SingleCellLoomExperiment")
67 print(paste("Starting with", ncol(scle), "cells and", nrow(scle), "features.")) 74 print(paste("Starting with", ncol(sce), "cells and", nrow(sce), "features."))
75
76 # Filter out low quality cells
77
78 # Filter low library sizes
79 passing_total <- sce$total > opt$library_size
80 sce <- sce[, passing_total]
81 print(paste("After filtering out low library counts: ", ncol(sce), "cells and", nrow(sce), "features."))
82
83 # Filter out high MT counts
84 passing_mt_counts <- sce$subsets_Mito_percent < opt$percent_counts_MT
85 sce <- sce[, passing_mt_counts]
86 print(paste("After filtering out high MT gene counts: ", ncol(sce), "cells and", nrow(sce), "features."))
87
88 expr_features <- sce$detected > opt$expressed_features
89 sce <- sce[, expr_features]
90 print(paste("After filtering out cells with low feature counts: ", ncol(sce), "cells and", nrow(sce), "features."))
68 91
69 # Create a logical vector of features that are expressed (above detection_limit) 92 # Create a logical vector of features that are expressed (above detection_limit)
70 feature_expressed <- nexprs(scle, detection_limit = opt$detection_limit, exprs_values = 1, byrow=TRUE) > 0 93 feature_expressed <- nexprs(sce, detection_limit = opt$detection_limit, byrow = TRUE) > opt$min_cells_expressed
71 scle <- scle[feature_expressed, ] 94 sce <- sce[feature_expressed, ]
72 95 print(paste("After filtering out rare features: ", ncol(sce), "cells and", nrow(sce), "features."))
73 print(paste("After filtering out unexpressed features: ", ncol(scle), "cells and", nrow(scle), "features."))
74
75 # Filter low library sizes
76 to_keep <- scle$total_counts > opt$library_size
77 scle <- scle[, to_keep]
78
79 print(paste("After filtering out low library counts: ", ncol(scle), "cells and", nrow(scle), "features."))
80
81
82 # Filter low expressed genes
83 to_keep <- scle$total_features_by_counts > opt$expressed_genes
84 scle <- scle[, to_keep]
85
86 print(paste("After filtering out low expressed: ", ncol(scle), "cells and", nrow(scle), "features."))
87
88
89 # Filter out high MT counts
90 to_keep <- scle$pct_counts_MT < opt$percent_counts_MT
91 scle <- scle[, to_keep]
92
93 print(paste("After filtering out high MT gene counts: ", ncol(scle), "cells and", nrow(scle), "features."))
94 96
95 # Output to a Loom file 97 # Output to a Loom file
96 if (file.exists(opt$output_loom)) { 98 if (file.exists(opt$output_loom)) {
97 file.remove(opt$output_loom) 99 file.remove(opt$output_loom)
98 } 100 }
99 export(scle, opt$output_loom, format='loom') 101 export(sce, opt$output_loom, format = "loom")