Mercurial > repos > iuc > scater_filter
diff scater-manual-filter.R @ 2:7a365ec81b52 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 154318f74839a4481c7c68993c4fb745842c4cce"
author | iuc |
---|---|
date | Thu, 09 Sep 2021 12:24:17 +0000 |
parents | b7ea9f09c02f |
children |
line wrap: on
line diff
--- a/scater-manual-filter.R Tue Sep 03 14:27:39 2019 -0400 +++ b/scater-manual-filter.R Thu Sep 09 12:24:17 2021 +0000 @@ -8,92 +8,94 @@ library(scater) # parse options -option_list = list( +option_list <- list( make_option( c("-i", "--input-loom"), action = "store", default = NA, - type = 'character', - help = "A SingleCellExperiment object file in Loom format." + type = "character", + help = "A SingleCellExperiment object file in Loom format" + ), + make_option( + c("-l", "--library-size"), + action = "store", + default = 0, + type = "numeric", + help = "Minimum library size (mapped reads) to filter cells on" + ), + make_option( + c("-m", "--percent-counts-MT"), + action = "store", + default = 100, + type = "numeric", + help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out" + ), + make_option( + c("-f", "--expressed-features"), + action = "store", + default = 100, + type = "numeric", + help = "Remove cells that have less than the given number of expressed features" ), make_option( c("-d", "--detection-limit"), action = "store", default = 0, - type = 'numeric', - help = "Numeric scalar providing the value above which observations are deemed to be expressed" - ), - make_option( - c("-l", "--library-size"), - action = "store", - default = 0, - type = 'numeric', - help = "Minimum library size (mapped reads) to filter cells on" + type = "numeric", + help = "Number of reads mapped to a feature above which it to be deemed as expressed" ), make_option( - c("-e", "--expressed-genes"), + c("-e", "--min-cells-expressed"), action = "store", default = 0, - type = 'numeric', - help = "Minimum number of expressed genes to filter cells on" - ), - make_option( - c("-m", "--percent-counts-MT"), - action = "store", - default = 100, - type = 'numeric', - help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out." + type = "numeric", + help = "Remove features that occur in less than the given number of cells" ), make_option( c("-o", "--output-loom"), action = "store", default = NA, - type = 'character', - help = "File name in which to store the SingleCellExperiment object in Loom format." + type = "character", + help = "File name in which to store the SingleCellExperiment object in Loom format" ) ) -opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_loom')) +opt <- wsc_parse_args(option_list, mandatory = c("input_loom", "output_loom")) # Check parameter values -if ( ! file.exists(opt$input_loom)){ - stop((paste('File', opt$input_loom, 'does not exist'))) +if (! file.exists(opt$input_loom)) { + stop((paste("File", opt$input_loom, "does not exist"))) } # Filter out unexpressed features -scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment') -print(paste("Starting with", ncol(scle), "cells and", nrow(scle), "features.")) +sce <- import(opt$input_loom, format = "loom", type = "SingleCellLoomExperiment") +print(paste("Starting with", ncol(sce), "cells and", nrow(sce), "features.")) -# Create a logical vector of features that are expressed (above detection_limit) -feature_expressed <- nexprs(scle, detection_limit = opt$detection_limit, exprs_values = 1, byrow=TRUE) > 0 -scle <- scle[feature_expressed, ] - -print(paste("After filtering out unexpressed features: ", ncol(scle), "cells and", nrow(scle), "features.")) +# Filter out low quality cells # Filter low library sizes -to_keep <- scle$total_counts > opt$library_size -scle <- scle[, to_keep] - -print(paste("After filtering out low library counts: ", ncol(scle), "cells and", nrow(scle), "features.")) - - -# Filter low expressed genes -to_keep <- scle$total_features_by_counts > opt$expressed_genes -scle <- scle[, to_keep] - -print(paste("After filtering out low expressed: ", ncol(scle), "cells and", nrow(scle), "features.")) - +passing_total <- sce$total > opt$library_size +sce <- sce[, passing_total] +print(paste("After filtering out low library counts: ", ncol(sce), "cells and", nrow(sce), "features.")) # Filter out high MT counts -to_keep <- scle$pct_counts_MT < opt$percent_counts_MT -scle <- scle[, to_keep] +passing_mt_counts <- sce$subsets_Mito_percent < opt$percent_counts_MT +sce <- sce[, passing_mt_counts] +print(paste("After filtering out high MT gene counts: ", ncol(sce), "cells and", nrow(sce), "features.")) -print(paste("After filtering out high MT gene counts: ", ncol(scle), "cells and", nrow(scle), "features.")) +expr_features <- sce$detected > opt$expressed_features +sce <- sce[, expr_features] +print(paste("After filtering out cells with low feature counts: ", ncol(sce), "cells and", nrow(sce), "features.")) + +# Create a logical vector of features that are expressed (above detection_limit) +feature_expressed <- nexprs(sce, detection_limit = opt$detection_limit, byrow = TRUE) > opt$min_cells_expressed +sce <- sce[feature_expressed, ] +print(paste("After filtering out rare features: ", ncol(sce), "cells and", nrow(sce), "features.")) # Output to a Loom file if (file.exists(opt$output_loom)) { file.remove(opt$output_loom) } -export(scle, opt$output_loom, format='loom') +export(sce, opt$output_loom, format = "loom")