diff scater-manual-filter.R @ 2:7a365ec81b52 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 154318f74839a4481c7c68993c4fb745842c4cce"
author iuc
date Thu, 09 Sep 2021 12:24:17 +0000
parents b7ea9f09c02f
children
line wrap: on
line diff
--- a/scater-manual-filter.R	Tue Sep 03 14:27:39 2019 -0400
+++ b/scater-manual-filter.R	Thu Sep 09 12:24:17 2021 +0000
@@ -8,92 +8,94 @@
 library(scater)
 
 # parse options
-option_list = list(
+option_list <- list(
   make_option(
     c("-i", "--input-loom"),
     action = "store",
     default = NA,
-    type = 'character',
-    help = "A SingleCellExperiment object file in Loom format."
+    type = "character",
+    help = "A SingleCellExperiment object file in Loom format"
+  ),
+  make_option(
+    c("-l", "--library-size"),
+    action = "store",
+    default = 0,
+    type = "numeric",
+    help = "Minimum library size (mapped reads) to filter cells on"
+  ),
+  make_option(
+    c("-m", "--percent-counts-MT"),
+    action = "store",
+    default = 100,
+    type = "numeric",
+    help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out"
+  ),
+  make_option(
+    c("-f", "--expressed-features"),
+    action = "store",
+    default = 100,
+    type = "numeric",
+    help = "Remove cells that have less than the given number of expressed features"
   ),
   make_option(
     c("-d", "--detection-limit"),
     action = "store",
     default = 0,
-    type = 'numeric',
-    help = "Numeric scalar providing the value above which observations are deemed to be expressed"
-  ),
-  make_option(
-    c("-l", "--library-size"),
-    action = "store",
-    default = 0,
-    type = 'numeric',
-    help = "Minimum library size (mapped reads) to filter cells on"
+    type = "numeric",
+    help = "Number of reads mapped to a feature above which it to be deemed as expressed"
   ),
   make_option(
-    c("-e", "--expressed-genes"),
+    c("-e", "--min-cells-expressed"),
     action = "store",
     default = 0,
-    type = 'numeric',
-    help = "Minimum number of expressed genes to filter cells on"
-  ),
-  make_option(
-    c("-m", "--percent-counts-MT"),
-    action = "store",
-    default = 100,
-    type = 'numeric',
-    help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out."
+    type = "numeric",
+    help = "Remove features that occur in less than the given number of cells"
   ),
   make_option(
     c("-o", "--output-loom"),
     action = "store",
     default = NA,
-    type = 'character',
-    help = "File name in which to store the SingleCellExperiment object in Loom format."
+    type = "character",
+    help = "File name in which to store the SingleCellExperiment object in Loom format"
   )
 )
 
-opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_loom'))
+opt <- wsc_parse_args(option_list, mandatory = c("input_loom", "output_loom"))
 
 # Check parameter values
 
-if ( ! file.exists(opt$input_loom)){
-  stop((paste('File', opt$input_loom, 'does not exist')))
+if (! file.exists(opt$input_loom)) {
+  stop((paste("File", opt$input_loom, "does not exist")))
 }
 
 # Filter out unexpressed features
 
-scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment')
-print(paste("Starting with", ncol(scle), "cells and", nrow(scle), "features."))
+sce <- import(opt$input_loom, format = "loom", type = "SingleCellLoomExperiment")
+print(paste("Starting with", ncol(sce), "cells and", nrow(sce), "features."))
 
-# Create a logical vector of features that are expressed (above detection_limit)
-feature_expressed <- nexprs(scle, detection_limit = opt$detection_limit, exprs_values = 1, byrow=TRUE) > 0
-scle <- scle[feature_expressed, ]
-
-print(paste("After filtering out unexpressed features: ", ncol(scle), "cells and", nrow(scle), "features."))
+# Filter out low quality cells
 
 # Filter low library sizes
-to_keep <- scle$total_counts > opt$library_size
-scle <- scle[, to_keep]
-
-print(paste("After filtering out low library counts: ", ncol(scle), "cells and", nrow(scle), "features."))
-
-
-# Filter low expressed genes
-to_keep <- scle$total_features_by_counts > opt$expressed_genes
-scle <- scle[, to_keep]
-
-print(paste("After filtering out low expressed: ", ncol(scle), "cells and", nrow(scle), "features."))
-
+passing_total <- sce$total > opt$library_size
+sce <- sce[, passing_total]
+print(paste("After filtering out low library counts: ", ncol(sce), "cells and", nrow(sce), "features."))
 
 # Filter out high MT counts
-to_keep <- scle$pct_counts_MT < opt$percent_counts_MT
-scle <- scle[, to_keep]
+passing_mt_counts <- sce$subsets_Mito_percent < opt$percent_counts_MT
+sce <- sce[, passing_mt_counts]
+print(paste("After filtering out high MT gene counts: ", ncol(sce), "cells and", nrow(sce), "features."))
 
-print(paste("After filtering out high MT gene counts: ", ncol(scle), "cells and", nrow(scle), "features."))
+expr_features <- sce$detected > opt$expressed_features
+sce <- sce[, expr_features]
+print(paste("After filtering out cells with low feature counts: ", ncol(sce), "cells and", nrow(sce), "features."))
+
+# Create a logical vector of features that are expressed (above detection_limit)
+feature_expressed <- nexprs(sce, detection_limit = opt$detection_limit, byrow = TRUE) > opt$min_cells_expressed
+sce <- sce[feature_expressed, ]
+print(paste("After filtering out rare features: ", ncol(sce), "cells and", nrow(sce), "features."))
 
 # Output to a Loom file
 if (file.exists(opt$output_loom)) {
   file.remove(opt$output_loom)
 }
-export(scle, opt$output_loom, format='loom')
+export(sce, opt$output_loom, format = "loom")