Mercurial > repos > iuc > scater_plot_pca
comparison scater-manual-filter.R @ 2:9e5c0bb18d08 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 154318f74839a4481c7c68993c4fb745842c4cce"
| author | iuc |
|---|---|
| date | Thu, 09 Sep 2021 12:23:55 +0000 |
| parents | 46fc6751d746 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:46fc6751d746 | 2:9e5c0bb18d08 |
|---|---|
| 6 library(workflowscriptscommon) | 6 library(workflowscriptscommon) |
| 7 library(LoomExperiment) | 7 library(LoomExperiment) |
| 8 library(scater) | 8 library(scater) |
| 9 | 9 |
| 10 # parse options | 10 # parse options |
| 11 option_list = list( | 11 option_list <- list( |
| 12 make_option( | 12 make_option( |
| 13 c("-i", "--input-loom"), | 13 c("-i", "--input-loom"), |
| 14 action = "store", | 14 action = "store", |
| 15 default = NA, | 15 default = NA, |
| 16 type = 'character', | 16 type = "character", |
| 17 help = "A SingleCellExperiment object file in Loom format." | 17 help = "A SingleCellExperiment object file in Loom format" |
| 18 ), | |
| 19 make_option( | |
| 20 c("-l", "--library-size"), | |
| 21 action = "store", | |
| 22 default = 0, | |
| 23 type = "numeric", | |
| 24 help = "Minimum library size (mapped reads) to filter cells on" | |
| 25 ), | |
| 26 make_option( | |
| 27 c("-m", "--percent-counts-MT"), | |
| 28 action = "store", | |
| 29 default = 100, | |
| 30 type = "numeric", | |
| 31 help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out" | |
| 32 ), | |
| 33 make_option( | |
| 34 c("-f", "--expressed-features"), | |
| 35 action = "store", | |
| 36 default = 100, | |
| 37 type = "numeric", | |
| 38 help = "Remove cells that have less than the given number of expressed features" | |
| 18 ), | 39 ), |
| 19 make_option( | 40 make_option( |
| 20 c("-d", "--detection-limit"), | 41 c("-d", "--detection-limit"), |
| 21 action = "store", | 42 action = "store", |
| 22 default = 0, | 43 default = 0, |
| 23 type = 'numeric', | 44 type = "numeric", |
| 24 help = "Numeric scalar providing the value above which observations are deemed to be expressed" | 45 help = "Number of reads mapped to a feature above which it to be deemed as expressed" |
| 25 ), | 46 ), |
| 26 make_option( | 47 make_option( |
| 27 c("-l", "--library-size"), | 48 c("-e", "--min-cells-expressed"), |
| 28 action = "store", | 49 action = "store", |
| 29 default = 0, | 50 default = 0, |
| 30 type = 'numeric', | 51 type = "numeric", |
| 31 help = "Minimum library size (mapped reads) to filter cells on" | 52 help = "Remove features that occur in less than the given number of cells" |
| 32 ), | |
| 33 make_option( | |
| 34 c("-e", "--expressed-genes"), | |
| 35 action = "store", | |
| 36 default = 0, | |
| 37 type = 'numeric', | |
| 38 help = "Minimum number of expressed genes to filter cells on" | |
| 39 ), | |
| 40 make_option( | |
| 41 c("-m", "--percent-counts-MT"), | |
| 42 action = "store", | |
| 43 default = 100, | |
| 44 type = 'numeric', | |
| 45 help = "Maximum % of mitochondrial genes expressed per cell. Cells that exceed this value will be filtered out." | |
| 46 ), | 53 ), |
| 47 make_option( | 54 make_option( |
| 48 c("-o", "--output-loom"), | 55 c("-o", "--output-loom"), |
| 49 action = "store", | 56 action = "store", |
| 50 default = NA, | 57 default = NA, |
| 51 type = 'character', | 58 type = "character", |
| 52 help = "File name in which to store the SingleCellExperiment object in Loom format." | 59 help = "File name in which to store the SingleCellExperiment object in Loom format" |
| 53 ) | 60 ) |
| 54 ) | 61 ) |
| 55 | 62 |
| 56 opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_loom')) | 63 opt <- wsc_parse_args(option_list, mandatory = c("input_loom", "output_loom")) |
| 57 | 64 |
| 58 # Check parameter values | 65 # Check parameter values |
| 59 | 66 |
| 60 if ( ! file.exists(opt$input_loom)){ | 67 if (! file.exists(opt$input_loom)) { |
| 61 stop((paste('File', opt$input_loom, 'does not exist'))) | 68 stop((paste("File", opt$input_loom, "does not exist"))) |
| 62 } | 69 } |
| 63 | 70 |
| 64 # Filter out unexpressed features | 71 # Filter out unexpressed features |
| 65 | 72 |
| 66 scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment') | 73 sce <- import(opt$input_loom, format = "loom", type = "SingleCellLoomExperiment") |
| 67 print(paste("Starting with", ncol(scle), "cells and", nrow(scle), "features.")) | 74 print(paste("Starting with", ncol(sce), "cells and", nrow(sce), "features.")) |
| 75 | |
| 76 # Filter out low quality cells | |
| 77 | |
| 78 # Filter low library sizes | |
| 79 passing_total <- sce$total > opt$library_size | |
| 80 sce <- sce[, passing_total] | |
| 81 print(paste("After filtering out low library counts: ", ncol(sce), "cells and", nrow(sce), "features.")) | |
| 82 | |
| 83 # Filter out high MT counts | |
| 84 passing_mt_counts <- sce$subsets_Mito_percent < opt$percent_counts_MT | |
| 85 sce <- sce[, passing_mt_counts] | |
| 86 print(paste("After filtering out high MT gene counts: ", ncol(sce), "cells and", nrow(sce), "features.")) | |
| 87 | |
| 88 expr_features <- sce$detected > opt$expressed_features | |
| 89 sce <- sce[, expr_features] | |
| 90 print(paste("After filtering out cells with low feature counts: ", ncol(sce), "cells and", nrow(sce), "features.")) | |
| 68 | 91 |
| 69 # Create a logical vector of features that are expressed (above detection_limit) | 92 # Create a logical vector of features that are expressed (above detection_limit) |
| 70 feature_expressed <- nexprs(scle, detection_limit = opt$detection_limit, exprs_values = 1, byrow=TRUE) > 0 | 93 feature_expressed <- nexprs(sce, detection_limit = opt$detection_limit, byrow = TRUE) > opt$min_cells_expressed |
| 71 scle <- scle[feature_expressed, ] | 94 sce <- sce[feature_expressed, ] |
| 72 | 95 print(paste("After filtering out rare features: ", ncol(sce), "cells and", nrow(sce), "features.")) |
| 73 print(paste("After filtering out unexpressed features: ", ncol(scle), "cells and", nrow(scle), "features.")) | |
| 74 | |
| 75 # Filter low library sizes | |
| 76 to_keep <- scle$total_counts > opt$library_size | |
| 77 scle <- scle[, to_keep] | |
| 78 | |
| 79 print(paste("After filtering out low library counts: ", ncol(scle), "cells and", nrow(scle), "features.")) | |
| 80 | |
| 81 | |
| 82 # Filter low expressed genes | |
| 83 to_keep <- scle$total_features_by_counts > opt$expressed_genes | |
| 84 scle <- scle[, to_keep] | |
| 85 | |
| 86 print(paste("After filtering out low expressed: ", ncol(scle), "cells and", nrow(scle), "features.")) | |
| 87 | |
| 88 | |
| 89 # Filter out high MT counts | |
| 90 to_keep <- scle$pct_counts_MT < opt$percent_counts_MT | |
| 91 scle <- scle[, to_keep] | |
| 92 | |
| 93 print(paste("After filtering out high MT gene counts: ", ncol(scle), "cells and", nrow(scle), "features.")) | |
| 94 | 96 |
| 95 # Output to a Loom file | 97 # Output to a Loom file |
| 96 if (file.exists(opt$output_loom)) { | 98 if (file.exists(opt$output_loom)) { |
| 97 file.remove(opt$output_loom) | 99 file.remove(opt$output_loom) |
| 98 } | 100 } |
| 99 export(scle, opt$output_loom, format='loom') | 101 export(sce, opt$output_loom, format = "loom") |
