diff scater-plot-dist-scatter.R @ 2:b834074a9aff draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scater commit 154318f74839a4481c7c68993c4fb745842c4cce"
author iuc
date Thu, 09 Sep 2021 12:23:11 +0000
parents fd808de478b1
children
line wrap: on
line diff
--- a/scater-plot-dist-scatter.R	Tue Sep 03 14:26:31 2019 -0400
+++ b/scater-plot-dist-scatter.R	Thu Sep 09 12:23:11 2021 +0000
@@ -13,45 +13,45 @@
 
 # parse options
 
-option_list = list(
+option_list <- list(
   make_option(
     c("-i", "--input-loom"),
     action = "store",
     default = NA,
-    type = 'character',
+    type = "character",
     help = "A SingleCellExperiment object file in Loom format."
   ),
   make_option(
     c("-o", "--output-plot-file"),
     action = "store",
     default = NA,
-    type = 'character',
+    type = "character",
     help = "Path of the PDF output file to save plot to."
   ),
   make_option(
     c("-l", "--log-scale"),
-    action="store_true",
-    default=FALSE,
-    type = 'logical',
+    action = "store_true",
+    default = FALSE,
+    type = "logical",
     help = "Plot on log scale (recommended for large datasets)."
   )
 )
 
-opt <- wsc_parse_args(option_list, mandatory = c('input_loom', 'output_plot_file', 'log_scale'))
+opt <- wsc_parse_args(option_list, mandatory = c("input_loom", "output_plot_file"))
 
 # Check parameter values
 
-if ( ! file.exists(opt$input_loom)){
-  stop((paste('File', opt$input_loom, 'does not exist')))
+if (! file.exists(opt$input_loom)) {
+  stop((paste("File", opt$input_loom, "does not exist")))
 }
 
-# Input from Loom format
+# Filter out unexpressed features
 
-scle <- import(opt$input_loom, format='loom', type='SingleCellLoomExperiment')
+sce <- import(opt$input_loom, format = "loom", type = "SingleCellLoomExperiment")
 
-#do the scatter plot of reads vs genes
-total_counts <- scle$total_counts
-total_features <- scle$total_features_by_counts
+# Do the scatter plot of reads vs genes
+total_counts <- sce$total
+total_features <- sce$detected
 count_feats <- cbind(total_counts, total_features)
 cf_dm <- as.data.frame(count_feats)
 
@@ -59,21 +59,20 @@
 read_bins <- max(total_counts / 1e6) / 20
 feat_bins <- max(total_features) / 20
 
-# Make the plots
-plot <- ggplot(cf_dm, aes(x=total_counts / 1e6, y=total_features)) + geom_point(shape=1) + geom_smooth() + xlab("Read count (millions)") +
-   ylab("Feature count") + ggtitle("Scatterplot of reads vs features")
-plot1 <- qplot(total_counts / 1e6, geom="histogram", binwidth = read_bins, ylab="Number of cells", xlab = "Read counts (millions)", fill=I("darkseagreen3")) + ggtitle("Read counts per cell")
-plot2 <- qplot(total_features, geom="histogram", binwidth = feat_bins, ylab="Number of cells", xlab = "Feature counts", fill=I("darkseagreen3")) + ggtitle("Feature counts per cell")
-plot3 <- plotColData(scle, y="pct_counts_MT", x="total_features_by_counts") + ggtitle("% MT genes") + geom_point(shape=1) + theme(text = element_text(size=15)) + theme(plot.title = element_text(size=15))
+plot1 <- qplot(total_counts / 1e6, geom = "histogram", binwidth = read_bins, ylab = "Number of cells", xlab = "Read counts (millions)", fill = I("darkseagreen3")) + ggtitle("Read counts per cell")
+plot2 <- qplot(total_features, geom = "histogram", binwidth = feat_bins, ylab = "Number of cells", xlab = "Feature counts", fill = I("darkseagreen3")) + ggtitle("Feature counts per cell")
+plot3 <- ggplot(cf_dm, aes(x = total_counts / 1e6, y = total_features)) + geom_point(shape = 1) + geom_smooth() + xlab("Read count (millions)") +
+  ylab("Feature count") + ggtitle("Scatterplot of reads vs features")
+plot4 <- plotColData(sce, y = "subsets_Mito_percent", x = "detected") + ggtitle("% MT genes") + geom_point(shape = 1) + theme(text = element_text(size = 15)) + theme(plot.title = element_text(size = 15)) + xlab("Total features") + ylab("% MT")
 
-if (! opt$log_scale){
-  final_plot <- ggarrange(plot1, plot2, plot, plot3, ncol=2, nrow=2)
-  ggsave(opt$output_plot_file, final_plot, device="pdf")
+if (! opt$log_scale) {
+  final_plot <- ggarrange(plot1, plot2, plot3, plot4, ncol = 2, nrow = 2)
+  ggsave(opt$output_plot_file, final_plot, device = "pdf")
 } else {
-  plot_log_both <- plot + scale_x_continuous(trans = 'log10') + scale_y_continuous(trans = 'log10')
-  plot1_log <- plot1 + scale_y_continuous(trans = 'log10')
-  plot2_log <- plot2 + scale_y_continuous(trans = 'log10')
-  plot3_log <- plot3 + scale_y_log10(labels=number)
-  final_plot_log <- ggarrange(plot1_log, plot2_log, plot_log_both, plot3_log, ncol=2, nrow=2)
-  ggsave(opt$output_plot_file, final_plot_log, device="pdf")
+  plot1_log <- plot1 + scale_x_continuous(trans = "log10") + scale_y_continuous(trans = "log10")
+  plot2_log <- plot2 + scale_y_continuous(trans = "log10")
+  plot3_log <- plot3 + scale_y_continuous(trans = "log10")
+  plot4_log <- plot4 + scale_y_log10(labels = number)
+  final_plot_log <- ggarrange(plot1_log, plot2_log, plot3_log, plot4_log, ncol = 2, nrow = 2)
+  ggsave(opt$output_plot_file, final_plot_log, device = "pdf")
 }