Mercurial > repos > artbio > gsc_filter_genes

diff filter_genes.R @ 2:afe949d332b3 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_filter_genes commit b184054ad6d4230ab0a714c13f9ef32449faa327
author: artbio
date: Mon, 16 Oct 2023 23:26:20 +0000
parents: 5d2304b09f58
--- a/filter_genes.R	Mon Jun 24 18:07:05 2019 -0400
+++ b/filter_genes.R	Mon Oct 16 23:26:20 2023 +0000
@@ -1,94 +1,98 @@
-# ########################
-#      filter genes     #
-# ########################
-
 # Filter out low expressed genes
 
 # Example of command (used for generate output file) :
 # Rscript filter_genes.R -f <input file> -o <output file>
 
 # load packages that are provided in the conda env
-options( show.error.messages=F,
-       error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+options(show.error.messages = FALSE,
+  error = function() {
+    cat(geterrmessage(), file = stderr())
+    q("no", 1, FALSE)
+  }
+)
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 library(optparse)
 
 # Arguments
-option_list = list(
+option_list <- list(
   make_option(
     c("-f", "--input"),
     default = NA,
-    type = 'character',
+    type = "character",
     help = "Input file that contains count values to filter"
   ),
   make_option(
     c("-s", "--sep"),
-    default = '\t',
-    type = 'character',
+    default = "\t",
+    type = "character",
     help = "File separator [default : '%default' ]"
   ),
   make_option(
     c("-c", "--colnames"),
     default = TRUE,
-    type = 'logical',
+    type = "logical",
     help = "first line is a header [default : '%default' ]"
   ),
   make_option(
     "--percentile_detection",
     default = 0,
-    type = 'numeric',
+    type = "numeric",
     help = "Include genes with detected expression in at least \
     this fraction of cells [default : '%default' ]"
   ),
   make_option(
     "--absolute_detection",
     default = 0,
-    type = 'numeric',
+    type = "numeric",
     help = "Include genes with detected expression in at least \
     this number of cells [default : '%default' ]"
   ),
   make_option(
     c("-o", "--output"),
     default = NA,
-    type = 'character',
+    type = "character",
     help = "Output name [default : '%default' ]"
   )
 )
 
-opt = parse_args(OptionParser(option_list = option_list),
-                 args = commandArgs(trailingOnly = TRUE))
-if (opt$sep == "tab") {opt$sep = "\t"}
-if (opt$sep == "comma") {opt$sep = ","}
+opt <- parse_args(OptionParser(option_list = option_list),
+                  args = commandArgs(trailingOnly = TRUE))
+if (opt$sep == "tab") {
+  opt$sep <- "\t"
+}
+if (opt$sep == "comma") {
+  opt$sep <- ","
+}
 
 # Open files
-data.counts <- read.table(
+data.counts <- read.delim(
   opt$input,
   h = opt$colnames,
   row.names = 1,
   sep = opt$sep,
-  check.names = F
+  check.names = FALSE
 )
 
 # note the [if else] below, to handle percentile_detection=absolute_detection=0
 # Search for genes that are expressed in a certain percent of cells
 if (opt$percentile_detection > 0) {
-kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts))
+  kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts))
 } else {
 
-# Search for genes that are expressed in more than an absolute number of cells
-kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection)
+  # Search for genes that are expressed in more than an absolute number of cells
+  kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection)
 }
 
 # Filter matrix
-data.counts <- data.counts[kept_genes,]
-data.counts <- cbind(Genes=rownames(data.counts), data.counts)
+data.counts <- data.counts[kept_genes, ]
+data.counts <- cbind(Genes = rownames(data.counts), data.counts)
 
 # Save filtered matrix
 write.table(
   data.counts,
   opt$output,
   sep = "\t",
-  quote = F,
-  col.names = T,
-  row.names = F
-)
\ No newline at end of file
+  quote = FALSE,
+  col.names = TRUE,
+  row.names = FALSE
+)
author	artbio
date	Mon, 16 Oct 2023 23:26:20 +0000
parents	5d2304b09f58
children