comparison filter_genes.R @ 0:f689c4ea8c43 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_genes commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author artbio
date Mon, 24 Jun 2019 13:38:10 -0400
parents
children 5d2304b09f58
comparison
equal deleted inserted replaced
-1:000000000000 0:f689c4ea8c43
1 # ########################
2 # filter genes #
3 # ########################
4
5 # Filter out low expressed genes
6
7 # Example of command (used for generate output file) :
8 # Rscript filter_genes.R -f <input file> -o <output file>
9
10 # load packages that are provided in the conda env
11 options( show.error.messages=F,
12 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
13 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
14 library(optparse)
15
16 # Arguments
17 option_list = list(
18 make_option(
19 c("-f", "--input"),
20 default = NA,
21 type = 'character',
22 help = "Input file that contains count values to filter"
23 ),
24 make_option(
25 c("-s", "--sep"),
26 default = '\t',
27 type = 'character',
28 help = "File separator [default : '%default' ]"
29 ),
30 make_option(
31 c("-c", "--colnames"),
32 default = TRUE,
33 type = 'logical',
34 help = "first line is a header [default : '%default' ]"
35 ),
36 make_option(
37 "--percentile_detection",
38 default = 0,
39 type = 'numeric',
40 help = "Include genes with detected expression in at least \
41 this fraction of cells [default : '%default' ]"
42 ),
43 make_option(
44 "--absolute_detection",
45 default = 0,
46 type = 'numeric',
47 help = "Include genes with detected expression in at least \
48 this number of cells [default : '%default' ]"
49 ),
50 make_option(
51 c("-o", "--output"),
52 default = NA,
53 type = 'character',
54 help = "Output name [default : '%default' ]"
55 )
56 )
57
58 opt = parse_args(OptionParser(option_list = option_list),
59 args = commandArgs(trailingOnly = TRUE))
60 if (opt$sep == "tab") {opt$sep = "\t"}
61 if (opt$sep == "comma") {opt$sep = ","}
62
63 # Open files
64 data.counts <- read.table(
65 opt$input,
66 h = opt$colnames,
67 row.names = 1,
68 sep = opt$sep,
69 check.names = F
70 )
71
72 # note the [if else] below, to handle percentile_detection=absolute_detection=0
73 # Search for genes that are expressed in a certain percent of cells
74 if (opt$percentile_detection > 0) {
75 kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts))
76 } else {
77
78 # Search for genes that are expressed in more than an absolute number of cells
79 kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection)
80 }
81
82 # Filter matrix
83 data.counts <- data.counts[kept_genes,]
84
85 # Save filtered matrix
86 write.table(
87 data.counts,
88 opt$output,
89 sep = "\t",
90 quote = F,
91 col.names = T,
92 row.names = T
93 )