Mercurial > repos > artbio > gsc_filter_genes
comparison filter_genes.R @ 0:f689c4ea8c43 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_filter_genes commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:38:10 -0400 |
parents | |
children | 5d2304b09f58 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f689c4ea8c43 |
---|---|
1 # ######################## | |
2 # filter genes # | |
3 # ######################## | |
4 | |
5 # Filter out low expressed genes | |
6 | |
7 # Example of command (used for generate output file) : | |
8 # Rscript filter_genes.R -f <input file> -o <output file> | |
9 | |
10 # load packages that are provided in the conda env | |
11 options( show.error.messages=F, | |
12 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
13 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
14 library(optparse) | |
15 | |
16 # Arguments | |
17 option_list = list( | |
18 make_option( | |
19 c("-f", "--input"), | |
20 default = NA, | |
21 type = 'character', | |
22 help = "Input file that contains count values to filter" | |
23 ), | |
24 make_option( | |
25 c("-s", "--sep"), | |
26 default = '\t', | |
27 type = 'character', | |
28 help = "File separator [default : '%default' ]" | |
29 ), | |
30 make_option( | |
31 c("-c", "--colnames"), | |
32 default = TRUE, | |
33 type = 'logical', | |
34 help = "first line is a header [default : '%default' ]" | |
35 ), | |
36 make_option( | |
37 "--percentile_detection", | |
38 default = 0, | |
39 type = 'numeric', | |
40 help = "Include genes with detected expression in at least \ | |
41 this fraction of cells [default : '%default' ]" | |
42 ), | |
43 make_option( | |
44 "--absolute_detection", | |
45 default = 0, | |
46 type = 'numeric', | |
47 help = "Include genes with detected expression in at least \ | |
48 this number of cells [default : '%default' ]" | |
49 ), | |
50 make_option( | |
51 c("-o", "--output"), | |
52 default = NA, | |
53 type = 'character', | |
54 help = "Output name [default : '%default' ]" | |
55 ) | |
56 ) | |
57 | |
58 opt = parse_args(OptionParser(option_list = option_list), | |
59 args = commandArgs(trailingOnly = TRUE)) | |
60 if (opt$sep == "tab") {opt$sep = "\t"} | |
61 if (opt$sep == "comma") {opt$sep = ","} | |
62 | |
63 # Open files | |
64 data.counts <- read.table( | |
65 opt$input, | |
66 h = opt$colnames, | |
67 row.names = 1, | |
68 sep = opt$sep, | |
69 check.names = F | |
70 ) | |
71 | |
72 # note the [if else] below, to handle percentile_detection=absolute_detection=0 | |
73 # Search for genes that are expressed in a certain percent of cells | |
74 if (opt$percentile_detection > 0) { | |
75 kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts)) | |
76 } else { | |
77 | |
78 # Search for genes that are expressed in more than an absolute number of cells | |
79 kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection) | |
80 } | |
81 | |
82 # Filter matrix | |
83 data.counts <- data.counts[kept_genes,] | |
84 | |
85 # Save filtered matrix | |
86 write.table( | |
87 data.counts, | |
88 opt$output, | |
89 sep = "\t", | |
90 quote = F, | |
91 col.names = T, | |
92 row.names = T | |
93 ) |