comparison cpm_tpm_rpk.R @ 0:ce3d027ec26b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_cpm_tpm_rpk commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author artbio
date Mon, 24 Jun 2019 13:37:16 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ce3d027ec26b
1 if (length(commandArgs(TRUE)) == 0) {
2 system("Rscript cpm_tpm_rpk.R -h", intern = F)
3 q("no")
4 }
5
6
7 # load packages that are provided in the conda env
8 options( show.error.messages=F,
9 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
10 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
11 warnings()
12 library(optparse)
13
14 #Arguments
15 option_list = list(
16 make_option(
17 c("-d", "--data"),
18 default = NA,
19 type = 'character',
20 help = "Input file that contains count values to transform"
21 ),
22 make_option(
23 c("-t", "--type"),
24 default = 'cpm',
25 type = 'character',
26 help = "Transformation type, either cpm, tpm, rpk or none[default : '%default' ]"
27 ),
28 make_option(
29 c("-s", "--sep"),
30 default = '\t',
31 type = 'character',
32 help = "File separator [default : '%default' ]"
33 ),
34 make_option(
35 c("-c", "--colnames"),
36 default = TRUE,
37 type = 'logical',
38 help = "Consider first line as header ? [default : '%default' ]"
39 ),
40 make_option(
41 c("-f", "--gene"),
42 default = NA,
43 type = 'character',
44 help = "Two column of gene length file"
45 ),
46 make_option(
47 c("-a", "--gene_sep"),
48 default = '\t',
49 type = 'character',
50 help = "Gene length file separator [default : '%default' ]"
51 ),
52 make_option(
53 c("-b", "--gene_header"),
54 default = TRUE,
55 type = 'logical',
56 help = "Consider first line of gene length as header ? [default : '%default' ]"
57 ),
58 make_option(
59 c("-l", "--log"),
60 default = FALSE,
61 type = 'logical',
62 help = "Should be log transformed as well ? (log2(data +1)) [default : '%default' ]"
63 ),
64 make_option(
65 c("-o", "--out"),
66 default = "res.tab",
67 type = 'character',
68 help = "Output name [default : '%default' ]"
69 )
70 )
71
72 opt = parse_args(OptionParser(option_list = option_list),
73 args = commandArgs(trailingOnly = TRUE))
74
75 if (opt$sep == "tab") {opt$sep = "\t"}
76 if (opt$gene_sep == "tab") {opt$gene_sep = "\t"}
77
78 cpm <- function(count) {
79 t(t(count) / colSums(count)) * 1000000
80 }
81
82
83 rpk <- function(count, length) {
84 count / (length / 1000)
85 }
86
87 tpm <- function(count, length) {
88 RPK = rpk(count, length)
89 perMillion_factor = colSums(RPK) / 1000000
90 TPM = RPK / perMillion_factor
91 return(TPM)
92 }
93
94 data = read.table(
95 opt$data,
96 check.names = FALSE,
97 header = opt$colnames,
98 row.names = 1,
99 sep = opt$sep
100 )
101
102 if (opt$type == "tpm" | opt$type == "rpk") {
103 gene_length = as.data.frame(
104 read.table(
105 opt$gene,
106 h = opt$gene_header,
107 row.names = 1,
108 sep = opt$gene_sep
109 )
110 )
111 gene_length = as.data.frame(gene_length[match(rownames(data), rownames(gene_length)), ], rownames(data))
112 }
113
114
115 if (opt$type == "cpm")
116 res = cpm(data)
117 if (opt$type == "tpm")
118 res = as.data.frame(apply(data, 2, tpm, length = gene_length), row.names = rownames(data))
119 if (opt$type == "rpk")
120 res = as.data.frame(apply(data, 2, rpk, length = gene_length), row.names = rownames(data))
121 if (opt$type == "none")
122 res = data
123 colnames(res) = colnames(data)
124
125
126 if (opt$log == TRUE) {
127 res = log2(res + 1)
128 }
129
130 write.table(
131 cbind(Genes = rownames(res), res),
132 opt$out,
133 col.names = opt$colnames,
134 row.names = F,
135 quote = F,
136 sep = "\t"
137 )
138
139
140
141
142
143
144
145
146
147
148
149