Mercurial > repos > artbio > gsc_cpm_tpm_rpk
comparison cpm_tpm_rpk.R @ 0:ce3d027ec26b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_cpm_tpm_rpk commit 09dcd74dbc01f448518cf3db3e646afb0675a6fe
author | artbio |
---|---|
date | Mon, 24 Jun 2019 13:37:16 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ce3d027ec26b |
---|---|
1 if (length(commandArgs(TRUE)) == 0) { | |
2 system("Rscript cpm_tpm_rpk.R -h", intern = F) | |
3 q("no") | |
4 } | |
5 | |
6 | |
7 # load packages that are provided in the conda env | |
8 options( show.error.messages=F, | |
9 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
10 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
11 warnings() | |
12 library(optparse) | |
13 | |
14 #Arguments | |
15 option_list = list( | |
16 make_option( | |
17 c("-d", "--data"), | |
18 default = NA, | |
19 type = 'character', | |
20 help = "Input file that contains count values to transform" | |
21 ), | |
22 make_option( | |
23 c("-t", "--type"), | |
24 default = 'cpm', | |
25 type = 'character', | |
26 help = "Transformation type, either cpm, tpm, rpk or none[default : '%default' ]" | |
27 ), | |
28 make_option( | |
29 c("-s", "--sep"), | |
30 default = '\t', | |
31 type = 'character', | |
32 help = "File separator [default : '%default' ]" | |
33 ), | |
34 make_option( | |
35 c("-c", "--colnames"), | |
36 default = TRUE, | |
37 type = 'logical', | |
38 help = "Consider first line as header ? [default : '%default' ]" | |
39 ), | |
40 make_option( | |
41 c("-f", "--gene"), | |
42 default = NA, | |
43 type = 'character', | |
44 help = "Two column of gene length file" | |
45 ), | |
46 make_option( | |
47 c("-a", "--gene_sep"), | |
48 default = '\t', | |
49 type = 'character', | |
50 help = "Gene length file separator [default : '%default' ]" | |
51 ), | |
52 make_option( | |
53 c("-b", "--gene_header"), | |
54 default = TRUE, | |
55 type = 'logical', | |
56 help = "Consider first line of gene length as header ? [default : '%default' ]" | |
57 ), | |
58 make_option( | |
59 c("-l", "--log"), | |
60 default = FALSE, | |
61 type = 'logical', | |
62 help = "Should be log transformed as well ? (log2(data +1)) [default : '%default' ]" | |
63 ), | |
64 make_option( | |
65 c("-o", "--out"), | |
66 default = "res.tab", | |
67 type = 'character', | |
68 help = "Output name [default : '%default' ]" | |
69 ) | |
70 ) | |
71 | |
72 opt = parse_args(OptionParser(option_list = option_list), | |
73 args = commandArgs(trailingOnly = TRUE)) | |
74 | |
75 if (opt$sep == "tab") {opt$sep = "\t"} | |
76 if (opt$gene_sep == "tab") {opt$gene_sep = "\t"} | |
77 | |
78 cpm <- function(count) { | |
79 t(t(count) / colSums(count)) * 1000000 | |
80 } | |
81 | |
82 | |
83 rpk <- function(count, length) { | |
84 count / (length / 1000) | |
85 } | |
86 | |
87 tpm <- function(count, length) { | |
88 RPK = rpk(count, length) | |
89 perMillion_factor = colSums(RPK) / 1000000 | |
90 TPM = RPK / perMillion_factor | |
91 return(TPM) | |
92 } | |
93 | |
94 data = read.table( | |
95 opt$data, | |
96 check.names = FALSE, | |
97 header = opt$colnames, | |
98 row.names = 1, | |
99 sep = opt$sep | |
100 ) | |
101 | |
102 if (opt$type == "tpm" | opt$type == "rpk") { | |
103 gene_length = as.data.frame( | |
104 read.table( | |
105 opt$gene, | |
106 h = opt$gene_header, | |
107 row.names = 1, | |
108 sep = opt$gene_sep | |
109 ) | |
110 ) | |
111 gene_length = as.data.frame(gene_length[match(rownames(data), rownames(gene_length)), ], rownames(data)) | |
112 } | |
113 | |
114 | |
115 if (opt$type == "cpm") | |
116 res = cpm(data) | |
117 if (opt$type == "tpm") | |
118 res = as.data.frame(apply(data, 2, tpm, length = gene_length), row.names = rownames(data)) | |
119 if (opt$type == "rpk") | |
120 res = as.data.frame(apply(data, 2, rpk, length = gene_length), row.names = rownames(data)) | |
121 if (opt$type == "none") | |
122 res = data | |
123 colnames(res) = colnames(data) | |
124 | |
125 | |
126 if (opt$log == TRUE) { | |
127 res = log2(res + 1) | |
128 } | |
129 | |
130 write.table( | |
131 cbind(Genes = rownames(res), res), | |
132 opt$out, | |
133 col.names = opt$colnames, | |
134 row.names = F, | |
135 quote = F, | |
136 sep = "\t" | |
137 ) | |
138 | |
139 | |
140 | |
141 | |
142 | |
143 | |
144 | |
145 | |
146 | |
147 | |
148 | |
149 |