Mercurial > repos > artbio > cpm_tpm_rpk
comparison cpm_tpm_rpk.R @ 0:35d032c46a4e draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cpm_tpm_rpk commit cc0fd23c039cc4a39c5e4e320b50666b7d9b6f65
author | artbio |
---|---|
date | Wed, 25 Jul 2018 13:05:17 -0400 |
parents | |
children | b74bab5157c4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:35d032c46a4e |
---|---|
1 if (length(commandArgs(TRUE)) == 0) { | |
2 system("Rscript cpm_tpm_rpk.R -h", intern = F) | |
3 q("no") | |
4 } | |
5 | |
6 # Load necessary packages (install them if it's not the case) | |
7 requiredPackages = c('optparse') | |
8 for (p in requiredPackages) { | |
9 if (!require(p, character.only = TRUE, quietly = T)) { | |
10 install.packages(p) | |
11 } | |
12 suppressPackageStartupMessages(suppressMessages(library(p, character.only = TRUE))) | |
13 } | |
14 | |
15 | |
16 #Arguments | |
17 option_list = list( | |
18 make_option( | |
19 c("-d", "--data"), | |
20 default = NA, | |
21 type = 'character', | |
22 help = "Input file that contains count values to transform" | |
23 ), | |
24 make_option( | |
25 c("-t", "--type"), | |
26 default = 'cpm', | |
27 type = 'character', | |
28 help = "Transformation type, either cpm, tpm or rpk [default : '%default' ]" | |
29 ), | |
30 make_option( | |
31 c("-s", "--sep"), | |
32 default = '\t', | |
33 type = 'character', | |
34 help = "File separator [default : '%default' ]" | |
35 ), | |
36 make_option( | |
37 c("-c", "--colnames"), | |
38 default = TRUE, | |
39 type = 'logical', | |
40 help = "Consider first line as header ? [default : '%default' ]" | |
41 ), | |
42 make_option( | |
43 c("-f", "--gene"), | |
44 default = NA, | |
45 type = 'character', | |
46 help = "Two column of gene length file" | |
47 ), | |
48 make_option( | |
49 c("-a", "--gene_sep"), | |
50 default = '\t', | |
51 type = 'character', | |
52 help = "Gene length file separator [default : '%default' ]" | |
53 ), | |
54 make_option( | |
55 c("-b", "--gene_header"), | |
56 default = TRUE, | |
57 type = 'logical', | |
58 help = "Consider first line of gene length as header ? [default : '%default' ]" | |
59 ), | |
60 make_option( | |
61 c("-l", "--log"), | |
62 default = FALSE, | |
63 type = 'logical', | |
64 help = "Should be log transformed as well ? (log2(data +1)) [default : '%default' ]" | |
65 ), | |
66 make_option( | |
67 c("-o", "--out"), | |
68 default = "res.tab", | |
69 type = 'character', | |
70 help = "Output name [default : '%default' ]" | |
71 ) | |
72 ) | |
73 | |
74 | |
75 opt = parse_args(OptionParser(option_list = option_list), | |
76 args = commandArgs(trailingOnly = TRUE)) | |
77 | |
78 if (opt$data == "" & !(opt$help)) { | |
79 stop("At least one argument must be supplied (count data).\n", | |
80 call. = FALSE) | |
81 } else if ((opt$type == "tpm" | opt$type == "rpk") & opt$gene == "") { | |
82 stop("At least two arguments must be supplied (count data and gene length file).\n", | |
83 call. = FALSE) | |
84 } else if (opt$type != "tpm" & opt$type != "rpk" & opt$type != "cpm") { | |
85 stop("Wrong transformation requested (--type option) must be : cpm, tpm or rpk.\n", | |
86 call. = FALSE) | |
87 } | |
88 | |
89 if (opt$sep == "tab") {opt$sep = "\t"} | |
90 if (opt$gene_sep == "tab") {opt$gene_sep = "\t"} | |
91 | |
92 cpm <- function(count) { | |
93 t(t(count) / colSums(count)) * 1000000 | |
94 } | |
95 | |
96 | |
97 rpk <- function(count, length) { | |
98 count / (length / 1000) | |
99 } | |
100 | |
101 tpm <- function(count, length) { | |
102 RPK = rpk(count, length) | |
103 perMillion_factor = colSums(RPK) / 1000000 | |
104 TPM = RPK / perMillion_factor | |
105 return(TPM) | |
106 } | |
107 | |
108 data = read.table( | |
109 opt$data, | |
110 h = opt$colnames, | |
111 row.names = 1, | |
112 sep = opt$sep | |
113 ) | |
114 | |
115 if (opt$type == "tpm" | opt$type == "rpk") { | |
116 gene_length = as.data.frame( | |
117 read.table( | |
118 opt$gene, | |
119 h = opt$gene_header, | |
120 row.names = 1, | |
121 sep = opt$gene_sep | |
122 ) | |
123 ) | |
124 gene_length = as.data.frame(gene_length[match(rownames(data), rownames(gene_length)), ], rownames(data)) | |
125 } | |
126 | |
127 | |
128 if (opt$type == "cpm") | |
129 res = cpm(data) | |
130 if (opt$type == "tpm") | |
131 res = as.data.frame(apply(data, 2, tpm, length = gene_length), row.names = rownames(data)) | |
132 if (opt$type == "rpk") | |
133 res = as.data.frame(apply(data, 2, rpk, length = gene_length), row.names = rownames(data)) | |
134 colnames(res) = colnames(data) | |
135 | |
136 | |
137 if (opt$log == TRUE) { | |
138 res = log2(res + 1) | |
139 } | |
140 | |
141 write.table( | |
142 cbind(Features = rownames(res), res), | |
143 opt$out, | |
144 col.names = opt$colnames, | |
145 row.names = F, | |
146 quote = F, | |
147 sep = "\t" | |
148 ) | |
149 |