Mercurial > repos > artbio > cpm_tpm_rpk
diff cpm_tpm_rpk.R @ 0:35d032c46a4e draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cpm_tpm_rpk commit cc0fd23c039cc4a39c5e4e320b50666b7d9b6f65
author | artbio |
---|---|
date | Wed, 25 Jul 2018 13:05:17 -0400 |
parents | |
children | b74bab5157c4 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpm_tpm_rpk.R Wed Jul 25 13:05:17 2018 -0400 @@ -0,0 +1,149 @@ +if (length(commandArgs(TRUE)) == 0) { + system("Rscript cpm_tpm_rpk.R -h", intern = F) + q("no") +} + +# Load necessary packages (install them if it's not the case) +requiredPackages = c('optparse') +for (p in requiredPackages) { + if (!require(p, character.only = TRUE, quietly = T)) { + install.packages(p) + } + suppressPackageStartupMessages(suppressMessages(library(p, character.only = TRUE))) +} + + +#Arguments +option_list = list( + make_option( + c("-d", "--data"), + default = NA, + type = 'character', + help = "Input file that contains count values to transform" + ), + make_option( + c("-t", "--type"), + default = 'cpm', + type = 'character', + help = "Transformation type, either cpm, tpm or rpk [default : '%default' ]" + ), + make_option( + c("-s", "--sep"), + default = '\t', + type = 'character', + help = "File separator [default : '%default' ]" + ), + make_option( + c("-c", "--colnames"), + default = TRUE, + type = 'logical', + help = "Consider first line as header ? [default : '%default' ]" + ), + make_option( + c("-f", "--gene"), + default = NA, + type = 'character', + help = "Two column of gene length file" + ), + make_option( + c("-a", "--gene_sep"), + default = '\t', + type = 'character', + help = "Gene length file separator [default : '%default' ]" + ), + make_option( + c("-b", "--gene_header"), + default = TRUE, + type = 'logical', + help = "Consider first line of gene length as header ? [default : '%default' ]" + ), + make_option( + c("-l", "--log"), + default = FALSE, + type = 'logical', + help = "Should be log transformed as well ? (log2(data +1)) [default : '%default' ]" + ), + make_option( + c("-o", "--out"), + default = "res.tab", + type = 'character', + help = "Output name [default : '%default' ]" + ) +) + + +opt = parse_args(OptionParser(option_list = option_list), + args = commandArgs(trailingOnly = TRUE)) + +if (opt$data == "" & !(opt$help)) { + stop("At least one argument must be supplied (count data).\n", + call. = FALSE) +} else if ((opt$type == "tpm" | opt$type == "rpk") & opt$gene == "") { + stop("At least two arguments must be supplied (count data and gene length file).\n", + call. = FALSE) +} else if (opt$type != "tpm" & opt$type != "rpk" & opt$type != "cpm") { + stop("Wrong transformation requested (--type option) must be : cpm, tpm or rpk.\n", + call. = FALSE) +} + +if (opt$sep == "tab") {opt$sep = "\t"} +if (opt$gene_sep == "tab") {opt$gene_sep = "\t"} + +cpm <- function(count) { + t(t(count) / colSums(count)) * 1000000 +} + + +rpk <- function(count, length) { + count / (length / 1000) +} + +tpm <- function(count, length) { + RPK = rpk(count, length) + perMillion_factor = colSums(RPK) / 1000000 + TPM = RPK / perMillion_factor + return(TPM) +} + +data = read.table( + opt$data, + h = opt$colnames, + row.names = 1, + sep = opt$sep +) + +if (opt$type == "tpm" | opt$type == "rpk") { + gene_length = as.data.frame( + read.table( + opt$gene, + h = opt$gene_header, + row.names = 1, + sep = opt$gene_sep + ) + ) + gene_length = as.data.frame(gene_length[match(rownames(data), rownames(gene_length)), ], rownames(data)) +} + + +if (opt$type == "cpm") + res = cpm(data) +if (opt$type == "tpm") + res = as.data.frame(apply(data, 2, tpm, length = gene_length), row.names = rownames(data)) +if (opt$type == "rpk") + res = as.data.frame(apply(data, 2, rpk, length = gene_length), row.names = rownames(data)) +colnames(res) = colnames(data) + + +if (opt$log == TRUE) { + res = log2(res + 1) +} + +write.table( + cbind(Features = rownames(res), res), + opt$out, + col.names = opt$colnames, + row.names = F, + quote = F, + sep = "\t" +) +