comparison cpm_tpm_rpk.R @ 3:8b1020c25f0f draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cpm_tpm_rpk commit 9149001c65de633ddfd2f91cf208074e40482ce3
author artbio
date Fri, 12 Apr 2019 12:01:35 -0400
parents 563337e780ce
children be358a1ebf67
comparison
equal deleted inserted replaced
2:563337e780ce 3:8b1020c25f0f
27 ), 27 ),
28 make_option( 28 make_option(
29 c("-t", "--type"), 29 c("-t", "--type"),
30 default = 'cpm', 30 default = 'cpm',
31 type = 'character', 31 type = 'character',
32 help = "Transformation type, either cpm, tpm or rpk [default : '%default' ]" 32 help = "Transformation type, either cpm, tpm, rpk or none[default : '%default' ]"
33 ), 33 ),
34 make_option( 34 make_option(
35 c("-s", "--sep"), 35 c("-s", "--sep"),
36 default = '\t', 36 default = '\t',
37 type = 'character', 37 type = 'character',
125 stop("At least one argument must be supplied (count data).\n", 125 stop("At least one argument must be supplied (count data).\n",
126 call. = FALSE) 126 call. = FALSE)
127 } else if ((opt$type == "tpm" | opt$type == "rpk") & opt$gene == "") { 127 } else if ((opt$type == "tpm" | opt$type == "rpk") & opt$gene == "") {
128 stop("At least two arguments must be supplied (count data and gene length file).\n", 128 stop("At least two arguments must be supplied (count data and gene length file).\n",
129 call. = FALSE) 129 call. = FALSE)
130 } else if (opt$type != "tpm" & opt$type != "rpk" & opt$type != "cpm") { 130 } else if (opt$type != "tpm" & opt$type != "rpk" & opt$type != "cpm" & opt$type != "none") {
131 stop("Wrong transformation requested (--type option) must be : cpm, tpm or rpk.\n", 131 stop("Wrong transformation requested (--type option) must be : cpm, tpm or rpk.\n",
132 call. = FALSE) 132 call. = FALSE)
133 } 133 }
134 134
135 if (opt$sep == "tab") {opt$sep = "\t"} 135 if (opt$sep == "tab") {opt$sep = "\t"}
151 return(TPM) 151 return(TPM)
152 } 152 }
153 153
154 data = read.table( 154 data = read.table(
155 opt$data, 155 opt$data,
156 check.names = FALSE,
156 header = opt$colnames, 157 header = opt$colnames,
157 row.names = 1, 158 row.names = 1,
158 sep = opt$sep 159 sep = opt$sep
159 ) 160 )
160 161
175 res = cpm(data) 176 res = cpm(data)
176 if (opt$type == "tpm") 177 if (opt$type == "tpm")
177 res = as.data.frame(apply(data, 2, tpm, length = gene_length), row.names = rownames(data)) 178 res = as.data.frame(apply(data, 2, tpm, length = gene_length), row.names = rownames(data))
178 if (opt$type == "rpk") 179 if (opt$type == "rpk")
179 res = as.data.frame(apply(data, 2, rpk, length = gene_length), row.names = rownames(data)) 180 res = as.data.frame(apply(data, 2, rpk, length = gene_length), row.names = rownames(data))
181 if (opt$type == "none")
182 res = data
180 colnames(res) = colnames(data) 183 colnames(res) = colnames(data)
181 184
182 185
183 if (opt$log == TRUE) { 186 if (opt$log == TRUE) {
184 res = log2(res + 1) 187 res = log2(res + 1)
204 tsne_out <- Rtsne(tdf, perplexity=opt$perp, theta=opt$theta) # 207 tsne_out <- Rtsne(tdf, perplexity=opt$perp, theta=opt$theta) #
205 embedding <- as.data.frame(tsne_out$Y) 208 embedding <- as.data.frame(tsne_out$Y)
206 embedding$Class <- as.factor(sub("Class_", "", rownames(tdf))) 209 embedding$Class <- as.factor(sub("Class_", "", rownames(tdf)))
207 gg_legend = theme(legend.position="none") 210 gg_legend = theme(legend.position="none")
208 ggplot(embedding, aes(x=V1, y=V2)) + 211 ggplot(embedding, aes(x=V1, y=V2)) +
209 geom_point(size=1.25, color='red') + 212 geom_point(size=1, color='red') +
210 gg_legend + 213 gg_legend +
211 xlab("") + 214 xlab("") +
212 ylab("") + 215 ylab("") +
213 ggtitle('t-SNE') + 216 ggtitle('t-SNE') +
214 if (opt$tsne_labels == TRUE) { 217 if (opt$tsne_labels == TRUE) {
215 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=2.5, color='darkblue') 218 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=2.5, color='darkblue')
216 } 219 }
217 ggsave(file=opt$tsne_out, device="pdf") 220 ggsave(file=opt$tsne_out, device="pdf")
218 # make PCA and plot result with ggfortify 221 # make PCA and plot result with ggfortify (autoplot)
219 tdf.pca <- prcomp(tdf, center = TRUE, scale. = T) 222 tdf.pca <- prcomp(tdf, center = TRUE, scale. = T)
220 if (opt$tsne_labels == TRUE) { 223 if (opt$tsne_labels == TRUE) {
221 autoplot(tdf.pca, shape=F, label=T, label.size=2.5, colour="darkred") + 224 autoplot(tdf.pca, shape=F, label=T, label.size=2.5, label.vjust=1.2,
225 label.hjust=1.2,
226 colour="darkblue") +
227 geom_point(size=1, color='red') +
222 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) + 228 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) +
223 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) + 229 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) +
224 ggtitle('PCA') 230 ggtitle('PCA')
225 ggsave(file=opt$pca_out, device="pdf") 231 ggsave(file=opt$pca_out, device="pdf")
226 } else { 232 } else {
227 autoplot(tdf.pca, shape=T, colour="red") + 233 autoplot(tdf.pca, shape=T, colour="darkblue") +
234 geom_point(size=1, color='red') +
228 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) + 235 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) +
229 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) + 236 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) +
230 ggtitle('PCA') 237 ggtitle('PCA')
231 ggsave(file=opt$pca_out, device="pdf") 238 ggsave(file=opt$pca_out, device="pdf")
232 } 239 }