Mercurial > repos > artbio > cpm_tpm_rpk
comparison cpm_tpm_rpk.R @ 3:8b1020c25f0f draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cpm_tpm_rpk commit 9149001c65de633ddfd2f91cf208074e40482ce3
author | artbio |
---|---|
date | Fri, 12 Apr 2019 12:01:35 -0400 |
parents | 563337e780ce |
children | be358a1ebf67 |
comparison
equal
deleted
inserted
replaced
2:563337e780ce | 3:8b1020c25f0f |
---|---|
27 ), | 27 ), |
28 make_option( | 28 make_option( |
29 c("-t", "--type"), | 29 c("-t", "--type"), |
30 default = 'cpm', | 30 default = 'cpm', |
31 type = 'character', | 31 type = 'character', |
32 help = "Transformation type, either cpm, tpm or rpk [default : '%default' ]" | 32 help = "Transformation type, either cpm, tpm, rpk or none[default : '%default' ]" |
33 ), | 33 ), |
34 make_option( | 34 make_option( |
35 c("-s", "--sep"), | 35 c("-s", "--sep"), |
36 default = '\t', | 36 default = '\t', |
37 type = 'character', | 37 type = 'character', |
125 stop("At least one argument must be supplied (count data).\n", | 125 stop("At least one argument must be supplied (count data).\n", |
126 call. = FALSE) | 126 call. = FALSE) |
127 } else if ((opt$type == "tpm" | opt$type == "rpk") & opt$gene == "") { | 127 } else if ((opt$type == "tpm" | opt$type == "rpk") & opt$gene == "") { |
128 stop("At least two arguments must be supplied (count data and gene length file).\n", | 128 stop("At least two arguments must be supplied (count data and gene length file).\n", |
129 call. = FALSE) | 129 call. = FALSE) |
130 } else if (opt$type != "tpm" & opt$type != "rpk" & opt$type != "cpm") { | 130 } else if (opt$type != "tpm" & opt$type != "rpk" & opt$type != "cpm" & opt$type != "none") { |
131 stop("Wrong transformation requested (--type option) must be : cpm, tpm or rpk.\n", | 131 stop("Wrong transformation requested (--type option) must be : cpm, tpm or rpk.\n", |
132 call. = FALSE) | 132 call. = FALSE) |
133 } | 133 } |
134 | 134 |
135 if (opt$sep == "tab") {opt$sep = "\t"} | 135 if (opt$sep == "tab") {opt$sep = "\t"} |
151 return(TPM) | 151 return(TPM) |
152 } | 152 } |
153 | 153 |
154 data = read.table( | 154 data = read.table( |
155 opt$data, | 155 opt$data, |
156 check.names = FALSE, | |
156 header = opt$colnames, | 157 header = opt$colnames, |
157 row.names = 1, | 158 row.names = 1, |
158 sep = opt$sep | 159 sep = opt$sep |
159 ) | 160 ) |
160 | 161 |
175 res = cpm(data) | 176 res = cpm(data) |
176 if (opt$type == "tpm") | 177 if (opt$type == "tpm") |
177 res = as.data.frame(apply(data, 2, tpm, length = gene_length), row.names = rownames(data)) | 178 res = as.data.frame(apply(data, 2, tpm, length = gene_length), row.names = rownames(data)) |
178 if (opt$type == "rpk") | 179 if (opt$type == "rpk") |
179 res = as.data.frame(apply(data, 2, rpk, length = gene_length), row.names = rownames(data)) | 180 res = as.data.frame(apply(data, 2, rpk, length = gene_length), row.names = rownames(data)) |
181 if (opt$type == "none") | |
182 res = data | |
180 colnames(res) = colnames(data) | 183 colnames(res) = colnames(data) |
181 | 184 |
182 | 185 |
183 if (opt$log == TRUE) { | 186 if (opt$log == TRUE) { |
184 res = log2(res + 1) | 187 res = log2(res + 1) |
204 tsne_out <- Rtsne(tdf, perplexity=opt$perp, theta=opt$theta) # | 207 tsne_out <- Rtsne(tdf, perplexity=opt$perp, theta=opt$theta) # |
205 embedding <- as.data.frame(tsne_out$Y) | 208 embedding <- as.data.frame(tsne_out$Y) |
206 embedding$Class <- as.factor(sub("Class_", "", rownames(tdf))) | 209 embedding$Class <- as.factor(sub("Class_", "", rownames(tdf))) |
207 gg_legend = theme(legend.position="none") | 210 gg_legend = theme(legend.position="none") |
208 ggplot(embedding, aes(x=V1, y=V2)) + | 211 ggplot(embedding, aes(x=V1, y=V2)) + |
209 geom_point(size=1.25, color='red') + | 212 geom_point(size=1, color='red') + |
210 gg_legend + | 213 gg_legend + |
211 xlab("") + | 214 xlab("") + |
212 ylab("") + | 215 ylab("") + |
213 ggtitle('t-SNE') + | 216 ggtitle('t-SNE') + |
214 if (opt$tsne_labels == TRUE) { | 217 if (opt$tsne_labels == TRUE) { |
215 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=2.5, color='darkblue') | 218 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=2.5, color='darkblue') |
216 } | 219 } |
217 ggsave(file=opt$tsne_out, device="pdf") | 220 ggsave(file=opt$tsne_out, device="pdf") |
218 # make PCA and plot result with ggfortify | 221 # make PCA and plot result with ggfortify (autoplot) |
219 tdf.pca <- prcomp(tdf, center = TRUE, scale. = T) | 222 tdf.pca <- prcomp(tdf, center = TRUE, scale. = T) |
220 if (opt$tsne_labels == TRUE) { | 223 if (opt$tsne_labels == TRUE) { |
221 autoplot(tdf.pca, shape=F, label=T, label.size=2.5, colour="darkred") + | 224 autoplot(tdf.pca, shape=F, label=T, label.size=2.5, label.vjust=1.2, |
225 label.hjust=1.2, | |
226 colour="darkblue") + | |
227 geom_point(size=1, color='red') + | |
222 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) + | 228 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) + |
223 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) + | 229 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) + |
224 ggtitle('PCA') | 230 ggtitle('PCA') |
225 ggsave(file=opt$pca_out, device="pdf") | 231 ggsave(file=opt$pca_out, device="pdf") |
226 } else { | 232 } else { |
227 autoplot(tdf.pca, shape=T, colour="red") + | 233 autoplot(tdf.pca, shape=T, colour="darkblue") + |
234 geom_point(size=1, color='red') + | |
228 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) + | 235 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) + |
229 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) + | 236 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) + |
230 ggtitle('PCA') | 237 ggtitle('PCA') |
231 ggsave(file=opt$pca_out, device="pdf") | 238 ggsave(file=opt$pca_out, device="pdf") |
232 } | 239 } |