comparison cpm_tpm_rpk.R @ 2:563337e780ce draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/cpm_tpm_rpk commit 4ade64ddb1b4e2c62cd153bee13c7ce4ff2d249d
author artbio
date Wed, 06 Feb 2019 19:31:57 -0500
parents b74bab5157c4
children 8b1020c25f0f
comparison
equal deleted inserted replaced
1:b74bab5157c4 2:563337e780ce
11 warnings() 11 warnings()
12 library(optparse) 12 library(optparse)
13 library(ggplot2) 13 library(ggplot2)
14 library(reshape2) 14 library(reshape2)
15 library(Rtsne) 15 library(Rtsne)
16 library(ggfortify)
16 17
17 18
18 19
19 #Arguments 20 #Arguments
20 option_list = list( 21 option_list = list(
71 default = "res.tab", 72 default = "res.tab",
72 type = 'character', 73 type = 'character',
73 help = "Output name [default : '%default' ]" 74 help = "Output name [default : '%default' ]"
74 ), 75 ),
75 make_option( 76 make_option(
76 "--tsne", 77 "--visu",
77 default = FALSE, 78 default = FALSE,
78 type = 'logical', 79 type = 'logical',
79 help = "performs T-SNE [default : '%default' ]" 80 help = "performs T-SNE [default : '%default' ]"
81 ),
82 make_option(
83 "--tsne_labels",
84 default = FALSE,
85 type = 'logical',
86 help = "add labels to t-SNE plot [default : '%default' ]"
80 ), 87 ),
81 make_option( 88 make_option(
82 "--seed", 89 "--seed",
83 default = 42, 90 default = 42,
84 type = 'integer', 91 type = 'integer',
99 make_option( 106 make_option(
100 c("-D", "--tsne_out"), 107 c("-D", "--tsne_out"),
101 default = "tsne.pdf", 108 default = "tsne.pdf",
102 type = 'character', 109 type = 'character',
103 help = "T-SNE pdf [default : '%default' ]" 110 help = "T-SNE pdf [default : '%default' ]"
111 ),
112 make_option(
113 "--pca_out",
114 default = "pca.pdf",
115 type = 'character',
116 help = "PCA pdf [default : '%default' ]"
104 ) 117 )
118
105 ) 119 )
106 120
107 opt = parse_args(OptionParser(option_list = option_list), 121 opt = parse_args(OptionParser(option_list = option_list),
108 args = commandArgs(trailingOnly = TRUE)) 122 args = commandArgs(trailingOnly = TRUE))
109 123
178 quote = F, 192 quote = F,
179 sep = "\t" 193 sep = "\t"
180 ) 194 )
181 195
182 ## 196 ##
183 if (opt$tsne == TRUE) { 197 if (opt$visu == TRUE) {
184 df = cpm(data) 198 df = res
185 # filter and transpose df for tsne 199 # filter and transpose df for tsne and pca
186 df = df[rowSums(df) != 0,] # remove lines without information (with only 0 counts) 200 df = df[rowSums(df) != 0,] # remove lines without information (with only 0 counts)
187 tdf = t(df) 201 tdf = t(df)
188 tdf = log2(tdf + 1)
189 # make tsne and plot results 202 # make tsne and plot results
190 set.seed(opt$seed) ## Sets seed for reproducibility 203 set.seed(opt$seed) ## Sets seed for reproducibility
191 # Run TSNE
192 tsne_out <- Rtsne(tdf, perplexity=opt$perp, theta=opt$theta) # 204 tsne_out <- Rtsne(tdf, perplexity=opt$perp, theta=opt$theta) #
193 embedding <- as.data.frame(tsne_out$Y) 205 embedding <- as.data.frame(tsne_out$Y)
194 embedding$Class <- as.factor(sub("Class_", "", rownames(tdf))) 206 embedding$Class <- as.factor(sub("Class_", "", rownames(tdf)))
195 gg_legend = theme(legend.position="none") 207 gg_legend = theme(legend.position="none")
196 ggplot(embedding, aes(x=V1, y=V2)) + 208 ggplot(embedding, aes(x=V1, y=V2)) +
197 geom_point(size=1.25, color='red') + 209 geom_point(size=1.25, color='red') +
198 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=2.5, color='darkblue') +
199 gg_legend + 210 gg_legend +
200 xlab("") + 211 xlab("") +
201 ylab("") + 212 ylab("") +
202 ggtitle('t-SNE of data (log2CPM transformed)') 213 ggtitle('t-SNE') +
214 if (opt$tsne_labels == TRUE) {
215 geom_text(aes(label=Class),hjust=-0.2, vjust=-0.5, size=2.5, color='darkblue')
216 }
203 ggsave(file=opt$tsne_out, device="pdf") 217 ggsave(file=opt$tsne_out, device="pdf")
218 # make PCA and plot result with ggfortify
219 tdf.pca <- prcomp(tdf, center = TRUE, scale. = T)
220 if (opt$tsne_labels == TRUE) {
221 autoplot(tdf.pca, shape=F, label=T, label.size=2.5, colour="darkred") +
222 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) +
223 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) +
224 ggtitle('PCA')
225 ggsave(file=opt$pca_out, device="pdf")
226 } else {
227 autoplot(tdf.pca, shape=T, colour="red") +
228 xlab(paste("PC1",summary(tdf.pca)$importance[2,1]*100, "%")) +
229 ylab(paste("PC2",summary(tdf.pca)$importance[2,2]*100, "%")) +
230 ggtitle('PCA')
231 ggsave(file=opt$pca_out, device="pdf")
232 }
204 } 233 }
205 234
206 235
207 236
208 237