Mercurial > repos > drosofff > repenrich
comparison edgeR_repenrich.R @ 1:54a3f3a195d6 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 114b47cc624e39b4f485c8623458fc98494c564d
author | drosofff |
---|---|
date | Mon, 29 May 2017 13:11:57 -0400 |
parents | |
children | 1805b262c12d |
comparison
equal
deleted
inserted
replaced
0:1435d142041b | 1:54a3f3a195d6 |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 # A command-line interface to edgeR for use with Galaxy edger-repenrich | |
4 # written by Christophe Antoniewski drosofff@gmail.com 2017.05.30 | |
5 | |
6 | |
7 # setup R error handling to go to stderr | |
8 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
9 | |
10 # To not crash galaxy with an UTF8 error with not-US LC settings. | |
11 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
12 | |
13 library("getopt") | |
14 library("tools") | |
15 options(stringAsFactors = FALSE, useFancyQuotes = FALSE) | |
16 args <- commandArgs(trailingOnly = TRUE) | |
17 | |
18 # get options, using the spec as defined by the enclosed list. | |
19 # we read the options from the default: commandArgs(TRUE). | |
20 spec <- matrix(c( | |
21 "quiet", "q", 0, "logical", | |
22 "help", "h", 0, "logical", | |
23 "outfile", "o", 1, "character", | |
24 "countsfile", "n", 1, "character", | |
25 "factorName", "N", 1, "character", | |
26 "levelNameA", "A", 1, "character", | |
27 "levelNameB", "B", 1, "character", | |
28 "levelAfiles", "a", 1, "character", | |
29 "levelBfiles", "b", 1, "character", | |
30 "alignmentA", "i", 1, "character", | |
31 "alignmentB", "j", 1, "character", | |
32 "plots" , "p", 1, "character"), | |
33 byrow=TRUE, ncol=4) | |
34 opt <- getopt(spec) | |
35 | |
36 # if help was asked for print a friendly message | |
37 # and exit with a non-zero error code | |
38 if (!is.null(opt$help)) { | |
39 cat(getopt(spec, usage=TRUE)) | |
40 q(status=1) | |
41 } | |
42 | |
43 # enforce the following required arguments | |
44 if (is.null(opt$outfile)) { | |
45 cat("'outfile' is required\n") | |
46 q(status=1) | |
47 } | |
48 if (is.null(opt$levelAfiles) | is.null(opt$levelBfiles)) { | |
49 cat("input count files are required for both levels\n") | |
50 q(status=1) | |
51 } | |
52 if (is.null(opt$alignmentA) | is.null(opt$alignmentB)) { | |
53 cat("total aligned read files are required for both levels\n") | |
54 q(status=1) | |
55 } | |
56 | |
57 verbose <- if (is.null(opt$quiet)) { | |
58 TRUE | |
59 } else { | |
60 FALSE | |
61 } | |
62 | |
63 suppressPackageStartupMessages({ | |
64 library("edgeR") | |
65 library("limma") | |
66 }) | |
67 | |
68 # build levels A and B file lists | |
69 | |
70 library("rjson") | |
71 filesA <- fromJSON(opt$levelAfiles, method = "C", unexpected.escape = "error") | |
72 filesB <- fromJSON(opt$levelBfiles, method = "C", unexpected.escape = "error") | |
73 listA <- list() | |
74 indice = 0 | |
75 listA[["level"]] <- opt$levelNameA | |
76 for (file in filesA) { | |
77 indice = indice +1 | |
78 listA[[paste0(opt$levelNameA,"_",indice)]] <- read.delim(file, header=FALSE) | |
79 } | |
80 listB <- list() | |
81 indice = 0 | |
82 listB[["level"]] <- opt$levelNameB | |
83 for (file in filesB) { | |
84 indice = indice +1 | |
85 listB[[paste0(opt$levelNameB,"_",indice)]] <- read.delim(file, header=FALSE) | |
86 } | |
87 | |
88 # build a counts table | |
89 counts <- data.frame(row.names=listA[[2]][,1]) | |
90 for (element in names(listA[-1])) { | |
91 counts<-cbind(counts, listA[[element]][,4]) | |
92 } | |
93 for (element in names(listB[-1])) { | |
94 counts<-cbind(counts, listB[[element]][,4]) | |
95 } | |
96 colnames(counts)=c(names(listA[-1]), names(listB[-1])) | |
97 | |
98 # build aligned counts vector | |
99 | |
100 filesi <- fromJSON(opt$alignmentA, method = "C", unexpected.escape = "error") | |
101 filesj <- fromJSON(opt$alignmentB, method = "C", unexpected.escape = "error") | |
102 sizes <- c() | |
103 for (file in filesi) { | |
104 sizes <- c(sizes, read.delim(file, header=FALSE)[1,1]) | |
105 } | |
106 for (file in filesj) { | |
107 sizes <- c(sizes, read.delim(file, header=FALSE)[1,1]) | |
108 } | |
109 | |
110 # build a meta data object | |
111 | |
112 meta <- data.frame( | |
113 row.names=colnames(counts), | |
114 condition=c(rep(opt$levelNameA,length(filesA)), rep(opt$levelNameB,length(filesB)) ), | |
115 libsize=sizes | |
116 ) | |
117 | |
118 | |
119 # Define the library size and conditions for the GLM | |
120 libsize <- meta$libsize | |
121 condition <- factor(meta$condition) | |
122 design <- model.matrix(~0+condition) | |
123 colnames(design) <- levels(meta$condition) | |
124 | |
125 | |
126 # Build a DGE object for the GLM | |
127 y <- DGEList(counts=counts, lib.size=libsize) | |
128 | |
129 # Normalize the data | |
130 y <- calcNormFactors(y) | |
131 y$samples | |
132 # plotMDS(y) latter | |
133 | |
134 # Estimate the variance | |
135 y <- estimateGLMCommonDisp(y, design) | |
136 y <- estimateGLMTrendedDisp(y, design) | |
137 y <- estimateGLMTagwiseDisp(y, design) | |
138 # plotBCV(y) latter | |
139 | |
140 # Builds and outputs an object to contain the normalized read abundance in counts per million of reads | |
141 cpm <- cpm(y, log=FALSE, lib.size=libsize) | |
142 cpm <- as.data.frame(cpm) | |
143 colnames(cpm) <- colnames(counts) | |
144 if (!is.null(opt$countsfile)) { | |
145 normalizedAbundance <- data.frame(Tag=rownames(cpm)) | |
146 normalizedAbundance <- cbind(normalizedAbundance, cpm) | |
147 write.table(normalizedAbundance, file=opt$countsfile, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE) | |
148 } | |
149 | |
150 # test | |
151 print(counts) | |
152 print(cpm) | |
153 | |
154 # Conduct fitting of the GLM | |
155 yfit <- glmFit(y, design) | |
156 | |
157 # Initialize result matrices to contain the results of the GLM | |
158 results <- matrix(nrow=dim(counts)[1],ncol=0) | |
159 logfc <- matrix(nrow=dim(counts)[1],ncol=0) | |
160 | |
161 # Make the comparisons for the GLM | |
162 my.contrasts <- makeContrasts( | |
163 paste0(opt$levelNameB,"_",opt$levelNameA," = ", opt$levelNameB, " - ", opt$levelNameA), | |
164 levels = design | |
165 ) | |
166 | |
167 # Define the contrasts used in the comparisons | |
168 allcontrasts = paste0(opt$levelNameB," vs ",opt$levelNameA) | |
169 | |
170 # Conduct a for loop that will do the fitting of the GLM for each comparison | |
171 # Put the results into the results objects | |
172 lrt <- glmLRT(yfit, contrast=my.contrasts[,1]) | |
173 plotSmear(lrt, de.tags=rownames(y)) | |
174 title(allcontrasts) | |
175 res <- topTags(lrt,n=dim(c)[1],sort.by="none")$table | |
176 results <- cbind(results,res[,c(1,5)]) | |
177 logfc <- cbind(logfc,res[c(1)]) | |
178 | |
179 # Add the repeat types back into the results. | |
180 # We should still have the same order as the input data | |
181 results$class <- listA[[2]][,2] | |
182 results$type <- listA[[2]][,3] | |
183 | |
184 # Sort the results table by the FDR | |
185 results <- results[with(results, order(FDR)), ] | |
186 | |
187 # Save the results | |
188 write.table(results, opt$outfile, quote=FALSE, sep="\t", col.names=FALSE) | |
189 | |
190 # Plot Fold Changes for repeat classes and types | |
191 | |
192 # open the device and plots | |
193 if (!is.null(opt$plots)) { | |
194 if (verbose) cat("creating plots\n") | |
195 pdf(opt$plots) | |
196 plotMDS(y, main="Multidimensional Scaling Plot Of Distances Between Samples") | |
197 plotBCV(y, xlab="Gene abundance (Average log CPM)", main="Biological Coefficient of Variation Plot") | |
198 logFC <- results[, "logFC"] | |
199 # Plot the repeat classes | |
200 classes <- with(results, reorder(class, -logFC, median)) | |
201 par(mar=c(6,10,4,1)) | |
202 boxplot(logFC ~ classes, data=results, outline=FALSE, horizontal=TRUE, | |
203 las=2, xlab="log(Fold Change)", main=paste0(allcontrasts, ", by Class")) | |
204 abline(v=0) | |
205 # Plot the repeat types | |
206 types <- with(results, reorder(type, -logFC, median)) | |
207 boxplot(logFC ~ types, data=results, outline=FALSE, horizontal=TRUE, | |
208 las=2, xlab="log(Fold Change)", main=paste0(allcontrasts, ", by Type")) | |
209 abline(v=0) | |
210 } | |
211 | |
212 # close the plot device | |
213 if (!is.null(opt$plots)) { | |
214 cat("closing plot device\n") | |
215 dev.off() | |
216 } | |
217 | |
218 cat("Session information:\n\n") | |
219 | |
220 sessionInfo() | |
221 |