Mercurial > repos > artbio > repenrich
comparison edgeR_repenrich.R @ 0:f6f0f1e5e940 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
author | artbio |
---|---|
date | Wed, 02 Aug 2017 05:17:29 -0400 |
parents | |
children | 15e3e29f310e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f6f0f1e5e940 |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 # A command-line interface to edgeR for use with Galaxy edger-repenrich | |
4 # written by Christophe Antoniewski drosofff@gmail.com 2017.05.30 | |
5 | |
6 | |
7 # setup R error handling to go to stderr | |
8 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
9 | |
10 # To not crash galaxy with an UTF8 error with not-US LC settings. | |
11 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
12 | |
13 library("getopt") | |
14 library("tools") | |
15 options(stringAsFactors = FALSE, useFancyQuotes = FALSE) | |
16 args <- commandArgs(trailingOnly = TRUE) | |
17 | |
18 # get options, using the spec as defined by the enclosed list. | |
19 # we read the options from the default: commandArgs(TRUE). | |
20 spec <- matrix(c( | |
21 "quiet", "q", 0, "logical", | |
22 "help", "h", 0, "logical", | |
23 "outfile", "o", 1, "character", | |
24 "countsfile", "n", 1, "character", | |
25 "factorName", "N", 1, "character", | |
26 "levelNameA", "A", 1, "character", | |
27 "levelNameB", "B", 1, "character", | |
28 "levelAfiles", "a", 1, "character", | |
29 "levelBfiles", "b", 1, "character", | |
30 "alignmentA", "i", 1, "character", | |
31 "alignmentB", "j", 1, "character", | |
32 "plots" , "p", 1, "character"), | |
33 byrow=TRUE, ncol=4) | |
34 opt <- getopt(spec) | |
35 | |
36 # if help was asked for print a friendly message | |
37 # and exit with a non-zero error code | |
38 if (!is.null(opt$help)) { | |
39 cat(getopt(spec, usage=TRUE)) | |
40 q(status=1) | |
41 } | |
42 | |
43 # enforce the following required arguments | |
44 if (is.null(opt$outfile)) { | |
45 cat("'outfile' is required\n") | |
46 q(status=1) | |
47 } | |
48 if (is.null(opt$levelAfiles) | is.null(opt$levelBfiles)) { | |
49 cat("input count files are required for both levels\n") | |
50 q(status=1) | |
51 } | |
52 if (is.null(opt$alignmentA) | is.null(opt$alignmentB)) { | |
53 cat("total aligned read files are required for both levels\n") | |
54 q(status=1) | |
55 } | |
56 | |
57 verbose <- if (is.null(opt$quiet)) { | |
58 TRUE | |
59 } else { | |
60 FALSE | |
61 } | |
62 | |
63 suppressPackageStartupMessages({ | |
64 library("edgeR") | |
65 library("limma") | |
66 }) | |
67 | |
68 # build levels A and B file lists | |
69 | |
70 library("rjson") | |
71 filesA <- fromJSON(opt$levelAfiles, method = "C", unexpected.escape = "error") | |
72 filesB <- fromJSON(opt$levelBfiles, method = "C", unexpected.escape = "error") | |
73 listA <- list() | |
74 indice = 0 | |
75 listA[["level"]] <- opt$levelNameA | |
76 for (file in filesA) { | |
77 indice = indice +1 | |
78 listA[[paste0(opt$levelNameA,"_",indice)]] <- read.delim(file, header=FALSE) | |
79 } | |
80 listB <- list() | |
81 indice = 0 | |
82 listB[["level"]] <- opt$levelNameB | |
83 for (file in filesB) { | |
84 indice = indice +1 | |
85 listB[[paste0(opt$levelNameB,"_",indice)]] <- read.delim(file, header=FALSE) | |
86 } | |
87 | |
88 # build a counts table | |
89 counts <- data.frame(row.names=listA[[2]][,1]) | |
90 for (element in names(listA[-1])) { | |
91 counts<-cbind(counts, listA[[element]][,4]) | |
92 } | |
93 for (element in names(listB[-1])) { | |
94 counts<-cbind(counts, listB[[element]][,4]) | |
95 } | |
96 colnames(counts)=c(names(listA[-1]), names(listB[-1])) | |
97 | |
98 # build aligned counts vector | |
99 | |
100 filesi <- fromJSON(opt$alignmentA, method = "C", unexpected.escape = "error") | |
101 filesj <- fromJSON(opt$alignmentB, method = "C", unexpected.escape = "error") | |
102 sizes <- c() | |
103 for (file in filesi) { | |
104 sizes <- c(sizes, read.delim(file, header=FALSE)[1,1]) | |
105 } | |
106 for (file in filesj) { | |
107 sizes <- c(sizes, read.delim(file, header=FALSE)[1,1]) | |
108 } | |
109 | |
110 # build a meta data object | |
111 | |
112 meta <- data.frame( | |
113 row.names=colnames(counts), | |
114 condition=c(rep(opt$levelNameA,length(filesA)), rep(opt$levelNameB,length(filesB)) ), | |
115 libsize=sizes | |
116 ) | |
117 | |
118 | |
119 # Define the library size and conditions for the GLM | |
120 libsize <- meta$libsize | |
121 condition <- factor(meta$condition) | |
122 design <- model.matrix(~0+condition) | |
123 colnames(design) <- levels(meta$condition) | |
124 | |
125 | |
126 # Build a DGE object for the GLM | |
127 y <- DGEList(counts=counts, lib.size=libsize) | |
128 | |
129 # Normalize the data | |
130 y <- calcNormFactors(y) | |
131 y$samples | |
132 # plotMDS(y) latter | |
133 | |
134 # Estimate the variance | |
135 y <- estimateGLMCommonDisp(y, design) | |
136 y <- estimateGLMTrendedDisp(y, design) | |
137 y <- estimateGLMTagwiseDisp(y, design) | |
138 # plotBCV(y) latter | |
139 | |
140 # Builds and outputs an object to contain the normalized read abundance in counts per million of reads | |
141 cpm <- cpm(y, log=FALSE, lib.size=libsize) | |
142 cpm <- as.data.frame(cpm) | |
143 colnames(cpm) <- colnames(counts) | |
144 if (!is.null(opt$countsfile)) { | |
145 normalizedAbundance <- data.frame(Tag=rownames(cpm)) | |
146 normalizedAbundance <- cbind(normalizedAbundance, cpm) | |
147 write.table(normalizedAbundance, file=opt$countsfile, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE) | |
148 } | |
149 | |
150 # Conduct fitting of the GLM | |
151 yfit <- glmFit(y, design) | |
152 | |
153 # Initialize result matrices to contain the results of the GLM | |
154 results <- matrix(nrow=dim(counts)[1],ncol=0) | |
155 logfc <- matrix(nrow=dim(counts)[1],ncol=0) | |
156 | |
157 # Make the comparisons for the GLM | |
158 my.contrasts <- makeContrasts( | |
159 paste0(opt$levelNameA,"_",opt$levelNameB," = ", opt$levelNameA, " - ", opt$levelNameB), | |
160 levels = design | |
161 ) | |
162 | |
163 # Define the contrasts used in the comparisons | |
164 allcontrasts = paste0(opt$levelNameA," vs ",opt$levelNameB) | |
165 | |
166 # Conduct a for loop that will do the fitting of the GLM for each comparison | |
167 # Put the results into the results objects | |
168 lrt <- glmLRT(yfit, contrast=my.contrasts[,1]) | |
169 plotSmear(lrt, de.tags=rownames(y)) | |
170 title(allcontrasts) | |
171 res <- topTags(lrt,n=dim(c)[1],sort.by="none")$table | |
172 results <- cbind(results,res[,c(1,5)]) | |
173 logfc <- cbind(logfc,res[c(1)]) | |
174 | |
175 # Add the repeat types back into the results. | |
176 # We should still have the same order as the input data | |
177 results$class <- listA[[2]][,2] | |
178 results$type <- listA[[2]][,3] | |
179 | |
180 # Sort the results table by the FDR | |
181 results <- results[with(results, order(FDR)), ] | |
182 | |
183 # Save the results | |
184 write.table(results, opt$outfile, quote=FALSE, sep="\t", col.names=FALSE) | |
185 | |
186 # Plot Fold Changes for repeat classes and types | |
187 | |
188 # open the device and plots | |
189 if (!is.null(opt$plots)) { | |
190 if (verbose) cat("creating plots\n") | |
191 pdf(opt$plots) | |
192 plotMDS(y, main="Multidimensional Scaling Plot Of Distances Between Samples") | |
193 plotBCV(y, xlab="Gene abundance (Average log CPM)", main="Biological Coefficient of Variation Plot") | |
194 logFC <- results[, "logFC"] | |
195 # Plot the repeat classes | |
196 classes <- with(results, reorder(class, -logFC, median)) | |
197 par(mar=c(6,10,4,1)) | |
198 boxplot(logFC ~ classes, data=results, outline=FALSE, horizontal=TRUE, | |
199 las=2, xlab="log(Fold Change)", main=paste0(allcontrasts, ", by Class")) | |
200 abline(v=0) | |
201 # Plot the repeat types | |
202 types <- with(results, reorder(type, -logFC, median)) | |
203 boxplot(logFC ~ types, data=results, outline=FALSE, horizontal=TRUE, | |
204 las=2, xlab="log(Fold Change)", main=paste0(allcontrasts, ", by Type")) | |
205 abline(v=0) | |
206 } | |
207 | |
208 # close the plot device | |
209 if (!is.null(opt$plots)) { | |
210 cat("closing plot device\n") | |
211 dev.off() | |
212 } | |
213 | |
214 cat("Session information:\n\n") | |
215 | |
216 sessionInfo() | |
217 |