Mercurial > repos > nikhil-joshi > deseq_and_sam2counts
comparison deseq/deseq.R @ 0:d7f27b43b8ff draft
Uploaded
author | nikhil-joshi |
---|---|
date | Thu, 05 Jul 2012 21:02:43 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d7f27b43b8ff |
---|---|
1 ## Run DESeq (oversimplified) in Galaxy. | |
2 ## Format: Rscript deseq3.R tab-delimited-counts.txt comma,delimited,col,names,except,first,gene,id,col comparison,here outputfile | |
3 ## | |
4 ## The incoming data must have the first column be the gene names, and | |
5 ## the rest raw counts. The next argument is the list of groups | |
6 ## (i.e. liver,liver,kidney,kidney), and the final argument is the | |
7 ## comparison, i.e. kidney,liver. This produces a top-table called | |
8 ## top-table.txt ordered by p-value. | |
9 | |
10 cargs <- commandArgs() | |
11 cargs <- cargs[(which(cargs == "--args")+1):length(cargs)] | |
12 | |
13 countstable <- cargs[1] | |
14 conds <- unlist(strsplit(cargs[2], ',')) | |
15 comparison <- unlist(strsplit(cargs[3], ',')) | |
16 outputfile <- cargs[4] | |
17 diag.html = cargs[5] | |
18 temp.files.path = cargs[6] | |
19 counts.name = cargs[7] | |
20 | |
21 # the comparison must only have two values and the conds must | |
22 # be a vector from those values, at least one of each. | |
23 if (length(unique(conds)) != 2) { | |
24 stop("You can only have two columns types: ", cargs[2]) | |
25 } | |
26 | |
27 if (length(comparison) != 2) { | |
28 stop("Comparison type must be a tuple: ", cargs[3]) | |
29 } | |
30 | |
31 if (!identical(sort(comparison), sort(unique(conds)))) { | |
32 stop("Column types must use the two names from Comparison type, and vice versa. Must have at least one of each in the Column types.\nColumn types: ", cargs[2], "\n", "Comparison type: ", cargs[3]) | |
33 } | |
34 | |
35 sink("/dev/null") | |
36 dir.create(temp.files.path, recursive=TRUE) | |
37 library(DESeq) | |
38 | |
39 d <- read.table(countstable, sep="\t", header=TRUE, row.names=1) | |
40 | |
41 if (length(d) != length(conds)) { | |
42 stop("Number of total sample columns in counts file must correspond to the columns types field. E.g. if column types is 'kidney,kidney,liver,liver' then number of sample columns in counts file must be 4 as well.") | |
43 } | |
44 | |
45 cds <- newCountDataSet(d, conds) | |
46 cds <- estimateSizeFactors(cds) | |
47 | |
48 cdsBlind <- estimateDispersions( cds, method="blind" ) | |
49 | |
50 if (length(conds) != 2) { | |
51 cds <- estimateDispersions( cds ) | |
52 norep = FALSE | |
53 } | |
54 | |
55 if (length(conds) == 2) { | |
56 cds <- estimateDispersions( cds, method="blind", sharingMode="fit-only" ) | |
57 norep = TRUE | |
58 } | |
59 | |
60 plotDispEsts <- function( cds ) | |
61 { | |
62 plot( | |
63 rowMeans( counts( cds, normalized=TRUE ) ), | |
64 fitInfo(cds)$perGeneDispEsts, | |
65 pch = '.', log="xy" ) | |
66 xg <- 10^seq( -.5, 5, length.out=300 ) | |
67 lines( xg, fitInfo(cds)$dispFun( xg ), col="red" ) | |
68 } | |
69 | |
70 temp.disp.est.plot = file.path( temp.files.path, "DispersionEstimatePlot.png" ) | |
71 png( temp.disp.est.plot, width=500, height=500 ) | |
72 plotDispEsts( cds ) | |
73 dev.off() | |
74 | |
75 res <- nbinomTest(cds, comparison[1], comparison[2]) | |
76 | |
77 write.table(res[order(res$padj), ], file=outputfile, quote=FALSE, row.names=FALSE, sep="\t") | |
78 | |
79 | |
80 plotDE <- function( res ) | |
81 plot( | |
82 res$baseMean, | |
83 res$log2FoldChange, | |
84 log="x", pch=20, cex=.3, | |
85 col = ifelse( res$padj < .1, "red", "black" ) ) | |
86 | |
87 temp.de.plot = file.path( temp.files.path, "DiffExpPlot.png") | |
88 png( temp.de.plot, width=500, height=500 ) | |
89 plotDE( res ) | |
90 dev.off() | |
91 | |
92 | |
93 temp.pval.plot = file.path( temp.files.path, "PvalHist.png") | |
94 png( temp.pval.plot, width=500, height=500 ) | |
95 hist(res$pval, breaks=100, col="skyblue", border="slateblue", main="") | |
96 dev.off() | |
97 | |
98 vsd <- getVarianceStabilizedData( cdsBlind ) | |
99 | |
100 temp.heatmap.plot = file.path( temp.files.path, "heatmap.png" ) | |
101 png( temp.heatmap.plot, width=500, height=500 ) | |
102 select <- order(res$pval)[1:100] | |
103 colors <- colorRampPalette(c("white","darkblue"))(100) | |
104 heatmap( vsd[select,], col = colors, scale = "none", Rowv=NULL, main="") | |
105 dev.off() | |
106 | |
107 | |
108 mod_lfc <- (rowMeans( vsd[, conditions(cds)==comparison[2], drop=FALSE] ) - rowMeans( vsd[, conditions(cds)==comparison[1], drop=FALSE] )) | |
109 lfc <- res$log2FoldChange | |
110 finite <- is.finite(lfc) | |
111 table(as.character(lfc[!finite]), useNA="always") | |
112 largeNumber <- 10 | |
113 lfc <- ifelse(finite, lfc, sign(lfc) * largeNumber) | |
114 | |
115 temp.modlfc.plot = file.path( temp.files.path, "modlfc.png" ) | |
116 png( temp.modlfc.plot, width=500, height=500 ) | |
117 plot( lfc, mod_lfc, pch=20, cex=.3, col = ifelse( finite, "#80808040", "red" ) ) | |
118 abline( a=0, b=1, col="#40404040" ) | |
119 dev.off() | |
120 | |
121 temp.sampclust.plot = file.path( temp.files.path, "sampclust.png" ) | |
122 png( temp.sampclust.plot, width=500, height=500 ) | |
123 dists <- dist( t( vsd ) ) | |
124 heatmap( as.matrix( dists ), | |
125 symm=TRUE, scale="none", margins=c(10,10), | |
126 col = colorRampPalette(c("darkblue","white"))(100), | |
127 labRow = paste( pData(cdsBlind)$condition, pData(cdsBlind)$type ) ) | |
128 dev.off() | |
129 | |
130 | |
131 library(aroma.light) | |
132 library(lattice) | |
133 | |
134 mdsPlot <- function(x, conds=NULL, cex=1, ...) { | |
135 d <- dist(x) | |
136 | |
137 mds.fit <- cmdscale(d, eig=TRUE, k=2) | |
138 | |
139 mds.d <- data.frame(x1=mds.fit$points[, 1], | |
140 x2=mds.fit$points[, 2], | |
141 labels=rownames(x)) | |
142 if (!is.null(conds)) | |
143 mds.d$treatment <- as.factor(conds) | |
144 | |
145 if (!is.null(conds)) { | |
146 p <- xyplot(x2 ~ x1, group=treatment, data=mds.d, panel=function(x, y, ..., groups, subscripts) { | |
147 panel.text(x, y, mds.d$labels[subscripts], cex=cex, col=trellis.par.get()$superpose.line$col[groups]) | |
148 }, ...) | |
149 } else { | |
150 p <- xyplot(x2 ~ x1, data=mds.d, panel=function(x, y, ..., groups, subscripts) { | |
151 panel.text(x, y, mds.d$labels[subscripts], cex=cex) | |
152 }, ...) | |
153 } | |
154 return(p) | |
155 } | |
156 | |
157 if (!norep) { | |
158 dn = normalizeQuantileRank(as.matrix(d)) | |
159 p = mdsPlot(t(log10(dn+1)), conds=conds, xlab="dimension 1", ylab="dimension 2", main="") | |
160 | |
161 temp.mds.plot = file.path( temp.files.path, "mds.png" ) | |
162 png( temp.mds.plot, width=500, height=500 ) | |
163 print(p) | |
164 dev.off() | |
165 } | |
166 | |
167 file.conn = file(diag.html, open="w") | |
168 writeLines( c("<html><body bgcolor='lightgray'>"), file.conn) | |
169 writeLines( c("<h2><u>Diagnostic Plots for ", counts.name, "</u></h2>"), file.conn) | |
170 writeLines( c("<h3>Dispersion Estimates</h3>"), file.conn) | |
171 writeLines( c("<img src='DispersionEstimatePlot.png'><br/><br/>"), file.conn) | |
172 writeLines( c("<h3>Differential Expression - Base Mean vs. Log2 Fold Change</h3>"), file.conn) | |
173 writeLines( c("<img src='DiffExpPlot.png'><br/><br/>"), file.conn) | |
174 writeLines( c("<h3>P-value histogram</h3>"), file.conn) | |
175 writeLines( c("<img src='PvalHist.png'><br/><br/>"), file.conn) | |
176 writeLines( c("<h3>Top 100 Genes/Transcripts by P-value</h3>"), file.conn) | |
177 writeLines( c("<img src='heatmap.png'><br/><br/>"), file.conn) | |
178 writeLines( c("<h3>Moderated LFC vs. LFC</h3>"), file.conn) | |
179 writeLines( c("<img src='modlfc.png'><br/><br/>"), file.conn) | |
180 writeLines( c("<h3>VST Sample Clustering</h3>"), file.conn) | |
181 writeLines( c("<img src='sampclust.png'><br/><br/>"), file.conn) | |
182 writeLines( c("<h3>MDS Plot</h3>"), file.conn) | |
183 | |
184 if (!norep) { | |
185 writeLines( c("<img src='mds.png'><br/><br/>"), file.conn) | |
186 } | |
187 | |
188 if (norep) { | |
189 writeLines( c("<h4>MDS plot not produced for unreplicated data</h4>"), file.conn) | |
190 } | |
191 | |
192 writeLines( c("</body></html>"), file.conn) | |
193 close(file.conn) | |
194 | |
195 sink(NULL) |