Mercurial > repos > iuc > mageck_test
comparison test-data/output_countsummary.Rnw @ 2:81bbbddcf285 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 49e456dda49db1f52fc876f406a10273a408b1a2
author | iuc |
---|---|
date | Wed, 04 Apr 2018 11:03:59 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:5e2a28bee02d | 2:81bbbddcf285 |
---|---|
1 % This is a template file for Sweave used in MAGeCK | |
2 % Author: Wei Li, Shirley Liu lab | |
3 % Do not modify lines beginning with "#__". | |
4 \documentclass{article} | |
5 | |
6 \usepackage{amsmath} | |
7 \usepackage{amscd} | |
8 \usepackage[tableposition=top]{caption} | |
9 \usepackage{ifthen} | |
10 \usepackage{fullpage} | |
11 \usepackage[utf8]{inputenc} | |
12 % \usepackage{longtable} | |
13 | |
14 \begin{document} | |
15 \setkeys{Gin}{width=0.9\textwidth} | |
16 | |
17 \title{MAGeCK Count Report} | |
18 \author{Wei Li} | |
19 | |
20 \maketitle | |
21 | |
22 | |
23 \tableofcontents | |
24 | |
25 \section{Summary} | |
26 | |
27 %Function definition | |
28 <<label=funcdef,include=FALSE,echo=FALSE>>= | |
29 genreporttable<-function(filelist,labellist,reads,mappedreads){ | |
30 xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads); | |
31 colnames(xtb)=c("Label","Reads","Mapped","Percentage"); | |
32 return (xtb); | |
33 } | |
34 genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){ | |
35 xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini); | |
36 colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex"); | |
37 return (xtb); | |
38 } | |
39 genreporttable3<-function(filelist,labellist){ | |
40 xtb=data.frame(File=filelist,Label=labellist); | |
41 colnames(xtb)=c("File","Label"); | |
42 return (xtb); | |
43 } | |
44 | |
45 | |
46 colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#F781BF", | |
47 "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", | |
48 "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", | |
49 "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F"); | |
50 | |
51 | |
52 | |
53 genboxplot<-function(filename,...){ | |
54 #slmed=read.table(filename,header=T) | |
55 slmed=read.table(filename,header=T) | |
56 slmat=as.matrix(slmed[,c(-1,-2)]) | |
57 slmat_log=log2(slmat+1) | |
58 | |
59 boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...) | |
60 } | |
61 | |
62 | |
63 genhistplot<-function(filename,isfile=T,...){ | |
64 if(isfile){ | |
65 slmed=read.table(filename,header=T) | |
66 }else{ | |
67 slmed=filename; | |
68 } | |
69 tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1)) | |
70 colnames(tabsmat)=colnames(slmed)[c(-1,-2)] | |
71 samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ] | |
72 if(ncol(tabsmat)>=1){ | |
73 histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) }) | |
74 xrange=range(unlist(lapply(histlist,function(X){X$mids}))) | |
75 yrange=range(unlist(lapply(histlist,function(X){X$counts}))) | |
76 hst1=histlist[[1]] | |
77 plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... ) | |
78 } | |
79 if(ncol(tabsmat)>=2){ | |
80 for(i in 2:ncol(tabsmat)){ | |
81 hstn=histlist[[i]] | |
82 lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i]) | |
83 } | |
84 } | |
85 legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol) | |
86 } | |
87 | |
88 | |
89 | |
90 genclustering<-function(filename,...){ | |
91 #slmed=read.table(filename,header=T) | |
92 slmed=read.table(filename,header=T) | |
93 slmat=as.matrix(slmed[,c(-1,-2)]) | |
94 slmat_log=log2(slmat+1) | |
95 | |
96 result=tryCatch({ | |
97 library(gplots); | |
98 heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2) | |
99 }, error=function(e){ | |
100 heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...) | |
101 }); | |
102 } | |
103 | |
104 ctfit_tx=0; | |
105 | |
106 | |
107 panel.plot<-function(x,y,textnames=names(x),...){ | |
108 par(new=TRUE) | |
109 m<-cbind(x,y) | |
110 plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...) | |
111 text(x,y,textnames,...) | |
112 } | |
113 | |
114 | |
115 genpcaplot<-function(filename,...){ | |
116 #slmed=read.table(filename,header=T) | |
117 slmed=read.table(filename,header=T) | |
118 slmat=as.matrix(slmed[,c(-1,-2)]) | |
119 slmat_log=log2(slmat+1) | |
120 ctfit_tx<<-prcomp(t(slmat_log),center=TRUE) | |
121 | |
122 # par(mfrow=c(2,1)); | |
123 samplecol=colors[((1:ncol(slmat)) %% length(colors)) ] | |
124 # first 2 PCA | |
125 #plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2])); | |
126 #text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol); | |
127 # par(mfrow=c(1,1)); | |
128 if(length(samplecol)>2){ | |
129 pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol) | |
130 }else{ | |
131 if(length(samplecol)>1){ | |
132 pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol) | |
133 } | |
134 } | |
135 | |
136 | |
137 } | |
138 | |
139 genpcavar<-function(){ | |
140 # % variance | |
141 varpca=ctfit_tx$sdev^2 | |
142 varpca=varpca/sum(varpca)*100; | |
143 if(length(varpca)>10){ | |
144 varpca=varpca[1:10]; | |
145 } | |
146 plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained'); | |
147 } | |
148 | |
149 @ | |
150 | |
151 %__FILE_SUMMARY__ | |
152 | |
153 The statistics of comparisons are listed in Table 1 and Table 2. | |
154 The corresponding fastq files in each row are listed in Table 3. | |
155 | |
156 <<label=tab1,echo=FALSE,results=tex>>= | |
157 library(xtable) | |
158 filelist=c("input_0.gz"); | |
159 labellist=c("test1_fastq_gz"); | |
160 reads=c(2500); | |
161 mappedreads=c(1453); | |
162 totalsgrnas=c(2550); | |
163 zerocounts=c(1276); | |
164 giniindex=c(0.5266899931488773); | |
165 | |
166 cptable=genreporttable(filelist,labellist,reads,mappedreads); | |
167 print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one", | |
168 digits = c(0, 0, 0, 0,2), | |
169 align=c('c', 'c','c', 'c', 'c'), | |
170 table.placement = "tbp", | |
171 caption.placement = "top")) | |
172 @ | |
173 | |
174 <<label=tab2,echo=FALSE,results=tex>>= | |
175 library(xtable) | |
176 cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex); | |
177 print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two", | |
178 digits = c(0, 0,0, 0,2), | |
179 align=c('c', 'c','c', 'c', 'c'), | |
180 table.placement = "tbp", | |
181 caption.placement = "top")) | |
182 @ | |
183 | |
184 | |
185 | |
186 | |
187 | |
188 <<label=tab3,echo=FALSE,results=tex>>= | |
189 library(xtable) | |
190 cptable=genreporttable3(filelist,labellist); | |
191 print(xtable(cptable, caption = "Summary of samples", label = "tab:three", | |
192 digits = c(0,0, 0), | |
193 align=c('c', 'p{9cm}', 'c'), | |
194 table.placement = "tbp", | |
195 caption.placement = "top")) | |
196 @ | |
197 | |
198 | |
199 | |
200 | |
201 The meanings of the columns are as follows. | |
202 | |
203 \begin{itemize} | |
204 \item \textbf{Row}: The row number in the table; | |
205 \item \textbf{File}: The filename of fastq file; | |
206 \item \textbf{Label}: Assigned label; | |
207 \item \textbf{Reads}: The total read count in the fastq file; | |
208 \item \textbf{Mapped}: Reads that can be mapped to gRNA library; | |
209 \item \textbf{Percentage}: The percentage of mapped reads; | |
210 \item \textbf{TotalsgRNAs}: The number of sgRNAs in the library; | |
211 \item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts; | |
212 \item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts. | |
213 \end{itemize} | |
214 | |
215 | |
216 | |
217 \newpage\section{Normalized read count distribution of all samples} | |
218 The following figure shows the distribution of median-normalized read counts in all samples. | |
219 | |
220 | |
221 <<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= | |
222 genboxplot("output.count_normalized.txt"); | |
223 @ | |
224 | |
225 The following figure shows the histogram of median-normalized read counts in all samples. | |
226 | |
227 | |
228 <<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= | |
229 genhistplot("output.count_normalized.txt"); | |
230 @ | |
231 | |
232 %__INDIVIDUAL_PAGE__ | |
233 | |
234 | |
235 | |
236 \end{document} | |
237 |