Mercurial > repos > iuc > mageck_gsea
diff test-data/out.count.Rnw @ 2:82a180e6b582 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 49e456dda49db1f52fc876f406a10273a408b1a2
author | iuc |
---|---|
date | Wed, 04 Apr 2018 11:03:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out.count.Rnw Wed Apr 04 11:03:05 2018 -0400 @@ -0,0 +1,237 @@ +% This is a template file for Sweave used in MAGeCK +% Author: Wei Li, Shirley Liu lab +% Do not modify lines beginning with "#__". +\documentclass{article} + +\usepackage{amsmath} +\usepackage{amscd} +\usepackage[tableposition=top]{caption} +\usepackage{ifthen} +\usepackage{fullpage} +\usepackage[utf8]{inputenc} +% \usepackage{longtable} + +\begin{document} +\setkeys{Gin}{width=0.9\textwidth} + +\title{MAGeCK Count Report} +\author{Wei Li} + +\maketitle + + +\tableofcontents + +\section{Summary} + +%Function definition +<<label=funcdef,include=FALSE,echo=FALSE>>= +genreporttable<-function(filelist,labellist,reads,mappedreads){ + xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads); + colnames(xtb)=c("Label","Reads","Mapped","Percentage"); + return (xtb); +} +genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){ + xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini); + colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex"); + return (xtb); +} +genreporttable3<-function(filelist,labellist){ + xtb=data.frame(File=filelist,Label=labellist); + colnames(xtb)=c("File","Label"); + return (xtb); +} + + +colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#F781BF", + "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", + "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", + "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F"); + + + +genboxplot<-function(filename,...){ + #slmed=read.table(filename,header=T) + slmed=read.table(filename,header=T) + slmat=as.matrix(slmed[,c(-1,-2)]) + slmat_log=log2(slmat+1) + + boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...) +} + + +genhistplot<-function(filename,isfile=T,...){ + if(isfile){ + slmed=read.table(filename,header=T) + }else{ + slmed=filename; + } + tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1)) + colnames(tabsmat)=colnames(slmed)[c(-1,-2)] + samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ] + if(ncol(tabsmat)>=1){ + histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) }) + xrange=range(unlist(lapply(histlist,function(X){X$mids}))) + yrange=range(unlist(lapply(histlist,function(X){X$counts}))) + hst1=histlist[[1]] + plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... ) + } + if(ncol(tabsmat)>=2){ + for(i in 2:ncol(tabsmat)){ + hstn=histlist[[i]] + lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i]) + } + } + legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol) +} + + + +genclustering<-function(filename,...){ + #slmed=read.table(filename,header=T) + slmed=read.table(filename,header=T) + slmat=as.matrix(slmed[,c(-1,-2)]) + slmat_log=log2(slmat+1) + + result=tryCatch({ + library(gplots); + heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2) + }, error=function(e){ + heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...) + }); +} + +ctfit_tx=0; + + +panel.plot<-function(x,y,textnames=names(x),...){ + par(new=TRUE) + m<-cbind(x,y) + plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...) + text(x,y,textnames,...) +} + + +genpcaplot<-function(filename,...){ + #slmed=read.table(filename,header=T) + slmed=read.table(filename,header=T) + slmat=as.matrix(slmed[,c(-1,-2)]) + slmat_log=log2(slmat+1) + ctfit_tx<<-prcomp(t(slmat_log),center=TRUE) + + # par(mfrow=c(2,1)); + samplecol=colors[((1:ncol(slmat)) %% length(colors)) ] + # first 2 PCA + #plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2])); + #text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol); + # par(mfrow=c(1,1)); + if(length(samplecol)>2){ + pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol) + }else{ + if(length(samplecol)>1){ + pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol) + } + } + + +} + +genpcavar<-function(){ + # % variance + varpca=ctfit_tx$sdev^2 + varpca=varpca/sum(varpca)*100; + if(length(varpca)>10){ + varpca=varpca[1:10]; + } + plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained'); +} + +@ + +%__FILE_SUMMARY__ + +The statistics of comparisons are listed in Table 1 and Table 2. +The corresponding fastq files in each row are listed in Table 3. + +<<label=tab1,echo=FALSE,results=tex>>= +library(xtable) +filelist=c("input_0.gz"); +labellist=c("test1_fastq_gz"); +reads=c(2500); +mappedreads=c(1453); +totalsgrnas=c(2550); +zerocounts=c(1276); +giniindex=c(0.5266899931488773); + +cptable=genreporttable(filelist,labellist,reads,mappedreads); +print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one", + digits = c(0, 0, 0, 0,2), + align=c('c', 'c','c', 'c', 'c'), + table.placement = "tbp", + caption.placement = "top")) +@ + +<<label=tab2,echo=FALSE,results=tex>>= +library(xtable) +cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex); +print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two", + digits = c(0, 0,0, 0,2), + align=c('c', 'c','c', 'c', 'c'), + table.placement = "tbp", + caption.placement = "top")) +@ + + + + + +<<label=tab3,echo=FALSE,results=tex>>= +library(xtable) +cptable=genreporttable3(filelist,labellist); +print(xtable(cptable, caption = "Summary of samples", label = "tab:three", + digits = c(0,0, 0), + align=c('c', 'p{9cm}', 'c'), + table.placement = "tbp", + caption.placement = "top")) +@ + + + + +The meanings of the columns are as follows. + +\begin{itemize} +\item \textbf{Row}: The row number in the table; +\item \textbf{File}: The filename of fastq file; +\item \textbf{Label}: Assigned label; +\item \textbf{Reads}: The total read count in the fastq file; +\item \textbf{Mapped}: Reads that can be mapped to gRNA library; +\item \textbf{Percentage}: The percentage of mapped reads; +\item \textbf{TotalsgRNAs}: The number of sgRNAs in the library; +\item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts; +\item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts. +\end{itemize} + + + +\newpage\section{Normalized read count distribution of all samples} +The following figure shows the distribution of median-normalized read counts in all samples. + + +<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= +genboxplot("output.count_normalized.txt"); +@ + +The following figure shows the histogram of median-normalized read counts in all samples. + + +<<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= +genhistplot("output.count_normalized.txt"); +@ + +%__INDIVIDUAL_PAGE__ + + + +\end{document} +