Mercurial > repos > iuc > mageck_pathway
view test-data/output_countsummary.Rnw @ 2:ff3716b505d3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 49e456dda49db1f52fc876f406a10273a408b1a2
author | iuc |
---|---|
date | Wed, 04 Apr 2018 11:02:50 -0400 |
parents | |
children |
line wrap: on
line source
% This is a template file for Sweave used in MAGeCK % Author: Wei Li, Shirley Liu lab % Do not modify lines beginning with "#__". \documentclass{article} \usepackage{amsmath} \usepackage{amscd} \usepackage[tableposition=top]{caption} \usepackage{ifthen} \usepackage{fullpage} \usepackage[utf8]{inputenc} % \usepackage{longtable} \begin{document} \setkeys{Gin}{width=0.9\textwidth} \title{MAGeCK Count Report} \author{Wei Li} \maketitle \tableofcontents \section{Summary} %Function definition <<label=funcdef,include=FALSE,echo=FALSE>>= genreporttable<-function(filelist,labellist,reads,mappedreads){ xtb=data.frame(Label=labellist,Reads=reads,MappedReads=mappedreads,MappedPercentage=mappedreads/reads); colnames(xtb)=c("Label","Reads","Mapped","Percentage"); return (xtb); } genreporttable2<-function(filelist,labellist,sgrnas,zerocounts,gini){ xtb=data.frame(Label=labellist,TotalsgRNAs=sgrnas,ZeroCounts=zerocounts,GiniIndex=gini); colnames(xtb)=c("Label","TotalsgRNA","ZeroCounts","GiniIndex"); return (xtb); } genreporttable3<-function(filelist,labellist){ xtb=data.frame(File=filelist,Label=labellist); colnames(xtb)=c("File","Label"); return (xtb); } colors=c( "#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#A65628", "#F781BF", "#999999", "#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3", "#A6D854", "#FFD92F", "#E5C494", "#B3B3B3", "#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F"); genboxplot<-function(filename,...){ #slmed=read.table(filename,header=T) slmed=read.table(filename,header=T) slmat=as.matrix(slmed[,c(-1,-2)]) slmat_log=log2(slmat+1) boxplot(slmat_log,pch='.',las=2,ylab='log2(read counts)',cex.axis=0.8,...) } genhistplot<-function(filename,isfile=T,...){ if(isfile){ slmed=read.table(filename,header=T) }else{ slmed=filename; } tabsmat=as.matrix(log2(slmed[,c(-1,-2)]+1)) colnames(tabsmat)=colnames(slmed)[c(-1,-2)] samplecol=colors[((1:ncol(tabsmat)) %% length(colors)) ] if(ncol(tabsmat)>=1){ histlist=lapply(1:ncol(tabsmat),function(X){ return (hist(tabsmat[,X],plot=F,breaks=40)) }) xrange=range(unlist(lapply(histlist,function(X){X$mids}))) yrange=range(unlist(lapply(histlist,function(X){X$counts}))) hst1=histlist[[1]] plot(hst1$mids,hst1$counts,type='b',pch=20,xlim=c(0,xrange[2]*1.2),ylim=c(0,yrange[2]*1.2),xlab='log2(counts)',ylab='Frequency',main='Distribution of read counts',col = samplecol[1], ... ) } if(ncol(tabsmat)>=2){ for(i in 2:ncol(tabsmat)){ hstn=histlist[[i]] lines(hstn$mids,hstn$counts,type='b',pch=20,col=samplecol[i]) } } legend('topright',colnames(tabsmat),pch=20,lwd=1,col=samplecol) } genclustering<-function(filename,...){ #slmed=read.table(filename,header=T) slmed=read.table(filename,header=T) slmat=as.matrix(slmed[,c(-1,-2)]) slmat_log=log2(slmat+1) result=tryCatch({ library(gplots); heatmap.2(cor(slmat_log),trace = 'none',density.info = 'none',cexRow = 0.8,cexCol = 0.8,offsetRow = -0.2,offsetCol = -0.2) }, error=function(e){ heatmap(cor(slmat_log),scale='none',cexRow = 0.8,cexCol = 0.8,cex.axis=0.8,...) }); } ctfit_tx=0; panel.plot<-function(x,y,textnames=names(x),...){ par(new=TRUE) m<-cbind(x,y) plot(m,pch=20,xlim = range(x)*1.1,ylim=range(y)*1.1,...) text(x,y,textnames,...) } genpcaplot<-function(filename,...){ #slmed=read.table(filename,header=T) slmed=read.table(filename,header=T) slmat=as.matrix(slmed[,c(-1,-2)]) slmat_log=log2(slmat+1) ctfit_tx<<-prcomp(t(slmat_log),center=TRUE) # par(mfrow=c(2,1)); samplecol=colors[((1:ncol(slmat)) %% length(colors)) ] # first 2 PCA #plot(ctfit_tx$x[,1],ctfit_tx$x[,2],xlab='PC1',ylab='PC2',main='First 2 PCs',col=samplecol,xlim=1.1*range(ctfit_tx$x[,1]),ylim=1.1*range(ctfit_tx$x[,2])); #text(ctfit_tx$x[,1],ctfit_tx$x[,2],rownames(ctfit_tx$x),col=samplecol); # par(mfrow=c(1,1)); if(length(samplecol)>2){ pairs(ctfit_tx$x[,1:3],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 3 principle components',col=samplecol) }else{ if(length(samplecol)>1){ pairs(ctfit_tx$x[,1:2],panel=panel.plot,textnames=rownames(ctfit_tx$x),main='First 2 principle components',col=samplecol) } } } genpcavar<-function(){ # % variance varpca=ctfit_tx$sdev^2 varpca=varpca/sum(varpca)*100; if(length(varpca)>10){ varpca=varpca[1:10]; } plot(varpca,type='b',lwd=2,pch=20,xlab='PCs',ylab='% Variance explained'); } @ %__FILE_SUMMARY__ The statistics of comparisons are listed in Table 1 and Table 2. The corresponding fastq files in each row are listed in Table 3. <<label=tab1,echo=FALSE,results=tex>>= library(xtable) filelist=c("input_0.gz"); labellist=c("test1_fastq_gz"); reads=c(2500); mappedreads=c(1453); totalsgrnas=c(2550); zerocounts=c(1276); giniindex=c(0.5266899931488773); cptable=genreporttable(filelist,labellist,reads,mappedreads); print(xtable(cptable, caption = "Summary of comparisons", label = "tab:one", digits = c(0, 0, 0, 0,2), align=c('c', 'c','c', 'c', 'c'), table.placement = "tbp", caption.placement = "top")) @ <<label=tab2,echo=FALSE,results=tex>>= library(xtable) cptable=genreporttable2(filelist,labellist,totalsgrnas,zerocounts,giniindex); print(xtable(cptable, caption = "Summary of comparisons", label = "tab:two", digits = c(0, 0,0, 0,2), align=c('c', 'c','c', 'c', 'c'), table.placement = "tbp", caption.placement = "top")) @ <<label=tab3,echo=FALSE,results=tex>>= library(xtable) cptable=genreporttable3(filelist,labellist); print(xtable(cptable, caption = "Summary of samples", label = "tab:three", digits = c(0,0, 0), align=c('c', 'p{9cm}', 'c'), table.placement = "tbp", caption.placement = "top")) @ The meanings of the columns are as follows. \begin{itemize} \item \textbf{Row}: The row number in the table; \item \textbf{File}: The filename of fastq file; \item \textbf{Label}: Assigned label; \item \textbf{Reads}: The total read count in the fastq file; \item \textbf{Mapped}: Reads that can be mapped to gRNA library; \item \textbf{Percentage}: The percentage of mapped reads; \item \textbf{TotalsgRNAs}: The number of sgRNAs in the library; \item \textbf{ZeroCounts}: The number of sgRNA with 0 read counts; \item \textbf{GiniIndex}: The Gini Index of the read count distribution. Gini index can be used to measure the evenness of the read counts, and a smaller value means a more even distribution of the read counts. \end{itemize} \newpage\section{Normalized read count distribution of all samples} The following figure shows the distribution of median-normalized read counts in all samples. <<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= genboxplot("output.count_normalized.txt"); @ The following figure shows the histogram of median-normalized read counts in all samples. <<fig=TRUE,echo=FALSE,width=4.5,height=4.5>>= genhistplot("output.count_normalized.txt"); @ %__INDIVIDUAL_PAGE__ \end{document}