Mercurial > repos > petr-novak > re_utils
comparison RM_html_report.R @ 0:a4cd8608ef6b draft
Uploaded
| author | petr-novak | 
|---|---|
| date | Mon, 01 Apr 2019 07:56:36 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:a4cd8608ef6b | 
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 ### this script is expected to run from clustering directory! ###### | |
| 3 | |
| 4 ## assume RM-custom_output_tablesummary.csv file in active directory | |
| 5 | |
| 6 suppressPackageStartupMessages(library(R2HTML)) | |
| 7 ###################################################################################### | |
| 8 htmlheader=" | |
| 9 <html xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"> | |
| 10 <head> | |
| 11 <title> Clustering summary </title> | |
| 12 <style> | |
| 13 <!-- | |
| 14 table { background:#FFFFFF; | |
| 15 border:1px solid gray; | |
| 16 border-collapse:collapse; | |
| 17 color:#fff; | |
| 18 font:normal 10px verdana, arial, helvetica, sans-serif; | |
| 19 } | |
| 20 caption { border:1px solid #5C443A; | |
| 21 color:#5C443A; | |
| 22 font-weight:bold; | |
| 23 font-size:20pt | |
| 24 padding:6px 4px 8px 0px; | |
| 25 text-align:center; | |
| 26 | |
| 27 } | |
| 28 td, th { color:#363636; | |
| 29 padding:.4em; | |
| 30 } | |
| 31 tr { border:1px dotted gray; | |
| 32 } | |
| 33 thead th, tfoot th { background:#5C443A; | |
| 34 color:#FFFFFF; | |
| 35 padding:3px 10px 3px 10px; | |
| 36 text-align:left; | |
| 37 text-transform:uppercase; | |
| 38 } | |
| 39 tbody td a { color:#3636FF; | |
| 40 text-decoration:underline; | |
| 41 } | |
| 42 tbody td a:visited { color:gray; | |
| 43 text-decoration:line-through; | |
| 44 } | |
| 45 tbody td a:hover { text-decoration:underline; | |
| 46 } | |
| 47 tbody th a { color:#3636FF; | |
| 48 font-weight:normal; | |
| 49 text-decoration:none; | |
| 50 } | |
| 51 tbody th a:hover { color:#363636; | |
| 52 } | |
| 53 tbody td+td+td+td a { background-image:url('bullet_blue.png'); | |
| 54 background-position:left center; | |
| 55 background-repeat:no-repeat; | |
| 56 color:#FFFFFF; | |
| 57 padding-left:15px; | |
| 58 } | |
| 59 tbody td+td+td+td a:visited { background-image:url('bullet_white.png'); | |
| 60 background-position:left center; | |
| 61 background-repeat:no-repeat; | |
| 62 } | |
| 63 tbody th, tbody td { text-align:left; | |
| 64 vertical-align:top; | |
| 65 } | |
| 66 tfoot td { background:#5C443A; | |
| 67 color:#FFFFFF; | |
| 68 padding-top:3px; | |
| 69 } | |
| 70 .odd { background:#fff; | |
| 71 } | |
| 72 tbody tr:hover { background:#EEEEEE; | |
| 73 border:1px solid #03476F; | |
| 74 color:#000000; | |
| 75 } | |
| 76 --> | |
| 77 </style> | |
| 78 | |
| 79 </head> | |
| 80 | |
| 81 " | |
| 82 ###################################################################################### | |
| 83 ###################################################################################### | |
| 84 | |
| 85 | |
| 86 | |
| 87 #basic statistics: | |
| 88 # Number of reads used for clustering | |
| 89 | |
| 90 RM=read.table("RM-custom_output_tablesummary.csv",sep="\t",header=TRUE,as.is=TRUE,check.names=FALSE) | |
| 91 | |
| 92 #Any hits to RM database? | |
| 93 N=NA | |
| 94 | |
| 95 # convert to legible format: | |
| 96 RM2=data.frame( | |
| 97 'total length [bp]'=RM$All_Reads_Length[c(T,F,F)], | |
| 98 'number of reads'=RM$All_Reads_Number[c(T,F,F)], | |
| 99 check.names=FALSE,stringsAsFactors=FALSE | |
| 100 ) | |
| 101 | |
| 102 RMpart1=RM[c(T,F,F),-c(1:3),drop=FALSE] #counts | |
| 103 RMpart2=RM[c(F,T,F),-c(1:3),drop=FALSE] #percent | |
| 104 | |
| 105 RMjoined=list() | |
| 106 | |
| 107 for (i in colnames(RMpart1)){ | |
| 108 RMjoined[[i]]=paste(RMpart1[,i],"hits, ",signif(RMpart2[,i],3),"%",sep='') | |
| 109 } | |
| 110 | |
| 111 | |
| 112 | |
| 113 if (ncol(RM)>3){ # not emppty | |
| 114 RM2=cbind(cluster=paste("CL",1:nrow(RM2),sep=''), | |
| 115 RM2, | |
| 116 "Genome proportion[%]"=signif(RM2$'number of reads'/N*100,3), | |
| 117 "cumulative GP [%]"=signif(cumsum(RM2$'number of reads'/N*100),3), | |
| 118 as.data.frame(RMjoined,stringsAsFactors=FALSE)) | |
| 119 | |
| 120 ##### RM2 formating for html output: ##### | |
| 121 ########################################## | |
| 122 bold=RMpart2>3 | |
| 123 for (i in 6:ncol(RM2)){ | |
| 124 rmcol=RM2[,i] | |
| 125 RM2[,i]=ifelse(bold[,i-5],paste("<b>",rmcol,"</b>",sep=''),rmcol) | |
| 126 } | |
| 127 | |
| 128 # join hits to one column | |
| 129 RMstring=character(nrow(RM2)) | |
| 130 for (i in 1:nrow(RM2)){ | |
| 131 x=ifelse(RMpart2[i,]>0,paste(colnames(RM2[,-(1:5),drop=FALSE])," (",RM2[i,-(1:5),drop=FALSE],")",sep=''),"") | |
| 132 # reorder based on GR | |
| 133 x=x[order(RMpart2[i,],decreasing=TRUE)] | |
| 134 | |
| 135 RMstring[i]=paste(x[x!=''],collapse="<br />") | |
| 136 if (nchar(RMstring[i])>240){ | |
| 137 RMstring[i]=paste(substring(RMstring[i],1,220),"......",sep='') | |
| 138 } | |
| 139 | |
| 140 } | |
| 141 }else{ # no RM hits | |
| 142 RM2=cbind(cluster=paste("CL",1:nrow(RM2),sep=''), | |
| 143 RM2, | |
| 144 "Genome proportion[%]"=signif(RM2$'number of reads'/N*100,3), | |
| 145 "cumulative GP [%]"=signif(cumsum(RM2$'number of reads'/N*100),3)) | |
| 146 RMstring=rep("",nrow(RM)/3) | |
| 147 } | |
| 148 | |
| 149 | |
| 150 # RM2 add link to subpage | |
| 151 | |
| 152 | |
| 153 RM2=data.frame(RM2[,1:3],'Repeat Masker'=RMstring,check.names=FALSE) | |
| 154 | |
| 155 | |
| 156 ################################################################################################## | |
| 157 #################### HTML output ##### | |
| 158 ################################################################################################## | |
| 159 | |
| 160 | |
| 161 htmlout=commandArgs(T)[1] # full absolute path | |
| 162 | |
| 163 cat(htmlheader,file=htmlout) | |
| 164 | |
| 165 HTML.title("RepeatMasker search against custom database",file=htmlout,HR=1) | |
| 166 | |
| 167 HTML(RM2,file=htmlout,align='left',caption="",captionalign='') | |
| 168 HTMLEndFile(htmlout) | |
| 169 | 
