comparison fastqc_site_render.R @ 11:507eec497730 draft

update fastqc site
author mingchen0919
date Tue, 07 Nov 2017 16:52:24 -0500
parents d820be692d74
children a6f8382f852c
comparison
equal deleted inserted replaced
10:600c39b11913 11:507eec497730
1 ##======= Handle arguments from command line ======== 1 library(getopt)
2 # setup R error handline to go to stderr
3 options(show.error.messages=FALSE,
4 error=function(){
5 cat(geterrmessage(), file=stderr())
6 quit("no", 1, F)
7 })
8
9 # we need that to not crash galaxy with an UTF8 error on German LC settings.
10 loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
11
12 # suppress warning
13 options(warn = -1)
14
15 options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
16 args = commandArgs(trailingOnly=TRUE)
17
18 suppressPackageStartupMessages({
19 library(getopt)
20 library(tools)
21 })
22
23 # column 1: the long flag name
24 # column 2: the short flag alias. A SINGLE character string
25 # column 3: argument mask
26 # 0: no argument
27 # 1: argument required
28 # 2: argument is optional
29 # column 4: date type to which the flag's argument shall be cast.
30 # possible values: logical, integer, double, complex, character.
31 spec_list=list()
32
33 ##------- 1. input data ---------------------
34 spec_list$READS = c('reads', 'r', '1', 'character')
35 spec_list$ECHO = c('echo', 'e', '1', 'character')
36
37 ##--------2. output report and report site directory --------------
38 spec_list$FASTQC_SITE = c('fastqc_site', 'o', '1', 'character')
39 spec_list$FASTQC_SITE_DIR = c('fastqc_site_dir', 'd', '1', 'character')
40
41 ##--------3. Rmd templates sitting in the tool directory ----------
42
43 ## _site.yml and index.Rmd files
44 spec_list$SITE_YML = c('site_yml', 's', 1, 'character')
45 spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character')
46
47 ## other Rmd body template files
48 spec_list$x01 = c('x01_evaluation_overview', 'p', '1', 'character')
49 spec_list$x02 = c('x02_fastqc_original_reports', 'a', '1', 'character')
50 spec_list$x1 = c('x1_per_base_quality_scores', 'b', '1', 'character')
51 spec_list$x2 = c('x2_per_base_N_content', 'c', '1', 'character')
52 spec_list$x3 = c('x3_per_sequence_quality_scores', 'f', '1', 'character')
53 spec_list$x4 = c('x4_per_sequence_GC_content', 'g', '1', 'character')
54 spec_list$x5 = c('x5_per_base_sequence_content', 'h', '1', 'character')
55
56 ##------------------------------------------------------------------
57
58 spec = t(as.data.frame(spec_list))
59 opt = getopt(spec)
60 # arguments are accessed by long flag name (the first column in the spec matrix)
61 # NOT by element name in the spec_list
62 # example: opt$help, opt$expression_file
63 ##====== End of arguments handling ==========
64
65 #------ Load libraries ---------
66 library(rmarkdown) 2 library(rmarkdown)
3 library(htmltools)
67 library(plyr) 4 library(plyr)
5 library(dplyr)
68 library(stringr) 6 library(stringr)
69 library(dplyr)
70 library(highcharter) 7 library(highcharter)
71 library(DT) 8 library(DT)
72 library(reshape2) 9 library(reshape2)
73 library(plotly) 10 library(plotly)
74 library(formattable) 11 library(formattable)
75 library(htmltools) 12 options(stringsAsFactors=FALSE, useFancyQuotes=FALSE)
76 13
77 14 ##============ Sink warnings and errors to a file ==============
78 #----- 1. create the report directory ------------------------ 15 ## use the sink() function to wrap all code within it.
79 paste0('mkdir -p ', opt$fastqc_site_dir) %>% 16 ##==============================================================
80 system() 17 zz = file('warnings_and_errors.txt')
81 18 sink(zz)
82 #----- 2. generate Rmd files with Rmd templates -------------- 19 sink(zz, type = 'message')
83 # a. templates without placeholder variables: 20 ##---------below is the code for rendering .Rmd templates-----
84 # copy templates from tool directory to the working directory. 21
85 # b. templates with placeholder variables: 22 ##=============STEP 1: handle command line arguments==========
86 # substitute variables with user input values and place them in the working directory. 23 ##
87 24 ##============================================================
88 25 # column 1: the long flag name
89 #----- Copy index.Rmd and _site.yml files to job working direcotry ----- 26 # column 2: the short flag alias. A SINGLE character string
90 file.copy(opt$index_rmd, 'index.Rmd', recursive=TRUE) 27 # column 3: argument mask
91 file.copy(opt$site_yml, '_site.yml', recursive=TRUE) 28 # 0: no argument
92 #--------------------------------------------------------- 29 # 1: argument required
93 30 # 2: argument is optional
94 #----- 01_evaluation_overview.Rmd ----------------------- 31 # column 4: date type to which the flag's argument shall be cast.
95 readLines(opt$x01_evaluation_overview) %>% 32 # possible values: logical, integer, double, complex, character.
96 (function(x) { 33 #-------------------------------------------------------------
97 gsub('ECHO', opt$echo, x) 34 #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
98 }) %>% 35 # 1. short flag alias should match the flag in the command section in the XML file.
99 (function(x) { 36 # 2. long flag name can be any legal R variable names
100 gsub('READS', opt$reads, x) 37 # 3. two names in args_list can have common string but one name should not be a part of another name.
101 }) %>% 38 # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
102 (function(x) { 39 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
103 gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x) 40 args_list=list()
104 }) %>% 41 ##------- 1. input data ---------------------
105 (function(x) { 42 args_list$ECHO = c('echo', 'e', '1', 'character')
106 fileConn = file('01_evaluation_overview.Rmd') 43 args_list$READS_1 = c('reads_1', 'r', '1', 'character')
107 writeLines(x, con=fileConn) 44 args_list$NAME_1 = c('name_1', 'n', '1', 'character')
108 close(fileConn) 45 args_list$READS_2 = c('reads_2', 'R', '1', 'character')
109 }) 46 args_list$NAME_2 = c('name_2', 'N', '1', 'character')
110 47 args_list$CONTAMINANTS = c('contaminants', 'c', '1', 'character')
111 #----- 1_per_base_quality_scores.Rmd -------------------- 48 args_list$LIMITS = c('limits', 'l', '1', 'character')
112 readLines(opt$x1_per_base_quality_scores) %>% 49 ##--------2. output report and outputs --------------
113 (function(x) { 50 args_list$REPORT_HTML = c('report_html', 'o', '1', 'character')
114 gsub('ECHO', opt$echo, x) 51 args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
115 }) %>% 52 args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character')
116 (function(x) { 53 ##--------3. .Rmd templates in the tool directory ----------
117 fileConn = file('1_per_base_quality_scores.Rmd') 54 args_list$SITE_YML = c('site_yml', 'S', '1', 'character')
118 writeLines(x, con=fileConn) 55 args_list$INDEX_RMD = c('index_rmd', 'I', '1', 'character')
119 close(fileConn) 56 args_list$X01_EVALUATION_OVERVIEW = c('x01_evaluation_overview', 'A', '1', 'character')
120 }) 57 args_list$X02_PER_BASE_SEQUENCE_QUALITY = c('x02_per_base_sequence_quality', 'B', '1', 'character')
121 58 args_list$X03_PER_TILE_SEQUENCE_QUALITY = c('x03_per_tile_sequence_quality', 'C', '1', 'character')
122 #----- 2_per_base_N_content.Rmd ------------------------- 59 args_list$X04_PER_SEQUENCE_QUALITY_SCORE = c('x04_per_sequence_quality_score', 'D', '1', 'character')
123 readLines(opt$x2_per_base_N_content) %>% 60 args_list$X05_PER_BASE_SEQUENCE_CONTENT = c('x05_per_base_sequence_content', 'E', '1', 'character')
124 (function(x) { 61 args_list$X06_PER_SEQUENCE_GC_CONTENT = c('x06_per_sequence_gc_content', 'F', '1', 'character')
125 gsub('ECHO', opt$echo, x) 62 args_list$X07_PER_BASE_N_CONTENT = c('x07_per_base_n_content', 'G', '1', 'character')
126 }) %>% 63 args_list$X08_SEQUENCE_LENGTH_DISTRIBUTION = c('x08_sequence_length_distribution', 'H', '1', 'character')
127 (function(x) { 64 args_list$X09_SEQUENCE_DUPLICATION_LEVELS = c('x09_sequence_duplication_levels', 'J', '1', 'character')
128 fileConn = file('2_per_base_N_content.Rmd') 65 args_list$X10_ADAPTER_CONTENT = c('x10_adapter_content', 'K', '1', 'character')
129 writeLines(x, con=fileConn) 66 args_list$X11_KMER_CONTENT = c('x11_kmer_content', 'L', '1', 'character')
130 close(fileConn) 67 ##-----------------------------------------------------------
131 }) 68 opt = getopt(t(as.data.frame(args_list)))
132 69
133 #----- 3_per_sequence_quality_scores.Rmd ---------------- 70
134 readLines(opt$x3_per_sequence_quality_scores) %>% 71
135 (function(x) { 72 ##=======STEP 2: create report directory (optional)==========
136 gsub('ECHO', opt$echo, x) 73 ##
137 }) %>% 74 ##===========================================================
138 (function(x) { 75 dir.create(opt$report_dir)
139 fileConn = file('3_per_sequence_quality_scores.Rmd') 76
140 writeLines(x, con=fileConn) 77 ##==STEP 3: copy index.Rmd and _site.yml to job working directory======
141 close(fileConn) 78 ##
142 }) 79 ##=====================================================================
143 80 file.copy(opt$index_rmd, 'index.Rmd')
144 81 file.copy(opt$site_yml, '_site.yml')
145 #----- 4_per_sequence_GC_content.Rmd -------------------- 82
146 readLines(opt$x4_per_sequence_GC_content) %>% 83 ##=STEP 4: replace placeholders in .Rmd files with argument values=
147 (function(x) { 84 ##
148 gsub('ECHO', opt$echo, x) 85 ##=================================================================
149 }) %>% 86 #++ need to replace placeholders with args values one by one+
150 (function(x) { 87
151 fileConn = file('4_per_sequence_GC_content.Rmd') 88 # 01_evaluation_overview.Rmd
152 writeLines(x, con=fileConn) 89 readLines(opt$x01_evaluation_overview) %>%
153 close(fileConn) 90 (function(x) {
154 }) 91 gsub('ECHO', opt$echo, x)
155 92 }) %>%
156 93 (function(x) {
157 #----- 5_per_base_sequence_content.Rmd ------------------ 94 gsub('READS_1', opt$reads_1, x)
158 readLines(opt$x5_per_base_sequence_content) %>% 95 }) %>%
159 (function(x) { 96 (function(x) {
160 gsub('ECHO', opt$echo, x) 97 gsub('NAME_1', opt$name_1, x)
161 }) %>% 98 }) %>%
162 (function(x) { 99 (function(x) {
163 fileConn = file('5_per_base_sequence_content.Rmd') 100 gsub('READS_2', opt$reads_2, x)
164 writeLines(x, con=fileConn) 101 }) %>%
165 close(fileConn) 102 (function(x) {
166 }) 103 gsub('NAME_2', opt$name_1, x)
167 104 }) %>%
168 #----- 02_fastqc_original_reports.Rmd ------------------- 105 (function(x) {
169 readLines(opt$x02_fastqc_original_reports) %>% 106 gsub('CONTAMINANTS', opt$contaminants, x)
170 (function(x) { 107 }) %>%
171 gsub('ECHO', opt$echo, x) 108 (function(x) {
172 }) %>% 109 gsub('LIMITS', opt$limits, x)
173 (function(x) { 110 }) %>%
174 gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x) 111 (function(x) {
175 }) %>% 112 gsub('REPORT_DIR', opt$report_dir, x)
176 (function(x) { 113 }) %>%
177 fileConn = file('02_fastqc_original_reports.Rmd') 114 (function(x) {
178 writeLines(x, con=fileConn) 115 fileConn = file('x01_evaluation_overview.Rmd')
179 close(fileConn) 116 writeLines(x, con=fileConn)
180 }) 117 close(fileConn)
181 118 })
182 119
183 120 # 02_per_base_sequence_quality.Rmd
184 #------ 3. render all Rmd files with render_site() -------- 121 readLines(opt$x02_per_base_sequence_quality) %>%
185 render_site() 122 (function(x) {
186 123 gsub('ECHO', opt$echo, x)
187 124 }) %>%
188 #-------4. manipulate outputs ----------------------------- 125 (function(x) {
189 # a. copy index.html to the report output path 126 gsub('REPORT_DIR', opt$report_dir, x)
190 # b. copy all files in 'my_site' to the report output directory 127 }) %>%
191 file.copy('my_site/index.html', opt$fastqc_site, recursive=TRUE) 128 (function(x) {
192 paste0('cp -r my_site/* ', opt$fastqc_site_dir) %>% 129 fileConn = file('x02_per_base_sequence_quality.Rmd')
193 system() 130 writeLines(x, con=fileConn)
194 131 close(fileConn)
195 132 })
133
134 # 03_per_tile_sequence_quality.Rmd
135 readLines(opt$x03_per_tile_sequence_quality) %>%
136 (function(x) {
137 gsub('ECHO', opt$echo, x)
138 }) %>%
139 (function(x) {
140 gsub('REPORT_DIR', opt$report_dir, x)
141 }) %>%
142 (function(x) {
143 fileConn = file('x03_per_tile_sequence_quality.Rmd')
144 writeLines(x, con=fileConn)
145 close(fileConn)
146 })
147
148 # 04_per_sequence_quality_score.Rmd
149 readLines(opt$x04_per_sequence_quality_score) %>%
150 (function(x) {
151 gsub('ECHO', opt$echo, x)
152 }) %>%
153 (function(x) {
154 gsub('REPORT_DIR', opt$report_dir, x)
155 }) %>%
156 (function(x) {
157 fileConn = file('x04_per_sequence_quality_score.Rmd')
158 writeLines(x, con=fileConn)
159 close(fileConn)
160 })
161
162 # 05_per_base_sequence_content.Rmd
163 readLines(opt$x05_per_base_sequence_content) %>%
164 (function(x) {
165 gsub('ECHO', opt$echo, x)
166 }) %>%
167 (function(x) {
168 gsub('REPORT_DIR', opt$report_dir, x)
169 }) %>%
170 (function(x) {
171 fileConn = file('x05_per_base_sequence_content.Rmd')
172 writeLines(x, con=fileConn)
173 close(fileConn)
174 })
175
176 # 06_per_sequence_gc_content.Rmd
177 readLines(opt$x06_per_sequence_gc_content) %>%
178 (function(x) {
179 gsub('ECHO', opt$echo, x)
180 }) %>%
181 (function(x) {
182 gsub('REPORT_DIR', opt$report_dir, x)
183 }) %>%
184 (function(x) {
185 fileConn = file('x06_per_sequence_gc_content.Rmd')
186 writeLines(x, con=fileConn)
187 close(fileConn)
188 })
189
190 # 07_per_base_n_content.Rmd
191 readLines(opt$x07_per_base_n_content) %>%
192 (function(x) {
193 gsub('ECHO', opt$echo, x)
194 }) %>%
195 (function(x) {
196 gsub('REPORT_DIR', opt$report_dir, x)
197 }) %>%
198 (function(x) {
199 fileConn = file('x07_per_base_n_content.Rmd')
200 writeLines(x, con=fileConn)
201 close(fileConn)
202 })
203
204 # 08_sequence_length_distribution.Rmd
205 readLines(opt$x08_sequence_length_distribution) %>%
206 (function(x) {
207 gsub('ECHO', opt$echo, x)
208 }) %>%
209 (function(x) {
210 gsub('REPORT_DIR', opt$report_dir, x)
211 }) %>%
212 (function(x) {
213 fileConn = file('x08_sequence_length_distribution.Rmd')
214 writeLines(x, con=fileConn)
215 close(fileConn)
216 })
217
218 # 09_sequence_duplication_levels.Rmd
219 readLines(opt$x09_sequence_duplication_levels) %>%
220 (function(x) {
221 gsub('ECHO', opt$echo, x)
222 }) %>%
223 (function(x) {
224 gsub('REPORT_DIR', opt$report_dir, x)
225 }) %>%
226 (function(x) {
227 fileConn = file('x09_sequence_duplication_levels.Rmd')
228 writeLines(x, con=fileConn)
229 close(fileConn)
230 })
231
232 # 10_adapter_content.Rmd
233 readLines(opt$x10_adapter_content) %>%
234 (function(x) {
235 gsub('ECHO', opt$echo, x)
236 }) %>%
237 (function(x) {
238 gsub('REPORT_DIR', opt$report_dir, x)
239 }) %>%
240 (function(x) {
241 fileConn = file('x10_adapter_content.Rmd')
242 writeLines(x, con=fileConn)
243 close(fileConn)
244 })
245
246 # 11_kmer_content.Rmd
247 readLines(opt$x11_kmer_content) %>%
248 (function(x) {
249 gsub('ECHO', opt$echo, x)
250 }) %>%
251 (function(x) {
252 gsub('REPORT_DIR', opt$report_dir, x)
253 }) %>%
254 (function(x) {
255 fileConn = file('x11_kmer_content.Rmd')
256 writeLines(x, con=fileConn)
257 close(fileConn)
258 })
259
260 ##=============STEP 5: render all .Rmd templates=================
261 ##
262 ##===========================================================
263 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
264 f = readLines(fastqc_data)
265 start_line = grep(module_name, f)
266 end_module_lines = grep('END_MODULE', f)
267 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
268 module_data = f[(start_line+1):(end_line-1)]
269 writeLines(module_data, 'temp.txt')
270 read.csv('temp.txt', sep = '\t', header = header, comment.char = comment.char)
271 }
272 render_site()
273
274 ##=============STEP 6: manipulate outputs====================
275 ##
276 ##===========================================================
277 file.copy('my_site/index.html', opt$report_html, recursive = TRUE)
278 system(paste0('cp -r my_site/* ', opt$report_dir))
279
280
281 ##--------end of code rendering .Rmd templates----------------
282 sink()
283 ##=========== End of sinking output=============================