Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site
comparison fastqc_site_render.R @ 11:507eec497730 draft
update fastqc site
author | mingchen0919 |
---|---|
date | Tue, 07 Nov 2017 16:52:24 -0500 |
parents | d820be692d74 |
children | a6f8382f852c |
comparison
equal
deleted
inserted
replaced
10:600c39b11913 | 11:507eec497730 |
---|---|
1 ##======= Handle arguments from command line ======== | 1 library(getopt) |
2 # setup R error handline to go to stderr | |
3 options(show.error.messages=FALSE, | |
4 error=function(){ | |
5 cat(geterrmessage(), file=stderr()) | |
6 quit("no", 1, F) | |
7 }) | |
8 | |
9 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
10 loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
11 | |
12 # suppress warning | |
13 options(warn = -1) | |
14 | |
15 options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) | |
16 args = commandArgs(trailingOnly=TRUE) | |
17 | |
18 suppressPackageStartupMessages({ | |
19 library(getopt) | |
20 library(tools) | |
21 }) | |
22 | |
23 # column 1: the long flag name | |
24 # column 2: the short flag alias. A SINGLE character string | |
25 # column 3: argument mask | |
26 # 0: no argument | |
27 # 1: argument required | |
28 # 2: argument is optional | |
29 # column 4: date type to which the flag's argument shall be cast. | |
30 # possible values: logical, integer, double, complex, character. | |
31 spec_list=list() | |
32 | |
33 ##------- 1. input data --------------------- | |
34 spec_list$READS = c('reads', 'r', '1', 'character') | |
35 spec_list$ECHO = c('echo', 'e', '1', 'character') | |
36 | |
37 ##--------2. output report and report site directory -------------- | |
38 spec_list$FASTQC_SITE = c('fastqc_site', 'o', '1', 'character') | |
39 spec_list$FASTQC_SITE_DIR = c('fastqc_site_dir', 'd', '1', 'character') | |
40 | |
41 ##--------3. Rmd templates sitting in the tool directory ---------- | |
42 | |
43 ## _site.yml and index.Rmd files | |
44 spec_list$SITE_YML = c('site_yml', 's', 1, 'character') | |
45 spec_list$INDEX_Rmd = c('index_rmd', 'i', 1, 'character') | |
46 | |
47 ## other Rmd body template files | |
48 spec_list$x01 = c('x01_evaluation_overview', 'p', '1', 'character') | |
49 spec_list$x02 = c('x02_fastqc_original_reports', 'a', '1', 'character') | |
50 spec_list$x1 = c('x1_per_base_quality_scores', 'b', '1', 'character') | |
51 spec_list$x2 = c('x2_per_base_N_content', 'c', '1', 'character') | |
52 spec_list$x3 = c('x3_per_sequence_quality_scores', 'f', '1', 'character') | |
53 spec_list$x4 = c('x4_per_sequence_GC_content', 'g', '1', 'character') | |
54 spec_list$x5 = c('x5_per_base_sequence_content', 'h', '1', 'character') | |
55 | |
56 ##------------------------------------------------------------------ | |
57 | |
58 spec = t(as.data.frame(spec_list)) | |
59 opt = getopt(spec) | |
60 # arguments are accessed by long flag name (the first column in the spec matrix) | |
61 # NOT by element name in the spec_list | |
62 # example: opt$help, opt$expression_file | |
63 ##====== End of arguments handling ========== | |
64 | |
65 #------ Load libraries --------- | |
66 library(rmarkdown) | 2 library(rmarkdown) |
3 library(htmltools) | |
67 library(plyr) | 4 library(plyr) |
5 library(dplyr) | |
68 library(stringr) | 6 library(stringr) |
69 library(dplyr) | |
70 library(highcharter) | 7 library(highcharter) |
71 library(DT) | 8 library(DT) |
72 library(reshape2) | 9 library(reshape2) |
73 library(plotly) | 10 library(plotly) |
74 library(formattable) | 11 library(formattable) |
75 library(htmltools) | 12 options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) |
76 | 13 |
77 | 14 ##============ Sink warnings and errors to a file ============== |
78 #----- 1. create the report directory ------------------------ | 15 ## use the sink() function to wrap all code within it. |
79 paste0('mkdir -p ', opt$fastqc_site_dir) %>% | 16 ##============================================================== |
80 system() | 17 zz = file('warnings_and_errors.txt') |
81 | 18 sink(zz) |
82 #----- 2. generate Rmd files with Rmd templates -------------- | 19 sink(zz, type = 'message') |
83 # a. templates without placeholder variables: | 20 ##---------below is the code for rendering .Rmd templates----- |
84 # copy templates from tool directory to the working directory. | 21 |
85 # b. templates with placeholder variables: | 22 ##=============STEP 1: handle command line arguments========== |
86 # substitute variables with user input values and place them in the working directory. | 23 ## |
87 | 24 ##============================================================ |
88 | 25 # column 1: the long flag name |
89 #----- Copy index.Rmd and _site.yml files to job working direcotry ----- | 26 # column 2: the short flag alias. A SINGLE character string |
90 file.copy(opt$index_rmd, 'index.Rmd', recursive=TRUE) | 27 # column 3: argument mask |
91 file.copy(opt$site_yml, '_site.yml', recursive=TRUE) | 28 # 0: no argument |
92 #--------------------------------------------------------- | 29 # 1: argument required |
93 | 30 # 2: argument is optional |
94 #----- 01_evaluation_overview.Rmd ----------------------- | 31 # column 4: date type to which the flag's argument shall be cast. |
95 readLines(opt$x01_evaluation_overview) %>% | 32 # possible values: logical, integer, double, complex, character. |
96 (function(x) { | 33 #------------------------------------------------------------- |
97 gsub('ECHO', opt$echo, x) | 34 #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ |
98 }) %>% | 35 # 1. short flag alias should match the flag in the command section in the XML file. |
99 (function(x) { | 36 # 2. long flag name can be any legal R variable names |
100 gsub('READS', opt$reads, x) | 37 # 3. two names in args_list can have common string but one name should not be a part of another name. |
101 }) %>% | 38 # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. |
102 (function(x) { | 39 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
103 gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x) | 40 args_list=list() |
104 }) %>% | 41 ##------- 1. input data --------------------- |
105 (function(x) { | 42 args_list$ECHO = c('echo', 'e', '1', 'character') |
106 fileConn = file('01_evaluation_overview.Rmd') | 43 args_list$READS_1 = c('reads_1', 'r', '1', 'character') |
107 writeLines(x, con=fileConn) | 44 args_list$NAME_1 = c('name_1', 'n', '1', 'character') |
108 close(fileConn) | 45 args_list$READS_2 = c('reads_2', 'R', '1', 'character') |
109 }) | 46 args_list$NAME_2 = c('name_2', 'N', '1', 'character') |
110 | 47 args_list$CONTAMINANTS = c('contaminants', 'c', '1', 'character') |
111 #----- 1_per_base_quality_scores.Rmd -------------------- | 48 args_list$LIMITS = c('limits', 'l', '1', 'character') |
112 readLines(opt$x1_per_base_quality_scores) %>% | 49 ##--------2. output report and outputs -------------- |
113 (function(x) { | 50 args_list$REPORT_HTML = c('report_html', 'o', '1', 'character') |
114 gsub('ECHO', opt$echo, x) | 51 args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') |
115 }) %>% | 52 args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') |
116 (function(x) { | 53 ##--------3. .Rmd templates in the tool directory ---------- |
117 fileConn = file('1_per_base_quality_scores.Rmd') | 54 args_list$SITE_YML = c('site_yml', 'S', '1', 'character') |
118 writeLines(x, con=fileConn) | 55 args_list$INDEX_RMD = c('index_rmd', 'I', '1', 'character') |
119 close(fileConn) | 56 args_list$X01_EVALUATION_OVERVIEW = c('x01_evaluation_overview', 'A', '1', 'character') |
120 }) | 57 args_list$X02_PER_BASE_SEQUENCE_QUALITY = c('x02_per_base_sequence_quality', 'B', '1', 'character') |
121 | 58 args_list$X03_PER_TILE_SEQUENCE_QUALITY = c('x03_per_tile_sequence_quality', 'C', '1', 'character') |
122 #----- 2_per_base_N_content.Rmd ------------------------- | 59 args_list$X04_PER_SEQUENCE_QUALITY_SCORE = c('x04_per_sequence_quality_score', 'D', '1', 'character') |
123 readLines(opt$x2_per_base_N_content) %>% | 60 args_list$X05_PER_BASE_SEQUENCE_CONTENT = c('x05_per_base_sequence_content', 'E', '1', 'character') |
124 (function(x) { | 61 args_list$X06_PER_SEQUENCE_GC_CONTENT = c('x06_per_sequence_gc_content', 'F', '1', 'character') |
125 gsub('ECHO', opt$echo, x) | 62 args_list$X07_PER_BASE_N_CONTENT = c('x07_per_base_n_content', 'G', '1', 'character') |
126 }) %>% | 63 args_list$X08_SEQUENCE_LENGTH_DISTRIBUTION = c('x08_sequence_length_distribution', 'H', '1', 'character') |
127 (function(x) { | 64 args_list$X09_SEQUENCE_DUPLICATION_LEVELS = c('x09_sequence_duplication_levels', 'J', '1', 'character') |
128 fileConn = file('2_per_base_N_content.Rmd') | 65 args_list$X10_ADAPTER_CONTENT = c('x10_adapter_content', 'K', '1', 'character') |
129 writeLines(x, con=fileConn) | 66 args_list$X11_KMER_CONTENT = c('x11_kmer_content', 'L', '1', 'character') |
130 close(fileConn) | 67 ##----------------------------------------------------------- |
131 }) | 68 opt = getopt(t(as.data.frame(args_list))) |
132 | 69 |
133 #----- 3_per_sequence_quality_scores.Rmd ---------------- | 70 |
134 readLines(opt$x3_per_sequence_quality_scores) %>% | 71 |
135 (function(x) { | 72 ##=======STEP 2: create report directory (optional)========== |
136 gsub('ECHO', opt$echo, x) | 73 ## |
137 }) %>% | 74 ##=========================================================== |
138 (function(x) { | 75 dir.create(opt$report_dir) |
139 fileConn = file('3_per_sequence_quality_scores.Rmd') | 76 |
140 writeLines(x, con=fileConn) | 77 ##==STEP 3: copy index.Rmd and _site.yml to job working directory====== |
141 close(fileConn) | 78 ## |
142 }) | 79 ##===================================================================== |
143 | 80 file.copy(opt$index_rmd, 'index.Rmd') |
144 | 81 file.copy(opt$site_yml, '_site.yml') |
145 #----- 4_per_sequence_GC_content.Rmd -------------------- | 82 |
146 readLines(opt$x4_per_sequence_GC_content) %>% | 83 ##=STEP 4: replace placeholders in .Rmd files with argument values= |
147 (function(x) { | 84 ## |
148 gsub('ECHO', opt$echo, x) | 85 ##================================================================= |
149 }) %>% | 86 #++ need to replace placeholders with args values one by one+ |
150 (function(x) { | 87 |
151 fileConn = file('4_per_sequence_GC_content.Rmd') | 88 # 01_evaluation_overview.Rmd |
152 writeLines(x, con=fileConn) | 89 readLines(opt$x01_evaluation_overview) %>% |
153 close(fileConn) | 90 (function(x) { |
154 }) | 91 gsub('ECHO', opt$echo, x) |
155 | 92 }) %>% |
156 | 93 (function(x) { |
157 #----- 5_per_base_sequence_content.Rmd ------------------ | 94 gsub('READS_1', opt$reads_1, x) |
158 readLines(opt$x5_per_base_sequence_content) %>% | 95 }) %>% |
159 (function(x) { | 96 (function(x) { |
160 gsub('ECHO', opt$echo, x) | 97 gsub('NAME_1', opt$name_1, x) |
161 }) %>% | 98 }) %>% |
162 (function(x) { | 99 (function(x) { |
163 fileConn = file('5_per_base_sequence_content.Rmd') | 100 gsub('READS_2', opt$reads_2, x) |
164 writeLines(x, con=fileConn) | 101 }) %>% |
165 close(fileConn) | 102 (function(x) { |
166 }) | 103 gsub('NAME_2', opt$name_1, x) |
167 | 104 }) %>% |
168 #----- 02_fastqc_original_reports.Rmd ------------------- | 105 (function(x) { |
169 readLines(opt$x02_fastqc_original_reports) %>% | 106 gsub('CONTAMINANTS', opt$contaminants, x) |
170 (function(x) { | 107 }) %>% |
171 gsub('ECHO', opt$echo, x) | 108 (function(x) { |
172 }) %>% | 109 gsub('LIMITS', opt$limits, x) |
173 (function(x) { | 110 }) %>% |
174 gsub('REPORT_OUTPUT_DIR', opt$fastqc_site_dir, x) | 111 (function(x) { |
175 }) %>% | 112 gsub('REPORT_DIR', opt$report_dir, x) |
176 (function(x) { | 113 }) %>% |
177 fileConn = file('02_fastqc_original_reports.Rmd') | 114 (function(x) { |
178 writeLines(x, con=fileConn) | 115 fileConn = file('x01_evaluation_overview.Rmd') |
179 close(fileConn) | 116 writeLines(x, con=fileConn) |
180 }) | 117 close(fileConn) |
181 | 118 }) |
182 | 119 |
183 | 120 # 02_per_base_sequence_quality.Rmd |
184 #------ 3. render all Rmd files with render_site() -------- | 121 readLines(opt$x02_per_base_sequence_quality) %>% |
185 render_site() | 122 (function(x) { |
186 | 123 gsub('ECHO', opt$echo, x) |
187 | 124 }) %>% |
188 #-------4. manipulate outputs ----------------------------- | 125 (function(x) { |
189 # a. copy index.html to the report output path | 126 gsub('REPORT_DIR', opt$report_dir, x) |
190 # b. copy all files in 'my_site' to the report output directory | 127 }) %>% |
191 file.copy('my_site/index.html', opt$fastqc_site, recursive=TRUE) | 128 (function(x) { |
192 paste0('cp -r my_site/* ', opt$fastqc_site_dir) %>% | 129 fileConn = file('x02_per_base_sequence_quality.Rmd') |
193 system() | 130 writeLines(x, con=fileConn) |
194 | 131 close(fileConn) |
195 | 132 }) |
133 | |
134 # 03_per_tile_sequence_quality.Rmd | |
135 readLines(opt$x03_per_tile_sequence_quality) %>% | |
136 (function(x) { | |
137 gsub('ECHO', opt$echo, x) | |
138 }) %>% | |
139 (function(x) { | |
140 gsub('REPORT_DIR', opt$report_dir, x) | |
141 }) %>% | |
142 (function(x) { | |
143 fileConn = file('x03_per_tile_sequence_quality.Rmd') | |
144 writeLines(x, con=fileConn) | |
145 close(fileConn) | |
146 }) | |
147 | |
148 # 04_per_sequence_quality_score.Rmd | |
149 readLines(opt$x04_per_sequence_quality_score) %>% | |
150 (function(x) { | |
151 gsub('ECHO', opt$echo, x) | |
152 }) %>% | |
153 (function(x) { | |
154 gsub('REPORT_DIR', opt$report_dir, x) | |
155 }) %>% | |
156 (function(x) { | |
157 fileConn = file('x04_per_sequence_quality_score.Rmd') | |
158 writeLines(x, con=fileConn) | |
159 close(fileConn) | |
160 }) | |
161 | |
162 # 05_per_base_sequence_content.Rmd | |
163 readLines(opt$x05_per_base_sequence_content) %>% | |
164 (function(x) { | |
165 gsub('ECHO', opt$echo, x) | |
166 }) %>% | |
167 (function(x) { | |
168 gsub('REPORT_DIR', opt$report_dir, x) | |
169 }) %>% | |
170 (function(x) { | |
171 fileConn = file('x05_per_base_sequence_content.Rmd') | |
172 writeLines(x, con=fileConn) | |
173 close(fileConn) | |
174 }) | |
175 | |
176 # 06_per_sequence_gc_content.Rmd | |
177 readLines(opt$x06_per_sequence_gc_content) %>% | |
178 (function(x) { | |
179 gsub('ECHO', opt$echo, x) | |
180 }) %>% | |
181 (function(x) { | |
182 gsub('REPORT_DIR', opt$report_dir, x) | |
183 }) %>% | |
184 (function(x) { | |
185 fileConn = file('x06_per_sequence_gc_content.Rmd') | |
186 writeLines(x, con=fileConn) | |
187 close(fileConn) | |
188 }) | |
189 | |
190 # 07_per_base_n_content.Rmd | |
191 readLines(opt$x07_per_base_n_content) %>% | |
192 (function(x) { | |
193 gsub('ECHO', opt$echo, x) | |
194 }) %>% | |
195 (function(x) { | |
196 gsub('REPORT_DIR', opt$report_dir, x) | |
197 }) %>% | |
198 (function(x) { | |
199 fileConn = file('x07_per_base_n_content.Rmd') | |
200 writeLines(x, con=fileConn) | |
201 close(fileConn) | |
202 }) | |
203 | |
204 # 08_sequence_length_distribution.Rmd | |
205 readLines(opt$x08_sequence_length_distribution) %>% | |
206 (function(x) { | |
207 gsub('ECHO', opt$echo, x) | |
208 }) %>% | |
209 (function(x) { | |
210 gsub('REPORT_DIR', opt$report_dir, x) | |
211 }) %>% | |
212 (function(x) { | |
213 fileConn = file('x08_sequence_length_distribution.Rmd') | |
214 writeLines(x, con=fileConn) | |
215 close(fileConn) | |
216 }) | |
217 | |
218 # 09_sequence_duplication_levels.Rmd | |
219 readLines(opt$x09_sequence_duplication_levels) %>% | |
220 (function(x) { | |
221 gsub('ECHO', opt$echo, x) | |
222 }) %>% | |
223 (function(x) { | |
224 gsub('REPORT_DIR', opt$report_dir, x) | |
225 }) %>% | |
226 (function(x) { | |
227 fileConn = file('x09_sequence_duplication_levels.Rmd') | |
228 writeLines(x, con=fileConn) | |
229 close(fileConn) | |
230 }) | |
231 | |
232 # 10_adapter_content.Rmd | |
233 readLines(opt$x10_adapter_content) %>% | |
234 (function(x) { | |
235 gsub('ECHO', opt$echo, x) | |
236 }) %>% | |
237 (function(x) { | |
238 gsub('REPORT_DIR', opt$report_dir, x) | |
239 }) %>% | |
240 (function(x) { | |
241 fileConn = file('x10_adapter_content.Rmd') | |
242 writeLines(x, con=fileConn) | |
243 close(fileConn) | |
244 }) | |
245 | |
246 # 11_kmer_content.Rmd | |
247 readLines(opt$x11_kmer_content) %>% | |
248 (function(x) { | |
249 gsub('ECHO', opt$echo, x) | |
250 }) %>% | |
251 (function(x) { | |
252 gsub('REPORT_DIR', opt$report_dir, x) | |
253 }) %>% | |
254 (function(x) { | |
255 fileConn = file('x11_kmer_content.Rmd') | |
256 writeLines(x, con=fileConn) | |
257 close(fileConn) | |
258 }) | |
259 | |
260 ##=============STEP 5: render all .Rmd templates================= | |
261 ## | |
262 ##=========================================================== | |
263 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { | |
264 f = readLines(fastqc_data) | |
265 start_line = grep(module_name, f) | |
266 end_module_lines = grep('END_MODULE', f) | |
267 end_line = end_module_lines[which(end_module_lines > start_line)[1]] | |
268 module_data = f[(start_line+1):(end_line-1)] | |
269 writeLines(module_data, 'temp.txt') | |
270 read.csv('temp.txt', sep = '\t', header = header, comment.char = comment.char) | |
271 } | |
272 render_site() | |
273 | |
274 ##=============STEP 6: manipulate outputs==================== | |
275 ## | |
276 ##=========================================================== | |
277 file.copy('my_site/index.html', opt$report_html, recursive = TRUE) | |
278 system(paste0('cp -r my_site/* ', opt$report_dir)) | |
279 | |
280 | |
281 ##--------end of code rendering .Rmd templates---------------- | |
282 sink() | |
283 ##=========== End of sinking output============================= |