Mercurial > repos > mingchen0919 > rmarkdown_fastqc_report
comparison fastqc_report.Rmd @ 16:1710b0e874f1 draft
fix file name issue
author | mingchen0919 |
---|---|
date | Sat, 21 Oct 2017 09:25:49 -0400 |
parents | d1d20f341632 |
children | ac5c618e4d97 |
comparison
equal
deleted
inserted
replaced
15:d1d20f341632 | 16:1710b0e874f1 |
---|---|
8 highlight: tango | 8 highlight: tango |
9 --- | 9 --- |
10 | 10 |
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} | 11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} |
12 knitr::opts_chunk$set( | 12 knitr::opts_chunk$set( |
13 echo = ECHO | 13 echo = ECHO, |
14 error = TRUE | |
14 ) | 15 ) |
15 ``` | 16 ``` |
16 | 17 |
17 | 18 |
18 # Fastqc Analysis | 19 # Fastqc Evaluation |
19 | 20 |
20 * Copy fastq files to job working directory | 21 ## Evaluation of reads before trimming |
21 | 22 |
22 ```{bash 'copy files'} | 23 ```{r} |
23 for f in $(echo READS | sed "s/,/ /g") | 24 if ('READS_1' == 'None') { |
24 do | 25 stop("No pre-trimming reads provided!") |
25 cp $f ./ | 26 } else { |
26 done | 27 ## run fastqc evaluation |
27 ``` | 28 fastqc_command = paste0('fastqc ') %>% |
28 | 29 (function(x) { |
29 * Run fastqc | 30 ifelse('CONTAMINANTS' != 'None', paste0(x, '-c CONTAMINANTS '), x) |
30 | 31 }) %>% |
31 ```{bash 'run fastqc'} | 32 (function(x) { |
32 for r in $(ls *.dat) | 33 ifelse('LIMITS' != 'None', paste0(x, '-l LIMITS '), x) |
33 do | 34 }) %>% |
34 fastqc -o REPORT_DIR $r > /dev/null 2>&1 | 35 (function(x) { |
35 done | 36 paste0(x, '-o REPORT_DIR ') |
36 ``` | 37 }) |
37 | 38 fastqc_command_reads_1 = paste0(fastqc_command, 'READS_1 > /dev/null 2>&1') |
38 ## Evaluation results | 39 system(fastqc_command_reads_1, intern = TRUE) |
39 | 40 |
40 ```{r 'html report links'} | 41 # Original html report |
41 html_file = list.files('REPORT_DIR', pattern = '.*html') | 42 reads_1_base = tail(strsplit('READS_1', '/')[[1]], 1) |
42 tags$ul(tags$a(href=html_file, paste0('HTML report', opt$name))) | 43 original_html = tags$a(href=paste0(reads_1_base, '_fastqc.html'), paste0('HTML report: ', opt$name_1)) |
44 | |
45 unzip(paste0('REPORT_DIR/', reads_1_base, '_fastqc.zip'), exdir = 'REPORT_DIR') | |
46 reads_1_unzip = paste0('REPORT_DIR/', reads_1_base, '_fastqc/') | |
47 # fastqc_data.txt | |
48 file.copy(paste0(reads_1_unzip, 'fastqc_data.txt'), 'REPORT_DIR/reads_1_fastqc_data.txt') | |
49 fastqc_data = tags$a(href='reads_1_fastqc_data.txt', paste0('fastqc_data.txt: ', opt$name_1)) | |
50 # summary.txt | |
51 file.copy(paste0(reads_1_unzip, 'summary.txt'), 'REPORT_DIR/reads_1_summary.txt') | |
52 summary_data = tags$a(href='reads_1_summary.txt', paste0('summary.txt: ', opt$name_1)) | |
53 | |
54 tags$ul( | |
55 tags$li(original_html), | |
56 tags$li(fastqc_data), | |
57 tags$li(summary_data) | |
58 ) | |
59 } | |
43 ``` | 60 ``` |
44 | 61 |
45 | 62 |
46 ```{r 'extract fastqc_data.txt and summary.txt'} | 63 ## Evaluation of reads after trimming |
47 # list all zip files | |
48 zip_file = list.files(path = 'REPORT_DIR', pattern = '.zip') | |
49 unzip(paste0('REPORT_DIR/', zip_file), exdir = 'REPORT_DIR') | |
50 | 64 |
51 unzip_directory = paste0(tail(strsplit(opt$reads, '/')[[1]], 1), '_fastqc/') | 65 ```{r} |
52 fastqc_data_txt_path = paste0('REPORT_DIR/', unzip_directory, 'fastqc_data.txt') | 66 if ('READS_2' == 'None') { |
53 summary_txt_path = paste0('REPORT_DIR/', unzip_directory, 'summary.txt') | 67 stop("No pre-trimming reads provided!") |
68 } else { | |
69 ## run fastqc evaluation | |
70 fastqc_command = paste0('fastqc ') %>% | |
71 (function(x) { | |
72 ifelse('CONTAMINANTS' != 'None', paste0(x, '-c CONTAMINANTS '), x) | |
73 }) %>% | |
74 (function(x) { | |
75 ifelse('LIMITS' != 'None', paste0(x, '-l LIMITS '), x) | |
76 }) %>% | |
77 (function(x) { | |
78 paste0(x, '-o REPORT_DIR ') | |
79 }) | |
80 fastqc_command_reads_2 = paste0(fastqc_command, 'READS_2 > /dev/null 2>&1') | |
81 system(fastqc_command_reads_2, intern = TRUE) | |
82 | |
83 # Original html report | |
84 reads_2_base = tail(strsplit('READS_2', '/')[[1]], 1) | |
85 original_html = tags$a(href=paste0(reads_2_base, '_fastqc.html'), paste0('HTML report: ', opt$name_2)) | |
86 | |
87 unzip(paste0('REPORT_DIR/', reads_2_base, '_fastqc.zip'), exdir = 'REPORT_DIR') | |
88 reads_2_unzip = paste0('REPORT_DIR/', reads_2_base, '_fastqc/') | |
89 # fastqc_data.txt | |
90 file.copy(paste0(reads_2_unzip, 'fastqc_data.txt'), 'REPORT_DIR/reads_2_fastqc_data.txt') | |
91 fastqc_data = tags$a(href='reads_2_fastqc_data.txt', paste0('fastqc_data.txt: ', opt$name_2)) | |
92 # summary.txt | |
93 file.copy(paste0(reads_2_unzip, 'summary.txt'), 'REPORT_DIR/reads_2_summary.txt') | |
94 summary_data = tags$a(href='reads_2_summary.txt', paste0('summary.txt: ', opt$name_2)) | |
95 | |
96 tags$ul( | |
97 tags$li(original_html), | |
98 tags$li(fastqc_data), | |
99 tags$li(summary_data) | |
100 ) | |
101 } | |
54 ``` | 102 ``` |
55 | 103 |
56 | |
57 ```{r 'summary.txt'} | |
58 tags$ul(tags$a(href=paste0(unzip_directory, 'summary.txt'), 'summary.txt')) | |
59 ``` | |
60 | |
61 | |
62 ```{r 'fastqc_data.txt'} | |
63 tags$ul(tags$a(href=paste0(unzip_directory, 'fastqc_data.txt'), 'fastqc_data.txt')) | |
64 ``` | |
65 | 104 |
66 | 105 |
67 # Fastqc output visualization | 106 # Fastqc output visualization |
68 | 107 |
69 ## Overview | 108 ## Overview |
70 | 109 |
71 ```{r} | 110 ```{r} |
72 # read.table(fastqc_data_txt_path) | 111 reads_1_summary = read.csv('REPORT_DIR/reads_1_summary.txt', header = FALSE, sep = '\t')[, 2:1] |
73 summary_txt = read.csv(summary_txt_path, header = FALSE, sep = '\t')[, 2:1] | 112 reads_2_summary = read.csv('REPORT_DIR/reads_1_summary.txt', header = FALSE, sep = '\t')[, 1] |
74 names(summary_txt) = c('MODULE', 'PASS/FAIL') | 113 combined_summary = cbind(reads_1_summary, reads_2_summary) |
75 knitr::kable(summary_txt) | 114 names(combined_summary) = c('MODULE', paste0(opt$name_1, '(before)'), paste0(opt$name_2, '(after)')) |
115 knitr::kable(combined_summary) | |
76 ``` | 116 ``` |
77 | 117 |
78 ## Summary by module {.tabset} | 118 ## Visualization by module {.tabset} |
79 | 119 |
80 * Define a function to extract outputs for each module from fastqc output | 120 * Define a function to extract outputs for each module from fastqc output |
81 | 121 |
82 ```{r 'function definition'} | 122 ```{r 'function definition'} |
83 extract_data_module = function(fastqc_data, module_name) { | 123 extract_data_module = function(fastqc_data, module_name) { |
91 } | 131 } |
92 ``` | 132 ``` |
93 | 133 |
94 ### Per base sequence quality | 134 ### Per base sequence quality |
95 | 135 |
96 ```{r} | 136 ```{r 'per base sequence quality', fig.width=10} |
97 pbsq = extract_data_module(fastqc_data_txt_path, 'Per base sequence quality') | 137 ## reads 1 |
98 knitr::kable(pbsq) | 138 pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality') |
139 pbsq_1$id = 1:length(pbsq_1$X.Base) | |
140 | |
141 melt_pbsq_1 = filter(melt(pbsq_1, id=c('X.Base', 'id')), variable != 'X90th.Percentile' & variable != 'X10th.Percentile') | |
142 melt_pbsq_1$trim = 'before' | |
143 | |
144 | |
145 ## reads 2 | |
146 pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality') | |
147 pbsq_2$id = 1:length(pbsq_2$X.Base) | |
148 | |
149 melt_pbsq_2 = filter(melt(pbsq_2, id=c('X.Base', 'id')), variable != 'X90th.Percentile' & variable != 'X10th.Percentile') | |
150 melt_pbsq_2$trim = 'after' | |
151 | |
152 comb_pbsq = rbind(melt_pbsq_1, melt_pbsq_2) | |
153 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) | |
154 p = ggplot(data = comb_pbsq) + | |
155 geom_line(mapping = aes(x = id, y = value, group = variable, color = variable)) + | |
156 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + | |
157 facet_grid(. ~ trim) + | |
158 theme(axis.text.x = element_text(angle=45)) | |
159 ggplotly(p) | |
160 | |
99 ``` | 161 ``` |
100 | 162 |
101 ### Per tile sequence quality | 163 ### Per tile sequence quality |
102 | 164 |
103 ```{r} | 165 ```{r 'per tile sequence quality'} |
104 ptsq = extract_data_module(fastqc_data_txt_path, 'Per tile sequence quality') | 166 ## reads 1 |
105 knitr::kable(ptsq) | 167 ptsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per tile sequence quality') |
168 ptsq_1$trim = 'before' | |
169 | |
170 ## reads 2 | |
171 ptsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per tile sequence quality') | |
172 ptsq_2$trim = 'after' | |
173 | |
174 comb_ptsq = rbind(ptsq_1, ptsq_2) | |
175 comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim) | |
176 comb_pbsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base) | |
177 | |
178 p = ggplot(data = comb_ptsq, aes(x = Base, y = X.Tile, fill = Mean)) + | |
179 geom_raster() + | |
180 facet_grid(. ~ trim) + | |
181 theme(axis.text.x = element_text(angle=45)) | |
182 ggplotly(p) | |
106 ``` | 183 ``` |
107 | 184 |
108 | 185 |
109 | 186 |
110 # Session Info | 187 # Session Info |