annotate 01_evaluation_overview.Rmd @ 12:68ea2ebbf866 draft

add boxplot for per base sequence quality
author mingchen0919
date Thu, 09 Nov 2017 09:23:43 -0500
parents 507eec497730
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
1 ---
11
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
3 output:
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
4 html_document:
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
5 number_sections: true
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
6 toc: true
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
7 theme: cosmo
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
8 highlight: tango
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
9 ---
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
10
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
11
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
12 knitr::opts_chunk$set(
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
13 echo = ECHO,
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
14 error = TRUE
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
15 )
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
16 ```
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
17
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
18
11
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
19 # Fastqc Evaluation
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
20
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
21 ## Evaluation of reads before trimming
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
22
11
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
23 ```{r}
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
24 if ('READS_1' == 'None') {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
25 stop("No pre-trimming reads provided!")
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
26 } else {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
27 ## run fastqc evaluation
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
28 fastqc_command = paste0('fastqc ') %>%
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
29 (function(x) {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
30 ifelse('CONTAMINANTS' != 'None', paste0(x, '-c CONTAMINANTS '), x)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
31 }) %>%
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
32 (function(x) {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
33 ifelse('LIMITS' != 'None', paste0(x, '-l LIMITS '), x)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
34 }) %>%
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
35 (function(x) {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
36 paste0(x, '-o REPORT_DIR ')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
37 })
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
38 fastqc_command_reads_1 = paste0(fastqc_command, 'READS_1 > /dev/null 2>&1')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
39 system(fastqc_command_reads_1, intern = TRUE)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
40
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
41 # Original html report
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
42 reads_1_base = tail(strsplit('READS_1', '/')[[1]], 1)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
43 original_html = tags$a(href=paste0(reads_1_base, '_fastqc.html'), paste0('HTML report: ', opt$name_1))
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
44
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
45 unzip(paste0('REPORT_DIR/', reads_1_base, '_fastqc.zip'), exdir = 'REPORT_DIR')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
46 reads_1_unzip = paste0('REPORT_DIR/', reads_1_base, '_fastqc/')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
47 # fastqc_data.txt
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
48 file.copy(paste0(reads_1_unzip, 'fastqc_data.txt'), 'REPORT_DIR/reads_1_fastqc_data.txt')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
49 fastqc_data = tags$a(href='reads_1_fastqc_data.txt', paste0('fastqc_data.txt: ', opt$name_1))
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
50 # summary.txt
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
51 file.copy(paste0(reads_1_unzip, 'summary.txt'), 'REPORT_DIR/reads_1_summary.txt')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
52 summary_data = tags$a(href='reads_1_summary.txt', paste0('summary.txt: ', opt$name_1))
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
53
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
54 tags$ul(
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
55 tags$li(original_html),
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
56 tags$li(fastqc_data),
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
57 tags$li(summary_data)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
58 )
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
59 }
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
60 ```
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
61
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
62
11
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
63 ## Evaluation of reads after trimming
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
64
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
65 ```{r}
11
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
66 if ('READS_2' == 'None') {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
67 stop("No pre-trimming reads provided!")
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
68 } else {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
69 ## run fastqc evaluation
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
70 fastqc_command = paste0('fastqc ') %>%
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
71 (function(x) {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
72 ifelse('CONTAMINANTS' != 'None', paste0(x, '-c CONTAMINANTS '), x)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
73 }) %>%
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
74 (function(x) {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
75 ifelse('LIMITS' != 'None', paste0(x, '-l LIMITS '), x)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
76 }) %>%
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
77 (function(x) {
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
78 paste0(x, '-o REPORT_DIR ')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
79 })
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
80 fastqc_command_reads_2 = paste0(fastqc_command, 'READS_2 > /dev/null 2>&1')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
81 system(fastqc_command_reads_2, intern = TRUE)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
82
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
83 # Original html report
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
84 reads_2_base = tail(strsplit('READS_2', '/')[[1]], 1)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
85 original_html = tags$a(href=paste0(reads_2_base, '_fastqc.html'), paste0('HTML report: ', opt$name_2))
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
86
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
87 unzip(paste0('REPORT_DIR/', reads_2_base, '_fastqc.zip'), exdir = 'REPORT_DIR')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
88 reads_2_unzip = paste0('REPORT_DIR/', reads_2_base, '_fastqc/')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
89 # fastqc_data.txt
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
90 file.copy(paste0(reads_2_unzip, 'fastqc_data.txt'), 'REPORT_DIR/reads_2_fastqc_data.txt')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
91 fastqc_data = tags$a(href='reads_2_fastqc_data.txt', paste0('fastqc_data.txt: ', opt$name_2))
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
92 # summary.txt
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
93 file.copy(paste0(reads_2_unzip, 'summary.txt'), 'REPORT_DIR/reads_2_summary.txt')
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
94 summary_data = tags$a(href='reads_2_summary.txt', paste0('summary.txt: ', opt$name_2))
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
95
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
96 tags$ul(
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
97 tags$li(original_html),
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
98 tags$li(fastqc_data),
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
99 tags$li(summary_data)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
100 )
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
101 }
11
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
102 ```
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
103
11
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
104
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
105
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
106 # Fastqc output visualization
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
107
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
108 ## Overview
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
109
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
110 ```{r}
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
111 reads_1_summary = read.csv('REPORT_DIR/reads_1_summary.txt', header = FALSE, sep = '\t')[, 2:1]
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
112 reads_2_summary = read.csv('REPORT_DIR/reads_2_summary.txt', header = FALSE, sep = '\t')[, 1]
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
113 combined_summary = cbind(reads_1_summary, reads_2_summary)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
114 names(combined_summary) = c('MODULE', paste0(opt$name_1, '(before)'), paste0(opt$name_2, '(after)'))
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
115 combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
116 combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
117 knitr::kable(combined_summary)
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
118 ```
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
119
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
120 # Session Info
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
121
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
122 ```{r 'session info'}
507eec497730 update fastqc site
mingchen0919
parents: 0
diff changeset
123 sessionInfo()
0
d732d4526c6d planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit ddb1f6aca7619aea2e660b1729367841b56ba4c9-dirty
mingchen0919
parents:
diff changeset
124 ```