annotate fastqc_report.Rmd @ 17:ac5c618e4d97 draft

compare evaluation before and after trimming
author mingchen0919
date Mon, 06 Nov 2017 16:53:14 -0500
parents 1710b0e874f1
children 8635a4cee6dd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
1 ---
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
3 output:
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
4 html_document:
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
5 number_sections: true
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
6 toc: true
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
7 theme: cosmo
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
8 highlight: tango
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
9 ---
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
10
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
12 knitr::opts_chunk$set(
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
13 echo = ECHO,
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
14 error = TRUE
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
15 )
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
16 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
17
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
18
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
19 # Fastqc Evaluation
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
20
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
21 ## Evaluation of reads before trimming
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
22
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
23 ```{r}
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
24 if ('READS_1' == 'None') {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
25 stop("No pre-trimming reads provided!")
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
26 } else {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
27 ## run fastqc evaluation
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
28 fastqc_command = paste0('fastqc ') %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
29 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
30 ifelse('CONTAMINANTS' != 'None', paste0(x, '-c CONTAMINANTS '), x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
31 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
32 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
33 ifelse('LIMITS' != 'None', paste0(x, '-l LIMITS '), x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
34 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
35 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
36 paste0(x, '-o REPORT_DIR ')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
37 })
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
38 fastqc_command_reads_1 = paste0(fastqc_command, 'READS_1 > /dev/null 2>&1')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
39 system(fastqc_command_reads_1, intern = TRUE)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
40
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
41 # Original html report
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
42 reads_1_base = tail(strsplit('READS_1', '/')[[1]], 1)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
43 original_html = tags$a(href=paste0(reads_1_base, '_fastqc.html'), paste0('HTML report: ', opt$name_1))
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
44
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
45 unzip(paste0('REPORT_DIR/', reads_1_base, '_fastqc.zip'), exdir = 'REPORT_DIR')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
46 reads_1_unzip = paste0('REPORT_DIR/', reads_1_base, '_fastqc/')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
47 # fastqc_data.txt
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
48 file.copy(paste0(reads_1_unzip, 'fastqc_data.txt'), 'REPORT_DIR/reads_1_fastqc_data.txt')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
49 fastqc_data = tags$a(href='reads_1_fastqc_data.txt', paste0('fastqc_data.txt: ', opt$name_1))
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
50 # summary.txt
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
51 file.copy(paste0(reads_1_unzip, 'summary.txt'), 'REPORT_DIR/reads_1_summary.txt')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
52 summary_data = tags$a(href='reads_1_summary.txt', paste0('summary.txt: ', opt$name_1))
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
53
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
54 tags$ul(
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
55 tags$li(original_html),
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
56 tags$li(fastqc_data),
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
57 tags$li(summary_data)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
58 )
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
59 }
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
60 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
61
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
62
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
63 ## Evaluation of reads after trimming
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
64
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
65 ```{r}
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
66 if ('READS_2' == 'None') {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
67 stop("No pre-trimming reads provided!")
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
68 } else {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
69 ## run fastqc evaluation
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
70 fastqc_command = paste0('fastqc ') %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
71 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
72 ifelse('CONTAMINANTS' != 'None', paste0(x, '-c CONTAMINANTS '), x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
73 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
74 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
75 ifelse('LIMITS' != 'None', paste0(x, '-l LIMITS '), x)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
76 }) %>%
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
77 (function(x) {
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
78 paste0(x, '-o REPORT_DIR ')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
79 })
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
80 fastqc_command_reads_2 = paste0(fastqc_command, 'READS_2 > /dev/null 2>&1')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
81 system(fastqc_command_reads_2, intern = TRUE)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
82
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
83 # Original html report
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
84 reads_2_base = tail(strsplit('READS_2', '/')[[1]], 1)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
85 original_html = tags$a(href=paste0(reads_2_base, '_fastqc.html'), paste0('HTML report: ', opt$name_2))
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
86
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
87 unzip(paste0('REPORT_DIR/', reads_2_base, '_fastqc.zip'), exdir = 'REPORT_DIR')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
88 reads_2_unzip = paste0('REPORT_DIR/', reads_2_base, '_fastqc/')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
89 # fastqc_data.txt
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
90 file.copy(paste0(reads_2_unzip, 'fastqc_data.txt'), 'REPORT_DIR/reads_2_fastqc_data.txt')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
91 fastqc_data = tags$a(href='reads_2_fastqc_data.txt', paste0('fastqc_data.txt: ', opt$name_2))
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
92 # summary.txt
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
93 file.copy(paste0(reads_2_unzip, 'summary.txt'), 'REPORT_DIR/reads_2_summary.txt')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
94 summary_data = tags$a(href='reads_2_summary.txt', paste0('summary.txt: ', opt$name_2))
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
95
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
96 tags$ul(
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
97 tags$li(original_html),
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
98 tags$li(fastqc_data),
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
99 tags$li(summary_data)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
100 )
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
101 }
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
102 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
103
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
104
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
105
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
106 # Fastqc output visualization
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
107
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
108 ## Overview
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
109
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
110 ```{r}
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
111 reads_1_summary = read.csv('REPORT_DIR/reads_1_summary.txt', header = FALSE, sep = '\t')[, 2:1]
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
112 reads_2_summary = read.csv('REPORT_DIR/reads_2_summary.txt', header = FALSE, sep = '\t')[, 1]
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
113 combined_summary = cbind(reads_1_summary, reads_2_summary)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
114 names(combined_summary) = c('MODULE', paste0(opt$name_1, '(before)'), paste0(opt$name_2, '(after)'))
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
115 combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
116 combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
117 knitr::kable(combined_summary)
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
118 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
119
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
120 ## Visualization by data module {.tabset}
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
121
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
122 * Define a function to extract outputs for each module from fastqc output
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
123
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
124 ```{r 'function definition'}
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
125 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
126 f = readLines(fastqc_data)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
127 start_line = grep(module_name, f)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
128 end_module_lines = grep('END_MODULE', f)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
129 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
130 module_data = f[(start_line+1):(end_line-1)]
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
131 writeLines(module_data, 'temp.txt')
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
132 read.csv('temp.txt', sep = '\t', header = header, comment.char = comment.char)
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
133 }
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
134 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
135
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
136 ### Per base sequence quality
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
137
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
138 ```{r 'per base sequence quality', fig.width=10}
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
139 ## reads 1
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
140 pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
141 pbsq_1$id = 1:length(pbsq_1$X.Base)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
142
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
143 melt_pbsq_1 = filter(melt(pbsq_1, id=c('X.Base', 'id')), variable == 'Mean')
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
144 melt_pbsq_1$trim = 'before'
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
145
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
146
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
147 ## reads 2
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
148 pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality')
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
149 pbsq_2$id = 1:length(pbsq_2$X.Base)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
150
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
151 melt_pbsq_2 = filter(melt(pbsq_2, id=c('X.Base', 'id')), variable == 'Mean')
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
152 melt_pbsq_2$trim = 'after'
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
153
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
154 comb_pbsq = rbind(melt_pbsq_1, melt_pbsq_2)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
155 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
156
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
157 p = ggplot(data = comb_pbsq) +
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
158 geom_line(mapping = aes(x = id, y = value, group = variable, color = variable)) +
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
159 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
160 facet_grid(. ~ trim) +
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
161 ylim(0, max(comb_pbsq$value) + 5) +
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
162 theme(axis.text.x = element_text(angle=45))
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
163 ggplotly(p)
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
164
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
165 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
166
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
167 ### Per tile sequence quality
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
168
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
169 ```{r 'per tile sequence quality', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
170 ## check if 'per tile sequence quality' module exits or not
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
171 check_ptsq = grep('Per tile sequence quality', readLines('REPORT_DIR/reads_1_fastqc_data.txt'))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
172 if (length(check_ptsq) > 0) {
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
173 ## reads 1
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
174 ptsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per tile sequence quality')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
175 ptsq_1$trim = 'before'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
176
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
177 ## reads 2
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
178 ptsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per tile sequence quality')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
179 ptsq_2$trim = 'after'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
180
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
181 comb_ptsq = rbind(ptsq_1, ptsq_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
182 comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
183 comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
184
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
185 # convert integers to charaters
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
186 comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
187
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
188 p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
189 geom_raster() +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
190 facet_grid(. ~ trim) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
191 xlab('Position in read (bp)') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
192 ylab('') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
193 theme(axis.text.x = element_text(angle=45))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
194 ggplotly(p)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
195 } else {
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
196 print('No "per tile sequence quality" data')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
197 }
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
198
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
199
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
200 ```
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
201
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
202 ### Per sequence quality score
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
203
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
204 ```{r 'Per sequence quality score', fig.width=10}
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
205 ## reads 1
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
206 psqs_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per sequence quality scores')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
207 psqs_1$trim = 'before'
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
208
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
209 ## reads 2
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
210 psqs_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per sequence quality scores')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
211 psqs_2$trim = 'after'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
212
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
213 comb_psqs = rbind(psqs_1, psqs_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
214 comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
215
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
216 p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
217 geom_line(color = 'red') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
218 facet_grid(. ~ trim) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
219 xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
220 xlab('Mean Sequence Qaulity (Phred Score)') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
221 ylab('')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
222 ggplotly(p)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
223 ```
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
224
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
225
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
226 ### Per base sequence content
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
227
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
228 ```{r 'Per base sequence content', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
229 ## reads 1
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
230 pbsc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
231 pbsc_1$id = 1:length(pbsc_1$X.Base)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
232
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
233 melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id'))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
234 melt_pbsc_1$trim = 'before'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
235
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
236
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
237 ## reads 2
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
238 pbsc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
239 pbsc_2$id = 1:length(pbsc_2$X.Base)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
240
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
241 melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id'))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
242 melt_pbsc_2$trim = 'after'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
243
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
244 comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
245 comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
246
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
247 p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
248 geom_line() +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
249 facet_grid(. ~ trim) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
250 xlim(min(comb_pbsc$id), max(comb_pbsc$id)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
251 ylim(0, 100) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
252 xlab('Position in read (bp)') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
253 ylab('')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
254 ggplotly(p)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
255 ```
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
256
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
257 ### Per sequence GC content
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
258
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
259 ```{r 'Per sequence GC content', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
260 ## reads 1
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
261 psGCc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per sequence GC content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
262 psGCc_1$trim = 'before'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
263
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
264 ## reads 2
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
265 psGCc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per sequence GC content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
266 psGCc_2$trim = 'after'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
267
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
268 comb_psGCc = rbind(psGCc_1, psGCc_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
269 comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
270
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
271 p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
272 geom_line(color = 'red') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
273 facet_grid(. ~ trim) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
274 xlab('Mean Sequence Qaulity (Phred Score)') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
275 ylab('')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
276 ggplotly(p)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
277 ```
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
278
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
279
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
280 ### Per base N content
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
281
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
282 ```{r 'Per base N content', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
283 ## reads 1
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
284 pbNc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base N content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
285 pbNc_1$id = 1:length(pbNc_1$X.Base)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
286 pbNc_1$trim = 'before'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
287
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
288 ## reads 2
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
289 pbNc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base N content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
290 pbNc_2$id = 1:length(pbNc_2$X.Base)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
291 pbNc_2$trim = 'after'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
292
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
293 comb_pbNc = rbind(pbNc_1, pbNc_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
294 comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
295
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
296 p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
297 geom_line(color = 'red') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
298 scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
299 facet_grid(. ~ trim) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
300 ylim(0, 1) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
301 xlab('N-Count') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
302 ylab('') +
16
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
303 theme(axis.text.x = element_text(angle=45))
1710b0e874f1 fix file name issue
mingchen0919
parents: 15
diff changeset
304 ggplotly(p)
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
305 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
306
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
307
17
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
308 ### Sequence Length Distribution
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
309
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
310 ```{r 'Sequence Length Distribution', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
311 ## reads 1
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
312 sld_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Length Distribution')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
313 sld_1$id = 1:length(sld_1$X.Length)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
314 sld_1$trim = 'before'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
315
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
316 ## reads 2
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
317 sld_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Length Distribution')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
318 sld_2$id = 1:length(sld_2$X.Length)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
319 sld_2$trim = 'after'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
320
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
321 comb_sld = rbind(sld_1, sld_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
322 comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
323
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
324 p = ggplot(data = comb_sld, aes(x = id, y = Count)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
325 geom_line(color = 'red') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
326 scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
327 facet_grid(. ~ trim) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
328 xlab('Sequence Length (bp)') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
329 ylab('') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
330 theme(axis.text.x = element_text(angle=45))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
331 ggplotly(p)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
332 ```
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
333
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
334 ### Sequence Duplication Levels
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
335
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
336 ```{r 'Sequence Duplication Levels', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
337 ## reads 1
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
338 sdl_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
339 names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
340 sdl_1$id = 1:length(sdl_1$Duplication_Level)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
341
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
342 melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
343 melt_sdl_1$trim = 'before'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
344
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
345
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
346 ## reads 2
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
347 sdl_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
348 names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
349 sdl_2$id = 1:length(sdl_2$Duplication_Level)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
350
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
351 melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
352 melt_sdl_2$trim = 'after'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
353
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
354 comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
355 comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
356
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
357 p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
358 geom_line() +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
359 scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
360 facet_grid(. ~ trim) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
361 xlab('Sequence Duplication Level') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
362 ylab('') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
363 theme(axis.text.x = element_text(angle=45))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
364 ggplotly(p)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
365 ```
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
366
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
367 ### Adapter Content
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
368
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
369 ```{r 'Adapter Content', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
370 ## reads 1
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
371 ac_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Adapter Content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
372 ac_1$id = 1:length(ac_1$X.Position)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
373
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
374 melt_ac_1 = melt(ac_1, id=c('X.Position', 'id'))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
375 melt_ac_1$trim = 'before'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
376
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
377 ## reads 2
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
378 ac_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Adapter Content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
379 ac_2$id = 1:length(ac_2$X.Position)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
380
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
381 melt_ac_2 = melt(ac_2, id=c('X.Position', 'id'))
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
382 melt_ac_2$trim = 'after'
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
383
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
384 comb_ac = rbind(melt_ac_1, melt_ac_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
385 comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
386
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
387 p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
388 geom_line() +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
389 facet_grid(. ~ trim) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
390 xlim(min(comb_ac$id), max(comb_ac$id)) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
391 ylim(0, 1) +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
392 xlab('Position in read (bp)') +
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
393 ylab('')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
394 ggplotly(p)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
395 ```
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
396
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
397 ### Kmer Content {.tabset}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
398
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
399 #### Before
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
400
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
401 ```{r 'Kmer Content (before)', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
402 kc_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Kmer Content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
403 knitr::kable(kc_1)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
404 ```
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
405
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
406 #### After
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
407 ```{r 'Kmer Content (after)', fig.width=10}
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
408 kc_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Kmer Content')
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
409 knitr::kable(kc_2)
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
410 ```
ac5c618e4d97 compare evaluation before and after trimming
mingchen0919
parents: 16
diff changeset
411
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
412
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
413 # Session Info
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
414
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
415 ```{r 'session info'}
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
416 sessionInfo()
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
417 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
418