annotate 02_per_base_sequence_quality.Rmd @ 0:b7c115edd970 draft

planemo upload
author mingchen0919
date Tue, 27 Feb 2018 10:37:12 -0500
parents
children c64267b9f754
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
1 ---
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
2 output: html_document
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
3 ---
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
4
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
5 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
6 knitr::opts_chunk$set(
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
7 echo = as.logical(opt$X_e),
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
8 error = TRUE,
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
9 eval = TRUE
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
10 )
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
11 ```
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
12
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
13
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
14 ```{r 'function definition', echo=FALSE}
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
15 # Define a function to extract outputs for each module from fastqc output
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
16 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
17 f = readLines(fastqc_data)
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
18 start_line = grep(module_name, f)
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
19 end_module_lines = grep('END_MODULE', f)
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
20 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
21 module_data = f[(start_line+1):(end_line-1)]
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
22 writeLines(module_data, '/tmp/temp.txt')
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
23 read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
24 }
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
25 ```
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
26
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
27 # Per base sequence quality
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
28
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
29 ```{r 'per base sequence quality', fig.width=10}
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
30 ## reads 1
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
31 pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality')
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
32 pbsq_1$id = 1:length(pbsq_1$X.Base)
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
33 pbsq_1$trim = 'before'
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
34
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
35 ## reads 2
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
36 pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality')
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
37 pbsq_2$id = 1:length(pbsq_2$X.Base)
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
38 pbsq_2$trim = 'after'
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
39
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
40 comb_pbsq = rbind(pbsq_1, pbsq_2)
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
41 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
42
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
43 p = ggplot(data = comb_pbsq) +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
44 geom_boxplot(mapping = aes(x = id,
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
45 lower = Lower.Quartile,
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
46 upper = Upper.Quartile,
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
47 middle = Median,
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
48 ymin = X10th.Percentile,
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
49 ymax = X90th.Percentile,
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
50 fill = "yellow"),
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
51 stat = 'identity') +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
52 geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
53 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
54 scale_fill_identity() +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
55 scale_color_identity() +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
56 ylim(0, max(comb_pbsq$Upper.Quartile) + 5) +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
57 xlab('Position in read (bp)') +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
58 facet_grid(. ~ trim) +
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
59 theme(axis.text.x = element_text(angle=45))
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
60 p
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
61
b7c115edd970 planemo upload
mingchen0919
parents:
diff changeset
62 ```