comparison 02_per_base_sequence_quality.Rmd @ 2:c64267b9f754 draft default tip

planemo upload commit 841d8b22bf9f1aaed6bfe8344b60617f45b275b2-dirty
author mingchen0919
date Sun, 30 Dec 2018 12:48:14 -0500
parents b7c115edd970
children
comparison
equal deleted inserted replaced
1:92e3de11b9f8 2:c64267b9f754
1 --- 1 ---
2 output: html_document 2 output: html_document
3 --- 3 ---
4 4
5 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} 5 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
6 knitr::opts_chunk$set( 6 knitr::opts_knit$set(progress = FALSE)
7 echo = as.logical(opt$X_e), 7 knitr::opts_chunk$set(error = TRUE, echo = FALSE)
8 error = TRUE,
9 eval = TRUE
10 )
11 ``` 8 ```
12 9
13 10 ### Per base sequence quality
14 ```{r 'function definition', echo=FALSE}
15 # Define a function to extract outputs for each module from fastqc output
16 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
17 f = readLines(fastqc_data)
18 start_line = grep(module_name, f)
19 end_module_lines = grep('END_MODULE', f)
20 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
21 module_data = f[(start_line+1):(end_line-1)]
22 writeLines(module_data, '/tmp/temp.txt')
23 read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
24 }
25 ```
26
27 # Per base sequence quality
28 11
29 ```{r 'per base sequence quality', fig.width=10} 12 ```{r 'per base sequence quality', fig.width=10}
30 ## reads 1 13 ## reads 1
31 pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality') 14 pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality')
32 pbsq_1$id = 1:length(pbsq_1$X.Base) 15 pbsq_1$id = 1:length(pbsq_1$X.Base)
48 ymin = X10th.Percentile, 31 ymin = X10th.Percentile,
49 ymax = X90th.Percentile, 32 ymax = X90th.Percentile,
50 fill = "yellow"), 33 fill = "yellow"),
51 stat = 'identity') + 34 stat = 'identity') +
52 geom_line(mapping = aes(x = id, y = Mean, color = "red")) + 35 geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
53 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + 36 scale_x_continuous(name = 'Position in read (bp)', breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
37 scale_y_continuous(limits = c(0, max(comb_pbsq$Upper.Quartile) + 5)) +
54 scale_fill_identity() + 38 scale_fill_identity() +
55 scale_color_identity() + 39 scale_color_identity() +
56 ylim(0, max(comb_pbsq$Upper.Quartile) + 5) +
57 xlab('Position in read (bp)') +
58 facet_grid(. ~ trim) + 40 facet_grid(. ~ trim) +
59 theme(axis.text.x = element_text(angle=45)) 41 theme(axis.text.x = element_text(size = 5),
42 panel.background = element_rect(fill = NA),
43 panel.grid.major.y = element_line(color = 'blue', size = 0.1))
60 p 44 p
45 ```
61 46
62 ```