Mercurial > repos > mingchen0919 > aurora_fastqc_site
comparison 02_per_base_sequence_quality.Rmd @ 0:b7c115edd970 draft
planemo upload
author | mingchen0919 |
---|---|
date | Tue, 27 Feb 2018 10:37:12 -0500 |
parents | |
children | c64267b9f754 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b7c115edd970 |
---|---|
1 --- | |
2 output: html_document | |
3 --- | |
4 | |
5 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} | |
6 knitr::opts_chunk$set( | |
7 echo = as.logical(opt$X_e), | |
8 error = TRUE, | |
9 eval = TRUE | |
10 ) | |
11 ``` | |
12 | |
13 | |
14 ```{r 'function definition', echo=FALSE} | |
15 # Define a function to extract outputs for each module from fastqc output | |
16 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { | |
17 f = readLines(fastqc_data) | |
18 start_line = grep(module_name, f) | |
19 end_module_lines = grep('END_MODULE', f) | |
20 end_line = end_module_lines[which(end_module_lines > start_line)[1]] | |
21 module_data = f[(start_line+1):(end_line-1)] | |
22 writeLines(module_data, '/tmp/temp.txt') | |
23 read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char) | |
24 } | |
25 ``` | |
26 | |
27 # Per base sequence quality | |
28 | |
29 ```{r 'per base sequence quality', fig.width=10} | |
30 ## reads 1 | |
31 pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality') | |
32 pbsq_1$id = 1:length(pbsq_1$X.Base) | |
33 pbsq_1$trim = 'before' | |
34 | |
35 ## reads 2 | |
36 pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality') | |
37 pbsq_2$id = 1:length(pbsq_2$X.Base) | |
38 pbsq_2$trim = 'after' | |
39 | |
40 comb_pbsq = rbind(pbsq_1, pbsq_2) | |
41 comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) | |
42 | |
43 p = ggplot(data = comb_pbsq) + | |
44 geom_boxplot(mapping = aes(x = id, | |
45 lower = Lower.Quartile, | |
46 upper = Upper.Quartile, | |
47 middle = Median, | |
48 ymin = X10th.Percentile, | |
49 ymax = X90th.Percentile, | |
50 fill = "yellow"), | |
51 stat = 'identity') + | |
52 geom_line(mapping = aes(x = id, y = Mean, color = "red")) + | |
53 scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + | |
54 scale_fill_identity() + | |
55 scale_color_identity() + | |
56 ylim(0, max(comb_pbsq$Upper.Quartile) + 5) + | |
57 xlab('Position in read (bp)') + | |
58 facet_grid(. ~ trim) + | |
59 theme(axis.text.x = element_text(angle=45)) | |
60 p | |
61 | |
62 ``` |