Mercurial > repos > mingchen0919 > aurora_fastqc_site
diff 02_per_base_sequence_quality.Rmd @ 0:b7c115edd970 draft
planemo upload
author | mingchen0919 |
---|---|
date | Tue, 27 Feb 2018 10:37:12 -0500 |
parents | |
children | c64267b9f754 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/02_per_base_sequence_quality.Rmd Tue Feb 27 10:37:12 2018 -0500 @@ -0,0 +1,62 @@ +--- +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE, + eval = TRUE +) +``` + + +```{r 'function definition', echo=FALSE} +# Define a function to extract outputs for each module from fastqc output +extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { + f = readLines(fastqc_data) + start_line = grep(module_name, f) + end_module_lines = grep('END_MODULE', f) + end_line = end_module_lines[which(end_module_lines > start_line)[1]] + module_data = f[(start_line+1):(end_line-1)] + writeLines(module_data, '/tmp/temp.txt') + read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char) +} +``` + +# Per base sequence quality + +```{r 'per base sequence quality', fig.width=10} +## reads 1 +pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality') +pbsq_1$id = 1:length(pbsq_1$X.Base) +pbsq_1$trim = 'before' + +## reads 2 +pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality') +pbsq_2$id = 1:length(pbsq_2$X.Base) +pbsq_2$trim = 'after' + +comb_pbsq = rbind(pbsq_1, pbsq_2) +comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) + +p = ggplot(data = comb_pbsq) + + geom_boxplot(mapping = aes(x = id, + lower = Lower.Quartile, + upper = Upper.Quartile, + middle = Median, + ymin = X10th.Percentile, + ymax = X90th.Percentile, + fill = "yellow"), + stat = 'identity') + + geom_line(mapping = aes(x = id, y = Mean, color = "red")) + + scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + + scale_fill_identity() + + scale_color_identity() + + ylim(0, max(comb_pbsq$Upper.Quartile) + 5) + + xlab('Position in read (bp)') + + facet_grid(. ~ trim) + + theme(axis.text.x = element_text(angle=45)) +p + +```