Mercurial > repos > mingchen0919 > rmarkdown_fastqc_report
diff fastqc_report.Rmd @ 16:1710b0e874f1 draft
fix file name issue
author | mingchen0919 |
---|---|
date | Sat, 21 Oct 2017 09:25:49 -0400 |
parents | d1d20f341632 |
children | ac5c618e4d97 |
line wrap: on
line diff
--- a/fastqc_report.Rmd Thu Oct 19 00:11:14 2017 -0400 +++ b/fastqc_report.Rmd Sat Oct 21 09:25:49 2017 -0400 @@ -10,72 +10,112 @@ ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = ECHO + echo = ECHO, + error = TRUE ) ``` -# Fastqc Analysis - -* Copy fastq files to job working directory +# Fastqc Evaluation -```{bash 'copy files'} -for f in $(echo READS | sed "s/,/ /g") -do - cp $f ./ -done -``` - -* Run fastqc +## Evaluation of reads before trimming -```{bash 'run fastqc'} -for r in $(ls *.dat) -do - fastqc -o REPORT_DIR $r > /dev/null 2>&1 -done -``` - -## Evaluation results - -```{r 'html report links'} -html_file = list.files('REPORT_DIR', pattern = '.*html') -tags$ul(tags$a(href=html_file, paste0('HTML report', opt$name))) +```{r} +if ('READS_1' == 'None') { + stop("No pre-trimming reads provided!") +} else { + ## run fastqc evaluation + fastqc_command = paste0('fastqc ') %>% + (function(x) { + ifelse('CONTAMINANTS' != 'None', paste0(x, '-c CONTAMINANTS '), x) + }) %>% + (function(x) { + ifelse('LIMITS' != 'None', paste0(x, '-l LIMITS '), x) + }) %>% + (function(x) { + paste0(x, '-o REPORT_DIR ') + }) + fastqc_command_reads_1 = paste0(fastqc_command, 'READS_1 > /dev/null 2>&1') + system(fastqc_command_reads_1, intern = TRUE) + + # Original html report + reads_1_base = tail(strsplit('READS_1', '/')[[1]], 1) + original_html = tags$a(href=paste0(reads_1_base, '_fastqc.html'), paste0('HTML report: ', opt$name_1)) + + unzip(paste0('REPORT_DIR/', reads_1_base, '_fastqc.zip'), exdir = 'REPORT_DIR') + reads_1_unzip = paste0('REPORT_DIR/', reads_1_base, '_fastqc/') + # fastqc_data.txt + file.copy(paste0(reads_1_unzip, 'fastqc_data.txt'), 'REPORT_DIR/reads_1_fastqc_data.txt') + fastqc_data = tags$a(href='reads_1_fastqc_data.txt', paste0('fastqc_data.txt: ', opt$name_1)) + # summary.txt + file.copy(paste0(reads_1_unzip, 'summary.txt'), 'REPORT_DIR/reads_1_summary.txt') + summary_data = tags$a(href='reads_1_summary.txt', paste0('summary.txt: ', opt$name_1)) + + tags$ul( + tags$li(original_html), + tags$li(fastqc_data), + tags$li(summary_data) + ) +} ``` -```{r 'extract fastqc_data.txt and summary.txt'} -# list all zip files -zip_file = list.files(path = 'REPORT_DIR', pattern = '.zip') -unzip(paste0('REPORT_DIR/', zip_file), exdir = 'REPORT_DIR') +## Evaluation of reads after trimming -unzip_directory = paste0(tail(strsplit(opt$reads, '/')[[1]], 1), '_fastqc/') -fastqc_data_txt_path = paste0('REPORT_DIR/', unzip_directory, 'fastqc_data.txt') -summary_txt_path = paste0('REPORT_DIR/', unzip_directory, 'summary.txt') +```{r} +if ('READS_2' == 'None') { + stop("No pre-trimming reads provided!") +} else { + ## run fastqc evaluation + fastqc_command = paste0('fastqc ') %>% + (function(x) { + ifelse('CONTAMINANTS' != 'None', paste0(x, '-c CONTAMINANTS '), x) + }) %>% + (function(x) { + ifelse('LIMITS' != 'None', paste0(x, '-l LIMITS '), x) + }) %>% + (function(x) { + paste0(x, '-o REPORT_DIR ') + }) + fastqc_command_reads_2 = paste0(fastqc_command, 'READS_2 > /dev/null 2>&1') + system(fastqc_command_reads_2, intern = TRUE) + + # Original html report + reads_2_base = tail(strsplit('READS_2', '/')[[1]], 1) + original_html = tags$a(href=paste0(reads_2_base, '_fastqc.html'), paste0('HTML report: ', opt$name_2)) + + unzip(paste0('REPORT_DIR/', reads_2_base, '_fastqc.zip'), exdir = 'REPORT_DIR') + reads_2_unzip = paste0('REPORT_DIR/', reads_2_base, '_fastqc/') + # fastqc_data.txt + file.copy(paste0(reads_2_unzip, 'fastqc_data.txt'), 'REPORT_DIR/reads_2_fastqc_data.txt') + fastqc_data = tags$a(href='reads_2_fastqc_data.txt', paste0('fastqc_data.txt: ', opt$name_2)) + # summary.txt + file.copy(paste0(reads_2_unzip, 'summary.txt'), 'REPORT_DIR/reads_2_summary.txt') + summary_data = tags$a(href='reads_2_summary.txt', paste0('summary.txt: ', opt$name_2)) + + tags$ul( + tags$li(original_html), + tags$li(fastqc_data), + tags$li(summary_data) + ) +} ``` -```{r 'summary.txt'} -tags$ul(tags$a(href=paste0(unzip_directory, 'summary.txt'), 'summary.txt')) -``` - - -```{r 'fastqc_data.txt'} -tags$ul(tags$a(href=paste0(unzip_directory, 'fastqc_data.txt'), 'fastqc_data.txt')) -``` - # Fastqc output visualization ## Overview ```{r} -# read.table(fastqc_data_txt_path) -summary_txt = read.csv(summary_txt_path, header = FALSE, sep = '\t')[, 2:1] -names(summary_txt) = c('MODULE', 'PASS/FAIL') -knitr::kable(summary_txt) +reads_1_summary = read.csv('REPORT_DIR/reads_1_summary.txt', header = FALSE, sep = '\t')[, 2:1] +reads_2_summary = read.csv('REPORT_DIR/reads_1_summary.txt', header = FALSE, sep = '\t')[, 1] +combined_summary = cbind(reads_1_summary, reads_2_summary) +names(combined_summary) = c('MODULE', paste0(opt$name_1, '(before)'), paste0(opt$name_2, '(after)')) +knitr::kable(combined_summary) ``` -## Summary by module {.tabset} +## Visualization by module {.tabset} * Define a function to extract outputs for each module from fastqc output @@ -93,16 +133,53 @@ ### Per base sequence quality -```{r} -pbsq = extract_data_module(fastqc_data_txt_path, 'Per base sequence quality') -knitr::kable(pbsq) +```{r 'per base sequence quality', fig.width=10} +## reads 1 +pbsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per base sequence quality') +pbsq_1$id = 1:length(pbsq_1$X.Base) + +melt_pbsq_1 = filter(melt(pbsq_1, id=c('X.Base', 'id')), variable != 'X90th.Percentile' & variable != 'X10th.Percentile') +melt_pbsq_1$trim = 'before' + + +## reads 2 +pbsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per base sequence quality') +pbsq_2$id = 1:length(pbsq_2$X.Base) + +melt_pbsq_2 = filter(melt(pbsq_2, id=c('X.Base', 'id')), variable != 'X90th.Percentile' & variable != 'X10th.Percentile') +melt_pbsq_2$trim = 'after' + +comb_pbsq = rbind(melt_pbsq_1, melt_pbsq_2) +comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) +p = ggplot(data = comb_pbsq) + + geom_line(mapping = aes(x = id, y = value, group = variable, color = variable)) + + scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + + facet_grid(. ~ trim) + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) + ``` ### Per tile sequence quality -```{r} -ptsq = extract_data_module(fastqc_data_txt_path, 'Per tile sequence quality') -knitr::kable(ptsq) +```{r 'per tile sequence quality'} +## reads 1 +ptsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per tile sequence quality') +ptsq_1$trim = 'before' + +## reads 2 +ptsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per tile sequence quality') +ptsq_2$trim = 'after' + +comb_ptsq = rbind(ptsq_1, ptsq_2) +comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim) +comb_pbsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base) + +p = ggplot(data = comb_ptsq, aes(x = Base, y = X.Tile, fill = Mean)) + + geom_raster() + + facet_grid(. ~ trim) + + theme(axis.text.x = element_text(angle=45)) +ggplotly(p) ```