view 03_per_tile_sequence_quality.Rmd @ 12:68ea2ebbf866 draft

add boxplot for per base sequence quality
author mingchen0919
date Thu, 09 Nov 2017 09:23:43 -0500 (2017-11-09)
parents 507eec497730
children
line wrap: on
line source
---
title: 'Per Tile Sequence Quality'
output:
    html_document:
      number_sections: true
      toc: true
      theme: cosmo
      highlight: tango
---

```{r setup, include=FALSE, warning=FALSE, message=FALSE}
knitr::opts_chunk$set(
  echo = ECHO,
  error = TRUE
)
```

### Per tile sequence quality

```{r 'per tile sequence quality', fig.width=10}
## check if 'per tile sequence quality' module exits or not
check_ptsq = grep('Per tile sequence quality', readLines('REPORT_DIR/reads_1_fastqc_data.txt'))
if (length(check_ptsq) > 0) {
    ## reads 1
  ptsq_1 = extract_data_module('REPORT_DIR/reads_1_fastqc_data.txt', 'Per tile sequence quality')
  ptsq_1$trim = 'before'
  
  ## reads 2
  ptsq_2 = extract_data_module('REPORT_DIR/reads_2_fastqc_data.txt', 'Per tile sequence quality')
  ptsq_2$trim = 'after'
  
  comb_ptsq = rbind(ptsq_1, ptsq_2)
  comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim)
  comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
  
  # convert integers to charaters
  comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
  
  p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) +
    geom_raster() + 
    facet_grid(. ~ trim) + 
    xlab('Position in read (bp)') + 
    ylab('') +
    theme(axis.text.x = element_text(angle=45))
  ggplotly(p)
} else {
  print('No "per tile sequence quality" data')
}