Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site

---
title: "Per Base Sequence Content"
output: html_document
---

```{r setup, include=FALSE, warning=FALSE, message=FALSE}
knitr::opts_chunk$set(echo = ECHO)
```

## Per Base Sequence Content

```{r}
PBSC_df = data.frame()
PBSC_file_paths = read.csv('PBSC_file_paths.txt',
                           header = TRUE, stringsAsFactors = FALSE)
for(i in 1:nrow(PBSC_file_paths)) {
  # file_path = paste0('REPORT_OUTPUT_DIR/', PBSC_file_paths[i,2])
  file_path = PBSC_file_paths[i,2]
  pbsc_df = read.csv(file_path,
                     sep='\t', header=TRUE, stringsAsFactors = FALSE) %>%
    mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]),
           Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>%
  (function (df) {
    df1 = select(df, -Base2)
    df2 = select(df, -Base1) %>% filter(Base2 != '')
    colnames(df1) = c(colnames(df1)[1:5], 'Base')
    colnames(df2) = c(colnames(df2)[1:5], 'Base')
    res = rbind(df1, df2) %>% arrange(Base)
    return(res)
  })
  pbsc_df$sample_id = rep(PBSC_file_paths[i,1], nrow(pbsc_df))
  PBSC_df = rbind(PBSC_df, pbsc_df)
}
```


```{r out.width="100%"}
PBSC_df_2 = select(PBSC_df, -X.Base) %>%
  melt(id = c('Base', 'sample_id'), value.name = 'base_percentage')
p = ggplot(data = PBSC_df_2, aes(x = Base, y = base_percentage, group = variable, color = variable)) +
  geom_line() +
  facet_wrap(~ sample_id)
ggplotly(p)
```
author	mingchen0919
date	Tue, 08 Aug 2017 14:33:43 -0400
parents	d732d4526c6d
children