Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site
diff 5_per_base_sequence_content.Rmd @ 7:d820be692d74 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastqc_site commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 12:36:13 -0400 |
parents | d732d4526c6d |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/5_per_base_sequence_content.Rmd Tue Aug 08 12:36:13 2017 -0400 @@ -0,0 +1,45 @@ +--- +title: "Per Base Sequence Content" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = ECHO) +``` + +## Per Base Sequence Content + +```{r} +PBSC_df = data.frame() +PBSC_file_paths = read.csv('PBSC_file_paths.txt', + header = TRUE, stringsAsFactors = FALSE) +for(i in 1:nrow(PBSC_file_paths)) { + # file_path = paste0('REPORT_OUTPUT_DIR/', PBSC_file_paths[i,2]) + file_path = PBSC_file_paths[i,2] + pbsc_df = read.csv(file_path, + sep='\t', header=TRUE, stringsAsFactors = FALSE) %>% + mutate(Base1=as.numeric(str_split_fixed(X.Base, '-', 2)[,1]), + Base2=as.numeric(str_split_fixed(X.Base, '-', 2)[,2])) %>% + (function (df) { + df1 = select(df, -Base2) + df2 = select(df, -Base1) %>% filter(Base2 != '') + colnames(df1) = c(colnames(df1)[1:5], 'Base') + colnames(df2) = c(colnames(df2)[1:5], 'Base') + res = rbind(df1, df2) %>% arrange(Base) + return(res) + }) + pbsc_df$sample_id = rep(PBSC_file_paths[i,1], nrow(pbsc_df)) + PBSC_df = rbind(PBSC_df, pbsc_df) +} +``` + + +```{r out.width="100%"} +PBSC_df_2 = select(PBSC_df, -X.Base) %>% + melt(id = c('Base', 'sample_id'), value.name = 'base_percentage') +p = ggplot(data = PBSC_df_2, aes(x = Base, y = base_percentage, group = variable, color = variable)) + + geom_line() + + facet_wrap(~ sample_id) +ggplotly(p) +``` +