comparison fastqc_report.Rmd @ 15:d1d20f341632 draft

fastqc_report v2.0.0
author mingchen0919
date Thu, 19 Oct 2017 00:11:14 -0400
parents 2efa46ce2c4c
children 1710b0e874f1
comparison
equal deleted inserted replaced
14:2efa46ce2c4c 15:d1d20f341632
1 --- 1 ---
2 title: 'HTML report title' 2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
3 output: 3 output:
4 html_document: 4 html_document:
5 number_sections: true 5 number_sections: true
6 toc: true 6 toc: true
7 theme: cosmo 7 theme: cosmo
33 do 33 do
34 fastqc -o REPORT_DIR $r > /dev/null 2>&1 34 fastqc -o REPORT_DIR $r > /dev/null 2>&1
35 done 35 done
36 ``` 36 ```
37 37
38 * Create links to original HTML reports 38 ## Evaluation results
39 39
40 ```{r 'html report links'} 40 ```{r 'html report links'}
41 html_report_list = list() 41 html_file = list.files('REPORT_DIR', pattern = '.*html')
42 html_files = list.files('REPORT_DIR', pattern = '.*html') 42 tags$ul(tags$a(href=html_file, paste0('HTML report', opt$name)))
43 for (i in html_files) {
44 html_report_list[[i]] = tags$li(tags$a(href=i, i))
45 }
46 tags$ul(html_report_list)
47 ``` 43 ```
48 44
49 # Fastqc output summary 45
46 ```{r 'extract fastqc_data.txt and summary.txt'}
47 # list all zip files
48 zip_file = list.files(path = 'REPORT_DIR', pattern = '.zip')
49 unzip(paste0('REPORT_DIR/', zip_file), exdir = 'REPORT_DIR')
50
51 unzip_directory = paste0(tail(strsplit(opt$reads, '/')[[1]], 1), '_fastqc/')
52 fastqc_data_txt_path = paste0('REPORT_DIR/', unzip_directory, 'fastqc_data.txt')
53 summary_txt_path = paste0('REPORT_DIR/', unzip_directory, 'summary.txt')
54 ```
55
56
57 ```{r 'summary.txt'}
58 tags$ul(tags$a(href=paste0(unzip_directory, 'summary.txt'), 'summary.txt'))
59 ```
60
61
62 ```{r 'fastqc_data.txt'}
63 tags$ul(tags$a(href=paste0(unzip_directory, 'fastqc_data.txt'), 'fastqc_data.txt'))
64 ```
65
66
67 # Fastqc output visualization
68
69 ## Overview
70
71 ```{r}
72 # read.table(fastqc_data_txt_path)
73 summary_txt = read.csv(summary_txt_path, header = FALSE, sep = '\t')[, 2:1]
74 names(summary_txt) = c('MODULE', 'PASS/FAIL')
75 knitr::kable(summary_txt)
76 ```
77
78 ## Summary by module {.tabset}
50 79
51 * Define a function to extract outputs for each module from fastqc output 80 * Define a function to extract outputs for each module from fastqc output
52 81
53 ```{r 'function definition'} 82 ```{r 'function definition'}
54 extract_data_module = function(fastqc_data, module_name) { 83 extract_data_module = function(fastqc_data, module_name) {
60 writeLines(module_data, 'temp.txt') 89 writeLines(module_data, 'temp.txt')
61 read.csv('temp.txt', sep = '\t') 90 read.csv('temp.txt', sep = '\t')
62 } 91 }
63 ``` 92 ```
64 93
65 ## 94 ### Per base sequence quality
95
96 ```{r}
97 pbsq = extract_data_module(fastqc_data_txt_path, 'Per base sequence quality')
98 knitr::kable(pbsq)
99 ```
100
101 ### Per tile sequence quality
102
103 ```{r}
104 ptsq = extract_data_module(fastqc_data_txt_path, 'Per tile sequence quality')
105 knitr::kable(ptsq)
106 ```
107
108
66 109
67 # Session Info 110 # Session Info
68 111
69 ```{r 'session info'} 112 ```{r 'session info'}
70 sessionInfo() 113 sessionInfo()