annotate fastqc_report.Rmd @ 15:d1d20f341632 draft

fastqc_report v2.0.0
author mingchen0919
date Thu, 19 Oct 2017 00:11:14 -0400
parents 2efa46ce2c4c
children 1710b0e874f1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
1 ---
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
3 output:
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
4 html_document:
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
5 number_sections: true
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
6 toc: true
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
7 theme: cosmo
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
8 highlight: tango
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
9 ---
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
10
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
12 knitr::opts_chunk$set(
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
13 echo = ECHO
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
14 )
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
15 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
16
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
17
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
18 # Fastqc Analysis
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
19
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
20 * Copy fastq files to job working directory
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
21
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
22 ```{bash 'copy files'}
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
23 for f in $(echo READS | sed "s/,/ /g")
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
24 do
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
25 cp $f ./
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
26 done
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
27 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
28
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
29 * Run fastqc
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
30
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
31 ```{bash 'run fastqc'}
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
32 for r in $(ls *.dat)
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
33 do
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
34 fastqc -o REPORT_DIR $r > /dev/null 2>&1
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
35 done
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
36 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
37
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
38 ## Evaluation results
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
39
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
40 ```{r 'html report links'}
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
41 html_file = list.files('REPORT_DIR', pattern = '.*html')
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
42 tags$ul(tags$a(href=html_file, paste0('HTML report', opt$name)))
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
43 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
44
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
45
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
46 ```{r 'extract fastqc_data.txt and summary.txt'}
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
47 # list all zip files
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
48 zip_file = list.files(path = 'REPORT_DIR', pattern = '.zip')
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
49 unzip(paste0('REPORT_DIR/', zip_file), exdir = 'REPORT_DIR')
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
50
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
51 unzip_directory = paste0(tail(strsplit(opt$reads, '/')[[1]], 1), '_fastqc/')
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
52 fastqc_data_txt_path = paste0('REPORT_DIR/', unzip_directory, 'fastqc_data.txt')
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
53 summary_txt_path = paste0('REPORT_DIR/', unzip_directory, 'summary.txt')
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
54 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
55
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
56
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
57 ```{r 'summary.txt'}
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
58 tags$ul(tags$a(href=paste0(unzip_directory, 'summary.txt'), 'summary.txt'))
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
59 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
60
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
61
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
62 ```{r 'fastqc_data.txt'}
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
63 tags$ul(tags$a(href=paste0(unzip_directory, 'fastqc_data.txt'), 'fastqc_data.txt'))
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
64 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
65
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
66
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
67 # Fastqc output visualization
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
68
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
69 ## Overview
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
70
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
71 ```{r}
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
72 # read.table(fastqc_data_txt_path)
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
73 summary_txt = read.csv(summary_txt_path, header = FALSE, sep = '\t')[, 2:1]
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
74 names(summary_txt) = c('MODULE', 'PASS/FAIL')
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
75 knitr::kable(summary_txt)
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
76 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
77
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
78 ## Summary by module {.tabset}
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
79
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
80 * Define a function to extract outputs for each module from fastqc output
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
81
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
82 ```{r 'function definition'}
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
83 extract_data_module = function(fastqc_data, module_name) {
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
84 f = readLines(fastqc_data)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
85 start_line = grep(module_name, f)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
86 end_module_lines = grep('END_MODULE', f)
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
87 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
88 module_data = f[(start_line+1):(end_line-1)]
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
89 writeLines(module_data, 'temp.txt')
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
90 read.csv('temp.txt', sep = '\t')
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
91 }
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
92 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
93
15
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
94 ### Per base sequence quality
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
95
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
96 ```{r}
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
97 pbsq = extract_data_module(fastqc_data_txt_path, 'Per base sequence quality')
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
98 knitr::kable(pbsq)
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
99 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
100
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
101 ### Per tile sequence quality
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
102
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
103 ```{r}
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
104 ptsq = extract_data_module(fastqc_data_txt_path, 'Per tile sequence quality')
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
105 knitr::kable(ptsq)
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
106 ```
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
107
d1d20f341632 fastqc_report v2.0.0
mingchen0919
parents: 14
diff changeset
108
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
109
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
110 # Session Info
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
111
14
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
112 ```{r 'session info'}
2efa46ce2c4c upgrade fastqc_report
mingchen0919
parents: 6
diff changeset
113 sessionInfo()
2
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
114 ```
0374e090e38e Uploaded
mingchen0919
parents:
diff changeset
115