2
|
1 ---
|
15
|
2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
|
14
|
3 output:
|
|
4 html_document:
|
|
5 number_sections: true
|
|
6 toc: true
|
|
7 theme: cosmo
|
|
8 highlight: tango
|
2
|
9 ---
|
|
10
|
14
|
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
|
|
12 knitr::opts_chunk$set(
|
|
13 echo = ECHO
|
|
14 )
|
2
|
15 ```
|
|
16
|
|
17
|
14
|
18 # Fastqc Analysis
|
2
|
19
|
14
|
20 * Copy fastq files to job working directory
|
|
21
|
|
22 ```{bash 'copy files'}
|
2
|
23 for f in $(echo READS | sed "s/,/ /g")
|
|
24 do
|
|
25 cp $f ./
|
|
26 done
|
|
27 ```
|
|
28
|
14
|
29 * Run fastqc
|
2
|
30
|
14
|
31 ```{bash 'run fastqc'}
|
2
|
32 for r in $(ls *.dat)
|
|
33 do
|
14
|
34 fastqc -o REPORT_DIR $r > /dev/null 2>&1
|
2
|
35 done
|
|
36 ```
|
|
37
|
15
|
38 ## Evaluation results
|
2
|
39
|
|
40 ```{r 'html report links'}
|
15
|
41 html_file = list.files('REPORT_DIR', pattern = '.*html')
|
|
42 tags$ul(tags$a(href=html_file, paste0('HTML report', opt$name)))
|
|
43 ```
|
|
44
|
|
45
|
|
46 ```{r 'extract fastqc_data.txt and summary.txt'}
|
|
47 # list all zip files
|
|
48 zip_file = list.files(path = 'REPORT_DIR', pattern = '.zip')
|
|
49 unzip(paste0('REPORT_DIR/', zip_file), exdir = 'REPORT_DIR')
|
|
50
|
|
51 unzip_directory = paste0(tail(strsplit(opt$reads, '/')[[1]], 1), '_fastqc/')
|
|
52 fastqc_data_txt_path = paste0('REPORT_DIR/', unzip_directory, 'fastqc_data.txt')
|
|
53 summary_txt_path = paste0('REPORT_DIR/', unzip_directory, 'summary.txt')
|
2
|
54 ```
|
|
55
|
15
|
56
|
|
57 ```{r 'summary.txt'}
|
|
58 tags$ul(tags$a(href=paste0(unzip_directory, 'summary.txt'), 'summary.txt'))
|
|
59 ```
|
|
60
|
|
61
|
|
62 ```{r 'fastqc_data.txt'}
|
|
63 tags$ul(tags$a(href=paste0(unzip_directory, 'fastqc_data.txt'), 'fastqc_data.txt'))
|
|
64 ```
|
|
65
|
|
66
|
|
67 # Fastqc output visualization
|
|
68
|
|
69 ## Overview
|
|
70
|
|
71 ```{r}
|
|
72 # read.table(fastqc_data_txt_path)
|
|
73 summary_txt = read.csv(summary_txt_path, header = FALSE, sep = '\t')[, 2:1]
|
|
74 names(summary_txt) = c('MODULE', 'PASS/FAIL')
|
|
75 knitr::kable(summary_txt)
|
|
76 ```
|
|
77
|
|
78 ## Summary by module {.tabset}
|
2
|
79
|
14
|
80 * Define a function to extract outputs for each module from fastqc output
|
2
|
81
|
14
|
82 ```{r 'function definition'}
|
|
83 extract_data_module = function(fastqc_data, module_name) {
|
|
84 f = readLines(fastqc_data)
|
|
85 start_line = grep(module_name, f)
|
|
86 end_module_lines = grep('END_MODULE', f)
|
|
87 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
|
|
88 module_data = f[(start_line+1):(end_line-1)]
|
|
89 writeLines(module_data, 'temp.txt')
|
|
90 read.csv('temp.txt', sep = '\t')
|
2
|
91 }
|
|
92 ```
|
|
93
|
15
|
94 ### Per base sequence quality
|
|
95
|
|
96 ```{r}
|
|
97 pbsq = extract_data_module(fastqc_data_txt_path, 'Per base sequence quality')
|
|
98 knitr::kable(pbsq)
|
|
99 ```
|
|
100
|
|
101 ### Per tile sequence quality
|
|
102
|
|
103 ```{r}
|
|
104 ptsq = extract_data_module(fastqc_data_txt_path, 'Per tile sequence quality')
|
|
105 knitr::kable(ptsq)
|
|
106 ```
|
|
107
|
|
108
|
2
|
109
|
14
|
110 # Session Info
|
2
|
111
|
14
|
112 ```{r 'session info'}
|
|
113 sessionInfo()
|
2
|
114 ```
|
|
115
|