annotate rmarkdown_deseq2_count_matrix.Rmd @ 1:629323b5fc0c draft

update tool
author mingchen0919
date Sat, 30 Dec 2017 16:39:39 -0500
parents c1f718dd6c7a
children 8ceda5896765
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
1 ---
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
2 title: 'DESeq2 analysis'
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
3 output:
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
4 html_document:
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
5 number_sections: true
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
6 toc: true
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
7 theme: cosmo
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
8 highlight: tango
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
9 ---
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
10
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
12 knitr::opts_chunk$set(
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
13 echo = opt$echo,
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
14 error = TRUE
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
15 )
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
16 ```
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
17
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
18
1
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
19 # User input
0
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
20
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
21 ```{r 'user input'}
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
22 df = data.frame(name = names(opt)[-1],
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
23 value = unlist(opt))
1
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
24 datatable(df, rownames = FALSE)
0
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
25 ```
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
26
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
27
1
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
28 # Count Matrix
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
29
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
30 Display the first 100 rows of count data matrix.
0
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
31
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
32 ```{r 'count matrix'}
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
33 count_data = read.table(opt$count_data)
1
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
34 col_names = trimws(strsplit(opt$count_matrix_column_names, ',')[[1]])[1:ncol(count_data)]
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
35 col_names = col_names[!is.na(col_names)]
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
36 colnames(count_data)[1:length(col_names)] = col_names
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
37 datatable(head(count_data, 100))
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
38 ```
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
39
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
40 # Column Data
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
41
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
42 ```{r 'column data'}
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
43 col_data = read.table(opt$col_data,
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
44 stringsAsFactors = FALSE, sep=',', header = TRUE, row.names = 1)
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
45 datatable(col_data)
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
46 ```
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
47
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
48 # Match sample names
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
49
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
50 The goal of this step is to rearrange the rows of the column data matrix so that the samples rows in the count data matrix and the sample columns in the count data matrix are in the same order.
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
51
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
52 ```{r 'match sample names'}
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
53 col_data = col_data[col_names, ]
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
54 datatable(col_data)
0
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
55 ```
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
56
1
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
57 # DESeqDataSet
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
58
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
59 ```{r 'DeseqDataSet'}
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
60 dds = DESeqDataSetFromMatrix(countData = count_data,
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
61 colData = col_data,
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
62 design = formula(opt$design_formula))
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
63 dds
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
64 ```
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
65
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
66 Pre-filter low count genes
0
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
67
1
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
68 ```{r 'pre-filtering'}
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
69 keep = rowSums(counts(dds)) >= 10
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
70 dds = dds[keep, ]
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
71 dds
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
72 ```
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
73
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
74 # Differential expression analysis
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
75
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
76 ```{r 'differential expression analysis'}
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
77 dds = DESeq(dds)
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
78 # res = results(dds, contrast = c(opt$contrast_condition, opt$treatment, opt$control))
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
79 res = results(dds)
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
80 resultsNames(dds)
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
81 if(nrow(res) > 500) {
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
82 cat('The result table has more than 500 rows. Only 500 rows are randomly selected to dispaly.')
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
83 datatable(as.data.frame(res)[sample(1:nrow(res), 500), ])
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
84 } else {
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
85 datatable(as.data.frame(res))
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
86 }
0
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
87 ```
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
88
c1f718dd6c7a planemo upload
mingchen0919
parents:
diff changeset
89
1
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
90
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
91 ```{r 'write results into csv'}
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
92 #Write results into a CSV file.
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
93 write.csv(res, 'differential_genes.csv')
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
94 ```
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
95
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
96 # MAplot
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
97
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
98 ```{r}
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
99 plotMA(res)
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
100 ```
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
101
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
102
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
103 ```{r 'save R objects'}
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
104 # Save R objects to a file
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
105 save(dds, opt, file = 'deseq2.RData')
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
106 ```
629323b5fc0c update tool
mingchen0919
parents: 0
diff changeset
107