comparison wgcna_construct_network.Rmd @ 0:4275479ada3a draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
author mingchen0919
date Tue, 08 Aug 2017 12:35:50 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4275479ada3a
1 ---
2 title: 'WGCNA: construct network'
3 output:
4 html_document:
5 number_sections: true
6 toc: true
7 theme: cosmo
8 highlight: tango
9 ---
10
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
12 knitr::opts_chunk$set(
13 echo = ECHO
14 )
15 ```
16
17 # Import workspace
18
19 This step imports workspace from the **WGCNA: preprocessing** step.
20
21 ```{r}
22 fcp = file.copy("PREPROCESSING_WORKSPACE", "deseq.RData")
23 load("deseq.RData")
24 ```
25
26
27 # Processing outliers {.tabset}
28
29 ## Before removing outliers
30
31 ```{r}
32 plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
33 cex.axis = 1, cex.main = 1, cex = 0.5)
34 if(!is.na(HEIGHT_CUT)) {
35 # plot a line to show the cut
36 abline(h = HEIGHT_CUT, col = "red")
37 # determine cluster under the line
38 clust = cutreeStatic(sampleTree, cutHeight = HEIGHT_CUT, minSize = 10)
39 keepSamples = (clust==1)
40 expression_data = expression_data[keepSamples, ]
41 }
42 ```
43
44 ## After removing outliers
45
46 ```{r}
47 sampleTree = hclust(dist(expression_data), method = "average");
48 plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="",
49 cex.axis = 1, cex.main = 1, cex = 0.5)
50 ```
51
52
53 # Trait data {.tabeset}
54
55 If trait data is provided, the first 100 rows from the data will be displayed here. A plot consisting of sample cluster dendrogram and trait heatmap will also be gerenated.
56
57 ## Trait data table
58
59 ```{r}
60 trait_data = data.frame()
61 if ("TRAIT_DATA" != 'None') {
62 trait_data = read.csv("TRAIT_DATA", header = TRUE, row.names = 1)
63 # form a data frame analogous to expression data that will hold the traits.
64 sample_names = rownames(expression_data)
65 trait_rows = match(sample_names, rownames(trait_data))
66 trait_data = trait_data[trait_rows, ]
67 datatable(head(trait_data, 100), style="bootstrap", filter = 'top',
68 class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
69 }
70 ```
71
72 ## Dendrogram and heatmap
73
74 ```{r fig.align='center', fig.width=8, fig.height=9}
75 if (nrow(trait_data) != 0) {
76 traitColors = numbers2colors(trait_data, signed = FALSE)
77 plotDendroAndColors(sampleTree, traitColors,
78 groupLabels = names(trait_data),
79 main = "Sample dendrogram and trait heatmap",
80 cex.dendroLabels = 0.5)
81 }
82 ```
83
84
85 # The thresholding power
86
87 ```{r}
88 powers = c(1:10, seq(12, 20, 2))
89 soft_threshold = pickSoftThreshold(expression_data, powerVector = powers, verbose = 5)
90 ```
91
92 ```{r fig.align='center'}
93 par(mfrow=c(1,2))
94 plot(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2],
95 xlab="Soft Threshold (power)",
96 ylab="Scale Free Topology Model Fit,signed R^2",type="n",
97 main = paste("Scale independence"),
98 cex.lab = 0.5);
99 text(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2],
100 labels=powers,cex=0.5,col="red");
101
102 # calculate soft threshold power
103 y = -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2]
104 r2_cutoff = 0.9
105 for(i in 1:length(powers)) {
106 if(y[i] > r2_cutoff) {
107 soft_threshold_power = soft_threshold$fitIndices[,1][i]
108 r2_cutoff_new = y[i]
109 break
110 }
111 soft_threshold_power = soft_threshold$fitIndices[,1][length(powers)]
112 }
113 abline(h=r2_cutoff, col="red")
114 abline(v=soft_threshold_power, col="blue")
115 text(soft_threshold_power+1, r2_cutoff-0.1,
116 paste0('R^2 cutoff = ', round(r2_cutoff_new,2)),
117 cex = 0.5, col = "red")
118
119 plot(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5],
120 xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
121 main = paste("Mean connectivity"),
122 cex.lab = 0.5)
123 text(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5], labels=powers, cex=0.5,col="red")
124 par(mfrow=c(1,1))
125 ```
126
127
128 # Construct network
129
130 The gene network is constructed based on **soft threshold power = `r soft_threshold_power`**
131
132 ```{r}
133 gene_network = blockwiseModules(expression_data, power = soft_threshold_power,
134 TOMType = "unsigned", minModuleSize = 30,
135 reassignThreshold = 0, mergeCutHeight = 0.25,
136 numericLabels = TRUE, pamRespectsDendro = FALSE,
137 verbose = 3)
138 ```
139
140
141 # Gene modules {.tabset}
142
143 ## Idenfity gene modules
144
145 ```{r}
146 modules = table(gene_network$colors)
147 n_modules = length(modules) - 1
148 module_size_upper = modules[2]
149 module_size_lower = modules[length(modules)]
150
151 module_table = data.frame(model_label = c(0, 1:n_modules),
152 gene_size = as.vector(modules))
153 datatable(t(module_table))
154 ```
155
156 The results above indicates that there are **`r n_modules` gene modules**, labeled 1 through `r length(n_modules)` in order of descending size. The largest module has **`r module_size_upper` genes**, and the smallest module has **`r module_size_lower` genes**. The label 0 is reserved for genes outside of all modules.
157
158
159 ## Dendrogram and module plot
160
161 ```{r}
162 # Convert labels to colors for plotting
163 module_colors = labels2colors(gene_network$colors)
164 # Plot the dendrogram and the module colors underneath
165 plotDendroAndColors(gene_network$dendrograms[[1]], module_colors[gene_network$blockGenes[[1]]],
166 "Module colors",
167 dendroLabels = FALSE, hang = 0.03,
168 addGuide = TRUE, guideHang = 0.05)
169 ```
170
171
172 ```{r echo=FALSE}
173 # save workspace
174 rm("opt")
175 save(list=ls(all.names = TRUE), file='CONSTRUCT_NETWORK_WORKSPACE')
176 ```
177
178