Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site
comparison wgcna_construct_network.Rmd @ 6:2f4df2be0572 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 12:35:11 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:0ac073bef19d | 6:2f4df2be0572 |
---|---|
1 --- | |
2 title: 'WGCNA: construct network' | |
3 output: | |
4 html_document: | |
5 number_sections: true | |
6 toc: true | |
7 theme: cosmo | |
8 highlight: tango | |
9 --- | |
10 | |
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} | |
12 knitr::opts_chunk$set( | |
13 echo = ECHO | |
14 ) | |
15 ``` | |
16 | |
17 # Import workspace | |
18 | |
19 This step imports workspace from the **WGCNA: preprocessing** step. | |
20 | |
21 ```{r} | |
22 fcp = file.copy("PREPROCESSING_WORKSPACE", "deseq.RData") | |
23 load("deseq.RData") | |
24 ``` | |
25 | |
26 | |
27 # Processing outliers {.tabset} | |
28 | |
29 ## Before removing outliers | |
30 | |
31 ```{r} | |
32 plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, | |
33 cex.axis = 1, cex.main = 1, cex = 0.5) | |
34 if(!is.na(HEIGHT_CUT)) { | |
35 # plot a line to show the cut | |
36 abline(h = HEIGHT_CUT, col = "red") | |
37 # determine cluster under the line | |
38 clust = cutreeStatic(sampleTree, cutHeight = HEIGHT_CUT, minSize = 10) | |
39 keepSamples = (clust==1) | |
40 expression_data = expression_data[keepSamples, ] | |
41 } | |
42 ``` | |
43 | |
44 ## After removing outliers | |
45 | |
46 ```{r} | |
47 sampleTree = hclust(dist(expression_data), method = "average"); | |
48 plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", | |
49 cex.axis = 1, cex.main = 1, cex = 0.5) | |
50 ``` | |
51 | |
52 | |
53 # Trait data {.tabeset} | |
54 | |
55 If trait data is provided, the first 100 rows from the data will be displayed here. A plot consisting of sample cluster dendrogram and trait heatmap will also be gerenated. | |
56 | |
57 ## Trait data table | |
58 | |
59 ```{r} | |
60 trait_data = data.frame() | |
61 if ("TRAIT_DATA" != 'None') { | |
62 trait_data = read.csv("TRAIT_DATA", header = TRUE, row.names = 1) | |
63 # form a data frame analogous to expression data that will hold the traits. | |
64 sample_names = rownames(expression_data) | |
65 trait_rows = match(sample_names, rownames(trait_data)) | |
66 trait_data = trait_data[trait_rows, ] | |
67 datatable(head(trait_data, 100), style="bootstrap", filter = 'top', | |
68 class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) | |
69 } | |
70 ``` | |
71 | |
72 ## Dendrogram and heatmap | |
73 | |
74 ```{r fig.align='center', fig.width=8, fig.height=9} | |
75 if (nrow(trait_data) != 0) { | |
76 traitColors = numbers2colors(trait_data, signed = FALSE) | |
77 plotDendroAndColors(sampleTree, traitColors, | |
78 groupLabels = names(trait_data), | |
79 main = "Sample dendrogram and trait heatmap", | |
80 cex.dendroLabels = 0.5) | |
81 } | |
82 ``` | |
83 | |
84 | |
85 # The thresholding power | |
86 | |
87 ```{r} | |
88 powers = c(1:10, seq(12, 20, 2)) | |
89 soft_threshold = pickSoftThreshold(expression_data, powerVector = powers, verbose = 5) | |
90 ``` | |
91 | |
92 ```{r fig.align='center'} | |
93 par(mfrow=c(1,2)) | |
94 plot(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2], | |
95 xlab="Soft Threshold (power)", | |
96 ylab="Scale Free Topology Model Fit,signed R^2",type="n", | |
97 main = paste("Scale independence"), | |
98 cex.lab = 0.5); | |
99 text(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2], | |
100 labels=powers,cex=0.5,col="red"); | |
101 | |
102 # calculate soft threshold power | |
103 y = -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2] | |
104 r2_cutoff = 0.9 | |
105 for(i in 1:length(powers)) { | |
106 if(y[i] > r2_cutoff) { | |
107 soft_threshold_power = soft_threshold$fitIndices[,1][i] | |
108 r2_cutoff_new = y[i] | |
109 break | |
110 } | |
111 soft_threshold_power = soft_threshold$fitIndices[,1][length(powers)] | |
112 } | |
113 abline(h=r2_cutoff, col="red") | |
114 abline(v=soft_threshold_power, col="blue") | |
115 text(soft_threshold_power+1, r2_cutoff-0.1, | |
116 paste0('R^2 cutoff = ', round(r2_cutoff_new,2)), | |
117 cex = 0.5, col = "red") | |
118 | |
119 plot(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5], | |
120 xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", | |
121 main = paste("Mean connectivity"), | |
122 cex.lab = 0.5) | |
123 text(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5], labels=powers, cex=0.5,col="red") | |
124 par(mfrow=c(1,1)) | |
125 ``` | |
126 | |
127 | |
128 # Construct network | |
129 | |
130 The gene network is constructed based on **soft threshold power = `r soft_threshold_power`** | |
131 | |
132 ```{r} | |
133 gene_network = blockwiseModules(expression_data, power = soft_threshold_power, | |
134 TOMType = "unsigned", minModuleSize = 30, | |
135 reassignThreshold = 0, mergeCutHeight = 0.25, | |
136 numericLabels = TRUE, pamRespectsDendro = FALSE, | |
137 verbose = 3) | |
138 ``` | |
139 | |
140 | |
141 # Gene modules {.tabset} | |
142 | |
143 ## Idenfity gene modules | |
144 | |
145 ```{r} | |
146 modules = table(gene_network$colors) | |
147 n_modules = length(modules) - 1 | |
148 module_size_upper = modules[2] | |
149 module_size_lower = modules[length(modules)] | |
150 | |
151 module_table = data.frame(model_label = c(0, 1:n_modules), | |
152 gene_size = as.vector(modules)) | |
153 datatable(t(module_table)) | |
154 ``` | |
155 | |
156 The results above indicates that there are **`r n_modules` gene modules**, labeled 1 through `r length(n_modules)` in order of descending size. The largest module has **`r module_size_upper` genes**, and the smallest module has **`r module_size_lower` genes**. The label 0 is reserved for genes outside of all modules. | |
157 | |
158 | |
159 ## Dendrogram and module plot | |
160 | |
161 ```{r} | |
162 # Convert labels to colors for plotting | |
163 module_colors = labels2colors(gene_network$colors) | |
164 # Plot the dendrogram and the module colors underneath | |
165 plotDendroAndColors(gene_network$dendrograms[[1]], module_colors[gene_network$blockGenes[[1]]], | |
166 "Module colors", | |
167 dendroLabels = FALSE, hang = 0.03, | |
168 addGuide = TRUE, guideHang = 0.05) | |
169 ``` | |
170 | |
171 | |
172 ```{r echo=FALSE} | |
173 # save workspace | |
174 rm("opt") | |
175 save(list=ls(all.names = TRUE), file='CONSTRUCT_NETWORK_WORKSPACE') | |
176 ``` | |
177 | |
178 |