annotate aurora_wgcna_trait.Rmd @ 13:53fc3fdf5e9a draft

Uploaded
author spficklin
date Mon, 09 Dec 2019 12:09:10 -0500
parents f2d2ec70b7d8
children e67fcdf09e39
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
1 ---
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
2 title: 'Aurora Galaxy WGCNA Tool: Gene Co-Expression Network Construction & Analysis. Part 2'
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
3 output:
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
4 pdf_document:
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
5 number_sections: false
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
6 ---
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
7
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
8 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
9 knitr::opts_chunk$set(error = FALSE, echo = FALSE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
10 ```
11
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
11 ```{r, include=FALSE}
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
12 options(tinytex.verbose = TRUE)
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
13 ```
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
14 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
15 # Load the data from the previous step.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
16 load(file=opt$r_data)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
17 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
18 # Introduction
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
19 This report is part two of step-by-step results from use of the [Aurora Galaxy](https://github.com/statonlab/aurora-galaxy-tools) Weighted Gene Co-expression Network Analysis [WGCNA](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-559) tool. It is generated when trait or phenotype data is provided.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
20
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
21 This report was generated on:
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
22 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
23 format(Sys.time(), "%a %b %d %X %Y")
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
24 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
25
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
26 # Trait/Phenotype Data
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
27
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
28 The contents below show the first 10 rows and 6 columns of trait/phenotype data provided. However, any columns that were indicated should be removed, were removed and any categorical columns specified were converte to a one-hot enconding (e.g. 0 when present 1 when not present). The updated trait/phenotype data matrix has been saved into a comma-separated file named `updated_trait_matrix.csv`.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
29
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
30 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
31 # Load the trait data file.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
32 trait_data = data.frame()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
33 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col, na.strings = opt$missing_value2)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
34 sample_names = rownames(gemt)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
35 trait_rows = match(sample_names, rownames(trait_data))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
36 trait_data = trait_data[trait_rows, ]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
37
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
38 # Determine the column types within the trait annotation data.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
39 trait_types = sapply(trait_data, class)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
40
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
41 # If the type is character we should convert it to a factor manually.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
42 character_fields = colnames(trait_data)[which(trait_types == "character")]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
43 if (length(character_fields) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
44 for (field in character_fields) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
45 trait_data[[field]] = as.factor(trait_data[[field]])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
46 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
47 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
48
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
49 # Remove ignored columns.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
50 ignore_cols = strsplit(opt$ignore_cols, ',')[[1]]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
51 if (length(ignore_cols) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
52 print('You chose to ignore the following fields:')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
53 print(ignore_cols)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
54 trait_data = trait_data[, colnames(trait_data)[!(colnames(trait_data) %in% ignore_cols)]]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
55 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
56
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
57 # Make sure we don't one-hot-encoude any columns that were also ignored.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
58 one_hot_cols = strsplit(opt$one_hot_cols, ',')[[1]]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
59 one_hot_cols = one_hot_cols[which(!(one_hot_cols %in% ignore_cols))]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
60
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
61 # Change any categorical fields to 1 hot encoding as requested by the caller.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
62 if (length(one_hot_cols) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
63 print('You chose to treat the following fields as categorical:')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
64 print(one_hot_cols)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
65
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
66 # Make sure we have enough levels for 1-hot encoding. We must have at least two.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
67 hkeep = c()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
68 hignore = c()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
69 for (field in one_hot_cols[[i]]) {
11
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
70
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
71 # Make sure the field is categorical. If it came in as integer it must be switched.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
72 if (trait_types[[field]] == "integer") {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
73 trait_data[[field]] = as.factor(trait_data[[field]])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
74 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
75 if (trait_types[[field]] == "numeric") {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
76 print('The following quantitative field will be treated as numeric instead.')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
77 print(field)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
78 next
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
79 }
11
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
80
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
81 # Now make sure we have enough factors.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
82 if (nlevels(trait_data[[field]]) > 1) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
83 hkeep[length(hkeep)+1] = field
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
84 } else {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
85 hignore[length(hignore)+1] = field
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
86 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
87 }
11
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
88
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
89 if (length(hignore) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
90 print('These fields were ignored due to too few factors:')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
91 print(hignore)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
92 }
11
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
93
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
94 # Perform the 1-hot encoding for specified and valid fields.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
95 if (length(hkeep) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
96 print('These fields were be 1-hot encoded:')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
97 print(hkeep)
11
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
98
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
99 swap_cols = colnames(trait_data)[(colnames(trait_data) %in% hkeep)]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
100 temp = as.data.frame(trait_data[, swap_cols])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
101 colnames(temp) = swap_cols
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
102 temp = apply(temp, 2, make.names)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
103 dmy <- dummyVars(" ~ .", data = temp)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
104 encoded <- data.frame(predict(dmy, newdata = temp))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
105 encoded = sapply(encoded, as.integer)
11
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
106
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
107 # Make a new trait_data table with these new 1-hot fields.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
108 keep_cols = colnames(trait_data)[!(colnames(trait_data) %in% one_hot_cols[[1]])]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
109 keep = as.data.frame(trait_data[, keep_cols])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
110 colnames(keep) = keep_cols
11
f2d2ec70b7d8 Uploaded
spficklin
parents: 3
diff changeset
111
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
112 # Make a new trait_data object that has the columns to keep and the new 1-hot columns.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
113 trait_data = cbind(keep, encoded)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
114 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
115 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
116
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
117 # Write the new trait data file.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
118 write.csv(trait_data, file=opt$updated_trait_matrix, quote=FALSE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
119
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
120 #datatable(trait_data)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
121 trait_data[1:10,1:6]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
122 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
123 # Module-Condition Association
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
124
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
125 Now that we have trait/phenotype data, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with traits added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
126
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
127 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
128 # Determine the column types within the trait annotation data.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
129 trait_types = sapply(trait_data, class)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
130
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
131 # So that we can merge colors together with a cbind, create a
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
132 # data frame with an empty column
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
133 trait_colors = data.frame(empty = rep(1:dim(trait_data)[1]))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
134
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
135 # Set the colors for the quantitative data.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
136 quantitative_fields = colnames(trait_data)[which(trait_types == "numeric")]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
137 if (length(quantitative_fields) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
138 qdata = as.data.frame(trait_data[,quantitative_fields])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
139 quantitative_colors = numbers2colors(qdata, signed = FALSE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
140 colnames(quantitative_colors) = quantitative_fields
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
141 trait_colors = cbind(trait_colors, quantitative_colors)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
142 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
143
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
144 # Set the colors for the categorical data but only if the column
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
145 # has more than one factor. For columns with more than one factor
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
146 # we should dump that column.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
147 categorical_fields = colnames(trait_data)[which(trait_types == "factor")]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
148 if (length(categorical_fields) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
149 cdata = as.data.frame(trait_data[,categorical_fields])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
150 categorical_colors = labels2colors(cdata)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
151 colnames(categorical_colors) = categorical_fields
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
152 trait_colors = cbind(trait_colors, categorical_colors)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
153 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
154
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
155 # Set the colors for the ordinal data.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
156 ordinal_fields = colnames(trait_data)[which(trait_types == "integer")]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
157 if (length(ordinal_fields) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
158 odata = as.data.frame(trait_data[,ordinal_fields])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
159 ordinal_colors = numbers2colors(odata, signed = FALSE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
160 colnames(ordinal_colors) = ordinal_fields
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
161 trait_colors = cbind(trait_colors, ordinal_colors)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
162 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
163
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
164 # Reorder the colors to match the same order of columns in the trait_data df.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
165 trait_order = colnames(trait_data)[colnames(trait_data) %in% colnames(trait_colors)]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
166 trait_colors = trait_colors[,trait_order]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
167 trait_data = trait_data[,trait_order]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
168 plotSampleDendroTraits <- function() {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
169 plotDendroAndColors(sampleTree, trait_colors,
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
170 groupLabels = names(trait_data),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
171 main = "Sample Dendrogram and Annotation Heatmap",
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
172 cex.dendroLabels = 0.5)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
173 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
174
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
175 png('figures/07-sample_trait_dendrogram.png', width=6 ,height=10, units="in", res=300)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
176 plotSampleDendroTraits()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
177 invisible(dev.off())
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
178 plotSampleDendroTraits()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
179 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
180
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
181 To statistically identify the associations, correlation tests are performed of the eigengenes of each module with the annotation data. The following heatmap shows the results between each annotation feature and each module. Modules with a signficant positive assocation have a correlation value near 1. Modules with a significant negative association have a correlation value near -1. Modules with no correlation have a value near 0.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
182
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
183 ```{r fig.align='center', fig.width=15, fig.height=15}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
184 MEs = orderMEs(MEs)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
185 moduleTraitCor = cor(MEs, trait_data, use = "p");
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
186 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples);
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
187
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
188 plotModuleTraitHeatmap <- function() {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
189 # The WGCNA labeledHeatmap function is too overloaded with detail, we'll create a simpler plot.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
190 plotData = melt(moduleTraitCor)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
191 # We want to makes sure the order is the same as in the
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
192 # labeledHeatmap function (example above)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
193 plotData$Var1 = factor(plotData$Var1, levels = rev(colnames(MEs)), ordered=TRUE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
194 # Now use ggplot2 to make a nicer image.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
195 p <- ggplot(plotData, aes(Var2, Var1, fill=value)) +
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
196 geom_tile() + xlab('Experimental Conditions') + ylab('WGCNA Modules') +
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
197 scale_fill_gradient2(low = "#0072B2", high = "#D55E00",
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
198 mid = "white", midpoint = 0,
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
199 limit = c(-1,1), name="PCC") +
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
200 theme_bw() +
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
201 theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1, size=15),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
202 axis.text.y = element_text(angle = 0, hjust=1, vjust=0.5, size=15),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
203 legend.text=element_text(size=15),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
204 panel.border = element_blank(),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
205 panel.grid.major = element_blank(),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
206 panel.grid.minor = element_blank(),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
207 axis.line = element_blank())
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
208 print(p)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
209 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
210 png('figures/08-module_trait_dendrogram.png', width=12 ,height=12, units="in", res=300)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
211 plotModuleTraitHeatmap()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
212 invisible(dev.off())
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
213 plotModuleTraitHeatmap()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
214 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
215
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
216 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
217 output = cbind(moduleTraitCor, moduleTraitPvalue)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
218 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
219 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
220 A file has been generated named `module_association.csv` which conatins the list of modules, and their correlation values as well as p-values indicating the strength of the associations.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
221 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
222 # names (colors) of the modules
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
223 modNames = substring(names(MEs), 3)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
224 geneModuleMembership = as.data.frame(cor(gemt, MEs, use = "p"));
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
225 MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), n_samples));
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
226 names(geneModuleMembership) = paste("MM", modNames, sep="");
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
227 names(MMPvalue) = paste("p.MM", modNames, sep="");
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
228
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
229 # Calculate the gene trait significance as a Pearson's R and p-value.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
230 gts = as.data.frame(cor(gemt, trait_data, use = "p"));
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
231 gtsp = as.data.frame(corPvalueStudent(as.matrix(gts), n_samples));
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
232 colnames(gtsp) = c(paste("p", names(trait_data), sep="."))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
233 colnames(gts) = c(paste("GS", names(trait_data), sep="."))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
234
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
235 # Write out the gene information.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
236 output = cbind(Module = module_labels, gts, gtsp)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
237 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
238
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
239 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
240 Genes themselves can also have assocation with traits. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the trait features.