annotate aurora_wgcna_trait.Rmd @ 6:58b01fa2cc81 draft

Uploaded
author spficklin
date Thu, 05 Dec 2019 15:34:52 -0500
parents d1a0b7ded7e3
children f2d2ec70b7d8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
1 ---
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
2 title: 'Aurora Galaxy WGCNA Tool: Gene Co-Expression Network Construction & Analysis. Part 2'
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
3 output:
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
4 pdf_document:
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
5 number_sections: false
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
6 ---
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
7
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
8 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
9 knitr::opts_chunk$set(error = FALSE, echo = FALSE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
10 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
11 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
12 # Load the data from the previous step.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
13 load(file=opt$r_data)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
14 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
15 # Introduction
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
16 This report is part two of step-by-step results from use of the [Aurora Galaxy](https://github.com/statonlab/aurora-galaxy-tools) Weighted Gene Co-expression Network Analysis [WGCNA](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-559) tool. It is generated when trait or phenotype data is provided.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
17
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
18 This report was generated on:
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
19 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
20 format(Sys.time(), "%a %b %d %X %Y")
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
21 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
22
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
23 # Trait/Phenotype Data
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
24
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
25 The contents below show the first 10 rows and 6 columns of trait/phenotype data provided. However, any columns that were indicated should be removed, were removed and any categorical columns specified were converte to a one-hot enconding (e.g. 0 when present 1 when not present). The updated trait/phenotype data matrix has been saved into a comma-separated file named `updated_trait_matrix.csv`.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
26
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
27 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
28 # Load the trait data file.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
29 trait_data = data.frame()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
30 trait_data = read.csv(opt$trait_data, header = TRUE, row.names = opt$sname_col, na.strings = opt$missing_value2)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
31 sample_names = rownames(gemt)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
32 trait_rows = match(sample_names, rownames(trait_data))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
33 trait_data = trait_data[trait_rows, ]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
34
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
35 # Determine the column types within the trait annotation data.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
36 trait_types = sapply(trait_data, class)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
37
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
38 # If the type is character we should convert it to a factor manually.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
39 character_fields = colnames(trait_data)[which(trait_types == "character")]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
40 if (length(character_fields) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
41 for (field in character_fields) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
42 trait_data[[field]] = as.factor(trait_data[[field]])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
43 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
44 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
45
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
46 # Remove ignored columns.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
47 ignore_cols = strsplit(opt$ignore_cols, ',')[[1]]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
48 if (length(ignore_cols) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
49 print('You chose to ignore the following fields:')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
50 print(ignore_cols)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
51 trait_data = trait_data[, colnames(trait_data)[!(colnames(trait_data) %in% ignore_cols)]]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
52 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
53
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
54 # Make sure we don't one-hot-encoude any columns that were also ignored.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
55 one_hot_cols = strsplit(opt$one_hot_cols, ',')[[1]]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
56 one_hot_cols = one_hot_cols[which(!(one_hot_cols %in% ignore_cols))]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
57
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
58 # Change any categorical fields to 1 hot encoding as requested by the caller.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
59 if (length(one_hot_cols) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
60 print('You chose to treat the following fields as categorical:')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
61 print(one_hot_cols)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
62
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
63 # Make sure we have enough levels for 1-hot encoding. We must have at least two.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
64 hkeep = c()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
65 hignore = c()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
66 for (field in one_hot_cols[[i]]) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
67
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
68 # Make sure the field is categorical. If it came in as integer it must be switched.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
69 if (trait_types[[field]] == "integer") {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
70 trait_data[[field]] = as.factor(trait_data[[field]])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
71 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
72 if (trait_types[[field]] == "numeric") {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
73 print('The following quantitative field will be treated as numeric instead.')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
74 print(field)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
75 next
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
76 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
77
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
78 # Now make sure we have enough factors.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
79 if (nlevels(trait_data[[field]]) > 1) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
80 hkeep[length(hkeep)+1] = field
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
81 } else {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
82 hignore[length(hignore)+1] = field
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
83 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
84 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
85
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
86 if (length(hignore) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
87 print('These fields were ignored due to too few factors:')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
88 print(hignore)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
89 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
90
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
91 # Perform the 1-hot encoding for specified and valid fields.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
92 if (length(hkeep) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
93 print('These fields were be 1-hot encoded:')
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
94 print(hkeep)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
95
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
96 swap_cols = colnames(trait_data)[(colnames(trait_data) %in% hkeep)]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
97 temp = as.data.frame(trait_data[, swap_cols])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
98 colnames(temp) = swap_cols
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
99 temp = apply(temp, 2, make.names)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
100 dmy <- dummyVars(" ~ .", data = temp)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
101 encoded <- data.frame(predict(dmy, newdata = temp))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
102 encoded = sapply(encoded, as.integer)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
103
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
104 # Make a new trait_data table with these new 1-hot fields.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
105 keep_cols = colnames(trait_data)[!(colnames(trait_data) %in% one_hot_cols[[1]])]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
106 keep = as.data.frame(trait_data[, keep_cols])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
107 colnames(keep) = keep_cols
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
108
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
109 # Make a new trait_data object that has the columns to keep and the new 1-hot columns.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
110 trait_data = cbind(keep, encoded)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
111 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
112 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
113
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
114 # Write the new trait data file.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
115 write.csv(trait_data, file=opt$updated_trait_matrix, quote=FALSE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
116
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
117 #datatable(trait_data)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
118 trait_data[1:10,1:6]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
119 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
120 # Module-Condition Association
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
121
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
122 Now that we have trait/phenotype data, we can explore if any of the network modules are asociated with these features. First, is an empirical exploration by viewing again the sample dendrogram but with traits added and colored by category or numerical intensity, as appropriate. If groups of samples with similar expression also share similar annotations then the same colors will appear "in blocks" under the clustered samples. This view does not indicate associations but can help visualize when some modules might be associated.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
123
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
124 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
125 # Determine the column types within the trait annotation data.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
126 trait_types = sapply(trait_data, class)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
127
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
128 # So that we can merge colors together with a cbind, create a
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
129 # data frame with an empty column
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
130 trait_colors = data.frame(empty = rep(1:dim(trait_data)[1]))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
131
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
132 # Set the colors for the quantitative data.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
133 quantitative_fields = colnames(trait_data)[which(trait_types == "numeric")]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
134 if (length(quantitative_fields) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
135 qdata = as.data.frame(trait_data[,quantitative_fields])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
136 quantitative_colors = numbers2colors(qdata, signed = FALSE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
137 colnames(quantitative_colors) = quantitative_fields
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
138 trait_colors = cbind(trait_colors, quantitative_colors)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
139 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
140
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
141 # Set the colors for the categorical data but only if the column
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
142 # has more than one factor. For columns with more than one factor
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
143 # we should dump that column.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
144 categorical_fields = colnames(trait_data)[which(trait_types == "factor")]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
145 if (length(categorical_fields) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
146 cdata = as.data.frame(trait_data[,categorical_fields])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
147 categorical_colors = labels2colors(cdata)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
148 colnames(categorical_colors) = categorical_fields
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
149 trait_colors = cbind(trait_colors, categorical_colors)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
150 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
151
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
152 # Set the colors for the ordinal data.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
153 ordinal_fields = colnames(trait_data)[which(trait_types == "integer")]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
154 if (length(ordinal_fields) > 0) {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
155 odata = as.data.frame(trait_data[,ordinal_fields])
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
156 ordinal_colors = numbers2colors(odata, signed = FALSE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
157 colnames(ordinal_colors) = ordinal_fields
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
158 trait_colors = cbind(trait_colors, ordinal_colors)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
159 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
160
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
161 # Reorder the colors to match the same order of columns in the trait_data df.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
162 trait_order = colnames(trait_data)[colnames(trait_data) %in% colnames(trait_colors)]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
163 trait_colors = trait_colors[,trait_order]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
164 trait_data = trait_data[,trait_order]
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
165 plotSampleDendroTraits <- function() {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
166 plotDendroAndColors(sampleTree, trait_colors,
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
167 groupLabels = names(trait_data),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
168 main = "Sample Dendrogram and Annotation Heatmap",
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
169 cex.dendroLabels = 0.5)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
170 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
171
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
172 png('figures/07-sample_trait_dendrogram.png', width=6 ,height=10, units="in", res=300)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
173 plotSampleDendroTraits()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
174 invisible(dev.off())
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
175 plotSampleDendroTraits()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
176 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
177
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
178 To statistically identify the associations, correlation tests are performed of the eigengenes of each module with the annotation data. The following heatmap shows the results between each annotation feature and each module. Modules with a signficant positive assocation have a correlation value near 1. Modules with a significant negative association have a correlation value near -1. Modules with no correlation have a value near 0.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
179
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
180 ```{r fig.align='center', fig.width=15, fig.height=15}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
181 MEs = orderMEs(MEs)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
182 moduleTraitCor = cor(MEs, trait_data, use = "p");
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
183 moduleTraitPvalue = corPvalueStudent(moduleTraitCor, n_samples);
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
184
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
185 plotModuleTraitHeatmap <- function() {
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
186 # The WGCNA labeledHeatmap function is too overloaded with detail, we'll create a simpler plot.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
187 plotData = melt(moduleTraitCor)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
188 # We want to makes sure the order is the same as in the
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
189 # labeledHeatmap function (example above)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
190 plotData$Var1 = factor(plotData$Var1, levels = rev(colnames(MEs)), ordered=TRUE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
191 # Now use ggplot2 to make a nicer image.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
192 p <- ggplot(plotData, aes(Var2, Var1, fill=value)) +
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
193 geom_tile() + xlab('Experimental Conditions') + ylab('WGCNA Modules') +
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
194 scale_fill_gradient2(low = "#0072B2", high = "#D55E00",
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
195 mid = "white", midpoint = 0,
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
196 limit = c(-1,1), name="PCC") +
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
197 theme_bw() +
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
198 theme(axis.text.x = element_text(angle = 45, hjust=1, vjust=1, size=15),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
199 axis.text.y = element_text(angle = 0, hjust=1, vjust=0.5, size=15),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
200 legend.text=element_text(size=15),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
201 panel.border = element_blank(),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
202 panel.grid.major = element_blank(),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
203 panel.grid.minor = element_blank(),
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
204 axis.line = element_blank())
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
205 print(p)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
206 }
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
207 png('figures/08-module_trait_dendrogram.png', width=12 ,height=12, units="in", res=300)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
208 plotModuleTraitHeatmap()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
209 invisible(dev.off())
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
210 plotModuleTraitHeatmap()
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
211 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
212
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
213 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
214 output = cbind(moduleTraitCor, moduleTraitPvalue)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
215 write.csv(output, file = opt$module_association_file, quote=FALSE, row.names=TRUE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
216 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
217 A file has been generated named `module_association.csv` which conatins the list of modules, and their correlation values as well as p-values indicating the strength of the associations.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
218 ```{r}
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
219 # names (colors) of the modules
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
220 modNames = substring(names(MEs), 3)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
221 geneModuleMembership = as.data.frame(cor(gemt, MEs, use = "p"));
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
222 MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), n_samples));
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
223 names(geneModuleMembership) = paste("MM", modNames, sep="");
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
224 names(MMPvalue) = paste("p.MM", modNames, sep="");
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
225
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
226 # Calculate the gene trait significance as a Pearson's R and p-value.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
227 gts = as.data.frame(cor(gemt, trait_data, use = "p"));
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
228 gtsp = as.data.frame(corPvalueStudent(as.matrix(gts), n_samples));
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
229 colnames(gtsp) = c(paste("p", names(trait_data), sep="."))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
230 colnames(gts) = c(paste("GS", names(trait_data), sep="."))
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
231
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
232 # Write out the gene information.
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
233 output = cbind(Module = module_labels, gts, gtsp)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
234 write.csv(output, file = opt$gene_association_file, quote=FALSE, row.names=TRUE)
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
235
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
236 ```
d1a0b7ded7e3 Uploaded
spficklin
parents:
diff changeset
237 Genes themselves can also have assocation with traits. This is calculated via a traditional correlation test as well. Another file has been generated named `gene_association.csv` which provides the list of genes, the modules they belong to and the assocaition of each gene to the trait features.