comparison aurora_wgcna.Rmd @ 10:96ba1a8fff06 draft

Uploaded
author spficklin
date Fri, 06 Dec 2019 13:15:35 -0500
parents b14e4bf568b0
children 53fc3fdf5e9a
comparison
equal deleted inserted replaced
9:4f3352aab9e2 10:96ba1a8fff06
6 --- 6 ---
7 7
8 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} 8 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
9 knitr::opts_chunk$set(error = FALSE, echo = FALSE) 9 knitr::opts_chunk$set(error = FALSE, echo = FALSE)
10 ``` 10 ```
11 ```{r, include=FALSE}
12 options(tinytex.verbose = TRUE)
13 ```
11 14
12 ```{r} 15 ```{r}
13 # Make a directory for saving the figures. 16 # Make a directory for saving the figures.
14 dir.create('figures', showWarnings = FALSE) 17 dir.create('figures', showWarnings = FALSE)
15 ``` 18 ```
16 19
17 # Introduction 20 # Introduction
18 This report contains step-by-step results from use of the [Aurora Galaxy](https://github.com/statonlab/aurora-galaxy-tools) Weighted Gene Co-expression Network Analysis [WGCNA](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-559) tool. This tool wraps the WGCNA R package into a ready-to-use Rmarkdown file. It performs module discovery and network construction using a dataset and optional trait data matrix provided. 21 This report contains step-by-step results from use of the [Aurora Galaxy](https://github.com/statonlab/aurora-galaxy-tools) Weighted Gene Co-expression Network Analysis [WGCNA](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-559) tool. This tool wraps the WGCNA R package into a ready-to-use Rmarkdown file. It performs module discovery and network construction using a dataset and optional trait data matrix provided.
19 22
20 If you provided trait data, a second report will be available with results comparing the trait values to the identified modules. 23 If you provided trait data, a second report will be available with results comparing the trait values to the identified modules.
21 24
22 This report was generated on: 25 This report was generated on:
23 ```{r} 26 ```{r}
62 print(opt$missing_value1) 65 print(opt$missing_value1)
63 66
64 if (!is.null(opt$trait_data)) { 67 if (!is.null(opt$trait_data)) {
65 print('The column in the trait data that contains the sample name:') 68 print('The column in the trait data that contains the sample name:')
66 print(opt$sname_col) 69 print(opt$sname_col)
67 70
68 print('The character string used to identify missing values in the trait data:') 71 print('The character string used to identify missing values in the trait data:')
69 print(opt$missing_value2) 72 print(opt$missing_value2)
70 73
71 print('Columns in the trait data that should be treated as categorical:') 74 print('Columns in the trait data that should be treated as categorical:')
72 print(opt$one_hot_cols) 75 print(opt$one_hot_cols)
73 76
74 print('Columns in the trait data that should be ignored:') 77 print('Columns in the trait data that should be ignored:')
75 print(opt$ignore_cols) 78 print(opt$ignore_cols)
76 } 79 }
77 ``` 80 ```
78 81
81 84
82 - Do the formats for the input datasets match the requirements listed above. 85 - Do the formats for the input datasets match the requirements listed above.
83 - Do the values set for missing values match the values in the input files, and is the missing value used consistently within the input files (i.e you don't have more than one such as 0.0 and 0, or NA and 0.0) 86 - Do the values set for missing values match the values in the input files, and is the missing value used consistently within the input files (i.e you don't have more than one such as 0.0 and 0, or NA and 0.0)
84 - If trait data was provided, check that the column specified for the sample name is correct. 87 - If trait data was provided, check that the column specified for the sample name is correct.
85 - The block size should not exceed 10,000 and should not be lower than 1,000. 88 - The block size should not exceed 10,000 and should not be lower than 1,000.
86 - Ensure that the sample names and all headers in the trait/phenotype data only contain alpha-numeric and underscore characters. 89 - Ensure that the sample names and all headers in the trait/phenotype data only contain alpha-numeric and underscore characters.
87 90
88 91
89 # Expression Data 92 # Expression Data
90 93
91 The content below shows the first 10 rows and 6 columns of the Gene Expression Matrix (GEM) file that was provided. 94 The content below shows the first 10 rows and 6 columns of the Gene Expression Matrix (GEM) file that was provided.
199 ylab="Scale Free Topology Model Fit,signed R^2", type="n", 202 ylab="Scale Free Topology Model Fit,signed R^2", type="n",
200 main = paste("Scale Independence"), cex.lab = 0.5); 203 main = paste("Scale Independence"), cex.lab = 0.5);
201 text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], 204 text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
202 labels=powers,cex=0.5,col="red"); 205 labels=powers,cex=0.5,col="red");
203 #abline(h=th, col="blue") 206 #abline(h=th, col="blue")
204 207
205 # Mean connectivity as a function of the soft-thresholding power. 208 # Mean connectivity as a function of the soft-thresholding power.
206 plot(sft$fitIndices[,1], sft$fitIndices[,5], 209 plot(sft$fitIndices[,1], sft$fitIndices[,5],
207 xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", 210 xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
208 main = paste("Mean Connectivity"), cex.lab = 0.5) 211 main = paste("Mean Connectivity"), cex.lab = 0.5)
209 text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=0.5,col="red") 212 text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=0.5,col="red")
290 colors = module_labels[net$blockGenes[[i]]] 293 colors = module_labels[net$blockGenes[[i]]]
291 colors = sub('ME','', colors) 294 colors = sub('ME','', colors)
292 plotClusterDendro <- function() { 295 plotClusterDendro <- function() {
293 plotDendroAndColors(net$dendrograms[[i]], colors, 296 plotDendroAndColors(net$dendrograms[[i]], colors,
294 "Module colors", dendroLabels = FALSE, hang = 0.03, 297 "Module colors", dendroLabels = FALSE, hang = 0.03,
295 addGuide = TRUE, guideHang = 0.05, 298 addGuide = TRUE, guideHang = 0.05,
296 main=paste('Cluster Dendgrogram, Block', i)) 299 main=paste('Cluster Dendgrogram, Block', i))
297 } 300 }
298 png(paste0('figures/06-cluster_dendrogram_block_', i, '.png'), width=6 ,height=4, units="in", res=300) 301 png(paste0('figures/06-cluster_dendrogram_block_', i, '.png'), width=6 ,height=4, units="in", res=300)
299 plotClusterDendro(); 302 plotClusterDendro();
300 invisible(dev.off()) 303 invisible(dev.off())
325 # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing 328 # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
326 # the color palette; setting the diagonal to NA also improves the clarity of the plot 329 # the color palette; setting the diagonal to NA also improves the clarity of the plot
327 plotDiss = selectTOM^7; 330 plotDiss = selectTOM^7;
328 diag(plotDiss) = NA; 331 diag(plotDiss) = NA;
329 colors = sub('ME','', selectColors) 332 colors = sub('ME','', selectColors)
330 333
331 png(paste0('figures/06-TOM_heatmap_block_', i, '.png'), width=6 ,height=6, units="in", res=300) 334 png(paste0('figures/06-TOM_heatmap_block_', i, '.png'), width=6 ,height=6, units="in", res=300)
332 TOMplot(plotDiss, selectTree, colors, main = paste('TOM Heatmap, Block', i)) 335 TOMplot(plotDiss, selectTree, colors, main = paste('TOM Heatmap, Block', i))
333 dev.off() 336 dev.off()
334 TOMplot(plotDiss, selectTree, colors, main = paste('TOM Heatmap, Block', i)) 337 TOMplot(plotDiss, selectTree, colors, main = paste('TOM Heatmap, Block', i))
335 } 338 }