Mercurial > repos > mingchen0919 > rmarkdown_wgcna
changeset 0:4275479ada3a draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 12:35:50 -0400 |
parents | |
children | 337fedd38522 |
files | wgcna_construct_network.Rmd wgcna_construct_network.xml wgcna_construct_network_render.R wgcna_eigengene_visualization.Rmd wgcna_eigengene_visualization.xml wgcna_eigengene_visualization_render.R wgcna_preprocessing.Rmd wgcna_preprocessing.xml wgcna_preprocessing_render.R |
diffstat | 9 files changed, 999 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_construct_network.Rmd Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,178 @@ +--- +title: 'WGCNA: construct network' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +# Import workspace + +This step imports workspace from the **WGCNA: preprocessing** step. + +```{r} +fcp = file.copy("PREPROCESSING_WORKSPACE", "deseq.RData") +load("deseq.RData") +``` + + +# Processing outliers {.tabset} + +## Before removing outliers + +```{r} +plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, + cex.axis = 1, cex.main = 1, cex = 0.5) +if(!is.na(HEIGHT_CUT)) { + # plot a line to show the cut + abline(h = HEIGHT_CUT, col = "red") + # determine cluster under the line + clust = cutreeStatic(sampleTree, cutHeight = HEIGHT_CUT, minSize = 10) + keepSamples = (clust==1) + expression_data = expression_data[keepSamples, ] +} +``` + +## After removing outliers + +```{r} +sampleTree = hclust(dist(expression_data), method = "average"); +plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", + cex.axis = 1, cex.main = 1, cex = 0.5) +``` + + +# Trait data {.tabeset} + +If trait data is provided, the first 100 rows from the data will be displayed here. A plot consisting of sample cluster dendrogram and trait heatmap will also be gerenated. + +## Trait data table + +```{r} +trait_data = data.frame() +if ("TRAIT_DATA" != 'None') { + trait_data = read.csv("TRAIT_DATA", header = TRUE, row.names = 1) + # form a data frame analogous to expression data that will hold the traits. + sample_names = rownames(expression_data) + trait_rows = match(sample_names, rownames(trait_data)) + trait_data = trait_data[trait_rows, ] + datatable(head(trait_data, 100), style="bootstrap", filter = 'top', + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +} +``` + +## Dendrogram and heatmap + +```{r fig.align='center', fig.width=8, fig.height=9} +if (nrow(trait_data) != 0) { + traitColors = numbers2colors(trait_data, signed = FALSE) + plotDendroAndColors(sampleTree, traitColors, + groupLabels = names(trait_data), + main = "Sample dendrogram and trait heatmap", + cex.dendroLabels = 0.5) +} +``` + + +# The thresholding power + +```{r} +powers = c(1:10, seq(12, 20, 2)) +soft_threshold = pickSoftThreshold(expression_data, powerVector = powers, verbose = 5) +``` + +```{r fig.align='center'} +par(mfrow=c(1,2)) +plot(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2], + xlab="Soft Threshold (power)", + ylab="Scale Free Topology Model Fit,signed R^2",type="n", + main = paste("Scale independence"), + cex.lab = 0.5); +text(soft_threshold$fitIndices[,1], -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2], + labels=powers,cex=0.5,col="red"); + +# calculate soft threshold power +y = -sign(soft_threshold$fitIndices[,3])*soft_threshold$fitIndices[,2] +r2_cutoff = 0.9 +for(i in 1:length(powers)) { + if(y[i] > r2_cutoff) { + soft_threshold_power = soft_threshold$fitIndices[,1][i] + r2_cutoff_new = y[i] + break + } + soft_threshold_power = soft_threshold$fitIndices[,1][length(powers)] +} +abline(h=r2_cutoff, col="red") +abline(v=soft_threshold_power, col="blue") +text(soft_threshold_power+1, r2_cutoff-0.1, + paste0('R^2 cutoff = ', round(r2_cutoff_new,2)), + cex = 0.5, col = "red") + +plot(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5], + xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", + main = paste("Mean connectivity"), + cex.lab = 0.5) +text(soft_threshold$fitIndices[,1], soft_threshold$fitIndices[,5], labels=powers, cex=0.5,col="red") +par(mfrow=c(1,1)) +``` + + +# Construct network + +The gene network is constructed based on **soft threshold power = `r soft_threshold_power`** + +```{r} +gene_network = blockwiseModules(expression_data, power = soft_threshold_power, + TOMType = "unsigned", minModuleSize = 30, + reassignThreshold = 0, mergeCutHeight = 0.25, + numericLabels = TRUE, pamRespectsDendro = FALSE, + verbose = 3) +``` + + +# Gene modules {.tabset} + +## Idenfity gene modules + +```{r} +modules = table(gene_network$colors) +n_modules = length(modules) - 1 +module_size_upper = modules[2] +module_size_lower = modules[length(modules)] + +module_table = data.frame(model_label = c(0, 1:n_modules), + gene_size = as.vector(modules)) +datatable(t(module_table)) +``` + +The results above indicates that there are **`r n_modules` gene modules**, labeled 1 through `r length(n_modules)` in order of descending size. The largest module has **`r module_size_upper` genes**, and the smallest module has **`r module_size_lower` genes**. The label 0 is reserved for genes outside of all modules. + + +## Dendrogram and module plot + +```{r} +# Convert labels to colors for plotting +module_colors = labels2colors(gene_network$colors) +# Plot the dendrogram and the module colors underneath +plotDendroAndColors(gene_network$dendrograms[[1]], module_colors[gene_network$blockGenes[[1]]], + "Module colors", + dendroLabels = FALSE, hang = 0.03, + addGuide = TRUE, guideHang = 0.05) +``` + + +```{r echo=FALSE} +# save workspace +rm("opt") +save(list=ls(all.names = TRUE), file='CONSTRUCT_NETWORK_WORKSPACE') +``` + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_construct_network.xml Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,105 @@ +<tool id="wgcna_construct_network" name="WGCNA: construct network" version="1.0.0"> + <requirements> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="0.4.0">r-highcharter</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="1.51">r-wgcna</requirement> + </requirements> + <description> + Construct gene network. + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + + Rscript '${__tool_directory__}/wgcna_construct_network_render.R' + + ## 1. input data + -e $echo + -w $preprocessing_workspace + -h '$height_cut' + -t $trait_data + + + + ## 2. output report and report site directory + -o $wgcna_construct_network + -d $wgcna_construct_network.files_path + -W $construct_network_workspace + + + ## 3. Rmd templates in the tool directory + + ## _site.yml and index.Rmd template files + -M '${__tool_directory__}/wgcna_construct_network.Rmd' + + + + ]]> + </command> + <inputs> + <param type="data" name="preprocessing_workspace" format="rdata" optional="false" + label="R workspace from WGCNA: preprocessing" /> + <param type="float" name="height_cut" optional="true" label="Height" + help="Refer to the sample clustering plot from WGCNA: preprocessing and choose a height cut that will + remove outliers. If there is not outlier, leave this field blank." /> + <param type="data" name="trait_data" format="csv" optional="true" + label="Trait data" + help="If trait data is provided, a plot consisting of sample clustering and trait heatmap will + be generated. This field is optional. "/> + + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data name="wgcna_construct_network" format="html" label="WGCNA: construct_network" /> + <data name="construct_network_workspace" format="rdata" label="R workspace: WGCNA construct_network" /> + </outputs> + <citations> + <citation type="bibtex"> + @article{langfelder2008wgcna, + title={WGCNA: an R package for weighted correlation network analysis}, + author={Langfelder, Peter and Horvath, Steve}, + journal={BMC bioinformatics}, + volume={9}, + number={1}, + pages={559}, + year={2008}, + publisher={BioMed Central} + } + </citation> + <citation type="bibtex"> + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_construct_network_render.R Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,112 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$PREPROCESSING_WORKSPACE = c('preprocessing_workspace', 'w', '1', 'character') +spec_list$HEIGHT_CUT = c('height_cut', 'h', '2', 'double') +spec_list$TRAIT_DATA = c('trait_data', 't', '2', 'character') + + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('wgcna_construct_network_html', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('wgcna_construct_network_dir', 'd', '1', 'character') +spec_list$CONSTRUCT_NETWORK_WORKSPACE = c('construct_network_workspace', 'W', '1', 'character') + + +##--------3. Rmd templates in the tool directory ---------- + +spec_list$WGCNA_PREPROCESSING_RMD = c('wgcna_construct_network_rmd', 'M', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(WGCNA) +library(DT) +library(htmltools) +library(ggplot2) + + +#----- 1. create the report directory ------------------------ +system(paste0('mkdir -p ', opt$wgcna_construct_network_dir)) + + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- 01 wgcna_construct_network.Rmd ----------------------- +readLines(opt$wgcna_construct_network_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('PREPROCESSING_WORKSPACE', opt$preprocessing_workspace, x) + }) %>% + (function(x) { + gsub('HEIGHT_CUT', opt$height_cut, x) + }) %>% + (function(x) { + gsub('TRAIT_DATA', opt$trait_data, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$wgcna_construct_network_dir, x) + }) %>% + (function(x) { + gsub('CONSTRUCT_NETWORK_WORKSPACE', opt$construct_network_workspace, x) + }) %>% + (function(x) { + fileConn = file('wgcna_construct_network.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files -------- +render('wgcna_construct_network.Rmd', output_file = opt$wgcna_construct_network_html) + +#-------4. manipulate outputs ----------------------------- + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_eigengene_visualization.Rmd Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,121 @@ +--- +title: 'WGCNA: eigengene visualization' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +# Import workspace + +This step imports workspace from the **WGCNA: construct network** step. + +```{r} +fcp = file.copy("CONSTRUCT_NETWORK_WORKSPACE", "deseq.RData") +load("deseq.RData") +``` + + +# Gene modules {.tabset} + +```{r} +if(!is.na(SOFT_THRESHOLD_POWER)) soft_threshold_power = SOFT_THRESHOLD_POWER +``` + +## Identify gene modules + +The gene network is constructed based on **soft threshold power = `r soft_threshold_power`** + +```{r} +gene_network = blockwiseModules(expression_data, power = soft_threshold_power, + TOMType = "unsigned", minModuleSize = 30, + reassignThreshold = 0, mergeCutHeight = 0.25, + numericLabels = TRUE, pamRespectsDendro = FALSE, + verbose = 3) +``` + + +```{r} +modules = table(gene_network$colors) +n_modules = length(modules) - 1 +module_size_upper = modules[2] +module_size_lower = modules[length(modules)] + +module_table = data.frame(model_label = c(0, 1:n_modules), + gene_size = as.vector(modules)) +datatable(t(module_table)) +``` + +The results above indicates that there are **`r n_modules` gene modules**, labeled 1 through `r length(n_modules)` in order of descending size. The largest module has **`r module_size_upper` genes**, and the smallest module has **`r module_size_lower` genes**. The label 0 is reserved for genes outside of all modules. + + +## Dendrogram and module plot + +```{r} +# Convert labels to colors for plotting +module_colors = labels2colors(gene_network$colors) +# Plot the dendrogram and the module colors underneath +plotDendroAndColors(gene_network$dendrograms[[1]], module_colors[gene_network$blockGenes[[1]]], + "Module colors", + dendroLabels = FALSE, hang = 0.03, + addGuide = TRUE, guideHang = 0.05) +``` + + +# Gene module correlation + +We can calculate eigengenes and use them as representative profiles to quantify similarity of found gene modules. + +```{r} +n_genes = ncol(expression_data) +n_samples = nrow(expression_data) +``` + +```{r} +diss_tom = 1-TOMsimilarityFromExpr(expression_data, power = soft_threshold_power) +set.seed(123) +select_genes = sample(n_genes, size = PLOT_GENES) +select_diss_tom = diss_tom[select_genes, select_genes] + +# calculate gene tree on selected genes +select_gene_tree = hclust(as.dist(select_diss_tom), method = 'average') +select_module_colors = module_colors[select_genes] + +# transform diss_tom with a power to make moderately strong connections more visiable in the heatmap. +plot_diss_tom = select_diss_tom^7 +# set diagonal to NA for a nicer plot +diag(plot_diss_tom) = NA +``` + + +```{r fig.align='center'} +TOMplot(plot_diss_tom, select_gene_tree, select_module_colors, main = "Network heatmap") +``` + + +# Eigengene visualization {.tabset} + +## Eigengene dendrogram + +```{r fig.align='center'} +module_eigengenes = moduleEigengenes(expression_data, module_colors)$eigengenes +plotEigengeneNetworks(module_eigengenes, "Eigengene dendrogram", + plotHeatmaps = FALSE) +``` + +## Eigengene adjacency heatmap + +```{r fig.align='center'} +plotEigengeneNetworks(module_eigengenes, "Eigengene adjacency heatmap", + marHeatmap = c(2, 3, 2, 2), + plotDendrograms = FALSE, xLabelsAngle = 90) +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_eigengene_visualization.xml Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,100 @@ +<tool id="wgcna_eigengene_visualization" name="WGCNA: eigengene visualization" version="1.0.0"> + <requirements> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="0.4.0">r-highcharter</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="1.51">r-wgcna</requirement> + </requirements> + <description> + Eigengene visualization. + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + ## Add tools to PATH + export PATH=/opt/R-3.2.5/bin:\$PATH && + + Rscript '${__tool_directory__}/wgcna_eigengene_visualization_render.R' + + ## 1. input data + -e $echo + -w $construct_network_workspace + -p '$soft_threshold_power' + -n $plot_genes + + + ## 2. output report and report site directory + -o $wgcna_eigengene_visualization + -d $wgcna_eigengene_visualization.files_path + + ## 3. Rmd templates in the tool directory + + -M '${__tool_directory__}/wgcna_eigengene_visualization.Rmd' + + + + ]]> + </command> + <inputs> + <param type="data" name="construct_network_workspace" format="rdata" optional="false" + label="R workspace from WGCNA: construct network" /> + <param type="integer" name="soft_threshold_power" optional="true" label="Soft threshold power" + help="Refer to the scale independence plot from 'WGCNA: construct network' and choose an optimal soft threshold power. + An optimal power will be calculated automatically if no value is provided." /> + <param type="integer" name="plot_genes" value="400" min="1" label="Number of genes" optional="false" + help="The number of genes that will be used. It is possible to speed up the plotting by providing a subset of + genes. However, the gene dendrogram may ofter look different from dendrogram of all genes." /> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data name="wgcna_eigengene_visualization" format="html" label="WGCNA: eigengene visualization" /> + </outputs> + <citations> + <citation type="bibtex"> + @article{langfelder2008wgcna, + title={WGCNA: an R package for weighted correlation network analysis}, + author={Langfelder, Peter and Horvath, Steve}, + journal={BMC bioinformatics}, + volume={9}, + number={1}, + pages={559}, + year={2008}, + publisher={BioMed Central} + } + </citation> + <citation type="bibtex"> + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_eigengene_visualization_render.R Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,109 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$CONSTRUCT_NETWORK_WORKSPACE = c('construct_network_workspace', 'w', '1', 'character') +spec_list$SOFT_THRESHOLD_POWER = c('soft_threshold_power', 'p', '2', 'double') +spec_list$PLOT_GENES = c('plot_genes', 'n', '1', 'integer') + + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('wgcna_eigengene_visualization_html', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('wgcna_eigengene_visualization_dir', 'd', '1', 'character') + + + +##--------3. Rmd templates in the tool directory ---------- + +spec_list$WGCNA_EIGENGENE_VISUALIZATION_RMD = c('wgcna_eigengene_visualization_rmd', 'M', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(WGCNA) +library(DT) +library(htmltools) +library(ggplot2) + + +#----- 1. create the report directory ------------------------ +system(paste0('mkdir -p ', opt$wgcna_eigengene_visualization_dir)) + + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- 01 wgcna_eigengene_visualization.Rmd ----------------------- +readLines(opt$wgcna_eigengene_visualization_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('CONSTRUCT_NETWORK_WORKSPACE', opt$construct_network_workspace, x) + }) %>% + (function(x) { + gsub('SOFT_THRESHOLD_POWER', opt$soft_threshold_power, x) + }) %>% + (function(x) { + gsub('PLOT_GENES', opt$plot_genes, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$wgcna_eigengene_visualization_dir, x) + }) %>% + (function(x) { + fileConn = file('wgcna_eigengene_visualization.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files -------- +render('wgcna_eigengene_visualization.Rmd', output_file = opt$wgcna_eigengene_visualization_html) + +#-------4. manipulate outputs ----------------------------- + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_preprocessing.Rmd Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,76 @@ +--- +title: 'WGCNA: data preprocessing' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +```{r} +str(opt) +``` + +# Import data + +Each row represents a gene and each column represents a sample. + +```{r} +expression_data = read.csv('EXPRESSION_DATA', header = TRUE, row.names = 1) +``` + +Display the first 100 genes. + +```{r} +datatable(head(expression_data, 100), style="bootstrap", filter = 'top', + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +Transpose expression data matrix so that each row represents a sample and each column represents a gene. + +```{r} +expression_data = as.data.frame(t(expression_data)) +``` + +# Checking data + +Checking data for excessive missing values and identification of outlier microarray samples. + +```{r} +gsg = goodSamplesGenes(expression_data, verbose = 3) +if (!gsg$allOK) { + # Optionally, print the gene and sample names that were removed: + if (sum(!gsg$goodGenes)>0) + printFlush(paste("Removing genes:", paste(names(expression_data)[!gsg$goodGenes], collapse = ", "))); + if (sum(!gsg$goodSamples)>0) + printFlush(paste("Removing samples:", paste(rownames(expression_data)[!gsg$goodSamples], collapse = ", "))); + # Remove the offending genes and samples from the data: + expression_data = expression_data[gsg$goodSamples, gsg$goodGenes] +} else { + print('all genes are OK!') +} +``` + +# Clustering samples + +If there are any outliers, choose a height cut that will remove the offending sample. Remember this number since you will need this number in further analysis. + +```{r fig.align='center'} +sampleTree = hclust(dist(expression_data), method = "average"); +plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", + cex.axis = 1, cex.main = 1, cex = 0.5) +``` + + +```{r echo=FALSE} +rm("opt") +save(list=ls(all.names = TRUE), file='PREPROCESSING_WORKSPACE') +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_preprocessing.xml Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,96 @@ +<tool id="wgcna_preprocessing" name="WGCNA: preprocessing" version="1.0.0"> + <requirements> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="0.4.0">r-highcharter</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="1.51">r-wgcna</requirement> + </requirements> + <description> + Data clearning and preprocessing. + </description> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted." /> + <regex match="Error in" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <regex match="Fatal error" + source="both" + level="fatal" + description="An undefined error occured, please check your intput carefully and contact your administrator." /> + </stdio> + <command> + <![CDATA[ + ## Add tools to PATH + export PATH=/opt/R-3.2.5/bin:\$PATH && + + Rscript '${__tool_directory__}/wgcna_preprocessing_render.R' + + ## 1. input data + -e $echo + -E $expression_data + + + ## 2. output report and report site directory + -o $wgcna_preprocessing + -d $wgcna_preprocessing.files_path + -w $preprocessing_workspace + + ## 3. Rmd templates sitting in the tool directory + + ## _site.yml and index.Rmd template files + -D '${__tool_directory__}/wgcna_preprocessing.Rmd' + + + + ]]> + </command> + <inputs> + <param type="data" name="expression_data" format="csv" optional="false" label="Gene expression data" + help="Each row represents a gene and each column represents a sample."/> + + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + </inputs> + <outputs> + <data name="wgcna_preprocessing" format="html" label="WGCNA: preprocessing" /> + <data name="preprocessing_workspace" format="rdata" label="R workspace: WGCNA preprocessing" /> + </outputs> + <citations> + <citation type="bibtex"> + @article{langfelder2008wgcna, + title={WGCNA: an R package for weighted correlation network analysis}, + author={Langfelder, Peter and Horvath, Steve}, + journal={BMC bioinformatics}, + volume={9}, + number={1}, + pages={559}, + year={2008}, + publisher={BioMed Central} + } + </citation> + <citation type="bibtex"> + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + </citation> + <citation type="bibtex"> + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + </citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_preprocessing_render.R Tue Aug 08 12:35:50 2017 -0400 @@ -0,0 +1,102 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +spec_list=list() + +##------- 1. input data --------------------- +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$EXPRESSION_DATA = c('expression_data', 'E', '1', 'character') + + +##--------2. output report and report site directory -------------- +spec_list$OUTPUT_HTML = c('wgcna_preprocessing_html', 'o', '1', 'character') +spec_list$OUTPUT_DIR = c('wgcna_preprocessing_dir', 'd', '1', 'character') +spec_list$PREPROCESSING_WORKSPACE = c('preprocessing_workspace', 'w', '1', 'character') + +##--------3. Rmd templates sitting in the tool directory ---------- + +spec_list$WGCNA_PREPROCESSING_RMD = c('wgcna_preprocessing_rmd', 'D', '1', 'character') + + + +##------------------------------------------------------------------ + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) +# arguments are accessed by long flag name (the first column in the spec matrix) +# NOT by element name in the spec_list +# example: opt$help, opt$expression_file +##====== End of arguments handling ========== + +#------ Load libraries --------- +library(rmarkdown) +library(WGCNA) +library(DT) +library(htmltools) + + +#----- 1. create the report directory ------------------------ +system(paste0('mkdir -p ', opt$wgcna_preprocessing_dir)) + + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + + +#----- 01 wgcna_preprocessing.Rmd ----------------------- +readLines(opt$wgcna_preprocessing_rmd) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('EXPRESSION_DATA', opt$expression_data, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$wgcna_preprocessing_dir, x) + }) %>% + (function(x) { + gsub('PREPROCESSING_WORKSPACE', opt$preprocessing_workspace, x) + }) %>% + (function(x) { + fileConn = file('wgcna_preprocessing.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + + +#------ 3. render all Rmd files -------- +render('wgcna_preprocessing.Rmd', output_file = opt$wgcna_preprocessing_html) + +#-------4. manipulate outputs ----------------------------- + +