Mercurial > repos > mingchen0919 > rmarkdown_fastqc_site
diff wgcna_preprocessing.Rmd @ 6:2f4df2be0572 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 12:35:11 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wgcna_preprocessing.Rmd Tue Aug 08 12:35:11 2017 -0400 @@ -0,0 +1,76 @@ +--- +title: 'WGCNA: data preprocessing' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +```{r} +str(opt) +``` + +# Import data + +Each row represents a gene and each column represents a sample. + +```{r} +expression_data = read.csv('EXPRESSION_DATA', header = TRUE, row.names = 1) +``` + +Display the first 100 genes. + +```{r} +datatable(head(expression_data, 100), style="bootstrap", filter = 'top', + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +Transpose expression data matrix so that each row represents a sample and each column represents a gene. + +```{r} +expression_data = as.data.frame(t(expression_data)) +``` + +# Checking data + +Checking data for excessive missing values and identification of outlier microarray samples. + +```{r} +gsg = goodSamplesGenes(expression_data, verbose = 3) +if (!gsg$allOK) { + # Optionally, print the gene and sample names that were removed: + if (sum(!gsg$goodGenes)>0) + printFlush(paste("Removing genes:", paste(names(expression_data)[!gsg$goodGenes], collapse = ", "))); + if (sum(!gsg$goodSamples)>0) + printFlush(paste("Removing samples:", paste(rownames(expression_data)[!gsg$goodSamples], collapse = ", "))); + # Remove the offending genes and samples from the data: + expression_data = expression_data[gsg$goodSamples, gsg$goodGenes] +} else { + print('all genes are OK!') +} +``` + +# Clustering samples + +If there are any outliers, choose a height cut that will remove the offending sample. Remember this number since you will need this number in further analysis. + +```{r fig.align='center'} +sampleTree = hclust(dist(expression_data), method = "average"); +plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", + cex.axis = 1, cex.main = 1, cex = 0.5) +``` + + +```{r echo=FALSE} +rm("opt") +save(list=ls(all.names = TRUE), file='PREPROCESSING_WORKSPACE') +``` +