Mercurial > repos > mingchen0919 > rmarkdown_wgcna
comparison wgcna_preprocessing.Rmd @ 0:4275479ada3a draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 12:35:50 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4275479ada3a |
---|---|
1 --- | |
2 title: 'WGCNA: data preprocessing' | |
3 output: | |
4 html_document: | |
5 number_sections: true | |
6 toc: true | |
7 theme: cosmo | |
8 highlight: tango | |
9 --- | |
10 | |
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} | |
12 knitr::opts_chunk$set( | |
13 echo = ECHO | |
14 ) | |
15 ``` | |
16 | |
17 ```{r} | |
18 str(opt) | |
19 ``` | |
20 | |
21 # Import data | |
22 | |
23 Each row represents a gene and each column represents a sample. | |
24 | |
25 ```{r} | |
26 expression_data = read.csv('EXPRESSION_DATA', header = TRUE, row.names = 1) | |
27 ``` | |
28 | |
29 Display the first 100 genes. | |
30 | |
31 ```{r} | |
32 datatable(head(expression_data, 100), style="bootstrap", filter = 'top', | |
33 class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) | |
34 ``` | |
35 | |
36 Transpose expression data matrix so that each row represents a sample and each column represents a gene. | |
37 | |
38 ```{r} | |
39 expression_data = as.data.frame(t(expression_data)) | |
40 ``` | |
41 | |
42 # Checking data | |
43 | |
44 Checking data for excessive missing values and identification of outlier microarray samples. | |
45 | |
46 ```{r} | |
47 gsg = goodSamplesGenes(expression_data, verbose = 3) | |
48 if (!gsg$allOK) { | |
49 # Optionally, print the gene and sample names that were removed: | |
50 if (sum(!gsg$goodGenes)>0) | |
51 printFlush(paste("Removing genes:", paste(names(expression_data)[!gsg$goodGenes], collapse = ", "))); | |
52 if (sum(!gsg$goodSamples)>0) | |
53 printFlush(paste("Removing samples:", paste(rownames(expression_data)[!gsg$goodSamples], collapse = ", "))); | |
54 # Remove the offending genes and samples from the data: | |
55 expression_data = expression_data[gsg$goodSamples, gsg$goodGenes] | |
56 } else { | |
57 print('all genes are OK!') | |
58 } | |
59 ``` | |
60 | |
61 # Clustering samples | |
62 | |
63 If there are any outliers, choose a height cut that will remove the offending sample. Remember this number since you will need this number in further analysis. | |
64 | |
65 ```{r fig.align='center'} | |
66 sampleTree = hclust(dist(expression_data), method = "average"); | |
67 plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", | |
68 cex.axis = 1, cex.main = 1, cex = 0.5) | |
69 ``` | |
70 | |
71 | |
72 ```{r echo=FALSE} | |
73 rm("opt") | |
74 save(list=ls(all.names = TRUE), file='PREPROCESSING_WORKSPACE') | |
75 ``` | |
76 |