Mercurial > repos > mingchen0919 > rmarkdown_wgcna
annotate wgcna_preprocessing.Rmd @ 2:237210176a2b draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit a23e23222252167ef7c3338a4872e84706df8f83-dirty
author | mingchen0919 |
---|---|
date | Tue, 08 Aug 2017 14:33:59 -0400 |
parents | 4275479ada3a |
children |
rev | line source |
---|---|
0
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
1 --- |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
2 title: 'WGCNA: data preprocessing' |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
3 output: |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
4 html_document: |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
5 number_sections: true |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
6 toc: true |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
7 theme: cosmo |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
8 highlight: tango |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
9 --- |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
10 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
12 knitr::opts_chunk$set( |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
13 echo = ECHO |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
14 ) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
15 ``` |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
16 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
17 ```{r} |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
18 str(opt) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
19 ``` |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
20 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
21 # Import data |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
22 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
23 Each row represents a gene and each column represents a sample. |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
24 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
25 ```{r} |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
26 expression_data = read.csv('EXPRESSION_DATA', header = TRUE, row.names = 1) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
27 ``` |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
28 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
29 Display the first 100 genes. |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
30 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
31 ```{r} |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
32 datatable(head(expression_data, 100), style="bootstrap", filter = 'top', |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
33 class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
34 ``` |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
35 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
36 Transpose expression data matrix so that each row represents a sample and each column represents a gene. |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
37 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
38 ```{r} |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
39 expression_data = as.data.frame(t(expression_data)) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
40 ``` |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
41 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
42 # Checking data |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
43 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
44 Checking data for excessive missing values and identification of outlier microarray samples. |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
45 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
46 ```{r} |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
47 gsg = goodSamplesGenes(expression_data, verbose = 3) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
48 if (!gsg$allOK) { |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
49 # Optionally, print the gene and sample names that were removed: |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
50 if (sum(!gsg$goodGenes)>0) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
51 printFlush(paste("Removing genes:", paste(names(expression_data)[!gsg$goodGenes], collapse = ", "))); |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
52 if (sum(!gsg$goodSamples)>0) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
53 printFlush(paste("Removing samples:", paste(rownames(expression_data)[!gsg$goodSamples], collapse = ", "))); |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
54 # Remove the offending genes and samples from the data: |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
55 expression_data = expression_data[gsg$goodSamples, gsg$goodGenes] |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
56 } else { |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
57 print('all genes are OK!') |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
58 } |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
59 ``` |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
60 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
61 # Clustering samples |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
62 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
63 If there are any outliers, choose a height cut that will remove the offending sample. Remember this number since you will need this number in further analysis. |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
64 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
65 ```{r fig.align='center'} |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
66 sampleTree = hclust(dist(expression_data), method = "average"); |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
67 plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
68 cex.axis = 1, cex.main = 1, cex = 0.5) |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
69 ``` |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
70 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
71 |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
72 ```{r echo=FALSE} |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
73 rm("opt") |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
74 save(list=ls(all.names = TRUE), file='PREPROCESSING_WORKSPACE') |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
75 ``` |
4275479ada3a
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_wgcna commit d91f269e8bc09a488ed2e005122bbb4a521f44a0-dirty
mingchen0919
parents:
diff
changeset
|
76 |