# HG changeset patch # User mingchen0919 # Date 1502143910 14400 # Node ID a5fdd120b2c7ba86c8c81b96e07297014e5b7a7d # Parent f1e4bfc589752ffd7c40c37c19ec611d0520d42d Uploaded diff -r f1e4bfc58975 -r a5fdd120b2c7 DESeq.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/DESeq.Rmd Mon Aug 07 18:11:50 2017 -0400 @@ -0,0 +1,98 @@ +--- +title: 'DESeq2: Perform DESeq analysis' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) + +library(stringi) +library(DESeq2) +library(pheatmap) +library(PoiClaClu) +library(RColorBrewer) +``` + +# `DESeqDataSet` object + +```{r} +count_files = strsplit(opt$count_files, ',')[[1]] +sample_table = read.table(opt$sample_table, header = TRUE) + +## copy count files into working directory +file_copy = file.copy(count_files, sample_table$fileName, overwrite = TRUE) + +## DESeqDataSet object +dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, + directory = './', + design = DESIGN_FORMULA) +dds +``` + +# Pre-filtering the dataset. + +We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed. + +* Number of rows before pre-filtering +```{r} +nrow(dds) +``` + +* Number of rows after pre-filtering +```{r} +dds = dds[rowSums(counts(dds)) > 1, ] +nrow(dds) +``` + +# Peek at data {.tabset} + +## Count Data + +```{r} +datatable(head(counts(dds), 100), style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Sample Table + +```{r} +datatable(sample_table, style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +# Sample distance on variance stabilized data {.tabset} + +## `rlog` Stabilizing transformation + +```{r} +rld = rlog(dds, blind = FALSE) +datatable(head(assay(rld), 100), style="bootstrap", + class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) +``` + +## Sample distance + +```{r} +sampleDists <- dist(t(assay(rld))) +sampleDists +``` + +# Differential expression analysis + +```{r} +dds <- DESeq(dds) +``` + +```{r} +rm("opt") +save(list=ls(all.names = TRUE), file='DESEQ_WORKSPACE') +``` + +