annotate deseq-hts_2.0/src/difftest_deseq2.R @ 10:2fe512c7bfdf draft

DESeq2 version 1.0.19 added to the repo
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 08:15:34 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
1 ### load DESeq package
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
2 suppressMessages(require("DESeq2"))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
3
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
4 ### get arguments 1: INFILE, 2: OUTFILE 3:SIZE
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
5 args <- commandArgs()
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
6 FITTYP<-args[4]
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
7 INFILE<-args[5]
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
8 OUTFILE<-args[6]
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
9
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
10 INFILE_COUNTS=c(paste(INFILE, "_COUNTS.tab", sep=""))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
11 INFILE_CONDS=c(paste(INFILE, "_CONDITIONS.tab", sep=""))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
12
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
13 ### read count data from file
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
14 countsTable <- read.delim( INFILE_COUNTS, header=TRUE, stringsAsFactors=TRUE)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
15 condsTable <- read.delim( INFILE_CONDS, header=TRUE, stringsAsFactors=TRUE)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
16
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
17 tagnames <- countsTable[-(1:2), 1]
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
18 #print(tagnames)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
19
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
20 ## use gene IDs as row names
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
21 rownames( countsTable ) <- countsTable$gene
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
22 countsTable <- countsTable[ , -1 ]
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
23 #print(countsTable)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
24
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
25 conditions<-factor( condsTable[ , 2] )
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
26 #print(conditions)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
27
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
28 ## unique condition to define the pair of tests
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
29 uniq_conds <- unique(conditions)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
30 #print(uniq_conds)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
31
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
32 ## all possible pairs of conditions
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
33 pw_tests <- list()
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
34 for(i in 1:(length(uniq_conds)-1)) {
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
35 for(j in (i+1):length(uniq_conds)) {
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
36 pw_tests[[length(pw_tests)+1]] <- c(uniq_conds[i], uniq_conds[j])
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
37 }
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
38 }
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
39 #print(pw_tests)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
40
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
41 tab <- NULL
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
42 ## testing all possible pairs of conditions
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
43 for(i in 1:length(pw_tests)) {
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
44 ## header name
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
45 test_pair_name <- c(paste(pw_tests[[i]][1], "__vs__", pw_tests[[i]][2], sep=""))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
46 #print(test_pair_name)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
47 ## colnames respective to the test pair
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
48 sub.data <- subset(condsTable, (conditions %in% c(pw_tests[[i]][1],pw_tests[[i]][2])))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
49 #print(sub.data)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
50 #print(sub.data[[1]]) # sample file name
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
51 #print(sub.data[[2]]) # condition
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
52 #print(sub.data[[3]]) # replicates
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
53 colData <- data.frame(row.names=sub.data[[1]], condition=sub.data[[2]], libType=sub.data[[3]])
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
54 #print(colData)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
55 #print(countsTable[(sub.data[[1]])])
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
56 dds <- DESeqDataSetFromMatrix(countData=countsTable[(sub.data[[1]])], colData=colData, design=~condition)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
57 colData(dds)$condition <- factor(colData(dds)$condition, levels=unique(sub.data[[2]]))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
58 dds <- DESeq(dds, fitType=FITTYP)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
59 ## concatenate the results
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
60 tested_pairs <- results(dds)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
61 #print(typeof(tested_pairs))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
62
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
63 colnames(tested_pairs) <- paste(test_pair_name, colnames(tested_pairs), sep=":")
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
64 #print(colnames(tested_pairs))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
65 #print(tested_pairs)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
66
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
67 tab_tmp <- tested_pairs[tagnames,]
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
68 if(is.null(tab)) {
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
69 tab<- as.data.frame(tab_tmp)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
70 }
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
71 else tab<- cbind(tab, as.data.frame(tab_tmp))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
72 }
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
73 ## TODO cbind creates a X character to the string start place.
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
74 colnames(tab) <- gsub("^X", "", colnames(tab))
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
75 ## adding gene names to the row
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
76 tab <- cbind(Feature=row.names(tab_tmp), tab)
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
77 ## priting the result
2fe512c7bfdf DESeq2 version 1.0.19 added to the repo
vipints <vipin@cbio.mskcc.org>
parents:
diff changeset
78 write.table(tab, OUTFILE, quote=F, sep="\t", row.names=F)