Mercurial > repos > md-anderson-bioinformatics > heat_map_creation
comparison CHM.R @ 21:4cf6970e735e draft
Uploaded
author | insilico-bob |
---|---|
date | Fri, 27 Jan 2017 11:33:29 -0500 |
parents | 603d5c39e8dc |
children | 16593e40c2cd |
comparison
equal
deleted
inserted
replaced
20:37153f19c56e | 21:4cf6970e735e |
---|---|
1 ### This method generates a row and column ordering given an input matrix and ordering methods. | |
2 ### | |
3 ### matrixData - numeric matrix | |
4 ### rowOrderMethod - Hierarchical, Original, Random | |
5 ### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation. | |
6 ### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage, | |
7 ### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'. | |
8 ### colOrderMethod | |
9 ### colDistanceMeasure | |
10 ### colAgglomerationMethod | |
11 ### rowOrderFile - output file of order of rows | |
12 ### rowDendroFile - output file of row dendrogram | |
13 ### colOrderFile - output file of order of cols | |
14 ### colDendroFile - output file of col dendrogram | |
15 ### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. | |
16 ### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. | |
17 | |
18 performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut) | |
19 { | |
20 dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?")) | |
21 rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) | |
22 if (rowOrderMethod == "Hierarchical") { | |
23 writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) | |
24 writeHCCut(rowOrder, rowCut, paste(rowOrderFile,".cut", sep="")) | |
25 } else { | |
26 writeOrderTSV(rowOrder, rownames(dataMatrix), rowOrderFile) | |
27 } | |
28 | |
29 colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod) | |
30 if (colOrderMethod == "Hierarchical") { | |
31 writeHCDataTSVs(colOrder, colDendroFile, colOrderFile) | |
32 writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep="")) | |
33 } else { | |
34 writeOrderTSV(colOrder, colnames(dataMatrix), colOrderFile) | |
35 } | |
36 } | |
37 | |
38 #creates output files for hclust ordering | |
39 writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName) | |
40 { | |
41 data<-cbind(uDend$merge, uDend$height, deparse.level=0) | |
42 colnames(data)<-c("A", "B", "Height") | |
43 write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
44 | |
45 data=matrix(,length(uDend$labels),2); | |
46 for (i in 1:length(uDend$labels)) { | |
47 data[i,1] = uDend$labels[i]; | |
48 data[i,2] = which(uDend$order==i); | |
49 } | |
50 colnames(data)<-c("Id", "Order") | |
51 write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
52 } | |
53 | |
54 #creates order file for non-clustering methods | |
55 writeOrderTSV<-function(newOrder, originalOrder, outputHCOrderFileName) | |
56 { | |
57 data=matrix(,length(originalOrder),2); | |
58 for (i in 1:length(originalOrder)) { | |
59 data[i,1] = originalOrder[i]; | |
60 data[i,2] = which(newOrder==originalOrder[i]); | |
61 } | |
62 colnames(data)<-c("Id", "Order") | |
63 write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
64 } | |
65 | |
66 #creates a classification file based on user specified cut of dendrogram | |
67 writeHCCut<-function(uDend, cutNum, outputCutFileName) | |
68 { | |
69 if (cutNum < 2) { | |
70 return() | |
71 } | |
72 print (paste("Writing cut file ", outputCutFileName)) | |
73 cut <- cutree(uDend, cutNum); | |
74 id <- names(cut); | |
75 data=matrix(,length(cut),2); | |
76 for (i in 1:length(cut)) { | |
77 data[i,1] = id[i]; | |
78 data[i,2] = sprintf("Cluster %d", cut[i]); | |
79 } | |
80 | |
81 write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE); | |
82 } | |
83 | |
84 | |
85 createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod) | |
86 { | |
87 ordering <- NULL | |
88 | |
89 if (orderMethod == "Hierarchical") | |
90 { | |
91 | |
92 # Compute dendrogram for "Distance Metric" | |
93 distVals <- NULL | |
94 if(direction=="row") { | |
95 if (distanceMeasure == "correlation") { | |
96 geneGeneCor <- cor(t(matrixData), use="pairwise") | |
97 distVals <- as.dist((1-geneGeneCor)/2) | |
98 } else { | |
99 distVals <- dist(matrixData, method=distanceMeasure) | |
100 } | |
101 } else { #column | |
102 if (distanceMeasure == "correlation") { | |
103 geneGeneCor <- cor(matrixData, use="pairwise") | |
104 distVals <- as.dist((1-geneGeneCor)/2) | |
105 } else { | |
106 distVals <- dist(t(matrixData), method=distanceMeasure) | |
107 } | |
108 } | |
109 | |
110 # if (agglomerationMethod == "ward") { | |
111 # ordering <- hclust(distVals * distVals, method="ward.D2") | |
112 # } else { | |
113 ordering <- hclust(distVals, method=agglomerationMethod) | |
114 # } | |
115 } | |
116 else if (orderMethod == "Random") | |
117 { | |
118 if(direction=="row") { | |
119 headerList <- rownames(matrixData) | |
120 ordering <- sample(headerList, length(headerList)) | |
121 } else { | |
122 headerList <- colnames(matrixData) | |
123 ordering <- sample(headerList, length(headerList)) | |
124 } | |
125 } | |
126 else if (orderMethod == "Original") | |
127 { | |
128 if(direction=="row") { | |
129 ordering <- rownames(matrixData) | |
130 } else { | |
131 ordering <- colnames(matrixData) | |
132 } | |
133 } else { | |
134 stop("createOrdering -- failed to find ordering method") | |
135 } | |
136 return(ordering) | |
137 } | |
138 ### Initialize command line arguments and call performDataOrdering | |
139 | |
140 options(warn=-1) | |
141 | |
142 args = commandArgs(TRUE) | |
143 | |
144 performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13]) | |
145 | |
146 #suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11])) |