Mercurial > repos > md-anderson-bioinformatics > heat_map_creation
comparison CHM.R @ 40:8f8ab332a050 draft
Uploaded
author | insilico-bob |
---|---|
date | Thu, 20 Jun 2019 11:39:46 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
39:436f03b71cf6 | 40:8f8ab332a050 |
---|---|
1 ### This method generates a row and column ordering given an input matrix and ordering methods. | |
2 ### | |
3 ### matrixData - numeric matrix | |
4 ### rowOrderMethod - Hierarchical, Original, Random | |
5 ### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation. | |
6 ### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage, | |
7 ### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'. | |
8 ### colOrderMethod | |
9 ### colDistanceMeasure | |
10 ### colAgglomerationMethod | |
11 ### rowOrderFile - output file of order of rows | |
12 ### rowDendroFile - output file of row dendrogram | |
13 ### colOrderFile - output file of order of cols | |
14 ### colDendroFile - output file of col dendrogram | |
15 ### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. | |
16 ### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. | |
17 | |
18 performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut) | |
19 { | |
20 dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?")) | |
21 rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) | |
22 if (rowOrderMethod == "Hierarchical") { | |
23 writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) | |
24 } | |
25 | |
26 colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod) | |
27 if (colOrderMethod == "Hierarchical") { | |
28 writeHCDataTSVs(colOrder, colDendroFile, colOrderFile) | |
29 writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep="")) | |
30 } | |
31 } | |
32 | |
33 #creates output files for hclust ordering | |
34 writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName) | |
35 { | |
36 data<-cbind(uDend$merge, uDend$height, deparse.level=0) | |
37 colnames(data)<-c("A", "B", "Height") | |
38 write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
39 | |
40 data=matrix(,length(uDend$labels),2); | |
41 for (i in 1:length(uDend$labels)) { | |
42 print(uDend$labels[i]) | |
43 data[i,1] = uDend$labels[i]; | |
44 data[i,2] = which(uDend$order==i); | |
45 } | |
46 colnames(data)<-c("Id", "Order") | |
47 write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) | |
48 } | |
49 | |
50 #creates a classification file based on user specified cut of dendrogram | |
51 writeHCCut<-function(uDend, cutNum, outputCutFileName) | |
52 { | |
53 if (cutNum < 2) { | |
54 return() | |
55 } | |
56 print (paste("Writing cut file ", outputCutFileName)) | |
57 cut <- cutree(uDend, cutNum); | |
58 id <- names(cut); | |
59 data=matrix(,length(cut),2); | |
60 for (i in 1:length(cut)) { | |
61 data[i,1] = id[i]; | |
62 data[i,2] = sprintf("Cluster %d", cut[i]); | |
63 } | |
64 | |
65 write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE); | |
66 } | |
67 | |
68 | |
69 createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod) | |
70 { | |
71 ordering <- NULL | |
72 | |
73 if (orderMethod == "Hierarchical") | |
74 { | |
75 | |
76 # Compute dendrogram for "Distance Metric" | |
77 distVals <- NULL | |
78 if(direction=="row") { | |
79 if (distanceMeasure == "correlation") { | |
80 geneGeneCor <- cor(t(matrixData), use="pairwise") | |
81 distVals <- as.dist((1-geneGeneCor)/2) | |
82 } else { | |
83 distVals <- dist(matrixData, method=distanceMeasure) | |
84 } | |
85 } else { #column | |
86 if (distanceMeasure == "correlation") { | |
87 geneGeneCor <- cor(matrixData, use="pairwise") | |
88 distVals <- as.dist((1-geneGeneCor)/2) | |
89 } else { | |
90 distVals <- dist(t(matrixData), method=distanceMeasure) | |
91 } | |
92 } | |
93 | |
94 # if (agglomerationMethod == "ward") { | |
95 # ordering <- hclust(distVals * distVals, method="ward.D2") | |
96 # } else { | |
97 ordering <- hclust(distVals, method=agglomerationMethod) | |
98 # } | |
99 } | |
100 else if (orderMethod == "Random") | |
101 { | |
102 if(direction=="row") { | |
103 headerList <- rownames(matrixData) | |
104 ordering <- sample(headerList, length(headerList)) | |
105 } else { | |
106 headerList <- colnames(matrixData) | |
107 ordering <- sample(headerList, length(headerList)) | |
108 } | |
109 } | |
110 else if (orderMethod == "Original") | |
111 { | |
112 if(direction=="row") { | |
113 ordering <- rownames(matrixData) | |
114 } else { | |
115 ordering <- colnames(matrixData) | |
116 } | |
117 } else { | |
118 stop("createOrdering -- failed to find ordering method") | |
119 } | |
120 return(ordering) | |
121 } | |
122 ### Initialize command line arguments and call performDataOrdering | |
123 | |
124 options(warn=-1) | |
125 | |
126 args = commandArgs(TRUE) | |
127 | |
128 performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13]) | |
129 | |
130 #suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11])) |