Mercurial > repos > md-anderson-bioinformatics > matrix_manipulation
comparison Matrix_Multiply.py @ 1:f1bcd79cd923 draft default tip
Uploaded
author | insilico-bob |
---|---|
date | Tue, 27 Nov 2018 14:20:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:7f12c81e2083 | 1:f1bcd79cd923 |
---|---|
1 ''' | |
2 Created on March 6, 2018 | |
3 | |
4 @author: Bob Brown based on John Weinstein's algorithm | |
5 ''' | |
6 | |
7 import os | |
8 import re | |
9 import shutil | |
10 import traceback | |
11 import sys, traceback, argparse | |
12 import numpy as np | |
13 import warnings | |
14 #import scipy.stats as ss | |
15 from Matrix_Validate_import import reader, Labeler, MatchLabels | |
16 import math | |
17 warnings.filterwarnings('error') | |
18 | |
19 # John Weinsteins algorithm by bob brown https://discover.nci.nih.gov/CorrelateMatrices/help.do | |
20 #http://www.blog.pythonlibrary.org/2014/04/30/reading-excel-spreadsheets-with-python-and-xlrd/ | |
21 | |
22 def get_args(): | |
23 parser = argparse.ArgumentParser() | |
24 parser.add_argument('input_file1', help='text file input matrix(include .txt in name)') | |
25 parser.add_argument('transpose', type=str, help='transpose matrix 1?') | |
26 parser.add_argument('input_file2', help='text file input matrix(include .txt in name)') | |
27 parser.add_argument('choice', type=str, help='Choose Normalization Method: 1 = Z-score, 2 = Mean Centered, 3 = log2, 4= rank') | |
28 # parser.add_argument('scaleValue', help='optional scaling factor for matrix)') | |
29 parser.add_argument('out_fileName', help='text file output matrix(include .txt in name)') | |
30 args = parser.parse_args() | |
31 if args.transpose == "": args.transpose = 'n' | |
32 return args | |
33 | |
34 | |
35 def Matrix_Multiply(matrix1, matrix2): | |
36 | |
37 try: | |
38 #TODO handle NANs | |
39 | |
40 matrixOut= np.dot(matrix1, matrix2) | |
41 | |
42 | |
43 except Exception as err: | |
44 traceback.print_exc() | |
45 sys.exit(-5) | |
46 | |
47 return(matrixOut) | |
48 | |
49 | |
50 #CorrelateMatrices correlation acorss 2 martices https://discover.nci.nih.gov/CorrelateMatrices/home.do | |
51 def Correlate_Matrices(matrix1, matrix2): | |
52 | |
53 #try: | |
54 # Leave both matrices as size axn and bxn and treat a is column and b as row | |
55 #matrix1T = Transpose(matrix1) | |
56 | |
57 #TODO handle NANs | |
58 numRows1,numColumns1= np.shape(matrix1) | |
59 | |
60 numRows2,numColumns2= np.shape(matrix2) | |
61 matrixOut= [] | |
62 | |
63 if numColumns1 != numRows2: | |
64 print("ERROR number columns Matrix 1 ", str(numColumns1), " not equal number rows for Matrix 2 ",str(numRows2)) | |
65 sys.exit(-1) | |
66 #TODO need to look for NANs?? | |
67 | |
68 for i in range(numRows1): | |
69 vectorM1 = matrix1[i][:] | |
70 meanVec1 = np.nanmean(vectorM1) | |
71 varStdDev1 = np.nanstd(vectorM1, ddof=1) | |
72 lowStdDev1 = False | |
73 #if equals zero | |
74 if abs(varStdDev1) < .000001: | |
75 print("ERROR Variance value almost zero", str(varStdDev1), " for Matrix 1 Row ",str(i+1)) | |
76 lowStdDev1= True | |
77 correlationRow= [] | |
78 | |
79 for j in range(numColumns2): | |
80 vectorM2 = [] | |
81 for t in range(numRows2): | |
82 vectorM2.append(matrix2[t][j]) | |
83 meanVec2 = np.nanmean(vectorM2) | |
84 varStdDev2 = np.nanstd(vectorM2, ddof=1) | |
85 lowStdDev2= False | |
86 #if equals zero | |
87 if abs(varStdDev2) < .000001: | |
88 print("ERROR Variance value almost zero", str(varStdDev2), " for Matrix 2 Column ",str(j+1)) | |
89 lowStdDev2= True | |
90 | |
91 covarStdDev12= 0 | |
92 | |
93 if not lowStdDev1 and not lowStdDev2: | |
94 #try: | |
95 for pos in range(len(vectorM1)): | |
96 covarStdDev12 += ((vectorM1[pos]-meanVec1)/varStdDev1)*((vectorM2[pos]-meanVec2)/varStdDev2) | |
97 # bottom= (numColumns1 -1)*(varStdDev1*varStdDev2) | |
98 # correlationRow.append( covarStdDev12/bottom) | |
99 correlationRow.append( covarStdDev12/(numColumns1 -1)) | |
100 #except: bad value because of NAN or other | |
101 else: | |
102 correlationRow.append("divide by 0") # cannot calculate correlation var too small | |
103 | |
104 matrixOut.append(correlationRow) | |
105 | |
106 # except Exception as err: | |
107 # traceback.print_exc() | |
108 # sys.exit(-6) | |
109 | |
110 return(matrixOut) | |
111 | |
112 #---------------------------------------------------------------------- | |
113 def Transpose(in_mat): | |
114 out_mat = [] | |
115 numRows,numColumns= np.shape(in_mat) | |
116 | |
117 for i in range(numColumns): | |
118 temp= [] | |
119 for j in range(numRows): | |
120 temp.append(in_mat[j][i]) | |
121 out_mat.append(temp) | |
122 #print( str(out_mat)) | |
123 return out_mat | |
124 | |
125 | |
126 #---------------------------------------------------------------------- | |
127 if __name__ == "__main__": | |
128 | |
129 # input_file1 = "/Users/bobbrown/Desktop/Gene-by-var.txt" | |
130 # input_file2 = "/Users/bobbrown/Desktop/var-by-sample.txt" | |
131 # out_fileName = "/Users/bobbrown/Desktop/MatixMult-1-2-Out.txt" | |
132 # selection = "MatrixMultiply" | |
133 #TODO address NANs ??? | |
134 | |
135 try: | |
136 args = get_args() | |
137 selection= args.choice | |
138 | |
139 matrix1,column_labels1,row_labels1 = reader(args.input_file1) # to be transposed later | |
140 matrix2,column_labels2,row_labels2 = reader(args.input_file2) | |
141 | |
142 | |
143 if args.transpose == 'y' or args.input_file1 == args.input_file2: | |
144 matrix1 = Transpose(matrix1) | |
145 print("\n>>>NOTICE Transposed first matrix so matrix 1 columns = Matrix 2 number rows ") | |
146 temp = row_labels1 #swap labels for output matrix | |
147 row_labels1 = column_labels1 #swap labels for output matrix | |
148 column_labels1= temp #swap labels for output matrix | |
149 | |
150 MatchLabels(column_labels1,row_labels2) # verfiy labels and their order match | |
151 | |
152 if len(column_labels1) != len(row_labels2): | |
153 print("\n>>> ERROR attempting to multiple Matrices of incompatible dimensions ") | |
154 print("First Matrix is "+str(len(row_labels1))+" by "+str(len(column_labels1))+" where second Matrix is "+str(len(og_row2))+" by "+str(len(column_labels2))+"\n") | |
155 print("Matrices must have dimensions AxB and BxC. A can equal C (square matrices)") | |
156 sys.exit(-1) | |
157 | |
158 if selection == "MatrixMultiply": | |
159 matrixOut= Matrix_Multiply(matrix1, matrix2 ) | |
160 | |
161 elif selection == "Corr2Matrices" or selection == "Corr1Matrix": | |
162 matrixOut = Correlate_Matrices(matrix1, matrix2) | |
163 | |
164 Labeler(matrixOut,column_labels2,row_labels1,args.out_fileName) | |
165 | |
166 print("Matrix Multiply "+str(len(row_labels1))+" by "+str(len(column_labels1))+" Matrix 1 by "+str(len(row_labels2))+" by "+str(len(column_labels2))+" matrix 2") | |
167 print("Output Matrix dimensions are "+str(len(row_labels1))+" by "+str(len(column_labels2))+"\n") | |
168 | |
169 except Exception as err: | |
170 traceback.print_exc() | |
171 sys.exit(-3) | |
172 | |
173 sys.exit(0) |