comparison Matrix_Multiply.py @ 1:f1bcd79cd923 draft default tip

Uploaded
author insilico-bob
date Tue, 27 Nov 2018 14:20:40 -0500
parents
children
comparison
equal deleted inserted replaced
0:7f12c81e2083 1:f1bcd79cd923
1 '''
2 Created on March 6, 2018
3
4 @author: Bob Brown based on John Weinstein's algorithm
5 '''
6
7 import os
8 import re
9 import shutil
10 import traceback
11 import sys, traceback, argparse
12 import numpy as np
13 import warnings
14 #import scipy.stats as ss
15 from Matrix_Validate_import import reader, Labeler, MatchLabels
16 import math
17 warnings.filterwarnings('error')
18
19 # John Weinsteins algorithm by bob brown https://discover.nci.nih.gov/CorrelateMatrices/help.do
20 #http://www.blog.pythonlibrary.org/2014/04/30/reading-excel-spreadsheets-with-python-and-xlrd/
21
22 def get_args():
23 parser = argparse.ArgumentParser()
24 parser.add_argument('input_file1', help='text file input matrix(include .txt in name)')
25 parser.add_argument('transpose', type=str, help='transpose matrix 1?')
26 parser.add_argument('input_file2', help='text file input matrix(include .txt in name)')
27 parser.add_argument('choice', type=str, help='Choose Normalization Method: 1 = Z-score, 2 = Mean Centered, 3 = log2, 4= rank')
28 # parser.add_argument('scaleValue', help='optional scaling factor for matrix)')
29 parser.add_argument('out_fileName', help='text file output matrix(include .txt in name)')
30 args = parser.parse_args()
31 if args.transpose == "": args.transpose = 'n'
32 return args
33
34
35 def Matrix_Multiply(matrix1, matrix2):
36
37 try:
38 #TODO handle NANs
39
40 matrixOut= np.dot(matrix1, matrix2)
41
42
43 except Exception as err:
44 traceback.print_exc()
45 sys.exit(-5)
46
47 return(matrixOut)
48
49
50 #CorrelateMatrices correlation acorss 2 martices https://discover.nci.nih.gov/CorrelateMatrices/home.do
51 def Correlate_Matrices(matrix1, matrix2):
52
53 #try:
54 # Leave both matrices as size axn and bxn and treat a is column and b as row
55 #matrix1T = Transpose(matrix1)
56
57 #TODO handle NANs
58 numRows1,numColumns1= np.shape(matrix1)
59
60 numRows2,numColumns2= np.shape(matrix2)
61 matrixOut= []
62
63 if numColumns1 != numRows2:
64 print("ERROR number columns Matrix 1 ", str(numColumns1), " not equal number rows for Matrix 2 ",str(numRows2))
65 sys.exit(-1)
66 #TODO need to look for NANs??
67
68 for i in range(numRows1):
69 vectorM1 = matrix1[i][:]
70 meanVec1 = np.nanmean(vectorM1)
71 varStdDev1 = np.nanstd(vectorM1, ddof=1)
72 lowStdDev1 = False
73 #if equals zero
74 if abs(varStdDev1) < .000001:
75 print("ERROR Variance value almost zero", str(varStdDev1), " for Matrix 1 Row ",str(i+1))
76 lowStdDev1= True
77 correlationRow= []
78
79 for j in range(numColumns2):
80 vectorM2 = []
81 for t in range(numRows2):
82 vectorM2.append(matrix2[t][j])
83 meanVec2 = np.nanmean(vectorM2)
84 varStdDev2 = np.nanstd(vectorM2, ddof=1)
85 lowStdDev2= False
86 #if equals zero
87 if abs(varStdDev2) < .000001:
88 print("ERROR Variance value almost zero", str(varStdDev2), " for Matrix 2 Column ",str(j+1))
89 lowStdDev2= True
90
91 covarStdDev12= 0
92
93 if not lowStdDev1 and not lowStdDev2:
94 #try:
95 for pos in range(len(vectorM1)):
96 covarStdDev12 += ((vectorM1[pos]-meanVec1)/varStdDev1)*((vectorM2[pos]-meanVec2)/varStdDev2)
97 # bottom= (numColumns1 -1)*(varStdDev1*varStdDev2)
98 # correlationRow.append( covarStdDev12/bottom)
99 correlationRow.append( covarStdDev12/(numColumns1 -1))
100 #except: bad value because of NAN or other
101 else:
102 correlationRow.append("divide by 0") # cannot calculate correlation var too small
103
104 matrixOut.append(correlationRow)
105
106 # except Exception as err:
107 # traceback.print_exc()
108 # sys.exit(-6)
109
110 return(matrixOut)
111
112 #----------------------------------------------------------------------
113 def Transpose(in_mat):
114 out_mat = []
115 numRows,numColumns= np.shape(in_mat)
116
117 for i in range(numColumns):
118 temp= []
119 for j in range(numRows):
120 temp.append(in_mat[j][i])
121 out_mat.append(temp)
122 #print( str(out_mat))
123 return out_mat
124
125
126 #----------------------------------------------------------------------
127 if __name__ == "__main__":
128
129 # input_file1 = "/Users/bobbrown/Desktop/Gene-by-var.txt"
130 # input_file2 = "/Users/bobbrown/Desktop/var-by-sample.txt"
131 # out_fileName = "/Users/bobbrown/Desktop/MatixMult-1-2-Out.txt"
132 # selection = "MatrixMultiply"
133 #TODO address NANs ???
134
135 try:
136 args = get_args()
137 selection= args.choice
138
139 matrix1,column_labels1,row_labels1 = reader(args.input_file1) # to be transposed later
140 matrix2,column_labels2,row_labels2 = reader(args.input_file2)
141
142
143 if args.transpose == 'y' or args.input_file1 == args.input_file2:
144 matrix1 = Transpose(matrix1)
145 print("\n>>>NOTICE Transposed first matrix so matrix 1 columns = Matrix 2 number rows ")
146 temp = row_labels1 #swap labels for output matrix
147 row_labels1 = column_labels1 #swap labels for output matrix
148 column_labels1= temp #swap labels for output matrix
149
150 MatchLabels(column_labels1,row_labels2) # verfiy labels and their order match
151
152 if len(column_labels1) != len(row_labels2):
153 print("\n>>> ERROR attempting to multiple Matrices of incompatible dimensions ")
154 print("First Matrix is "+str(len(row_labels1))+" by "+str(len(column_labels1))+" where second Matrix is "+str(len(og_row2))+" by "+str(len(column_labels2))+"\n")
155 print("Matrices must have dimensions AxB and BxC. A can equal C (square matrices)")
156 sys.exit(-1)
157
158 if selection == "MatrixMultiply":
159 matrixOut= Matrix_Multiply(matrix1, matrix2 )
160
161 elif selection == "Corr2Matrices" or selection == "Corr1Matrix":
162 matrixOut = Correlate_Matrices(matrix1, matrix2)
163
164 Labeler(matrixOut,column_labels2,row_labels1,args.out_fileName)
165
166 print("Matrix Multiply "+str(len(row_labels1))+" by "+str(len(column_labels1))+" Matrix 1 by "+str(len(row_labels2))+" by "+str(len(column_labels2))+" matrix 2")
167 print("Output Matrix dimensions are "+str(len(row_labels1))+" by "+str(len(column_labels2))+"\n")
168
169 except Exception as err:
170 traceback.print_exc()
171 sys.exit(-3)
172
173 sys.exit(0)