Mercurial > repos > md-anderson-bioinformatics > matrix_manipulation
diff Matrix_Multiply.py @ 1:f1bcd79cd923 draft default tip
Uploaded
author | insilico-bob |
---|---|
date | Tue, 27 Nov 2018 14:20:40 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Matrix_Multiply.py Tue Nov 27 14:20:40 2018 -0500 @@ -0,0 +1,173 @@ +''' +Created on March 6, 2018 + +@author: Bob Brown based on John Weinstein's algorithm +''' + +import os +import re +import shutil +import traceback +import sys, traceback, argparse +import numpy as np +import warnings +#import scipy.stats as ss +from Matrix_Validate_import import reader, Labeler, MatchLabels +import math +warnings.filterwarnings('error') + +# John Weinsteins algorithm by bob brown https://discover.nci.nih.gov/CorrelateMatrices/help.do +#http://www.blog.pythonlibrary.org/2014/04/30/reading-excel-spreadsheets-with-python-and-xlrd/ + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument('input_file1', help='text file input matrix(include .txt in name)') + parser.add_argument('transpose', type=str, help='transpose matrix 1?') + parser.add_argument('input_file2', help='text file input matrix(include .txt in name)') + parser.add_argument('choice', type=str, help='Choose Normalization Method: 1 = Z-score, 2 = Mean Centered, 3 = log2, 4= rank') +# parser.add_argument('scaleValue', help='optional scaling factor for matrix)') + parser.add_argument('out_fileName', help='text file output matrix(include .txt in name)') + args = parser.parse_args() + if args.transpose == "": args.transpose = 'n' + return args + + +def Matrix_Multiply(matrix1, matrix2): + + try: +#TODO handle NANs + + matrixOut= np.dot(matrix1, matrix2) + + + except Exception as err: + traceback.print_exc() + sys.exit(-5) + + return(matrixOut) + + +#CorrelateMatrices correlation acorss 2 martices https://discover.nci.nih.gov/CorrelateMatrices/home.do +def Correlate_Matrices(matrix1, matrix2): + + #try: + # Leave both matrices as size axn and bxn and treat a is column and b as row + #matrix1T = Transpose(matrix1) + +#TODO handle NANs + numRows1,numColumns1= np.shape(matrix1) + + numRows2,numColumns2= np.shape(matrix2) + matrixOut= [] + + if numColumns1 != numRows2: + print("ERROR number columns Matrix 1 ", str(numColumns1), " not equal number rows for Matrix 2 ",str(numRows2)) + sys.exit(-1) +#TODO need to look for NANs?? + + for i in range(numRows1): + vectorM1 = matrix1[i][:] + meanVec1 = np.nanmean(vectorM1) + varStdDev1 = np.nanstd(vectorM1, ddof=1) + lowStdDev1 = False + #if equals zero + if abs(varStdDev1) < .000001: + print("ERROR Variance value almost zero", str(varStdDev1), " for Matrix 1 Row ",str(i+1)) + lowStdDev1= True + correlationRow= [] + + for j in range(numColumns2): + vectorM2 = [] + for t in range(numRows2): + vectorM2.append(matrix2[t][j]) + meanVec2 = np.nanmean(vectorM2) + varStdDev2 = np.nanstd(vectorM2, ddof=1) + lowStdDev2= False + #if equals zero + if abs(varStdDev2) < .000001: + print("ERROR Variance value almost zero", str(varStdDev2), " for Matrix 2 Column ",str(j+1)) + lowStdDev2= True + + covarStdDev12= 0 + + if not lowStdDev1 and not lowStdDev2: + #try: + for pos in range(len(vectorM1)): + covarStdDev12 += ((vectorM1[pos]-meanVec1)/varStdDev1)*((vectorM2[pos]-meanVec2)/varStdDev2) +# bottom= (numColumns1 -1)*(varStdDev1*varStdDev2) +# correlationRow.append( covarStdDev12/bottom) + correlationRow.append( covarStdDev12/(numColumns1 -1)) + #except: bad value because of NAN or other + else: + correlationRow.append("divide by 0") # cannot calculate correlation var too small + + matrixOut.append(correlationRow) + +# except Exception as err: +# traceback.print_exc() +# sys.exit(-6) + + return(matrixOut) + +#---------------------------------------------------------------------- +def Transpose(in_mat): + out_mat = [] + numRows,numColumns= np.shape(in_mat) + + for i in range(numColumns): + temp= [] + for j in range(numRows): + temp.append(in_mat[j][i]) + out_mat.append(temp) + #print( str(out_mat)) + return out_mat + + +#---------------------------------------------------------------------- +if __name__ == "__main__": + +# input_file1 = "/Users/bobbrown/Desktop/Gene-by-var.txt" +# input_file2 = "/Users/bobbrown/Desktop/var-by-sample.txt" +# out_fileName = "/Users/bobbrown/Desktop/MatixMult-1-2-Out.txt" +# selection = "MatrixMultiply" +#TODO address NANs ??? + + try: + args = get_args() + selection= args.choice + + matrix1,column_labels1,row_labels1 = reader(args.input_file1) # to be transposed later + matrix2,column_labels2,row_labels2 = reader(args.input_file2) + + + if args.transpose == 'y' or args.input_file1 == args.input_file2: + matrix1 = Transpose(matrix1) + print("\n>>>NOTICE Transposed first matrix so matrix 1 columns = Matrix 2 number rows ") + temp = row_labels1 #swap labels for output matrix + row_labels1 = column_labels1 #swap labels for output matrix + column_labels1= temp #swap labels for output matrix + + MatchLabels(column_labels1,row_labels2) # verfiy labels and their order match + + if len(column_labels1) != len(row_labels2): + print("\n>>> ERROR attempting to multiple Matrices of incompatible dimensions ") + print("First Matrix is "+str(len(row_labels1))+" by "+str(len(column_labels1))+" where second Matrix is "+str(len(og_row2))+" by "+str(len(column_labels2))+"\n") + print("Matrices must have dimensions AxB and BxC. A can equal C (square matrices)") + sys.exit(-1) + + if selection == "MatrixMultiply": + matrixOut= Matrix_Multiply(matrix1, matrix2 ) + + elif selection == "Corr2Matrices" or selection == "Corr1Matrix": + matrixOut = Correlate_Matrices(matrix1, matrix2) + + Labeler(matrixOut,column_labels2,row_labels1,args.out_fileName) + + print("Matrix Multiply "+str(len(row_labels1))+" by "+str(len(column_labels1))+" Matrix 1 by "+str(len(row_labels2))+" by "+str(len(column_labels2))+" matrix 2") + print("Output Matrix dimensions are "+str(len(row_labels1))+" by "+str(len(column_labels2))+"\n") + + except Exception as err: + traceback.print_exc() + sys.exit(-3) + + sys.exit(0) \ No newline at end of file