Mercurial > repos > md-anderson-bioinformatics > matrix_manipulation
comparison Matrix_Statistics.py @ 1:f1bcd79cd923 draft default tip
Uploaded
| author | insilico-bob |
|---|---|
| date | Tue, 27 Nov 2018 14:20:40 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:7f12c81e2083 | 1:f1bcd79cd923 |
|---|---|
| 1 ''' | |
| 2 Created on Feb2018 | |
| 3 | |
| 4 @author: bob brown | |
| 5 ''' | |
| 6 | |
| 7 import sys, traceback, argparse | |
| 8 import numpy as np | |
| 9 from Matrix_Validate_import import reader | |
| 10 #import matplotlib.pyplot as plt | |
| 11 from Matrix_Filters import Variance_Percent_Filter_row, Variance_Percent_Filter_col | |
| 12 | |
| 13 #Define argparse Function | |
| 14 def get_args(): | |
| 15 parser = argparse.ArgumentParser() | |
| 16 parser.add_argument('input_file_txt', help='tab delimited text file input matrix(include .txt in name)') | |
| 17 parser.add_argument('choice',type=str, help='Variance Filter Method (Variance or Range)') | |
| 18 parser.add_argument('thresh', help='Thershold for Variance Filtering') | |
| 19 parser.add_argument('axes', help='Axes to Filter on (Either Row or Column') | |
| 20 parser.add_argument('output_file_txt', help='tab delimited text file output name (include .txt in name)') | |
| 21 args = parser.parse_args() | |
| 22 return args | |
| 23 | |
| 24 | |
| 25 #Define Function Which Labels Rows/Columns on Output | |
| 26 def labeler(matrix,filter_rows,filter_cols,output_file_txt): | |
| 27 | |
| 28 #Write Data to Specified Text File Output | |
| 29 with open(output_file_txt,'w') as f: | |
| 30 f.write("") | |
| 31 for k in range(0,len(filter_cols)): | |
| 32 f.write('\t' + filter_cols[k]) | |
| 33 f.write('\n') | |
| 34 for i in range(0,len(filter_rows)): | |
| 35 f.write(filter_rows[i]) | |
| 36 for j in range(0,len(matrix[0])): | |
| 37 f.write('\t' + format(matrix[i][j])) | |
| 38 f.write('\n') | |
| 39 | |
| 40 | |
| 41 def Histo(matrix): | |
| 42 numBins= 20 | |
| 43 data = [] | |
| 44 # numRow,numCol= np.shape(matrix) | |
| 45 for i in range(len(matrix[0])): | |
| 46 data.append(np.nanmean([row[i] for row in matrix])) | |
| 47 | |
| 48 # print(str(np.nanmean([row[i] for row in matrix]))) | |
| 49 | |
| 50 #https://stackoverflow.com/questions/5328556/histogram-matplotlib | |
| 51 #bins = [0, 40, 60, 75, 90, 110, 125, 140, 160, 200] | |
| 52 minBin = int(min(data)-0.5) | |
| 53 maxBin = int(max(data)+0.5) | |
| 54 binWidth = float(maxBin-minBin)/numBins | |
| 55 bins= [] | |
| 56 """ | |
| 57 for j in range(numBins): | |
| 58 bins.append(minBin+ j*binWidth) | |
| 59 #bins= 20 | |
| 60 n, bins, patches = plt.hist(data,bins, normed=False) | |
| 61 #n, bins, patches = plt.hist(data,bins, normed=1, color='green') | |
| 62 #hist, bins = np.histogram(data, bins=bins) | |
| 63 width = np.diff(bins) | |
| 64 center = (minBin + bins[1:]) / 2 | |
| 65 | |
| 66 cm = plt.cm.get_cmap('RdYlBu_r') | |
| 67 #col = (n-n.min())/(n.max()-n.min()) | |
| 68 for c, p in zip(bins, patches): | |
| 69 plt.setp( p, 'facecolor', cm(c/numBins)) | |
| 70 fig, ax = plt.subplots(num=1, figsize=(8,3)) | |
| 71 ax.set_title("Distribution of Column Means") | |
| 72 #ax.bar(center,bins, align='center', width=width) | |
| 73 #ax.bar(center, hist, align='center', width=width) | |
| 74 #ax.set_xticks(bins) | |
| 75 # fig.savefig("/Users/bobbrown/Desktop/Matrix-tools-Test-output/Column_Mean_Histogram.png") | |
| 76 | |
| 77 plt.show() | |
| 78 """ | |
| 79 return() | |
| 80 | |
| 81 #========== test create variable number output files in Galaxy | |
| 82 def CreateFiles(output_file_info): | |
| 83 | |
| 84 for i in range(3): | |
| 85 fd= open( output_file_info, 'w') | |
| 86 fd.write('File number = '+ str(i)+"\n") | |
| 87 fd.close() | |
| 88 | |
| 89 return() | |
| 90 | |
| 91 #================== | |
| 92 | |
| 93 #Define Main Function | |
| 94 def main(): | |
| 95 try: | |
| 96 args = get_args() | |
| 97 #sys.stdout.write(str(args)+"\n") | |
| 98 nanList= ["NAN", "NA", "N/A", "-","?","nan", "na", "n/a"] | |
| 99 | |
| 100 matrix, og_cols,og_rows = reader(args.input_file_txt) | |
| 101 #old_reader matrix, og_rows, og_cols = reader(args.input_file_txt) | |
| 102 # if float(args.thresh) < 0.000001: | |
| 103 # print('Invalid negative threshold chosen = '+str(args.thresh)+" choose positive value") | |
| 104 # sys.exit(-4) | |
| 105 | |
| 106 if args.choice == "Histogram": | |
| 107 Histo(matrix) | |
| 108 elif args.choice == "CreateFiles": | |
| 109 CreateFiles(args.output_file_info) | |
| 110 | |
| 111 elif args.choice == "Variance": | |
| 112 if args.axes == "Row": | |
| 113 matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = Variance_Percent_Filter_row(matrix,1,og_rows,og_cols,True) | |
| 114 labeler(matrix,filter_rows,filter_cols,args.output_file_txt) | |
| 115 # if delCnt < 1: | |
| 116 # print('\nNO Filtering occurred for rows using variance < '+str(args.thresh)+ ' by row. Matrix row minimum variance= %.2f' % minVal+' and maximum variance= %.2f' % maxVal) | |
| 117 # sys.stderr.write('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows') | |
| 118 # sys.exit(-1) | |
| 119 # else: | |
| 120 # print('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows') | |
| 121 elif args.axes == "Column": | |
| 122 matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = Variance_Percent_Filter_col(matrix,1,og_rows,og_cols,True) | |
| 123 labeler(matrix,filter_rows,filter_cols,args.output_file_txt) | |
| 124 # if delCnt < 1: | |
| 125 # print('\nNO Filtering occurred for columns using variance < '+str(args.thresh)+ ' by columns. Matrix columns minimum variance= %.2f' % minVal+' and maximum variance= %.2f' % maxVal) | |
| 126 # sys.stderr.write('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows') | |
| 127 # sys.exit(-1) | |
| 128 # else: | |
| 129 # print('\nFiltering out columns using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' columns') | |
| 130 else: | |
| 131 print('Invalid Axes = '+str(args.axes)) | |
| 132 sys.exit(-1) | |
| 133 else: | |
| 134 print("Invalid Filter Choice = "+str(args.choice)) | |
| 135 sys.exit(-2) | |
| 136 | |
| 137 | |
| 138 except Exception as err: | |
| 139 traceback.print_exc() | |
| 140 sys.exit(-3) | |
| 141 | |
| 142 if __name__ == '__main__': | |
| 143 main() | |
| 144 print("\nFini") | |
| 145 sys.exit(0) |
