diff Matrix_Statistics.py @ 1:f1bcd79cd923 draft default tip

Uploaded
author insilico-bob
date Tue, 27 Nov 2018 14:20:40 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Matrix_Statistics.py	Tue Nov 27 14:20:40 2018 -0500
@@ -0,0 +1,145 @@
+'''
+Created on Feb2018
+
+@author: bob brown 
+'''
+
+import sys, traceback, argparse
+import numpy as np
+from Matrix_Validate_import import reader
+#import matplotlib.pyplot as plt
+from Matrix_Filters import Variance_Percent_Filter_row, Variance_Percent_Filter_col
+
+#Define argparse Function
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_file_txt', help='tab delimited text file input matrix(include .txt in name)')
+    parser.add_argument('choice',type=str, help='Variance Filter Method (Variance or Range)')
+    parser.add_argument('thresh', help='Thershold for Variance Filtering')
+    parser.add_argument('axes', help='Axes to Filter on (Either Row or Column')
+    parser.add_argument('output_file_txt', help='tab delimited text file output name (include .txt in name)')
+    args = parser.parse_args()
+    return args
+
+
+#Define Function Which Labels Rows/Columns on Output
+def labeler(matrix,filter_rows,filter_cols,output_file_txt):
+
+    #Write Data to Specified Text File Output
+    with open(output_file_txt,'w') as f:
+        f.write("")
+        for k in range(0,len(filter_cols)):
+                f.write('\t' + filter_cols[k])
+        f.write('\n')
+        for i in range(0,len(filter_rows)):
+                f.write(filter_rows[i])
+                for j in range(0,len(matrix[0])):
+                        f.write('\t' + format(matrix[i][j]))
+                f.write('\n')
+
+
+def Histo(matrix):
+    numBins= 20
+    data = []
+#    numRow,numCol= np.shape(matrix)
+    for i in range(len(matrix[0])):
+        data.append(np.nanmean([row[i] for row in matrix]))
+         
+#        print(str(np.nanmean([row[i] for row in matrix])))
+
+#https://stackoverflow.com/questions/5328556/histogram-matplotlib
+    #bins = [0, 40, 60, 75, 90, 110, 125, 140, 160, 200]
+    minBin = int(min(data)-0.5)
+    maxBin = int(max(data)+0.5)
+    binWidth = float(maxBin-minBin)/numBins
+    bins= []
+    """
+    for j in range(numBins):
+        bins.append(minBin+ j*binWidth)
+    #bins= 20
+    n, bins, patches = plt.hist(data,bins, normed=False)
+    #n, bins, patches = plt.hist(data,bins, normed=1, color='green')
+    #hist, bins = np.histogram(data, bins=bins)
+    width = np.diff(bins)
+    center = (minBin + bins[1:]) / 2
+    
+    cm = plt.cm.get_cmap('RdYlBu_r')
+    #col = (n-n.min())/(n.max()-n.min())
+    for c, p in zip(bins, patches):
+        plt.setp( p, 'facecolor', cm(c/numBins))
+    fig, ax = plt.subplots(num=1, figsize=(8,3))
+    ax.set_title("Distribution of Column Means")
+    #ax.bar(center,bins, align='center', width=width)
+    #ax.bar(center, hist, align='center', width=width)
+    #ax.set_xticks(bins)
+#    fig.savefig("/Users/bobbrown/Desktop/Matrix-tools-Test-output/Column_Mean_Histogram.png")
+    
+    plt.show()
+    """
+    return()
+
+#========== test create variable number output files in Galaxy
+def CreateFiles(output_file_info):    
+    
+        for i in range(3):
+            fd= open( output_file_info, 'w')
+            fd.write('File number = '+ str(i)+"\n")
+            fd.close()
+            
+        return()
+    
+#==================
+    
+    #Define Main Function
+def main():
+    try:
+        args = get_args()
+        #sys.stdout.write(str(args)+"\n")
+        nanList= ["NAN", "NA", "N/A", "-","?","nan", "na", "n/a"]
+
+        matrix, og_cols,og_rows = reader(args.input_file_txt)
+        #old_reader  matrix, og_rows, og_cols = reader(args.input_file_txt)
+#         if float(args.thresh) < 0.000001:
+#             print('Invalid negative threshold chosen = '+str(args.thresh)+" choose positive value")
+#             sys.exit(-4)
+            
+        if args.choice == "Histogram":
+            Histo(matrix)
+        elif args.choice == "CreateFiles":
+            CreateFiles(args.output_file_info)
+            
+        elif args.choice == "Variance":
+            if args.axes == "Row":
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = Variance_Percent_Filter_row(matrix,1,og_rows,og_cols,True)
+                labeler(matrix,filter_rows,filter_cols,args.output_file_txt)
+#                 if delCnt < 1:
+#                     print('\nNO Filtering occurred for rows using variance < '+str(args.thresh)+ ' by row. Matrix row minimum variance=  %.2f' % minVal+' and maximum variance=  %.2f' % maxVal)
+#                     sys.stderr.write('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+#                     sys.exit(-1)
+#                 else:   
+#                     print('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+            elif args.axes == "Column":
+                matrix, filter_rows, filter_cols,delCnt,minVal,maxVal = Variance_Percent_Filter_col(matrix,1,og_rows,og_cols,True)
+                labeler(matrix,filter_rows,filter_cols,args.output_file_txt)
+#                 if delCnt < 1:
+#                     print('\nNO Filtering occurred for columns using variance < '+str(args.thresh)+ ' by columns. Matrix columns minimum variance=  %.2f' % minVal+' and maximum variance=  %.2f' % maxVal)
+#                     sys.stderr.write('\nFiltering out rows using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' rows')
+#                     sys.exit(-1)
+#                 else:   
+#                     print('\nFiltering out columns using variance < '+str(args.thresh)+ ' removed '+str(delCnt)+' columns')
+            else:
+                print('Invalid Axes = '+str(args.axes))
+                sys.exit(-1)
+        else:
+            print("Invalid Filter Choice = "+str(args.choice))
+            sys.exit(-2)    
+        
+             
+    except Exception as err:
+        traceback.print_exc()
+        sys.exit(-3)
+
+if __name__ == '__main__':
+    main()
+    print("\nFini")
+    sys.exit(0)
\ No newline at end of file