Mercurial > repos > george-weingart > micropita
comparison src/breadcrumbs/scripts/scriptPlotFeature.py @ 0:2f4f6f08c8c4 draft
Uploaded
| author | george-weingart |
|---|---|
| date | Tue, 13 May 2014 21:58:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:2f4f6f08c8c4 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Author: Timothy Tickle | |
| 4 Description: Plots feaures | |
| 5 """ | |
| 6 | |
| 7 __author__ = "Timothy Tickle" | |
| 8 __copyright__ = "Copyright 2012" | |
| 9 __credits__ = ["Timothy Tickle"] | |
| 10 __license__ = "" | |
| 11 __version__ = "" | |
| 12 __maintainer__ = "Timothy Tickle" | |
| 13 __email__ = "ttickle@sph.harvard.edu" | |
| 14 __status__ = "Development" | |
| 15 | |
| 16 import sys | |
| 17 import argparse | |
| 18 import csv | |
| 19 import os | |
| 20 from src.BoxPlot import BoxPlot | |
| 21 from src.Histogram import Histogram | |
| 22 from src.ScatterPlot import ScatterPlot | |
| 23 | |
| 24 def funcPlotBoxPlot(lxVariable1,lxVariable2,fOneIsNumeric): | |
| 25 | |
| 26 ly,lsLabels = [lxVariable1,lxVariable2] if fOneIsNumeric else [lxVariable2,lxVariable1] | |
| 27 | |
| 28 # Group data | |
| 29 dictGroups = {} | |
| 30 for iIndex in xrange(len(ly)): | |
| 31 lsList = dictGroups.get(lsLabels[iIndex],[]) | |
| 32 lsList.append(ly[iIndex]) | |
| 33 dictGroups.setdefault(lsLabels[iIndex],lsList) | |
| 34 ly = [dictGroups[sKey] for sKey in dictGroups.keys()] | |
| 35 lsLabels = dictGroups.keys() | |
| 36 | |
| 37 BoxPlot.funcPlot(ly=ly, lsLabels=lsLabels, strOutputFigurePath=args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fJitter=True, fInvert=args.fColor, fInvertY=args.fAxis) | |
| 38 | |
| 39 | |
| 40 #Set up arguments reader | |
| 41 argp = argparse.ArgumentParser( prog = "scriptBoxPlot.py\nExample: python scriptBoxPlot.py Input.pcl valuesID groupID", | |
| 42 description = "Make a box plot from an abundance table.") | |
| 43 | |
| 44 #Sepecify output if needed | |
| 45 argp.add_argument("-o","--output", dest="strOutputFile", action="store", default=None, help="Output file name.") | |
| 46 | |
| 47 # Text annotation | |
| 48 argp.add_argument("-t","--title", dest="strTitle", action="store", default=None, help="Test for the title.") | |
| 49 argp.add_argument("-x","--xaxis", dest="strX", action="store", default=None, help="Text for the x-axis.") | |
| 50 argp.add_argument("-y","--yaxis", dest="strY", action="store", default=None, help="Text for the y-axis.") | |
| 51 | |
| 52 # Color options | |
| 53 argp.add_argument("-c","--color", dest="strColor", action="store", default="#83C8F9", help="Fill color as a Hex number (including the #).") | |
| 54 argp.add_argument("-r","--invertcolor", dest="fColor", action="store_true", default=False, help="Flag to invert the background to black.") | |
| 55 | |
| 56 # Axis adjustments | |
| 57 argp.add_argument("-s","--invertyaxis", dest="fAxis", action="store_true", default=False, help="Flag to invert the y axis.") | |
| 58 | |
| 59 # Required | |
| 60 argp.add_argument("strFileAbund", help ="Input data file") | |
| 61 argp.add_argument("strFeatures", nargs = "+", help="Features to plot (from one to two metadata).") | |
| 62 | |
| 63 args = argp.parse_args( ) | |
| 64 | |
| 65 #Holds the data | |
| 66 lxVariable1 = None | |
| 67 lxVariable2 = None | |
| 68 fOneIsNumeric = False | |
| 69 fTwoIsNumeric = False | |
| 70 | |
| 71 strFeatureOneID = args.strFeatures[0] | |
| 72 strFeatureTwoID = None if len(args.strFeatures)<2 else args.strFeatures[1] | |
| 73 | |
| 74 # If the output file is not specified, make it up | |
| 75 if not args.strOutputFile: | |
| 76 lsPieces = os.path.splitext(args.strFileAbund) | |
| 77 args.strOutputFile = [lsPieces[0],strFeatureOneID] | |
| 78 if strFeatureTwoID: | |
| 79 args.strOutputFile = args.strOutputFile+[strFeatureTwoID] | |
| 80 args.strOutputFile = "-".join(args.strOutputFile+["plotfeature.pdf"]) | |
| 81 | |
| 82 if not args.strTitle: | |
| 83 args.strTitle = [strFeatureOneID] | |
| 84 if strFeatureTwoID: | |
| 85 args.strTitle = args.strTitle+[strFeatureTwoID] | |
| 86 args.strTitle = " vs ".join(args.strTitle) | |
| 87 | |
| 88 csvReader = csv.reader(open(args.strFileAbund, 'rU') if isinstance(args.strFileAbund,str) else args.strFileAbund, delimiter="\t") | |
| 89 | |
| 90 if args.strX is None: | |
| 91 args.strX = strFeatureOneID | |
| 92 | |
| 93 if args.strY is None: | |
| 94 args.strY = strFeatureTwoID | |
| 95 | |
| 96 # Get values and groupings | |
| 97 for lsLine in csvReader: | |
| 98 if lsLine[0] == strFeatureOneID: | |
| 99 lxVariable1 = lsLine[1:] | |
| 100 if not strFeatureTwoID is None: | |
| 101 if lsLine[0] == strFeatureTwoID: | |
| 102 lxVariable2 = lsLine[1:] | |
| 103 | |
| 104 # Remove NAs | |
| 105 liNAs = [i for i,x in enumerate(lxVariable1) if x.lower() == "na"] | |
| 106 liNAs = set([i for i,x in enumerate(lxVariable1) if x.lower() == "na"]+liNAs) | |
| 107 lxVariable1 = [x for i,x in enumerate(lxVariable1) if not i in liNAs] | |
| 108 | |
| 109 if not lxVariable2 is None: | |
| 110 lxVariable2 = [x for i,x in enumerate(lxVariable2) if not i in liNAs] | |
| 111 | |
| 112 # Type variables | |
| 113 if not lxVariable1 is None: | |
| 114 try: | |
| 115 float(lxVariable1[0]) | |
| 116 lxVariable1 = [float(xItem) for xItem in lxVariable1] | |
| 117 fOneIsNumeric = True | |
| 118 except ValueError: | |
| 119 pass | |
| 120 | |
| 121 if not lxVariable2 is None: | |
| 122 try: | |
| 123 float(lxVariable2[0]) | |
| 124 lxVariable2 = [float(xItem) for xItem in lxVariable2] | |
| 125 fTwoIsNumeric = True | |
| 126 except ValueError: | |
| 127 pass | |
| 128 | |
| 129 if lxVariable1 is None: | |
| 130 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureOneID +" in the file "+args.strFileAbund+" .") | |
| 131 elif( (lxVariable2 is None) and (not strFeatureTwoID is None) ): | |
| 132 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureTwoID +" in the file "+args.strFileAbund+" .") | |
| 133 else: | |
| 134 # Plot as needed | |
| 135 if((not lxVariable1 is None ) and (not lxVariable2 is None)): | |
| 136 if(sum([fOneIsNumeric, fTwoIsNumeric])==0): | |
| 137 print "scriptPlotFeature:: Error, If plotting 2 variables, atleast 1 should be numeric." | |
| 138 elif(sum([fOneIsNumeric, fTwoIsNumeric])==1): | |
| 139 funcPlotBoxPlot(lxVariable1,lxVariable2, fOneIsNumeric=fOneIsNumeric) | |
| 140 elif(sum([fOneIsNumeric, fTwoIsNumeric])==2): | |
| 141 ScatterPlot.funcPlot(lxVariable1, lxVariable2, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fInvert=args.fColor) | |
| 142 elif(not lxVariable1 is None ): | |
| 143 if fOneIsNumeric: | |
| 144 Histogram.funcPlot(lxVariable1, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle="Frequency", strColor=args.strColor, fInvert=args.fColor) | |
| 145 else: | |
| 146 print "Sorry currently histograms are support for only numeric data." |
