3
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Author: Timothy Tickle
|
|
4 Description: Plots feaures
|
|
5 """
|
|
6
|
|
7 __author__ = "Timothy Tickle"
|
|
8 __copyright__ = "Copyright 2012"
|
|
9 __credits__ = ["Timothy Tickle"]
|
|
10 __license__ = ""
|
|
11 __version__ = ""
|
|
12 __maintainer__ = "Timothy Tickle"
|
|
13 __email__ = "ttickle@sph.harvard.edu"
|
|
14 __status__ = "Development"
|
|
15
|
|
16 import sys
|
|
17 import argparse
|
|
18 import csv
|
|
19 import os
|
|
20 from src.BoxPlot import BoxPlot
|
|
21 from src.Histogram import Histogram
|
|
22 from src.ScatterPlot import ScatterPlot
|
|
23
|
|
24 def funcPlotBoxPlot(lxVariable1,lxVariable2,fOneIsNumeric):
|
|
25
|
|
26 ly,lsLabels = [lxVariable1,lxVariable2] if fOneIsNumeric else [lxVariable2,lxVariable1]
|
|
27
|
|
28 # Group data
|
|
29 dictGroups = {}
|
|
30 for iIndex in xrange(len(ly)):
|
|
31 lsList = dictGroups.get(lsLabels[iIndex],[])
|
|
32 lsList.append(ly[iIndex])
|
|
33 dictGroups.setdefault(lsLabels[iIndex],lsList)
|
|
34 ly = [dictGroups[sKey] for sKey in dictGroups.keys()]
|
|
35 lsLabels = dictGroups.keys()
|
|
36
|
|
37 BoxPlot.funcPlot(ly=ly, lsLabels=lsLabels, strOutputFigurePath=args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fJitter=True, fInvert=args.fColor, fInvertY=args.fAxis)
|
|
38
|
|
39
|
|
40 #Set up arguments reader
|
|
41 argp = argparse.ArgumentParser( prog = "scriptBoxPlot.py\nExample: python scriptBoxPlot.py Input.pcl valuesID groupID",
|
|
42 description = "Make a box plot from an abundance table.")
|
|
43
|
|
44 #Sepecify output if needed
|
|
45 argp.add_argument("-o","--output", dest="strOutputFile", action="store", default=None, help="Output file name.")
|
|
46
|
|
47 # Text annotation
|
|
48 argp.add_argument("-t","--title", dest="strTitle", action="store", default=None, help="Test for the title.")
|
|
49 argp.add_argument("-x","--xaxis", dest="strX", action="store", default=None, help="Text for the x-axis.")
|
|
50 argp.add_argument("-y","--yaxis", dest="strY", action="store", default=None, help="Text for the y-axis.")
|
|
51
|
|
52 # Color options
|
|
53 argp.add_argument("-c","--color", dest="strColor", action="store", default="#83C8F9", help="Fill color as a Hex number (including the #).")
|
|
54 argp.add_argument("-r","--invertcolor", dest="fColor", action="store_true", default=False, help="Flag to invert the background to black.")
|
|
55
|
|
56 # Axis adjustments
|
|
57 argp.add_argument("-s","--invertyaxis", dest="fAxis", action="store_true", default=False, help="Flag to invert the y axis.")
|
|
58
|
|
59 # Required
|
|
60 argp.add_argument("strFileAbund", help ="Input data file")
|
|
61 argp.add_argument("strFeatures", nargs = "+", help="Features to plot (from one to two metadata).")
|
|
62
|
|
63 args = argp.parse_args( )
|
|
64
|
|
65 #Holds the data
|
|
66 lxVariable1 = None
|
|
67 lxVariable2 = None
|
|
68 fOneIsNumeric = False
|
|
69 fTwoIsNumeric = False
|
|
70
|
|
71 strFeatureOneID = args.strFeatures[0]
|
|
72 strFeatureTwoID = None if len(args.strFeatures)<2 else args.strFeatures[1]
|
|
73
|
|
74 # If the output file is not specified, make it up
|
|
75 if not args.strOutputFile:
|
|
76 lsPieces = os.path.splitext(args.strFileAbund)
|
|
77 args.strOutputFile = [lsPieces[0],strFeatureOneID]
|
|
78 if strFeatureTwoID:
|
|
79 args.strOutputFile = args.strOutputFile+[strFeatureTwoID]
|
|
80 args.strOutputFile = "-".join(args.strOutputFile+["plotfeature.pdf"])
|
|
81
|
|
82 if not args.strTitle:
|
|
83 args.strTitle = [strFeatureOneID]
|
|
84 if strFeatureTwoID:
|
|
85 args.strTitle = args.strTitle+[strFeatureTwoID]
|
|
86 args.strTitle = " vs ".join(args.strTitle)
|
|
87
|
|
88 csvReader = csv.reader(open(args.strFileAbund, 'rU') if isinstance(args.strFileAbund,str) else args.strFileAbund, delimiter="\t")
|
|
89
|
|
90 if args.strX is None:
|
|
91 args.strX = strFeatureOneID
|
|
92
|
|
93 if args.strY is None:
|
|
94 args.strY = strFeatureTwoID
|
|
95
|
|
96 # Get values and groupings
|
|
97 for lsLine in csvReader:
|
|
98 if lsLine[0] == strFeatureOneID:
|
|
99 lxVariable1 = lsLine[1:]
|
|
100 if not strFeatureTwoID is None:
|
|
101 if lsLine[0] == strFeatureTwoID:
|
|
102 lxVariable2 = lsLine[1:]
|
|
103
|
|
104 # Remove NAs
|
|
105 liNAs = [i for i,x in enumerate(lxVariable1) if x.lower() == "na"]
|
|
106 liNAs = set([i for i,x in enumerate(lxVariable1) if x.lower() == "na"]+liNAs)
|
|
107 lxVariable1 = [x for i,x in enumerate(lxVariable1) if not i in liNAs]
|
|
108
|
|
109 if not lxVariable2 is None:
|
|
110 lxVariable2 = [x for i,x in enumerate(lxVariable2) if not i in liNAs]
|
|
111
|
|
112 # Type variables
|
|
113 if not lxVariable1 is None:
|
|
114 try:
|
|
115 float(lxVariable1[0])
|
|
116 lxVariable1 = [float(xItem) for xItem in lxVariable1]
|
|
117 fOneIsNumeric = True
|
|
118 except ValueError:
|
|
119 pass
|
|
120
|
|
121 if not lxVariable2 is None:
|
|
122 try:
|
|
123 float(lxVariable2[0])
|
|
124 lxVariable2 = [float(xItem) for xItem in lxVariable2]
|
|
125 fTwoIsNumeric = True
|
|
126 except ValueError:
|
|
127 pass
|
|
128
|
|
129 if lxVariable1 is None:
|
|
130 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureOneID +" in the file "+args.strFileAbund+" .")
|
|
131 elif( (lxVariable2 is None) and (not strFeatureTwoID is None) ):
|
|
132 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureTwoID +" in the file "+args.strFileAbund+" .")
|
|
133 else:
|
|
134 # Plot as needed
|
|
135 if((not lxVariable1 is None ) and (not lxVariable2 is None)):
|
|
136 if(sum([fOneIsNumeric, fTwoIsNumeric])==0):
|
|
137 print "scriptPlotFeature:: Error, If plotting 2 variables, atleast 1 should be numeric."
|
|
138 elif(sum([fOneIsNumeric, fTwoIsNumeric])==1):
|
|
139 funcPlotBoxPlot(lxVariable1,lxVariable2, fOneIsNumeric=fOneIsNumeric)
|
|
140 elif(sum([fOneIsNumeric, fTwoIsNumeric])==2):
|
|
141 ScatterPlot.funcPlot(lxVariable1, lxVariable2, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fInvert=args.fColor)
|
|
142 elif(not lxVariable1 is None ):
|
|
143 if fOneIsNumeric:
|
|
144 Histogram.funcPlot(lxVariable1, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle="Frequency", strColor=args.strColor, fInvert=args.fColor)
|
|
145 else:
|
|
146 print "Sorry currently histograms are support for only numeric data."
|