annotate src/breadcrumbs/scripts/scriptPlotFeature.py @ 0:2f4f6f08c8c4 draft

Uploaded
author george-weingart
date Tue, 13 May 2014 21:58:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
2 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
3 Author: Timothy Tickle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
4 Description: Plots feaures
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
5 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
6
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
7 __author__ = "Timothy Tickle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
8 __copyright__ = "Copyright 2012"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
9 __credits__ = ["Timothy Tickle"]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
10 __license__ = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
11 __version__ = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
12 __maintainer__ = "Timothy Tickle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
13 __email__ = "ttickle@sph.harvard.edu"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
14 __status__ = "Development"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
15
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
16 import sys
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
17 import argparse
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
18 import csv
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
19 import os
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
20 from src.BoxPlot import BoxPlot
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
21 from src.Histogram import Histogram
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
22 from src.ScatterPlot import ScatterPlot
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
23
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
24 def funcPlotBoxPlot(lxVariable1,lxVariable2,fOneIsNumeric):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
25
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
26 ly,lsLabels = [lxVariable1,lxVariable2] if fOneIsNumeric else [lxVariable2,lxVariable1]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
27
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
28 # Group data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
29 dictGroups = {}
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
30 for iIndex in xrange(len(ly)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
31 lsList = dictGroups.get(lsLabels[iIndex],[])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
32 lsList.append(ly[iIndex])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
33 dictGroups.setdefault(lsLabels[iIndex],lsList)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
34 ly = [dictGroups[sKey] for sKey in dictGroups.keys()]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
35 lsLabels = dictGroups.keys()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
36
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
37 BoxPlot.funcPlot(ly=ly, lsLabels=lsLabels, strOutputFigurePath=args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fJitter=True, fInvert=args.fColor, fInvertY=args.fAxis)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
38
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
39
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
40 #Set up arguments reader
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
41 argp = argparse.ArgumentParser( prog = "scriptBoxPlot.py\nExample: python scriptBoxPlot.py Input.pcl valuesID groupID",
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
42 description = "Make a box plot from an abundance table.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
43
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
44 #Sepecify output if needed
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
45 argp.add_argument("-o","--output", dest="strOutputFile", action="store", default=None, help="Output file name.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
46
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
47 # Text annotation
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
48 argp.add_argument("-t","--title", dest="strTitle", action="store", default=None, help="Test for the title.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
49 argp.add_argument("-x","--xaxis", dest="strX", action="store", default=None, help="Text for the x-axis.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
50 argp.add_argument("-y","--yaxis", dest="strY", action="store", default=None, help="Text for the y-axis.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
51
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
52 # Color options
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
53 argp.add_argument("-c","--color", dest="strColor", action="store", default="#83C8F9", help="Fill color as a Hex number (including the #).")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
54 argp.add_argument("-r","--invertcolor", dest="fColor", action="store_true", default=False, help="Flag to invert the background to black.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
55
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
56 # Axis adjustments
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
57 argp.add_argument("-s","--invertyaxis", dest="fAxis", action="store_true", default=False, help="Flag to invert the y axis.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
58
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
59 # Required
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
60 argp.add_argument("strFileAbund", help ="Input data file")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
61 argp.add_argument("strFeatures", nargs = "+", help="Features to plot (from one to two metadata).")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
62
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
63 args = argp.parse_args( )
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
64
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
65 #Holds the data
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
66 lxVariable1 = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
67 lxVariable2 = None
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
68 fOneIsNumeric = False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
69 fTwoIsNumeric = False
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
70
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
71 strFeatureOneID = args.strFeatures[0]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
72 strFeatureTwoID = None if len(args.strFeatures)<2 else args.strFeatures[1]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
73
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
74 # If the output file is not specified, make it up
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
75 if not args.strOutputFile:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
76 lsPieces = os.path.splitext(args.strFileAbund)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
77 args.strOutputFile = [lsPieces[0],strFeatureOneID]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
78 if strFeatureTwoID:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
79 args.strOutputFile = args.strOutputFile+[strFeatureTwoID]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
80 args.strOutputFile = "-".join(args.strOutputFile+["plotfeature.pdf"])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
81
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
82 if not args.strTitle:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
83 args.strTitle = [strFeatureOneID]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
84 if strFeatureTwoID:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
85 args.strTitle = args.strTitle+[strFeatureTwoID]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
86 args.strTitle = " vs ".join(args.strTitle)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
87
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
88 csvReader = csv.reader(open(args.strFileAbund, 'rU') if isinstance(args.strFileAbund,str) else args.strFileAbund, delimiter="\t")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
89
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
90 if args.strX is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
91 args.strX = strFeatureOneID
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
92
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
93 if args.strY is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
94 args.strY = strFeatureTwoID
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
95
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
96 # Get values and groupings
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
97 for lsLine in csvReader:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
98 if lsLine[0] == strFeatureOneID:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
99 lxVariable1 = lsLine[1:]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
100 if not strFeatureTwoID is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
101 if lsLine[0] == strFeatureTwoID:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
102 lxVariable2 = lsLine[1:]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
103
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
104 # Remove NAs
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
105 liNAs = [i for i,x in enumerate(lxVariable1) if x.lower() == "na"]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
106 liNAs = set([i for i,x in enumerate(lxVariable1) if x.lower() == "na"]+liNAs)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
107 lxVariable1 = [x for i,x in enumerate(lxVariable1) if not i in liNAs]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
108
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
109 if not lxVariable2 is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
110 lxVariable2 = [x for i,x in enumerate(lxVariable2) if not i in liNAs]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
111
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
112 # Type variables
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
113 if not lxVariable1 is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
114 try:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
115 float(lxVariable1[0])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
116 lxVariable1 = [float(xItem) for xItem in lxVariable1]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
117 fOneIsNumeric = True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
118 except ValueError:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
119 pass
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
120
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
121 if not lxVariable2 is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
122 try:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
123 float(lxVariable2[0])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
124 lxVariable2 = [float(xItem) for xItem in lxVariable2]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
125 fTwoIsNumeric = True
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
126 except ValueError:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
127 pass
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
128
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
129 if lxVariable1 is None:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
130 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureOneID +" in the file "+args.strFileAbund+" .")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
131 elif( (lxVariable2 is None) and (not strFeatureTwoID is None) ):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
132 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureTwoID +" in the file "+args.strFileAbund+" .")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
133 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
134 # Plot as needed
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
135 if((not lxVariable1 is None ) and (not lxVariable2 is None)):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
136 if(sum([fOneIsNumeric, fTwoIsNumeric])==0):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
137 print "scriptPlotFeature:: Error, If plotting 2 variables, atleast 1 should be numeric."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
138 elif(sum([fOneIsNumeric, fTwoIsNumeric])==1):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
139 funcPlotBoxPlot(lxVariable1,lxVariable2, fOneIsNumeric=fOneIsNumeric)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
140 elif(sum([fOneIsNumeric, fTwoIsNumeric])==2):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
141 ScatterPlot.funcPlot(lxVariable1, lxVariable2, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fInvert=args.fColor)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
142 elif(not lxVariable1 is None ):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
143 if fOneIsNumeric:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
144 Histogram.funcPlot(lxVariable1, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle="Frequency", strColor=args.strColor, fInvert=args.fColor)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
145 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
146 print "Sorry currently histograms are support for only numeric data."