Mercurial > repos > george-weingart > micropita
comparison src/breadcrumbs/scripts/scriptPlotFeature.py @ 0:2f4f6f08c8c4 draft
Uploaded
author | george-weingart |
---|---|
date | Tue, 13 May 2014 21:58:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2f4f6f08c8c4 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 Author: Timothy Tickle | |
4 Description: Plots feaures | |
5 """ | |
6 | |
7 __author__ = "Timothy Tickle" | |
8 __copyright__ = "Copyright 2012" | |
9 __credits__ = ["Timothy Tickle"] | |
10 __license__ = "" | |
11 __version__ = "" | |
12 __maintainer__ = "Timothy Tickle" | |
13 __email__ = "ttickle@sph.harvard.edu" | |
14 __status__ = "Development" | |
15 | |
16 import sys | |
17 import argparse | |
18 import csv | |
19 import os | |
20 from src.BoxPlot import BoxPlot | |
21 from src.Histogram import Histogram | |
22 from src.ScatterPlot import ScatterPlot | |
23 | |
24 def funcPlotBoxPlot(lxVariable1,lxVariable2,fOneIsNumeric): | |
25 | |
26 ly,lsLabels = [lxVariable1,lxVariable2] if fOneIsNumeric else [lxVariable2,lxVariable1] | |
27 | |
28 # Group data | |
29 dictGroups = {} | |
30 for iIndex in xrange(len(ly)): | |
31 lsList = dictGroups.get(lsLabels[iIndex],[]) | |
32 lsList.append(ly[iIndex]) | |
33 dictGroups.setdefault(lsLabels[iIndex],lsList) | |
34 ly = [dictGroups[sKey] for sKey in dictGroups.keys()] | |
35 lsLabels = dictGroups.keys() | |
36 | |
37 BoxPlot.funcPlot(ly=ly, lsLabels=lsLabels, strOutputFigurePath=args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fJitter=True, fInvert=args.fColor, fInvertY=args.fAxis) | |
38 | |
39 | |
40 #Set up arguments reader | |
41 argp = argparse.ArgumentParser( prog = "scriptBoxPlot.py\nExample: python scriptBoxPlot.py Input.pcl valuesID groupID", | |
42 description = "Make a box plot from an abundance table.") | |
43 | |
44 #Sepecify output if needed | |
45 argp.add_argument("-o","--output", dest="strOutputFile", action="store", default=None, help="Output file name.") | |
46 | |
47 # Text annotation | |
48 argp.add_argument("-t","--title", dest="strTitle", action="store", default=None, help="Test for the title.") | |
49 argp.add_argument("-x","--xaxis", dest="strX", action="store", default=None, help="Text for the x-axis.") | |
50 argp.add_argument("-y","--yaxis", dest="strY", action="store", default=None, help="Text for the y-axis.") | |
51 | |
52 # Color options | |
53 argp.add_argument("-c","--color", dest="strColor", action="store", default="#83C8F9", help="Fill color as a Hex number (including the #).") | |
54 argp.add_argument("-r","--invertcolor", dest="fColor", action="store_true", default=False, help="Flag to invert the background to black.") | |
55 | |
56 # Axis adjustments | |
57 argp.add_argument("-s","--invertyaxis", dest="fAxis", action="store_true", default=False, help="Flag to invert the y axis.") | |
58 | |
59 # Required | |
60 argp.add_argument("strFileAbund", help ="Input data file") | |
61 argp.add_argument("strFeatures", nargs = "+", help="Features to plot (from one to two metadata).") | |
62 | |
63 args = argp.parse_args( ) | |
64 | |
65 #Holds the data | |
66 lxVariable1 = None | |
67 lxVariable2 = None | |
68 fOneIsNumeric = False | |
69 fTwoIsNumeric = False | |
70 | |
71 strFeatureOneID = args.strFeatures[0] | |
72 strFeatureTwoID = None if len(args.strFeatures)<2 else args.strFeatures[1] | |
73 | |
74 # If the output file is not specified, make it up | |
75 if not args.strOutputFile: | |
76 lsPieces = os.path.splitext(args.strFileAbund) | |
77 args.strOutputFile = [lsPieces[0],strFeatureOneID] | |
78 if strFeatureTwoID: | |
79 args.strOutputFile = args.strOutputFile+[strFeatureTwoID] | |
80 args.strOutputFile = "-".join(args.strOutputFile+["plotfeature.pdf"]) | |
81 | |
82 if not args.strTitle: | |
83 args.strTitle = [strFeatureOneID] | |
84 if strFeatureTwoID: | |
85 args.strTitle = args.strTitle+[strFeatureTwoID] | |
86 args.strTitle = " vs ".join(args.strTitle) | |
87 | |
88 csvReader = csv.reader(open(args.strFileAbund, 'rU') if isinstance(args.strFileAbund,str) else args.strFileAbund, delimiter="\t") | |
89 | |
90 if args.strX is None: | |
91 args.strX = strFeatureOneID | |
92 | |
93 if args.strY is None: | |
94 args.strY = strFeatureTwoID | |
95 | |
96 # Get values and groupings | |
97 for lsLine in csvReader: | |
98 if lsLine[0] == strFeatureOneID: | |
99 lxVariable1 = lsLine[1:] | |
100 if not strFeatureTwoID is None: | |
101 if lsLine[0] == strFeatureTwoID: | |
102 lxVariable2 = lsLine[1:] | |
103 | |
104 # Remove NAs | |
105 liNAs = [i for i,x in enumerate(lxVariable1) if x.lower() == "na"] | |
106 liNAs = set([i for i,x in enumerate(lxVariable1) if x.lower() == "na"]+liNAs) | |
107 lxVariable1 = [x for i,x in enumerate(lxVariable1) if not i in liNAs] | |
108 | |
109 if not lxVariable2 is None: | |
110 lxVariable2 = [x for i,x in enumerate(lxVariable2) if not i in liNAs] | |
111 | |
112 # Type variables | |
113 if not lxVariable1 is None: | |
114 try: | |
115 float(lxVariable1[0]) | |
116 lxVariable1 = [float(xItem) for xItem in lxVariable1] | |
117 fOneIsNumeric = True | |
118 except ValueError: | |
119 pass | |
120 | |
121 if not lxVariable2 is None: | |
122 try: | |
123 float(lxVariable2[0]) | |
124 lxVariable2 = [float(xItem) for xItem in lxVariable2] | |
125 fTwoIsNumeric = True | |
126 except ValueError: | |
127 pass | |
128 | |
129 if lxVariable1 is None: | |
130 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureOneID +" in the file "+args.strFileAbund+" .") | |
131 elif( (lxVariable2 is None) and (not strFeatureTwoID is None) ): | |
132 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureTwoID +" in the file "+args.strFileAbund+" .") | |
133 else: | |
134 # Plot as needed | |
135 if((not lxVariable1 is None ) and (not lxVariable2 is None)): | |
136 if(sum([fOneIsNumeric, fTwoIsNumeric])==0): | |
137 print "scriptPlotFeature:: Error, If plotting 2 variables, atleast 1 should be numeric." | |
138 elif(sum([fOneIsNumeric, fTwoIsNumeric])==1): | |
139 funcPlotBoxPlot(lxVariable1,lxVariable2, fOneIsNumeric=fOneIsNumeric) | |
140 elif(sum([fOneIsNumeric, fTwoIsNumeric])==2): | |
141 ScatterPlot.funcPlot(lxVariable1, lxVariable2, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fInvert=args.fColor) | |
142 elif(not lxVariable1 is None ): | |
143 if fOneIsNumeric: | |
144 Histogram.funcPlot(lxVariable1, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle="Frequency", strColor=args.strColor, fInvert=args.fColor) | |
145 else: | |
146 print "Sorry currently histograms are support for only numeric data." |