annotate galaxy_micropita/src/breadcrumbs/scripts/scriptPlotFeature.py @ 3:8fb4630ab314 draft default tip

Uploaded
author sagun98
date Thu, 03 Jun 2021 17:07:36 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1 #!/usr/bin/env python
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
2 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
3 Author: Timothy Tickle
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
4 Description: Plots feaures
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
5 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
6
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
7 __author__ = "Timothy Tickle"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
8 __copyright__ = "Copyright 2012"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
9 __credits__ = ["Timothy Tickle"]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
10 __license__ = ""
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
11 __version__ = ""
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
12 __maintainer__ = "Timothy Tickle"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
13 __email__ = "ttickle@sph.harvard.edu"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
14 __status__ = "Development"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
15
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
16 import sys
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
17 import argparse
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
18 import csv
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
19 import os
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
20 from src.BoxPlot import BoxPlot
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
21 from src.Histogram import Histogram
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
22 from src.ScatterPlot import ScatterPlot
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
23
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
24 def funcPlotBoxPlot(lxVariable1,lxVariable2,fOneIsNumeric):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
25
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
26 ly,lsLabels = [lxVariable1,lxVariable2] if fOneIsNumeric else [lxVariable2,lxVariable1]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
27
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
28 # Group data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
29 dictGroups = {}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
30 for iIndex in xrange(len(ly)):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
31 lsList = dictGroups.get(lsLabels[iIndex],[])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
32 lsList.append(ly[iIndex])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
33 dictGroups.setdefault(lsLabels[iIndex],lsList)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
34 ly = [dictGroups[sKey] for sKey in dictGroups.keys()]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
35 lsLabels = dictGroups.keys()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
36
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
37 BoxPlot.funcPlot(ly=ly, lsLabels=lsLabels, strOutputFigurePath=args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fJitter=True, fInvert=args.fColor, fInvertY=args.fAxis)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
38
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
39
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
40 #Set up arguments reader
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
41 argp = argparse.ArgumentParser( prog = "scriptBoxPlot.py\nExample: python scriptBoxPlot.py Input.pcl valuesID groupID",
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
42 description = "Make a box plot from an abundance table.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
43
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
44 #Sepecify output if needed
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
45 argp.add_argument("-o","--output", dest="strOutputFile", action="store", default=None, help="Output file name.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
46
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
47 # Text annotation
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
48 argp.add_argument("-t","--title", dest="strTitle", action="store", default=None, help="Test for the title.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
49 argp.add_argument("-x","--xaxis", dest="strX", action="store", default=None, help="Text for the x-axis.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
50 argp.add_argument("-y","--yaxis", dest="strY", action="store", default=None, help="Text for the y-axis.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
51
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
52 # Color options
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
53 argp.add_argument("-c","--color", dest="strColor", action="store", default="#83C8F9", help="Fill color as a Hex number (including the #).")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
54 argp.add_argument("-r","--invertcolor", dest="fColor", action="store_true", default=False, help="Flag to invert the background to black.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
55
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
56 # Axis adjustments
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
57 argp.add_argument("-s","--invertyaxis", dest="fAxis", action="store_true", default=False, help="Flag to invert the y axis.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
58
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
59 # Required
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
60 argp.add_argument("strFileAbund", help ="Input data file")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
61 argp.add_argument("strFeatures", nargs = "+", help="Features to plot (from one to two metadata).")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
62
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
63 args = argp.parse_args( )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
64
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
65 #Holds the data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
66 lxVariable1 = None
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
67 lxVariable2 = None
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
68 fOneIsNumeric = False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
69 fTwoIsNumeric = False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
70
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
71 strFeatureOneID = args.strFeatures[0]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
72 strFeatureTwoID = None if len(args.strFeatures)<2 else args.strFeatures[1]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
73
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
74 # If the output file is not specified, make it up
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
75 if not args.strOutputFile:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
76 lsPieces = os.path.splitext(args.strFileAbund)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
77 args.strOutputFile = [lsPieces[0],strFeatureOneID]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
78 if strFeatureTwoID:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
79 args.strOutputFile = args.strOutputFile+[strFeatureTwoID]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
80 args.strOutputFile = "-".join(args.strOutputFile+["plotfeature.pdf"])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
81
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
82 if not args.strTitle:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
83 args.strTitle = [strFeatureOneID]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
84 if strFeatureTwoID:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
85 args.strTitle = args.strTitle+[strFeatureTwoID]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
86 args.strTitle = " vs ".join(args.strTitle)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
87
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
88 csvReader = csv.reader(open(args.strFileAbund, 'rU') if isinstance(args.strFileAbund,str) else args.strFileAbund, delimiter="\t")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
89
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
90 if args.strX is None:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
91 args.strX = strFeatureOneID
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
92
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
93 if args.strY is None:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
94 args.strY = strFeatureTwoID
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
95
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
96 # Get values and groupings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
97 for lsLine in csvReader:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
98 if lsLine[0] == strFeatureOneID:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
99 lxVariable1 = lsLine[1:]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
100 if not strFeatureTwoID is None:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
101 if lsLine[0] == strFeatureTwoID:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
102 lxVariable2 = lsLine[1:]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
103
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
104 # Remove NAs
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
105 liNAs = [i for i,x in enumerate(lxVariable1) if x.lower() == "na"]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
106 liNAs = set([i for i,x in enumerate(lxVariable1) if x.lower() == "na"]+liNAs)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
107 lxVariable1 = [x for i,x in enumerate(lxVariable1) if not i in liNAs]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
108
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
109 if not lxVariable2 is None:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
110 lxVariable2 = [x for i,x in enumerate(lxVariable2) if not i in liNAs]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
111
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
112 # Type variables
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
113 if not lxVariable1 is None:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
114 try:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
115 float(lxVariable1[0])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
116 lxVariable1 = [float(xItem) for xItem in lxVariable1]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
117 fOneIsNumeric = True
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
118 except ValueError:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
119 pass
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
120
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
121 if not lxVariable2 is None:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
122 try:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
123 float(lxVariable2[0])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
124 lxVariable2 = [float(xItem) for xItem in lxVariable2]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
125 fTwoIsNumeric = True
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
126 except ValueError:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
127 pass
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
128
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
129 if lxVariable1 is None:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
130 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureOneID +" in the file "+args.strFileAbund+" .")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
131 elif( (lxVariable2 is None) and (not strFeatureTwoID is None) ):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
132 print("scriptPlotFeature:: Sorry, could not find the feature "+ strFeatureTwoID +" in the file "+args.strFileAbund+" .")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
133 else:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
134 # Plot as needed
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
135 if((not lxVariable1 is None ) and (not lxVariable2 is None)):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
136 if(sum([fOneIsNumeric, fTwoIsNumeric])==0):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
137 print "scriptPlotFeature:: Error, If plotting 2 variables, atleast 1 should be numeric."
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
138 elif(sum([fOneIsNumeric, fTwoIsNumeric])==1):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
139 funcPlotBoxPlot(lxVariable1,lxVariable2, fOneIsNumeric=fOneIsNumeric)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
140 elif(sum([fOneIsNumeric, fTwoIsNumeric])==2):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
141 ScatterPlot.funcPlot(lxVariable1, lxVariable2, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle=args.strY, strColor=args.strColor, fInvert=args.fColor)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
142 elif(not lxVariable1 is None ):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
143 if fOneIsNumeric:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
144 Histogram.funcPlot(lxVariable1, args.strOutputFile, strTitle=args.strTitle, strXTitle=args.strX, strYTitle="Frequency", strColor=args.strColor, fInvert=args.fColor)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
145 else:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
146 print "Sorry currently histograms are support for only numeric data."