0
|
1 """
|
|
2 Author: Timothy Tickle
|
|
3 Description: Perfroms and plots Principle Coordinates Analysis.
|
|
4 """
|
|
5
|
|
6 #####################################################################################
|
|
7 #Copyright (C) <2012>
|
|
8 #
|
|
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
10 #this software and associated documentation files (the "Software"), to deal in the
|
|
11 #Software without restriction, including without limitation the rights to use, copy,
|
|
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
13 #and to permit persons to whom the Software is furnished to do so, subject to
|
|
14 #the following conditions:
|
|
15 #
|
|
16 #The above copyright notice and this permission notice shall be included in all copies
|
|
17 #or substantial portions of the Software.
|
|
18 #
|
|
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
25 #####################################################################################
|
|
26
|
|
27 __author__ = "Timothy Tickle"
|
|
28 __copyright__ = "Copyright 2012"
|
|
29 __credits__ = ["Timothy Tickle"]
|
|
30 __license__ = "MIT"
|
|
31 __maintainer__ = "Timothy Tickle"
|
|
32 __email__ = "ttickle@sph.harvard.edu"
|
|
33 __status__ = "Development"
|
|
34
|
|
35 #External libraries
|
|
36 from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs
|
|
37 from cogent.cluster.nmds import NMDS
|
|
38 import csv
|
|
39 import math
|
|
40 import matplotlib.cm as cm
|
|
41 from Metric import Metric
|
|
42 import numpy as np
|
|
43 from scipy.spatial.distance import squareform
|
|
44 from scipy.stats.stats import spearmanr
|
|
45 from Utility import Utility
|
|
46 from UtilityMath import UtilityMath
|
|
47 from ValidateData import ValidateData
|
|
48 from matplotlib import pyplot as plt
|
|
49
|
|
50 class PCoA:
|
|
51 """
|
|
52 Class to Run Principle Coordinates Analysis.
|
|
53
|
|
54 To run PCoA first load the AbundanceTable or distance matrix using the "load" method,
|
|
55 then use the "run" method to derive points, and then use "plot" to plot the graph.
|
|
56 The process is structured in this way so that data is read once but can be transformed to different
|
|
57 distance matricies and after analysis can be plotted with multiple sample highlighting.
|
|
58 One can always reload or rerun data by calling the appropriate function.
|
|
59
|
|
60 Supported beta diversity metrics include "braycurtis","canberra","chebyshev","cityblock","correlation",
|
|
61 "cosine","euclidean","hamming","sqeuclidean",unifrac_unweighted","unifrac_weighted"
|
|
62 """
|
|
63
|
|
64 #Supported distance metrics
|
|
65 c_BRAY_CURTIS="B_Curtis"
|
|
66 c_SPEARMAN="spearman"
|
|
67
|
|
68 #Holds the data Matrix
|
|
69 dataMatrix=None
|
|
70 #Indicates if the data matrix is raw data (True) or a distance matrix (False)
|
|
71 isRawData=None
|
|
72 # Holds current matrix ids
|
|
73 lsIDs = None
|
|
74
|
|
75 #Current pcoa object
|
|
76 pcoa = None
|
|
77
|
|
78 #Holds the most recently successful distance metric
|
|
79 strRecentMetric = None
|
|
80
|
|
81 #Current dimensions
|
|
82 _iDimensions = 2
|
|
83
|
|
84 #Get plot colors
|
|
85 objFigureControl = ConstantsFiguresBreadCrumbs()
|
|
86
|
|
87 #Forced X Axis
|
|
88 ldForcedXAxis = None
|
|
89
|
|
90 #Indices for the plot group dictionary
|
|
91 c_iXPointIndex = 0
|
|
92 c_iYPointIndex = 1
|
|
93 c_iColorIndex = 2
|
|
94 c_iMarkerIndex = 3
|
|
95 c_iAlphaIndex = 4
|
|
96 c_iLabelIndex = 5
|
|
97 c_iShapeIndex = 6
|
|
98 c_iEdgeColorIndex = 7
|
|
99 c_strTiesKey = "Ties"
|
|
100
|
|
101 #Happy path tested
|
|
102 def loadData(self, xData, fIsRawData):
|
|
103 """
|
|
104 Loads data into PCoA (given the matrix or an abundance table)
|
|
105 Data can be the Abundance Table to be converted to a distance matrix or a distance matrix
|
|
106 If it is the AbundanceTable, indicate that it is rawData (tempIsRawData=True)
|
|
107 If it is the distance matrix already generated indicate (tempIsRawData=False)
|
|
108 and no conversion will occur in subsequent methods.
|
|
109
|
|
110 :params xData: AbundanceTable or Distance matrix . Taxa (columns) by samples (rows)(lists)
|
|
111 :type: AbundanceTable or DistanceMatrix
|
|
112 :param fIsRawData: Indicates if the xData is an AbudanceTable (True) or distance matrix (False; numpy array)
|
|
113 :type: boolean
|
|
114 :return boolean: indicator of success (True=Was able to load data)
|
|
115 """
|
|
116
|
|
117 if fIsRawData:
|
|
118 #Read in the file data to a numpy array.
|
|
119 #Samples (column) by Taxa (rows)(lists) without the column
|
|
120 data = xData.funcToArray()
|
|
121 if data==None:
|
|
122 print("PCoA:loadData::Error when converting AbundanceTable to Array, did not perform PCoA.")
|
|
123 return False
|
|
124
|
|
125 #Transpose data to be Taxa (columns) by samples (rows)(lists)
|
|
126 data = UtilityMath.funcTransposeDataMatrix(data,fRemoveAdornments=False)
|
|
127 if(ValidateData.funcIsFalse(data)):
|
|
128 print("PCoA:loadData::Error when transposing data file, did not perform PCoA.")
|
|
129 return False
|
|
130 else:
|
|
131 self.dataMatrix=data
|
|
132 self.isRawData=fIsRawData
|
|
133 self.lsIDs=xData.funcGetMetadata(xData.funcGetIDMetadataName())
|
|
134
|
|
135 #Otherwise load the data directly as passed.
|
|
136 else:
|
|
137 self.dataMatrix=xData
|
|
138 self.isRawData=fIsRawData
|
|
139 return True
|
|
140
|
|
141 def run(self, tempDistanceMetric=None, iDims=2, strDistanceMatrixFile=None, istrmTree=None, istrmEnvr=None):
|
|
142 """
|
|
143 Runs analysis on loaded data.
|
|
144
|
|
145 :param tempDistanceMetric: The name of the distance metric to use when performing PCoA.
|
|
146 None indicates a distance matrix was already given when loading and will be used.
|
|
147 Supports "braycurtis","canberra","chebyshev","cityblock","correlation",
|
|
148 "cosine","euclidean","hamming","sqeuclidean",unifrac_unweighted","unifrac_weighted"
|
|
149 :type: String Distance matrix name
|
|
150 :param iDims: How many dimension to plot the PCoA graphs.
|
|
151 (This can be minimally 2; all combinations of dimensions are plotted).
|
|
152 iDims start with 1 (not index-based).
|
|
153 :type: Integer Positive integer 2 or greater.
|
|
154 :param strDistanceMatrixFile: If the underlying distance matrix should be output, this is the file to output to.
|
|
155 :type: String Output file for distances of None for indicating it shoudl not be done.
|
|
156 :param istrmTree: One of two files needed for unifrac calculations, this is the phylogeny of the features.
|
|
157 :type: String Path to file
|
|
158 :param istrmEnvr: One of two files needed for unifrac calculations, this is the environment file for the features.
|
|
159 :type: String Path to file
|
|
160 :return boolean: Indicator of success (True)
|
|
161 """
|
|
162
|
|
163 if iDims > 1:
|
|
164 self._iDimensions = iDims
|
|
165
|
|
166 #If distance metric is none, check to see if the matrix is a distance matrix
|
|
167 #If so, run NMDS on the distance matrix
|
|
168 #Otherwise return a false and do not run
|
|
169 if(tempDistanceMetric==None):
|
|
170 if(ValidateData.funcIsTrue(self.isRawData)):
|
|
171 print("PCoA:run::Error, no distance metric was specified but the previous load was not of a distance matrix.")
|
|
172 return False
|
|
173 elif(ValidateData.funcIsFalse(self.isRawData)):
|
|
174 self.pcoa = NMDS(dataMatrix, verbosity=0)
|
|
175 return True
|
|
176
|
|
177 #Make sure the distance metric was a valid string type
|
|
178 if(not ValidateData.funcIsValidString(tempDistanceMetric)):
|
|
179 print("PCoA:run::Error, distance metric was not a valid string type.")
|
|
180 return False
|
|
181
|
|
182 #Supported distances
|
|
183
|
|
184 distanceMatrix = None
|
|
185 if(tempDistanceMetric==self.c_SPEARMAN):
|
|
186 distanceMatrix = Metric().funcGetDissimilarity(ldSampleTaxaAbundancies=self.dataMatrix, funcDistanceFunction=lambda u,v: spearmanr(u,v)[0])
|
|
187 if(tempDistanceMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]):
|
|
188 distanceMatrix,lsLabels = Metric().funcGetBetaMetric(sMetric=tempDistanceMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
|
|
189 self.lsIDs = lsLabels
|
|
190 else:
|
|
191 distanceMatrix = Metric().funcGetBetaMetric(npadAbundancies=self.dataMatrix, sMetric=tempDistanceMetric)
|
|
192 if(ValidateData.funcIsFalse(distanceMatrix)):
|
|
193 print "PCoA:run::Error, when generating distance matrix."
|
|
194 return False
|
|
195
|
|
196 # Make squareform
|
|
197 distanceMatrix = squareform(distanceMatrix)
|
|
198
|
|
199 # Writes distance measures if needed.
|
|
200 if strDistanceMatrixFile:
|
|
201 csvrDistance = csv.writer(open(strDistanceMatrixFile, 'w'))
|
|
202 if self.lsIDs:
|
|
203 csvrDistance.writerow(["ID"]+self.lsIDs)
|
|
204
|
|
205 for x in xrange(distanceMatrix.shape[0]):
|
|
206 strId = [self.lsIDs[x]] if self.lsIDs else []
|
|
207 csvrDistance.writerow(strId+distanceMatrix[x].tolist())
|
|
208
|
|
209 self.pcoa = NMDS(distanceMatrix, dimension=max(self._iDimensions,2), verbosity=0)
|
|
210 self.strRecentMetric = tempDistanceMetric
|
|
211 return True
|
|
212
|
|
213 #TODO Test
|
|
214 def funcGetCoordinates(self):
|
|
215 return(self.pcoa.getPoints())
|
|
216
|
|
217 #TODO Test
|
|
218 def funcGetIDs(self):
|
|
219 return(self.lsIDs)
|
|
220
|
|
221 #Happy path tested
|
|
222 def plot(self, tempPlotName="PCOA.png", tempColorGrouping=None, tempShape=None, tempLabels=None, tempShapeSize=None, tempAlpha = 1.0, tempLegendLocation="upper right", tempInvert=False, iDim1 = 1, iDim2 = 2):
|
|
223 """
|
|
224 Plots the provided data by the given distance matrix in the file.
|
|
225 All lists should be in order in relation to each other.
|
|
226
|
|
227 :param tempPlotName: Path of file to save figure.
|
|
228 :type: String File path.
|
|
229 :param tempColorGrouping: Colors for markers.
|
|
230 If you want a marker with multiple colors (piewedges) for that marker give a list in the list of colors.
|
|
231 For example ['r','r','r',['r','g','b']] This would make 3 red markers and 1 split into 3 wedges (red, green, and blue).
|
|
232 This is only possible if you are using circle shapes ('o') or square shapes ('s').
|
|
233 :type: Character or list of characters: Characters should be useable by matplotlib as a color.
|
|
234 :param tempShape: Marker shapes. If you want to specify one shape for all markers then just pass a char/str for the marker not a list.
|
|
235 :type: Character or list of characters. Characters should be useable by matplotlib as shapes.
|
|
236 :param tempLabels: Labels associated with the coloring. Should be consistent with tempColorGrouping (both should be strings or lists of equal length).
|
|
237 :type: String or list of Strings.
|
|
238 :param tempShapeSize: Sizes of markers (points). If no list is given, all markers are given the same size.
|
|
239 :type: Integer of list of integers: 1 or greater.
|
|
240 :param tempAlpha: Value between 0.0 and 1.0 (0.0 being completely transparent, 1.0 being opaque).
|
|
241 :type: Float 0.0-1.0.
|
|
242 :param tempLegendLocation: Indicates where to put the legend.
|
|
243 :type: String Either "upper right", "lower right", "upper left", "lower left".
|
|
244 :param tempInvert: Allows the inverting of the figure.
|
|
245 :type: boolean True inverts.
|
|
246 :param iDim1: First dimension to plot.
|
|
247 :type: Integer Greater than 1.
|
|
248 :param iDim2: Second dimension to plot.
|
|
249 :type: Integer Greater than 1.
|
|
250 :return boolean: Indicator of success (True)
|
|
251 """
|
|
252
|
|
253 if(not self.pcoa == None):
|
|
254
|
|
255 #Get point count
|
|
256 iDimensionOne = max(0,min(self._iDimensions-2, iDim1-1))
|
|
257 iDimensionTwo = max(1,min(self._iDimensions-1, iDim2-1))
|
|
258 adPoints = self.pcoa.getPoints()
|
|
259
|
|
260 #This is 1-stress which is the amount of variance not explained by all dimensions
|
|
261 #There is no precent variance, so I am trying this as a substitute
|
|
262 dPercentVariance = int((1.0-self.pcoa.getStress())*100)
|
|
263 ldXPoints = list(adPoints[:,iDimensionOne])
|
|
264 if not (self.ldForcedXAxis == None):
|
|
265 ldXPoints = self.ldForcedXAxis
|
|
266 ldYPoints = list(adPoints[:,iDimensionTwo])
|
|
267 iPointCount = len(ldXPoints)
|
|
268
|
|
269 #Get plot object
|
|
270 imgFigure = plt.figure()
|
|
271 self.objFigureControl.invertColors(fInvert=tempInvert)
|
|
272
|
|
273 #Manage Labels
|
|
274 if tempLabels is None:
|
|
275 tempLabels = [self.objFigureControl.c_strPCoALabelDefault] * iPointCount
|
|
276 elif(ValidateData.funcIsValidList(tempLabels)):
|
|
277 if not len(tempLabels) == iPointCount:
|
|
278 print "PCoA::plot:Error, the list of labels was given but was not the same length as the points so nothing was plotted."
|
|
279 print "PCoA::plot:tempLabels=", tempLabels
|
|
280 print "PCoA::plot:Label list length=", len(tempLabels)
|
|
281 print "PCoA::plot:iPointCount=", iPointCount
|
|
282 return False
|
|
283 elif ValidateData.funcIsValidString(tempLabels):
|
|
284 tempLabels = [tempLabels] * iPointCount
|
|
285 else:
|
|
286 print "PCoA::plot:tempLabels was of an unexpected type. Expecting None, List, string, or char."
|
|
287 print tempLabels
|
|
288 return False
|
|
289
|
|
290 #Manage Colors
|
|
291 if tempColorGrouping is None:
|
|
292 tempColorGrouping = [self.objFigureControl.c_cPCoAColorDefault] * iPointCount
|
|
293 elif(ValidateData.funcIsValidList(tempColorGrouping)):
|
|
294 if not len(tempColorGrouping) == iPointCount:
|
|
295 print "PCoA::plot:Error, the list of colors was given but was not the same length as the points so nothing was plotted."
|
|
296 print "PCoA::plot:tempColorGrouping=", tempColorGrouping
|
|
297 print "PCoA::plot:Color list length=", len(tempColorGrouping)
|
|
298 print "PCoA::plot:iPointCount=", iPointCount
|
|
299 return False
|
|
300 elif ValidateData.funcIsValidString(tempColorGrouping):
|
|
301 tempColorGrouping = [tempColorGrouping] * iPointCount
|
|
302 else:
|
|
303 print "PCoA::plot:tempColorGrouping was of an unexpected type. Expecting None, List, string, or char."
|
|
304 print tempColorGrouping
|
|
305 return False
|
|
306
|
|
307 #Manage tempShape
|
|
308 if tempShape is None:
|
|
309 tempShape = [self.objFigureControl.c_cPCoAShapeDefault] * iPointCount
|
|
310 elif(ValidateData.funcIsValidList(tempShape)):
|
|
311 if not len(tempShape) == iPointCount:
|
|
312 print "PCoA::plot:Error, the list of shapes was given but was not the same length as the points so nothing was plotted."
|
|
313 print "PCoA::plot:tempShape=", tempShape
|
|
314 print "PCoA::plot:Shape list length=", len(tempShape)
|
|
315 print "PCoA::plot:iPointCount=", iPointCount
|
|
316 return False
|
|
317 elif ValidateData.funcIsValidString(tempShape):
|
|
318 tempShape = [tempShape] * iPointCount
|
|
319 else:
|
|
320 print("PCoA::plot:tempShape was of an unexpected type. Expecting None, List, string, or char.")
|
|
321 print tempShape
|
|
322 return False
|
|
323
|
|
324 #Manage tempShapeSize
|
|
325 if tempShapeSize is None:
|
|
326 tempShapeSize = [self.objFigureControl.c_cPCoASizeDefault] * iPointCount
|
|
327 elif(ValidateData.funcIsValidList(tempShapeSize)):
|
|
328 if not len(tempShapeSize) == iPointCount:
|
|
329 print "PCoA::plot:Error, the list of sizes was given but was not the same length as the points so nothing was plotted."
|
|
330 print "PCoA::plot:tempShapeSize=", tempShapeSize
|
|
331 print "PCoA::plot:Size list length=", len(tempShapeSize)
|
|
332 print "PCoA::plot:iPointCount=", iPointCount
|
|
333 return False
|
|
334 elif ValidateData.funcIsValidInteger(tempShapeSize):
|
|
335 tempShapeSize = [tempShapeSize] * iPointCount
|
|
336 else:
|
|
337 print "PCoA::plot:tempShapeSize was of an unexpected type. Expecting None, List, string, or char."
|
|
338 print tempShapeSize
|
|
339 return False
|
|
340
|
|
341 #Color/Invert figure
|
|
342 imgFigure.set_facecolor(self.objFigureControl.c_strBackgroundColorWord)
|
|
343 imgSubplot = imgFigure.add_subplot(111,axisbg=self.objFigureControl.c_strBackgroundColorLetter)
|
|
344 imgSubplot.set_xlabel("Dimension "+str(iDimensionOne+1)+" (1-Stress = "+str(dPercentVariance)+"% )")
|
|
345 imgSubplot.set_ylabel("Dimension "+str(iDimensionTwo+1))
|
|
346 imgSubplot.spines['top'].set_color(self.objFigureControl.c_strDetailsColorLetter)
|
|
347 imgSubplot.spines['bottom'].set_color(self.objFigureControl.c_strDetailsColorLetter)
|
|
348 imgSubplot.spines['left'].set_color(self.objFigureControl.c_strDetailsColorLetter)
|
|
349 imgSubplot.spines['right'].set_color(self.objFigureControl.c_strDetailsColorLetter)
|
|
350 imgSubplot.xaxis.label.set_color(self.objFigureControl.c_strDetailsColorLetter)
|
|
351 imgSubplot.yaxis.label.set_color(self.objFigureControl.c_strDetailsColorLetter)
|
|
352 imgSubplot.tick_params(axis='x', colors=self.objFigureControl.c_strDetailsColorLetter)
|
|
353 imgSubplot.tick_params(axis='y', colors=self.objFigureControl.c_strDetailsColorLetter)
|
|
354 charMarkerEdgeColor = self.objFigureControl.c_strDetailsColorLetter
|
|
355
|
|
356 #If given a list of colors, each color will be plotted individually stratified by shape
|
|
357 #Plot colors seperately so the legend will pick up on the labels and make a legend
|
|
358 if(ValidateData.funcIsValidList(tempColorGrouping)):
|
|
359 if len(tempColorGrouping) == iPointCount:
|
|
360
|
|
361 #Dictionary to hold plotting groups
|
|
362 #Logistical to plot points as layers in an intelligent fashion
|
|
363 #{CountofPoints: [[plot info list]]} The list happends so ties can occur in the key
|
|
364 dictPlotGroups = dict()
|
|
365
|
|
366 #Check for lists in the list which indicate the need to plot pie charts
|
|
367 lfAreLists = [ValidateData.funcIsValidList(objColor) for objIndex, objColor in enumerate(tempColorGrouping)]
|
|
368
|
|
369 #Pie chart data seperated out
|
|
370 lsColorsPieCharts = None
|
|
371 lcShapesPieCharts = None
|
|
372 lsLabelsPieCharts = None
|
|
373 lsSizesPieCharts = None
|
|
374 ldXPointsPieCharts = None
|
|
375 ldYPointsPieCharts = None
|
|
376
|
|
377 #Split out piechart data
|
|
378 if sum(lfAreLists) > 0:
|
|
379 #Get lists of index that are and are not lists
|
|
380 liAreLists = []
|
|
381 liAreNotLists = []
|
|
382 curIndex = 0
|
|
383 for fIsList in lfAreLists:
|
|
384 if fIsList: liAreLists.append(curIndex)
|
|
385 else: liAreNotLists.append(curIndex)
|
|
386 curIndex = curIndex + 1
|
|
387
|
|
388 lsColorsPieCharts = Utility.reduceList(tempColorGrouping, liAreLists)
|
|
389 tempColorGrouping = Utility.reduceList(tempColorGrouping, liAreNotLists)
|
|
390
|
|
391 #Split out shapes
|
|
392 lcShapesPieCharts = Utility.reduceList(tempShape, liAreLists)
|
|
393 tempShape = Utility.reduceList(tempShape, liAreNotLists)
|
|
394
|
|
395 #Split out labels
|
|
396 lsLabelsPieCharts = Utility.reduceList(tempLabels, liAreLists)
|
|
397 tempLabels = Utility.reduceList(tempLabels, liAreNotLists)
|
|
398
|
|
399 #Split out sizes
|
|
400 lsSizesPieCharts = Utility.reduceList(tempShapeSize, liAreLists)
|
|
401 tempShapeSize = Utility.reduceList(tempShapeSize, liAreNotLists)
|
|
402
|
|
403 #Split out xpoints
|
|
404 ldXPointsPieCharts = Utility.reduceList(ldXPoints, liAreLists)
|
|
405 ldXPoints = Utility.reduceList(ldXPoints, liAreNotLists)
|
|
406
|
|
407 #Split out ypoints
|
|
408 ldYPointsPieCharts = Utility.reduceList(ldYPoints, liAreLists)
|
|
409 ldYPoints = Utility.reduceList(ldYPoints, liAreNotLists)
|
|
410
|
|
411 #Get unique colors and plot each individually
|
|
412 acharUniqueColors = list(set(tempColorGrouping))
|
|
413 for iColorIndex in xrange(0,len(acharUniqueColors)):
|
|
414 #Get the color
|
|
415 charColor = acharUniqueColors[iColorIndex]
|
|
416
|
|
417 #Get indices of colors
|
|
418 aiColorPointPositions = Utility.getIndices(tempColorGrouping,charColor)
|
|
419
|
|
420 #Reduce the labels by color
|
|
421 acharLabelsByColor = Utility.reduceList(tempLabels,aiColorPointPositions)
|
|
422
|
|
423 #Reduces sizes to indices if a list
|
|
424 reducedSizes = tempShapeSize
|
|
425 #Reduce sizes if a list
|
|
426 if(ValidateData.funcIsValidList(reducedSizes)):
|
|
427 reducedSizes = Utility.reduceList(reducedSizes,aiColorPointPositions)
|
|
428
|
|
429 #Reduce to the current color grouping
|
|
430 aiXPoints = Utility.reduceList(ldXPoints,aiColorPointPositions)
|
|
431 aiYPoints = Utility.reduceList(ldYPoints,aiColorPointPositions)
|
|
432
|
|
433 #There are 3 options for shapes which are checked in this order.
|
|
434 #1. 1 shape character is given which is used for all markers
|
|
435 #2. A list is given of marker characters or lists of decimals which will be used to make pie chart markers
|
|
436 #This is handled after the rest this block of code
|
|
437 #3. A list of char are given each indicating the marker for a sample
|
|
438 #If the shapes are not a list plot
|
|
439 #Otherwise plot per shape per color (can not plot list of shapes in matplotlib)
|
|
440 reducedShapes = tempShape
|
|
441 if(not ValidateData.funcIsValidList(reducedShapes)):
|
|
442 reducedShapes = reducedShapes[0]
|
|
443 dictPlotGroups.setdefault(len(aiXPoints), []).append([aiXPoints,aiYPoints,[charColor],reducedShapes,tempAlpha,tempLabels[tempColorGrouping.index(charColor)],reducedSizes,charMarkerEdgeColor])
|
|
444 #Shapes are supplied as a list so plot each shape
|
|
445 else:
|
|
446 #Reduce to shapes of the current colors
|
|
447 reducedShapes = Utility.reduceList(reducedShapes,aiColorPointPositions)
|
|
448 acharReducedShapesElements = list(set(reducedShapes))
|
|
449 #If there are multiple shapes, plot seperately because one is not allowed to plot them as a list
|
|
450 for aCharShapeElement in acharReducedShapesElements:
|
|
451 #Get indices
|
|
452 aiShapeIndices = Utility.getIndices(reducedShapes,aCharShapeElement)
|
|
453 #Reduce label by shapes
|
|
454 strShapeLabel = Utility.reduceList(acharLabelsByColor,aiShapeIndices)
|
|
455 #Reduce sizes by shapes
|
|
456 strShapeSizes = reducedSizes
|
|
457 if ValidateData.funcIsValidList(reducedSizes):
|
|
458 strShapeSizes = Utility.reduceList(reducedSizes,aiShapeIndices)
|
|
459 #Get points per shape
|
|
460 aiXPointsPerShape = Utility.reduceList(aiXPoints,aiShapeIndices)
|
|
461 aiYPointsPerShape = Utility.reduceList(aiYPoints,aiShapeIndices)
|
|
462 #Get sizes per shape
|
|
463 #Reduce sizes if a list
|
|
464 reducedSizesPerShape = reducedSizes
|
|
465 if(ValidateData.funcIsValidList(reducedSizes)):
|
|
466 reducedSizesPerShape = Utility.reduceList(reducedSizes,aiShapeIndices)
|
|
467 #Put plot data in dict of lists for later plotting
|
|
468 #Separate out the background printing
|
|
469 dictPlotGroups.setdefault(len(aiXPointsPerShape), []).append([aiXPointsPerShape,aiYPointsPerShape,[charColor],aCharShapeElement,tempAlpha,strShapeLabel[0],strShapeSizes,charMarkerEdgeColor])
|
|
470
|
|
471 #Plot each color starting with largest color amount to smallest color anmount so small groups will not be covered up by larger groups
|
|
472 #Plot other colors in increasing order
|
|
473 for sPlotGroupKey in sorted(list(dictPlotGroups.keys()), reverse=True):
|
|
474 lslsCurPlotGroup = dictPlotGroups[sPlotGroupKey]
|
|
475 #Plot
|
|
476 for lsGroup in lslsCurPlotGroup:
|
|
477 imgSubplot.scatter(lsGroup[self.c_iXPointIndex],
|
|
478 lsGroup[self.c_iYPointIndex],
|
|
479 c = lsGroup[self.c_iColorIndex],
|
|
480 marker = lsGroup[self.c_iMarkerIndex],
|
|
481 alpha = lsGroup[self.c_iAlphaIndex],
|
|
482 label = lsGroup[self.c_iLabelIndex],
|
|
483 s = lsGroup[self.c_iShapeIndex],
|
|
484 edgecolor = lsGroup[self.c_iEdgeColorIndex])
|
|
485
|
|
486 #Plot pie charts
|
|
487 if not lsColorsPieCharts is None:
|
|
488 self.plotWithPieMarkers(imgSubplot=imgSubplot, aiXPoints=ldXPointsPieCharts, aiYPoints=ldYPointsPieCharts, dSize=lsSizesPieCharts, llColors=lsColorsPieCharts, lsLabels=lsLabelsPieCharts, lcShapes=lcShapesPieCharts, edgeColor=charMarkerEdgeColor, dAlpha=tempAlpha)
|
|
489
|
|
490 objLegend = imgSubplot.legend(loc=tempLegendLocation, scatterpoints=1, prop={'size':10})
|
|
491
|
|
492 #Invert legend
|
|
493 if(tempInvert):
|
|
494 if objLegend:
|
|
495 objLegend.legendPatch.set_fc(self.objFigureControl.c_strBackgroundColorWord)
|
|
496 objLegend.legendPatch.set_ec(self.objFigureControl.c_strDetailsColorLetter)
|
|
497 plt.setp(objLegend.get_texts(),color=self.objFigureControl.c_strDetailsColorLetter)
|
|
498
|
|
499 #Make legend background transparent
|
|
500 if objLegend:
|
|
501 objLegendFrame = objLegend.get_frame()
|
|
502 objLegendFrame.set_alpha(self.objFigureControl.c_dAlpha)
|
|
503
|
|
504 imgFigure.savefig(tempPlotName, facecolor=imgFigure.get_facecolor())
|
|
505 return True
|
|
506
|
|
507 #Indirectly tested
|
|
508 def plotWithPieMarkers(self, imgSubplot, aiXPoints, aiYPoints, dSize, llColors, lsLabels, lcShapes, edgeColor, dAlpha):
|
|
509 """
|
|
510 The all lists should be in the same order
|
|
511
|
|
512 :param imgSubPlot: Image to plot to
|
|
513 :type: Image
|
|
514 :param aiXPoints: List of X axis points (one element per color list)
|
|
515 :type: List of Floats
|
|
516 :param aiYPoints: List of X axis points (one element per color list)
|
|
517 :type: List of Floats
|
|
518 :param dSize: double or List of doubles (one element per color list)
|
|
519 :type: List of Floats
|
|
520 :param llColors: List of Lists of colors, one list of colors is for 1 piechart/multiply highlighted feature
|
|
521 Example ["red","blue","green"] for a marker with 3 sections.
|
|
522 :type: List of strings
|
|
523 :param lsLabels: List of labels (one element per color list).
|
|
524 :type: List of Floats
|
|
525 :param lcShapes: Indicates which shape of a pie chart to use, currently supported 'o' and 's' (one element per color list).
|
|
526 :type: List of characters
|
|
527 :param edgeColor: One color entry for the edge of the piechart.
|
|
528 :type: List of characters
|
|
529 :param dAlpha: Value between 0.0 and 1.0 (0.0 being completely transparent, 1.0 being opaque).
|
|
530 :type: Float 0.0-1.0.
|
|
531 """
|
|
532
|
|
533 #Zip up points to pairs
|
|
534 xyPoints = zip(aiXPoints,aiYPoints)
|
|
535 #For each pair of points
|
|
536 for iIndex,dXY in enumerate(xyPoints):
|
|
537 ldWedges = []
|
|
538 #Get colors
|
|
539 lcurColors = llColors[iIndex]
|
|
540 #Get pie cut shape
|
|
541 cPieChartType = lcShapes[iIndex]
|
|
542 if cPieChartType == ConstantsFiguresBreadCrumbs().c_charPCOAPieChart:
|
|
543 ldWedges = self.makePieWedges(len(lcurColors),20)
|
|
544 elif cPieChartType == ConstantsFiguresBreadCrumbs().c_charPCOASquarePieChart:
|
|
545 ldWedges = self.makeSquarePieWedges(len(lcurColors))
|
|
546 for iWedgeIndex,dWedge in enumerate(ldWedges):
|
|
547 imgSubplot.scatter(x=dXY[0], y=dXY[1], marker=(dWedge,0), s=dSize[iIndex], label=lsLabels[iIndex], facecolor=lcurColors[iWedgeIndex], edgecolor=edgeColor, alpha=dAlpha)
|
|
548
|
|
549 #Indirectly tested
|
|
550 def makePieWedges(self, iWedgeCount, iSplineResolution = 10):
|
|
551 """
|
|
552 Generate a list of tuple points which will draw a square broken up into pie cuts.
|
|
553
|
|
554 :param iWedgeCount: The number of piecuts in the square.
|
|
555 :type: Integer Number greater than 1.
|
|
556 :param iSplineResolution: The amount of smoothing to the circle's outer edge, the higher the number the more smooth.
|
|
557 :type: integer Greater than 1.
|
|
558 :return list List of tuples. Each tuple is a point, formatted for direct plotting of the marker.
|
|
559 """
|
|
560
|
|
561 ldWedge = []
|
|
562 dLastValue = 0.0
|
|
563
|
|
564 #Create a list of equal percentages for all wedges
|
|
565 #Do not include a last wedge it gets all the space from the 2nd to last wedge to the end
|
|
566 #Which should still be equal to the others
|
|
567 ldPercentages = [1.0/iWedgeCount]*(iWedgeCount-1)
|
|
568
|
|
569 for dPercentage in ldPercentages:
|
|
570 ldX = [0] + np.cos(np.linspace(2*math.pi*dLastValue,2*math.pi*(dLastValue+dPercentage),iSplineResolution)).tolist()
|
|
571 ldY = [0] + np.sin(np.linspace(2*math.pi*dLastValue,2*math.pi*(dLastValue+dPercentage),iSplineResolution)).tolist()
|
|
572 ldWedge.append(zip(ldX,ldY))
|
|
573 dLastValue = dLastValue+dPercentage
|
|
574 ldX = [0] + np.cos(np.linspace(2*math.pi*dLastValue,2*math.pi,iSplineResolution)).tolist()
|
|
575 ldY = [0] + np.sin(np.linspace(2*math.pi*dLastValue,2*math.pi,iSplineResolution)).tolist()
|
|
576 ldWedge.append(zip(ldX,ldY))
|
|
577 return ldWedge
|
|
578
|
|
579 #Indirectly tested
|
|
580 def makeSquarePieWedges(self, iWedgeCount):
|
|
581 """
|
|
582 Generate a list of tuple points which will draw a square broken up into pie cuts.
|
|
583
|
|
584 :param iWedgeCount: The number of piecuts in the square.
|
|
585 :type: Integer Number greater than 1.
|
|
586 :return list List of tuples. Each tuple is a point, formatted for direct plotting of the marker.
|
|
587 """
|
|
588
|
|
589 ldWedge = []
|
|
590 dLastPercentageValue = 0.0
|
|
591 dLastSquareValue = 0.0
|
|
592 dCumulativePercentageValue = 0.0
|
|
593 dRadius = None
|
|
594 fXYSwitched = False
|
|
595 fAfterCorner = False
|
|
596 iSwitchCounts = 0
|
|
597 iMagicNumber =(1.0/4)
|
|
598
|
|
599 #Create a list of equal percentages for all wedges
|
|
600 #Do not include a last wedge it gets all the space from the 2nd to last wedge to the end
|
|
601 #Which should still be equal to the others
|
|
602 ldPercentages = [1.0/iWedgeCount]*(iWedgeCount)
|
|
603
|
|
604 for dPercentage in ldPercentages:
|
|
605 ldCircleXs = np.cos([2*math.pi*dLastPercentageValue,2*math.pi*(dLastPercentageValue+dPercentage)])
|
|
606 ldCircleYs = np.sin([2*math.pi*dLastPercentageValue,2*math.pi*(dLastPercentageValue+dPercentage)])
|
|
607
|
|
608 if dRadius == None:
|
|
609 dRadius = ldCircleXs[0]
|
|
610
|
|
611 #Check to see if at corner
|
|
612 fAtCorner = False
|
|
613 iDistance = int((dLastPercentageValue+dPercentage+(iMagicNumber/2))/iMagicNumber
|
|
614 ) - int((dLastPercentageValue+(iMagicNumber/2))/iMagicNumber)
|
|
615 if(iDistance > 0):
|
|
616 fAtCorner = True
|
|
617 if iDistance > 1:
|
|
618 fXYSwitched = not fXYSwitched
|
|
619 iSwitchCounts = iSwitchCounts + 1
|
|
620
|
|
621 #Check to see if at a side center
|
|
622 fAtSide = False
|
|
623 if (int((dLastPercentageValue+dPercentage)/iMagicNumber) > int(dLastPercentageValue/iMagicNumber)):
|
|
624 fAtSide = True
|
|
625
|
|
626 #Handle corner xy switching
|
|
627 if fAtCorner:
|
|
628 fXYSwitched = not fXYSwitched
|
|
629 iSwitchCounts = iSwitchCounts + 1
|
|
630 #Make sure the xy switching occurs to vary the slope at the corner.
|
|
631 if fXYSwitched:
|
|
632 ldCircleXs,ldCircleYs = ldCircleYs,ldCircleXs
|
|
633
|
|
634 dSquarePoint = dRadius * (ldCircleYs[1]/float(ldCircleXs[1]))
|
|
635 dRadiusSq1 = dRadius
|
|
636 dRadiusSq2 = dRadius
|
|
637 dLastSquareValueSq = dLastSquareValue
|
|
638 dSquarePointSq = dSquarePoint
|
|
639
|
|
640 #If in quadrants 2,3 make sign changes
|
|
641 if iSwitchCounts in [2,3]:
|
|
642 if iSwitchCounts == 2:
|
|
643 dRadiusSq1 = dRadiusSq1 *-1
|
|
644 elif iSwitchCounts == 3:
|
|
645 dRadiusSq1 = dRadiusSq1 * -1
|
|
646 dRadiusSq2 = dRadiusSq2 * -1
|
|
647 dLastSquareValueSq = dLastSquareValueSq * -1.0
|
|
648 dSquarePointSq = dSquarePointSq * -1.0
|
|
649
|
|
650 if fAtCorner:
|
|
651 #Corner 1
|
|
652 if iSwitchCounts==1:
|
|
653 ldWedge.append(zip([0,dRadiusSq1,dRadiusSq1,dSquarePointSq,0],[0,dLastSquareValueSq,dRadiusSq2,dRadiusSq2,0]))
|
|
654 #Corner 2
|
|
655 elif iSwitchCounts==2:
|
|
656 if iDistance > 1:
|
|
657 ldWedge.append(zip([0,-dRadiusSq1,-dRadiusSq1,dRadiusSq1,dRadiusSq1,0],[0,-dLastSquareValueSq,dRadiusSq2,dRadiusSq2,dSquarePointSq,0]))
|
|
658 else:
|
|
659 ldWedge.append(zip([0,-dLastSquareValueSq,dRadiusSq1,dRadiusSq1,0],[0,dRadiusSq2,dRadiusSq2,dSquarePointSq,0]))
|
|
660 #Corner 3
|
|
661 elif iSwitchCounts==3:
|
|
662 if iDistance > 1:
|
|
663 ldWedge.append(zip([0,-dLastSquareValueSq,dRadiusSq1,dRadiusSq1,dSquarePointSq,0],[0,-dRadiusSq2,-dRadiusSq2,dRadiusSq2,dRadiusSq2,0]))
|
|
664 else:
|
|
665 ldWedge.append(zip([0,dRadiusSq1,dRadiusSq1,dSquarePointSq,0],[0,dLastSquareValueSq,dRadiusSq2,dRadiusSq2,0]))
|
|
666 #Corner 4
|
|
667 elif iSwitchCounts==4:
|
|
668 if iDistance > 1:
|
|
669 ldWedge.append(zip([0,-dRadiusSq1,-dRadiusSq1,dRadiusSq1,dRadiusSq1,0],[0,-dLastSquareValueSq,-dRadiusSq2,-dRadiusSq2,dSquarePointSq,0]))
|
|
670 else:
|
|
671 ldWedge.append(zip([0,(-1.0*dLastSquareValueSq),dRadiusSq1,dRadiusSq1,0],[0,(-1.0*dRadiusSq2),(-1.0*dRadiusSq2),dSquarePointSq,0]))
|
|
672
|
|
673 fAfterCorner = True
|
|
674 else:
|
|
675 if iSwitchCounts%2:
|
|
676 ldWedge.append(zip([0,dLastSquareValueSq,dSquarePointSq,0],[0,dRadiusSq2,dRadiusSq2,0]))
|
|
677 else:
|
|
678 ldWedge.append(zip([0,dRadiusSq1,dRadiusSq1,0],[0,dLastSquareValueSq,dSquarePointSq,0]))
|
|
679
|
|
680 dLastSquareValue = dSquarePoint
|
|
681 dCumulativePercentageValue = dCumulativePercentageValue + dLastSquareValue
|
|
682 dLastPercentageValue = dLastPercentageValue+dPercentage
|
|
683
|
|
684 return ldWedge
|
|
685
|
|
686 #Happy Path Tested
|
|
687 def plotList(self, lsLabelList, strOutputFileName, iSize=20, dAlpha=1.0, charForceColor=None, charForceShape=None, fInvert=False, iDim1=1, iDim2=2):
|
|
688 """
|
|
689 Convenience method used to plot data in the PCoA given a label list (which is in order of the underlying data).
|
|
690 This is for the scenario where you do not care that the color or shape of the data will be as long as it varies
|
|
691 with the label.
|
|
692 This method does allow forcing color or shape to 1 character so that they do not vary with the label but are one value.
|
|
693 This is helpful when you have a large number of labels to plot given the shapes in the PCoA are limited but not the coloring.
|
|
694
|
|
695 :param lsLabelList: List of string labels which are in order of the data in the PCoA object (as the data was loaded the PCoA object).
|
|
696 :type: List of strings
|
|
697 :param strOutputFileName: File path to save figure.
|
|
698 :type: String
|
|
699 :param iSize: Size of marker. Default 20.
|
|
700 :type: Integer
|
|
701 :param dAlpha: Alpha for the markers. (0.0 tranparent, 1.0 opaque)
|
|
702 :type: Double between 0.0 and 1.0
|
|
703 :param charForceColor: Color to force the points to. (Must be understandable by matplotlib as a color [ie. 'k','m','c','r','g','b','y','w'])
|
|
704 :type: Character
|
|
705 :param charForceShape: Shape to force the points to. (Must be understandable by matplotlib as a shape [ie. 'o','s','^','v','<','>','8','p','h'])
|
|
706 :type: Character
|
|
707 :param fInvert: Allows one to invert the background and plot details from white to black (True == background is black).
|
|
708 :type: Boolean
|
|
709 :param iDim1: The first dimension to plot
|
|
710 :type: Integer starting at 1
|
|
711 :param iDim2: The second dimension to plot
|
|
712 :type: Integer starting at 2
|
|
713 :return boolean: Indicator of success (True)
|
|
714 """
|
|
715
|
|
716 #Get uniqueValues for labels
|
|
717 acharUniqueValues = list(set(lsLabelList))
|
|
718 iCountUniqueValues = len(acharUniqueValues)
|
|
719
|
|
720 #Set colors
|
|
721 atupldLabelColors = None
|
|
722
|
|
723 #Set shapes
|
|
724 alLabelShapes = None
|
|
725 if charForceShape == None:
|
|
726 #Get shapes
|
|
727 acharShapes = PCoA.getShapes(iCountUniqueValues)
|
|
728 if len(acharShapes) == 0:
|
|
729 return False
|
|
730 #Make label shapes
|
|
731 alLabelShapes = [ acharShapes[acharUniqueValues.index(sMetadata)] for sMetadata in lsLabelList ]
|
|
732 else:
|
|
733 alLabelShapes = charForceShape
|
|
734
|
|
735 #If the coloring is not forced, color so it is based on the labels
|
|
736 if charForceColor == None:
|
|
737 #Get colors based on labels
|
|
738 atupldColors = [Utility.RGBToHex(cm.jet(float(iUniqueValueIndex)/float(iCountUniqueValues))) for iUniqueValueIndex in xrange(0,iCountUniqueValues)]
|
|
739 #Make sure generated colors are unique
|
|
740 if not iCountUniqueValues == len(set(atupldColors)):
|
|
741 print "PCoA::plotList:Error, generated colors were not unique for each unique label value."
|
|
742 print "Labels"
|
|
743 print lsLabelList
|
|
744 print len(lsLabelList)
|
|
745 print "Unique Labels"
|
|
746 print set(lsLabelList)
|
|
747 print len(set(lsLabelList))
|
|
748 print "Colors"
|
|
749 print atupldColors
|
|
750 print len(atupldColors)
|
|
751 print "Unique Colors"
|
|
752 print set(atupldColors)
|
|
753 print len(set(atupldColors))
|
|
754 return False
|
|
755 #Make label coloring
|
|
756 atupldLabelColors = [ atupldColors[acharUniqueValues.index(sMetadata)] for sMetadata in lsLabelList ]
|
|
757 #If the coloring is forced, color so it is based on the charForcedColor list
|
|
758 elif(ValidateData.funcIsValidList(charForceColor)):
|
|
759 atupldLabelColors = charForceColor[0]
|
|
760 if not len(lsLabelList) == len(atupldLabelColors):
|
|
761 print "PCoA::plotList:Error, label and forced color lengths were not the same."
|
|
762 print "Labels"
|
|
763 print lsLabelList
|
|
764 print len(lsLabelList)
|
|
765 print "Forced Colors"
|
|
766 print charForceColor[0]
|
|
767 print len(charForceColor[0])
|
|
768 return False
|
|
769 lsLabelList = [ "".join([charForceColor[1][iLabelIndex], "_", lsLabelList[iLabelIndex]]) for iLabelIndex in xrange(0,len(charForceColor[1]))]
|
|
770 #If the color is forced but the color does not vary, color all markers are the same.
|
|
771 else:
|
|
772 atupldLabelColors = charForceColor
|
|
773
|
|
774 #Call plot
|
|
775 self.plot(tempPlotName=strOutputFileName, tempColorGrouping=atupldLabelColors, tempShape=alLabelShapes, tempLabels=lsLabelList, tempShapeSize = iSize, tempAlpha=dAlpha, tempInvert = fInvert, iDim1=iDim1, iDim2=iDim2)
|
|
776
|
|
777 def funcForceXAxis(self, dList):
|
|
778 """
|
|
779 Force the X axis to the given list.
|
|
780
|
|
781 :param dList: List of values to force the x axis of the plot (floats).
|
|
782 :type: List of floats
|
|
783 """
|
|
784
|
|
785 self.ldForcedXAxis = dList
|
|
786
|
|
787 def funcUnforceXAxis(self):
|
|
788 """
|
|
789 Return the X axis to the values derived from the loaded data.
|
|
790 """
|
|
791
|
|
792 self.ldForcedXAxis = None
|
|
793
|
|
794 #Happy Path Tested
|
|
795 @staticmethod
|
|
796 def getShapes(intShapeCount):
|
|
797 """
|
|
798 Returns a list of characters which are valid shapes for markers.
|
|
799
|
|
800 :param intShapeCount: The number of shapes to return.
|
|
801 :type: Integer (min 1, max 9)
|
|
802 :return: A list of characters to use as markers. [] is returned on error
|
|
803 """
|
|
804
|
|
805 lsPointShapes = ['o','s','^','v','<','>','8','p','h']
|
|
806 if intShapeCount > len(lsPointShapes):
|
|
807 print("".join(["Error, PCoA.getShapes. Do not have enough shapes to give. Received request for ",str(intShapeCount)," shapes. Max available shape count is ",str(len(lsPointShapes)),"."]))
|
|
808 return []
|
|
809 return lsPointShapes[0:intShapeCount]
|