Mercurial > repos > george-weingart > micropita
comparison src/breadcrumbs/src/Cladogram.py @ 0:2f4f6f08c8c4 draft
Uploaded
author | george-weingart |
---|---|
date | Tue, 13 May 2014 21:58:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2f4f6f08c8c4 |
---|---|
1 """ | |
2 Author: Timothy Tickle | |
3 Description: Class to call circlader and create dendrograms. | |
4 """ | |
5 | |
6 ##################################################################################### | |
7 #Copyright (C) <2012> | |
8 # | |
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of | |
10 #this software and associated documentation files (the "Software"), to deal in the | |
11 #Software without restriction, including without limitation the rights to use, copy, | |
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
13 #and to permit persons to whom the Software is furnished to do so, subject to | |
14 #the following conditions: | |
15 # | |
16 #The above copyright notice and this permission notice shall be included in all copies | |
17 #or substantial portions of the Software. | |
18 # | |
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | |
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
25 ##################################################################################### | |
26 | |
27 __author__ = "Timothy Tickle" | |
28 __copyright__ = "Copyright 2012" | |
29 __credits__ = ["Timothy Tickle"] | |
30 __license__ = "MIT" | |
31 __maintainer__ = "Timothy Tickle" | |
32 __email__ = "ttickle@sph.harvard.edu" | |
33 __status__ = "Development" | |
34 | |
35 #External libraries | |
36 from AbundanceTable import AbundanceTable | |
37 from CommandLine import CommandLine | |
38 from ConstantsBreadCrumbs import ConstantsBreadCrumbs | |
39 from ConstantsFiguresBreadCrumbs import ConstantsFiguresBreadCrumbs | |
40 import math | |
41 import numpy as np | |
42 import os | |
43 import re | |
44 import scipy.stats | |
45 from ValidateData import ValidateData | |
46 #import scipy.stats.stats as stats | |
47 | |
48 class Cladogram: | |
49 """ | |
50 This class manages creating files for Circlader and calling circulator. | |
51 """ | |
52 | |
53 #Script name | |
54 circladerScript=None | |
55 | |
56 #Constants | |
57 c_sTaxa="Taxa" | |
58 c_sCircle="Circle" | |
59 c_sBorder="Border" | |
60 c_sShape="Shape" | |
61 c_sAlpha="Alpha" | |
62 c_sForced="Forced" | |
63 | |
64 #Numpy array (structured array) holding data | |
65 #Should be SampleID, Sample Abundances/Data (samples = columns)..... | |
66 npaAbundance = None | |
67 #List of sample names | |
68 lsSampleNames = None | |
69 #Name of output image | |
70 strImageName = "Cladogram.png" | |
71 #String used to call the sample id column | |
72 strSampleID = "ID" | |
73 strUnclassified = "unclassified" | |
74 | |
75 #Minimum size of clade (terminal node count for clade) | |
76 iMinCladeSize = 1 | |
77 #Level of ancestry to filter at (starts with 0 and based on the input file) | |
78 iCladeLevelToMeasure = 1 | |
79 iCladeLevelToReduce = 1 | |
80 cFeatureDelimiter = "|" | |
81 | |
82 #Flags | |
83 #Turns on (True) or off (False) abundance-based filtering | |
84 fAbundanceFilter = False | |
85 #Turns on (True) or off (False) clade size-based filtering | |
86 fCladeSizeFilter = False | |
87 #Indicate if the following files were made | |
88 fSizeFileMade=False | |
89 fCircleFileMade=False | |
90 fColorFileMade=False | |
91 fTickFileMade=False | |
92 fHighlightFileMade=False | |
93 | |
94 #Circlader files | |
95 strTreeFilePath="_Taxa.txt" | |
96 strCircleFilePath = "_Circle.txt" | |
97 strColorFilePath="_Color.txt" | |
98 strTickFilePath="_Tick.txt" | |
99 strHighLightFilePath="_HighLight.txt" | |
100 strSizeFilePath="_Size.txt" | |
101 strStyleFilePath="" | |
102 | |
103 #Thresholds | |
104 #Controls the showing of taxa | |
105 c_dPercentileCutOff = 90.0 | |
106 c_dPercentageAbovePercentile = 1.0 | |
107 | |
108 #Minimum average abundance score when using log scale | |
109 c_dMinLogSize = 0.0000000001 | |
110 #Constant used to maginfy the size difference in the taxa (log only) | |
111 c_dLogScale = 1000000 | |
112 #When after log10, an addition scaling adjustment (use this) | |
113 c_dCircleScale = 3 | |
114 | |
115 #Data for circular files | |
116 #Used to change IDs to proper labels | |
117 dictConvertIDs = None | |
118 #Labels to be relabeled | |
119 dictRelabels = None | |
120 #Colors | |
121 dictColors = None | |
122 #Elements that are forced to be highlighted | |
123 dictForcedHighLights = None | |
124 #Ticks | |
125 llsTicks = None | |
126 #Forced root of the tree, discarding data as needed. | |
127 strRoot = None | |
128 #Holds circle data as a list of dictionaries | |
129 #One dictionary per circle | |
130 ldictCircleData = None | |
131 | |
132 def __init__(self): | |
133 self.dictForcedHighLights = dict() | |
134 | |
135 #Happy Path Tested | |
136 def addHighLights(self, dictClades,fOverwrite): | |
137 """ | |
138 This methods allows highlighting to be added. | |
139 When an element is added in this manner it will not be filtered out. | |
140 These elements, if existing in the tree will be highlighted the named color given. | |
141 This color name should be supplied in the set Color Data method | |
142 {strName1:strColorName1,strName2:strColorName2,...} | |
143 | |
144 :param dictClades: Names of elements, if found in the tree which should be highlighted | |
145 :type: dictClades Dictionary of element name (string) and element color (string) | |
146 :param fOverwrite: If element is already indicated to be highlighted, overwrite the color to the one provided here. | |
147 :type: fOverwrite boolean (True == overwrite color) | |
148 """ | |
149 if ValidateData.funcIsValidDictionary(dictClades): | |
150 if ValidateData.funcIsValidBoolean(fOverwrite): | |
151 for strElement in dictClades: | |
152 if(strElement in self.dictForcedHighLights): | |
153 if(fOverwrite): | |
154 self.dictForcedHighLights[strElement] = dictClades[strElement] | |
155 else: | |
156 self.dictForcedHighLights[strElement] = dictClades[strElement] | |
157 | |
158 #Not tested | |
159 def getHighLights(self): | |
160 return self.dictForcedHighLights | |
161 | |
162 #Not tested | |
163 def forceRoot(self, strRoot): | |
164 """ | |
165 This method allows one to root the tree at a certain level and value | |
166 Only taxa that contain this value in their ancestry will be plotted | |
167 The root will be the value given, any previous heirachy will be ignored | |
168 This will remove highlighted data if indicated to do so | |
169 | |
170 :params strRoot: Where to root the tree | |
171 :type: strRoot String | |
172 """ | |
173 self.strRoot = strRoot | |
174 | |
175 def generate(self, strImageName, strStyleFile, sTaxaFileName, strCircladerScript = ConstantsBreadCrumbs.c_strCircladerScript, iTerminalCladeLevel = 10, sColorFileName=None, sTickFileName=None, sHighlightFileName=None, sSizeFileName=None, sCircleFileName=None): | |
176 """ | |
177 This is the method to call to generate a cladogram using circlader. | |
178 The default data file is an abundance table unless the getDa function is overwritten. | |
179 | |
180 :param strImageName: File name to save the output cladogram image | |
181 :type: strImageName File name (string) | |
182 :param strStyleFile: File path indicating the style file to use | |
183 :type: strStyleFile File path (string) | |
184 :param sTaxaFileName: File path indicating the taxa file to use | |
185 :type: sTaxaFileName File path (string) | |
186 :param strCircladerScript: File path to the Circlader script | |
187 :type: String | |
188 :param iTerminalCladeLevel: Clade level to use as terminal in plotting | |
189 :type: iTerminalCladeLevel integer starting with 1 | |
190 :param strColorFile: File path indicating the color file to use | |
191 :type: strColorFile File path (string) | |
192 :param strTickFile: File path indicating the tick file to use | |
193 :type: strTickFile File path (string) | |
194 :param strHighlightFile: File path indicating the highlight file to use | |
195 :type: strHighlightFile File path (string) | |
196 :param strSizeFile: File path indicating the size file to use | |
197 :type: strSizeFile File path (string) | |
198 :param sCircleFileName: File path of circlader circle file. | |
199 :type: String | |
200 """ | |
201 | |
202 if self.npaAbundance == None: | |
203 print "Cladogram::generate. The data was not set so an image could not be generated" | |
204 return False | |
205 | |
206 #Set script | |
207 self.circladerScript = strCircladerScript | |
208 | |
209 #Set output file name | |
210 self.strImageName = strImageName | |
211 | |
212 #Check files exist and remove files which will be written | |
213 self.manageFilePaths(sTaxaFileName, strStyleFile, sColorFileName, sTickFileName, sHighlightFileName, sSizeFileName, sCircleFileName) | |
214 | |
215 #Get IDs | |
216 lsIDs = [strId for strId in list(self.npaAbundance[self.strSampleID])] | |
217 | |
218 #Generate a dictionary to convert the ids to correct format | |
219 #Fix unclassified names | |
220 #Make numeric labels as indicated | |
221 self.dictConvertIDs = self.generateLabels(lsIDs) | |
222 | |
223 #Remove taxa lower than the display clade level | |
224 lsCladeAndAboveFeatures = [] | |
225 for sFeature in lsIDs: | |
226 if len(sFeature.split(self.cFeatureDelimiter)) <= iTerminalCladeLevel: | |
227 lsCladeAndAboveFeatures.append(sFeature) | |
228 lsIDs = lsCladeAndAboveFeatures | |
229 | |
230 #Filter by abundance | |
231 if(self.fAbundanceFilter): | |
232 lsIDs = self.filterByAbundance(lsIDs) | |
233 | |
234 #Update to the correct root | |
235 lsIDs = self.updateToRoot(lsIDs) | |
236 | |
237 #Set highlights to root for consistency | |
238 if(not self.strRoot == None): | |
239 dictRootedHighLights = dict() | |
240 if not self.dictForcedHighLights == None: | |
241 for sKey in self.dictForcedHighLights.keys(): | |
242 strUpdatedKey = self.updateToRoot([sKey]) | |
243 dictRootedHighLights[strUpdatedKey[0]]=self.dictForcedHighLights[sKey] | |
244 self.dictForcedHighLights = dictRootedHighLights | |
245 | |
246 #Set relabels to root for consistency | |
247 if(not self.strRoot == None): | |
248 dictRootedLabels = dict() | |
249 if not self.dictRelabels == None: | |
250 for sKey in self.dictRelabels.keys(): | |
251 strUpdatedKey = self.updateToRoot([sKey]) | |
252 dictRootedLabels[strUpdatedKey[0]]=self.dictRelabels[sKey] | |
253 self.dictRelabels = dictRootedLabels | |
254 | |
255 #Filter by clade size Should be the last filter. | |
256 #It is not a strong filter but cleans up images | |
257 if(self.fCladeSizeFilter): | |
258 lsIDs = self.filterByCladeSize(lsIDs) | |
259 | |
260 #Add in forced highlighting | |
261 lsIDs.extend(self.dictForcedHighLights.keys()) | |
262 lsIDs = list(set(lsIDs)) | |
263 | |
264 #Add in forced circle data | |
265 for dictCircleData in self.ldictCircleData: | |
266 if(dictCircleData[self.c_sForced]): | |
267 lsTaxa = dictCircleData[self.c_sTaxa] | |
268 lsAlpha = dictCircleData[self.c_sAlpha] | |
269 lsAddTaxa = [] | |
270 [lsAddTaxa.append(lsTaxa[tpleAlpha[0]]) if not tpleAlpha[1] == '0.0' else 0 for tpleAlpha in enumerate(lsAlpha)] | |
271 lsIDs.extend(lsAddTaxa) | |
272 lsIDs = list(set(lsIDs)) | |
273 | |
274 #Create circle files (needs to be after any filtering because it has a forcing option). | |
275 if not self.createCircleFile(lsIDs): | |
276 return False | |
277 | |
278 #Generate / Write Tree file | |
279 if not self.createTreeFile(lsIDs): | |
280 return False | |
281 | |
282 #Generate / Write Highlight file | |
283 if not self.createHighlightFile(lsIDs): | |
284 return False | |
285 | |
286 #Generate / write color file | |
287 if(self.dictColors is not None): | |
288 lsColorData = [ConstantsBreadCrumbs.c_cTab.join([sColorKey,self.dictColors[sColorKey]]) for sColorKey in self.dictColors] | |
289 self.writeToFile(self.strColorFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsColorData), False) | |
290 self.fColorFileMade=True | |
291 | |
292 #Generate / write tick file | |
293 if(self.llsTicks is not None): | |
294 lsTickData = [ConstantsBreadCrumbs.c_cTab.join(lsTicks) for lsTicks in self.llsTicks] | |
295 self.writeToFile(self.strTickFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsTickData), False) | |
296 self.fTickFileMade=True | |
297 | |
298 #Generate / Write size data | |
299 if not self.createSizeFile(lsIDs): | |
300 return False | |
301 | |
302 #Call commandline | |
303 lsCommand = [self.circladerScript, self.strTreeFilePath, self.strImageName, "--style_file", self.strStyleFilePath, "--tree_format", "tabular"] | |
304 if(self.fSizeFileMade): | |
305 lsCommand.extend(["--size_file", self.strSizeFilePath]) | |
306 if(self.fColorFileMade): | |
307 lsCommand.extend(["--color_file", self.strColorFilePath]) | |
308 if(self.fTickFileMade): | |
309 lsCommand.extend(["--tick_file", self.strTickFilePath]) | |
310 if(self.fHighlightFileMade): | |
311 lsCommand.extend(["--highlight_file", self.strHighLightFilePath]) | |
312 if(self.fCircleFileMade): | |
313 lsCommand.extend(["--circle_file", self.strCircleFilePath]) | |
314 CommandLine().runCommandLine(lsCommand) | |
315 | |
316 #Happy path tested | |
317 def setColorData(self, dictColors): | |
318 """ | |
319 This methods allows color information to be specified. | |
320 Need to give a dictionary having a name (key)(string) and color (value)(string RGB)data | |
321 {strName1:Color,strName2:Color...} | |
322 Name will be a string name that references what needs to be this color | |
323 Color data should be a string in the RGB format 0-255,0-255,0-255 | |
324 | |
325 :param dictColors: Color Name and RGB specification | |
326 :type: dictColorsDictionary strings | |
327 """ | |
328 if ValidateData.funcIsValidDictionary(dictColors): | |
329 self.dictColors = dictColors | |
330 if not ConstantsFiguresBreadCrumbs.c_strBackgroundColorName in self.dictColors: | |
331 self.dictColors[ConstantsFiguresBreadCrumbs.c_strBackgroundColorName]=ConstantsFiguresBreadCrumbs.c_strBackgroundColor | |
332 | |
333 #Not tested | |
334 def setAbundanceData(self, abtbAbundanceTable): | |
335 """ | |
336 Sets the abundance data the Cladogram will use to plot | |
337 | |
338 :params abtAbundanceTable: AbundanceTable to set | |
339 :type: AbundanceTable | |
340 """ | |
341 self.npaAbundance = abtbAbundanceTable.funcGetAbundanceCopy() | |
342 self.strSampleID = abtbAbundanceTable.funcGetIDMetadataName() | |
343 self.lsSampleNames = abtbAbundanceTable.funcGetSampleNames() | |
344 | |
345 #Not tested | |
346 def setFilterByAbundance(self, fAbundanceFilter, dPercentileCutOff = 90.0, dPercentageAbovePercentile = 1.0): | |
347 """ | |
348 Switch filtering by abundance on and off. | |
349 fAbundanceFilter == True indicates filtering is on | |
350 | |
351 :param fAbundanceFilter: Switch to turn on (true) and off (false) abundance-based filtering | |
352 :type: fAbundanceFilter boolean | |
353 :param dPercentileCutOff: Percentage between 100.0 to 0.0. | |
354 :type: double | |
355 :param dPercentageAbovePercentile: Percentage between 100.0 to 1.0. | |
356 :type: double | |
357 """ | |
358 self.fAbundanceFilter = fAbundanceFilter | |
359 self.c_dPercentileCutOff = dPercentileCutOff | |
360 self.c_dPercentageAbovePercentile = dPercentageAbovePercentile | |
361 | |
362 #Not Tested | |
363 def setCircleScale(self, iScale): | |
364 """ | |
365 Is a scale used to increase or decrease node sizes in the the cladogram to make more visible | |
366 iScale default is 3 | |
367 | |
368 :param iScale: Integer to increase the relative sizes of nodes | |
369 :type: iScale integer | |
370 """ | |
371 self.c_dCircleScale = iScale | |
372 | |
373 #Not tested | |
374 def setFeatureDelimiter(self, cDelimiter): | |
375 """ | |
376 Set the delimiter used to parse the consensus lineages of features. | |
377 | |
378 :param cDelimiter: The delimiter used to parse the consensus lineage of features. | |
379 :type: Character | |
380 """ | |
381 if cDelimiter: | |
382 self.cFeatureDelimiter = cDelimiter | |
383 | |
384 #Not tested | |
385 def setFilterByCladeSize(self, fCladeSizeFilter, iCladeLevelToMeasure = 3, iCladeLevelToReduce = 1, iMinimumCladeSize = 5, cFeatureDelimiter = None, strUnclassified="unclassified"): | |
386 """ | |
387 Switch filtering by clade size on and off. | |
388 fCladeSizeFilter == True indicates filtering is on | |
389 NOT 0 based. | |
390 | |
391 :param fCladeSizeFilter: Switch to turn on (true) and off (false) clade size-based filtering | |
392 :type: fCladeSizeFilter boolean | |
393 :param iCladeLevelToMeasure: The level of the concensus lineage that is measure or counted. Should be greater than iCladeLevelToReduce (Root is 1) | |
394 :type: iCladeLevelToMeasure int | |
395 :param iCladeLevelToReduce: The level of the concensus lineage that is reduced if the measured level are not the correct count (Root is 1) | |
396 :type: iCladeLevelToReduce int | |
397 :param iMinimumCladeSize: Minimum count of the measured clade for the clade to be kept | |
398 :type: iMinimumCladeSize int | |
399 :param cFeatureDelimiter: One may set the feature delimiter if needed. | |
400 :type: Character | |
401 :param strUnclassified: String indicating unclassifed features | |
402 :type: String | |
403 """ | |
404 self.fCladeSizeFilter = fCladeSizeFilter | |
405 if iCladeLevelToMeasure > 0: | |
406 self.iCladeLevelToMeasure = iCladeLevelToMeasure | |
407 if iCladeLevelToReduce > 0: | |
408 self.iCladeLevelToReduce = iCladeLevelToReduce | |
409 if iMinimumCladeSize > 0: | |
410 self.iMinCladeSize = iMinimumCladeSize | |
411 if cFeatureDelimiter: | |
412 self.cFeatureDelimiter = cFeatureDelimiter | |
413 if strUnclassified: | |
414 self.strUnclassified = strUnclassified | |
415 | |
416 #Not tested | |
417 def setTicks(self, llsTicks): | |
418 """ | |
419 This methods allows tick information to be specified. | |
420 Need to generate a list of lists each having a tick level (number starting at 0 as a string), and tick name | |
421 #Lowest numbers are closest to the center of the tree | |
422 [[#,Name1],[#,Name2]...] | |
423 | |
424 :param llsTicks: Level # and Name of level | |
425 :type: llsTicks List of lists of strings | |
426 """ | |
427 self.llsTicks = llsTicks | |
428 | |
429 #Happy Path tested with createCircleFile | |
430 def addCircle(self, lsTaxa, strCircle, dBorder=0.0, strShape="R", dAlpha=1.0, fForced=False): | |
431 """ | |
432 This methods allows one to add a circle to the outside of the cladogram. | |
433 | |
434 :param lsTaxa: Taxa to highlight with this circle | |
435 :type: lsTaxa List of strings (taxa names) | |
436 :param strCircle: Circle the elements will be in, indicates color and circle level. | |
437 :type: strCircle String circle | |
438 :param dBorder: Border size for the circle element border (between 0.0 and 1.0) | |
439 can also be a list of dBorders. If list, position must match lsTaxa. | |
440 :type: dBorder Float of border size (or list of floats). | |
441 :param strShape: String Indicator of shape or method to determine shape. | |
442 Can also be a list of shapes. If list, position must match lsTaxa. | |
443 :type: strShape String to indicate the shape (may also be a list of strings). | |
444 Default value is square. | |
445 Valid shapes are R(Square), v(inward pointing triangle), ^(outward pointing triangle) | |
446 :param dAlpha: The transparency of the circle element (between 0.0[clear] and 1.0[solid]). | |
447 Can also be a list of floats. If list, position must match lsTaxa. | |
448 :type: dAlpha Float to indicate the transparency of the shape (may also be a list of strings). | |
449 :param fForced: Forces item in the features in the circle to be displayed in the cladogram no matter thier passing filters. | |
450 :type: Boolean | |
451 """ | |
452 if(self.ldictCircleData == None): | |
453 self.ldictCircleData = list() | |
454 dictCircleData = dict() | |
455 dictCircleData[self.c_sTaxa]=lsTaxa | |
456 dictCircleData[self.c_sCircle]=strCircle | |
457 dictCircleData[self.c_sBorder]=dBorder | |
458 dictCircleData[self.c_sShape]=strShape | |
459 dictCircleData[self.c_sAlpha]=dAlpha | |
460 dictCircleData[self.c_sForced]=fForced | |
461 | |
462 self.ldictCircleData.append(dictCircleData) | |
463 return True | |
464 | |
465 #Happy Path tested with AddCircle | |
466 def createCircleFile(self, lsIDs): | |
467 """ | |
468 Write circle data to file. | |
469 | |
470 :param lsIDs: Ids to include in the circle file | |
471 :type: lsIDs List of strings | |
472 """ | |
473 #If there is circle data | |
474 if(not self.ldictCircleData == None): | |
475 if self.strCircleFilePath == None: | |
476 print("Error, there is no circle file specified to write to.") | |
477 return False | |
478 #Holds circle data {Taxaname:string updates correctly for output to file} | |
479 dictCircleDataMethods = dict() | |
480 lsCircleData = list() | |
481 | |
482 for dictCircleData in self.ldictCircleData: | |
483 lsTaxa = dictCircleData[self.c_sTaxa] | |
484 #Shape/s for taxa | |
485 datShape = dictCircleData[self.c_sShape] | |
486 fShapeIsList = (str(type(datShape)) == "<type 'list'>") | |
487 #Border/s for taxa | |
488 datBorder = dictCircleData[self.c_sBorder] | |
489 fBorderIsList = (str(type(datBorder)) == "<type 'list'>") | |
490 #Alpha/s for taxa | |
491 datAlpha = dictCircleData[self.c_sAlpha] | |
492 fAlphaIsList = (str(type(datAlpha)) == "<type 'list'>") | |
493 #Circle name | |
494 sCircleMethod = dictCircleData[self.c_sCircle] | |
495 | |
496 #Check to make sure the lengths of the array match up | |
497 if(fShapeIsList): | |
498 if not len(datShape) == len(lsTaxa): | |
499 print("".join(["Error, Shapes were given as an list not of the size of the taxa list. Shape list length: ",str(len(datShape)),". Taxa list length: ",str(len(lsTaxa)),"."])) | |
500 return False | |
501 if(fBorderIsList): | |
502 if not len(datBorder) == len(lsTaxa): | |
503 print("".join(["Error, Border sizes were given as an list not of the size of the taxa list. Border list length: ",str(len(datBorder)),". Taxa list length: ",str(len(lsTaxa)),"."])) | |
504 return False | |
505 if(fAlphaIsList): | |
506 if not len(datAlpha) == len(lsTaxa): | |
507 print("".join(["Error, Alpha sizes were given as an list not of the size of the taxa list. Alpha list length: ",str(len(datAlpha)),". Taxa list length: ",str(len(lsTaxa)),"."])) | |
508 return False | |
509 | |
510 #Update taxa to root if needed | |
511 #When doing this if any of the other data is an array we have to edit them | |
512 #as the taxa are edited for updating root | |
513 if((not fShapeIsList) and (not fBorderIsList) and (not fAlphaIsList)): | |
514 lsTaxa = self.updateToRoot(dictCircleData[self.c_sTaxa]) | |
515 else: | |
516 #Initilize as lists or as the string value they already are | |
517 lsUpdatedTaxa = list() | |
518 datUpdatedShapes=list() | |
519 if(not fShapeIsList): | |
520 datUpdatedShapes = datShape | |
521 datUpdatedBorders=list() | |
522 if(not fBorderIsList): | |
523 datUpdatedBorders = datBorder | |
524 datUpdatedAlphas=list() | |
525 if(not fAlphaIsList): | |
526 datUpdatedAlphas = datAlpha | |
527 | |
528 #If a taxa is kept, keep associated list information | |
529 #If not a list data, leave alone, it will be used globally for all taxa. | |
530 iTaxaIndex = -1 | |
531 for sTaxa in lsTaxa: | |
532 iTaxaIndex = iTaxaIndex + 1 | |
533 sUpdatedTaxa=self.updateToRoot([sTaxa]) | |
534 | |
535 if len(sUpdatedTaxa)==1: | |
536 lsUpdatedTaxa.append(sUpdatedTaxa[0]) | |
537 if(fShapeIsList): | |
538 datUpdatedShapes.append(datShape[iTaxaIndex]) | |
539 if(fBorderIsList): | |
540 datUpdatedBorders.append(datBorder[iTaxaIndex]) | |
541 if(fAlphaIsList): | |
542 datUpdatedAlphas.append(datAlpha[iTaxaIndex]) | |
543 | |
544 #Reset data to rooted data | |
545 lsTaxa=lsUpdatedTaxa | |
546 datShape=datUpdatedShapes | |
547 datBorder=datUpdatedBorders | |
548 datAlpha=datUpdatedAlphas | |
549 | |
550 #QC passes so we will add the circle to the figure and the ticks. | |
551 #If there are ticks and if the circle is not already in the ticks. | |
552 if(not self.llsTicks == None): | |
553 strCircleName = dictCircleData[self.c_sCircle] | |
554 fFound = False | |
555 iHighestNumber = -1 | |
556 for tick in self.llsTicks: | |
557 #Look for name | |
558 if tick[1] == strCircleName: | |
559 fFound = True | |
560 #Find highest count | |
561 if int(tick[0]) > iHighestNumber: | |
562 iHighestNumber = int(tick[0]) | |
563 if not fFound: | |
564 self.llsTicks.append([str(iHighestNumber+1),strCircleName]) | |
565 | |
566 #If the circle is forced, add the taxa to the lsIDs | |
567 #Otherwise we will only plot those that are matching | |
568 #the lsIDs and the circle taxa list. | |
569 if dictCircleData[self.c_sForced]: | |
570 for iAlpha in xrange(0,len(datAlpha)): | |
571 if(not datAlpha[iAlpha] == "0.0"): | |
572 lsIDs.append(lsTaxa[iAlpha]) | |
573 lsIDs = list(set(lsIDs)) | |
574 | |
575 #For all taxa in the cladogram | |
576 for sTaxa in lsTaxa: | |
577 #Store circle content name in dictionary | |
578 if not sTaxa in dictCircleDataMethods: | |
579 #Reset name to . delimited | |
580 asNameElements = filter(None,re.split("\|",sTaxa)) | |
581 | |
582 sCurTaxaName = asNameElements[len(asNameElements)-1] | |
583 if(len(asNameElements)>1): | |
584 if(sCurTaxaName=="unclassified"): | |
585 sCurTaxaName = ".".join([asNameElements[len(asNameElements)-2],sCurTaxaName]) | |
586 sCurTaxa = ".".join(asNameElements) | |
587 #Add to dictionary | |
588 dictCircleDataMethods[sTaxa] = sCurTaxa | |
589 | |
590 #If the taxa is in the selected method | |
591 if sTaxa in lsTaxa: | |
592 #Index of the id in the circle data | |
593 iTaxaIndex = lsTaxa.index(sTaxa) | |
594 #Get border | |
595 sBorder = "" | |
596 if(fBorderIsList): | |
597 sBorder = str(datBorder[iTaxaIndex]) | |
598 else: | |
599 sBorder = str(datBorder) | |
600 #Get shape | |
601 sShape = "" | |
602 if(fShapeIsList): | |
603 sShape = datShape[iTaxaIndex] | |
604 else: | |
605 sShape = datShape | |
606 #Get alpha | |
607 sAlpha = "" | |
608 if(fAlphaIsList): | |
609 sAlpha = str(datAlpha[iTaxaIndex]) | |
610 else: | |
611 sAlpha = str(datAlpha) | |
612 dictCircleDataMethods[sTaxa]=dictCircleDataMethods[sTaxa]+"".join([ConstantsBreadCrumbs.c_cTab,sCircleMethod,":",sAlpha,"!",sShape,"#",sBorder]) | |
613 else: | |
614 dictCircleDataMethods[sTaxa]=dictCircleDataMethods[sTaxa]+"".join([ConstantsBreadCrumbs.c_cTab,sCircleMethod,":0.0!R#0.0"]) | |
615 | |
616 if len(dictCircleDataMethods)>0: | |
617 lsTaxaKeys = dictCircleDataMethods.keys() | |
618 sCircleContent = dictCircleDataMethods[lsTaxaKeys[0]] | |
619 for sTaxaKey in lsTaxaKeys[1:len(lsTaxaKeys)]: | |
620 sCircleContent = ConstantsBreadCrumbs.c_strEndline.join([sCircleContent,dictCircleDataMethods[sTaxaKey]]) | |
621 self.writeToFile(self.strCircleFilePath, sCircleContent, False) | |
622 self.fCircleFileMade=True | |
623 | |
624 return True | |
625 self.fCircleFileMade=False | |
626 return False | |
627 | |
628 #Happy Path tested | |
629 def createHighlightFile(self, lsIDs): | |
630 """ | |
631 Write highlight data to file | |
632 | |
633 :param lsIDs: Ids to include in the highlight file | |
634 :type: lsIDs List of strings | |
635 """ | |
636 lsHighLightData = list() | |
637 #Each taxa name | |
638 for sID in lsIDs: | |
639 sCurColor = "" | |
640 #Rename taxa to be consisten with the . delimit format | |
641 asNameElements = filter(None,re.split("\|",sID)) | |
642 sCurTaxaName = asNameElements[len(asNameElements)-1] | |
643 if(len(asNameElements)>1): | |
644 if(sCurTaxaName=="unclassified"): | |
645 sCurTaxaName = ".".join([asNameElements[len(asNameElements)-2],sCurTaxaName]) | |
646 sCurTaxa = ".".join(asNameElements) | |
647 | |
648 sCurLabel = "" | |
649 #Get color | |
650 sColorKey = "" | |
651 if(sID in self.dictForcedHighLights): | |
652 sColorKey = self.dictForcedHighLights[sID] | |
653 if(sColorKey in self.dictColors): | |
654 sCurColor = self.formatRGB(self.dictColors[sColorKey]) | |
655 #Get label | |
656 if(self.dictRelabels is not None): | |
657 if(sID in self.dictRelabels): | |
658 sCurLabel = self.dictRelabels[sID] | |
659 if(sCurLabel == ""): | |
660 lsHighLightData.append(ConstantsBreadCrumbs.c_cTab.join([sCurTaxa,sCurTaxaName,sCurLabel,sCurColor])) | |
661 else: | |
662 lsHighLightData.append(ConstantsBreadCrumbs.c_cTab.join([sCurTaxa,sCurLabel,sCurLabel,sCurColor])) | |
663 | |
664 if len(lsHighLightData)>0: | |
665 self.writeToFile(self.strHighLightFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsHighLightData), False) | |
666 self.fHighlightFileMade=True | |
667 return True | |
668 | |
669 #Happy path tested | |
670 def createSizeFile(self, lsIDs): | |
671 """ | |
672 Write size data to file | |
673 | |
674 :param lsIDs: Ids to include in the size file | |
675 :type: lsIDs List of strings | |
676 """ | |
677 if self.npaAbundance is not None: | |
678 dMinimumValue = (self.c_dMinLogSize*self.c_dLogScale)+1 | |
679 lsWriteData = list() | |
680 for rowData in self.npaAbundance: | |
681 strCurrentId = rowData[0] | |
682 #Reset to root if needed to match current data | |
683 if(not self.strRoot == None): | |
684 strCurrentId = self.updateToRoot([strCurrentId]) | |
685 if(len(strCurrentId) > 0): | |
686 strCurrentId = strCurrentId[0] | |
687 if(strCurrentId in lsIDs): | |
688 dAverage = np.average(list(rowData)[1:]) | |
689 dSize = max([dMinimumValue,(dAverage*self.c_dLogScale)+1]) | |
690 lsWriteData.append(".".join(re.split("\|",strCurrentId))+ConstantsBreadCrumbs.c_cTab+str(math.log10(dSize)*self.c_dCircleScale)) | |
691 if len(lsWriteData)>0: | |
692 self.writeToFile(self.strSizeFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsWriteData), False) | |
693 self.fSizeFileMade=True | |
694 return True | |
695 | |
696 #Happy path tested 1 | |
697 def createTreeFile(self, lsIDs): | |
698 """ | |
699 Write tree data to file. The tree file defines the internal cladogram and all it's points. | |
700 | |
701 :param lsIDs: Ids to include in the tree file as well as their ancestors | |
702 :type: lsIDs List of strings | |
703 """ | |
704 lsFullTree = list() | |
705 for sID in lsIDs: | |
706 lsIDElements = filter(None,re.split("\|",sID)) | |
707 sElementCur = lsIDElements[0] | |
708 if(not sElementCur in lsFullTree): | |
709 lsFullTree.append(sElementCur) | |
710 if(len(lsIDElements) > 1): | |
711 sNodePath = "" | |
712 for iEndLevel in xrange(1,len(lsIDElements)+1): | |
713 sCurAncestry = lsIDElements[0:iEndLevel] | |
714 sNodePath = ".".join(sCurAncestry) | |
715 if(not sNodePath in lsFullTree): | |
716 lsFullTree.append(sNodePath) | |
717 | |
718 if len(lsFullTree)>0: | |
719 self.writeToFile(self.strTreeFilePath, ConstantsBreadCrumbs.c_strEndline.join(lsFullTree), False) | |
720 return True | |
721 | |
722 #Happy Path tested | |
723 def filterByAbundance(self, lsIDs): | |
724 """ | |
725 Filter by abundance. Specifically this version requires elements of | |
726 the tree to have a certain percentage of a certain percentile in samples. | |
727 | |
728 :param lsIDs: Ids to filter | |
729 :type: lsIDs List of strings | |
730 """ | |
731 #list of ids to return that survived the filtering | |
732 retls = list() | |
733 if not self.npaAbundance is None: | |
734 #Hold the cuttoff score (threshold) for the percentile of interest {SampleName(string):score(double)} | |
735 dictPercentiles = dict() | |
736 for index in xrange(1,len(self.npaAbundance.dtype.names)): | |
737 dScore = scipy.stats.scoreatpercentile(self.npaAbundance[self.npaAbundance.dtype.names[index]],self.c_dPercentileCutOff) | |
738 dictPercentiles[self.npaAbundance.dtype.names[index]] = dScore | |
739 | |
740 #Sample count (Ignore sample id [position 0] which is not a name) | |
741 dSampleCount = float(len(self.npaAbundance.dtype.names[1:])) | |
742 | |
743 #Check each taxa | |
744 for rowTaxaData in self.npaAbundance: | |
745 sCurTaxaName = rowTaxaData[0] | |
746 #Only look at the IDs given | |
747 if(sCurTaxaName in lsIDs): | |
748 dCountAbovePercentile = 0.0 | |
749 ldAbundanceMeasures = list(rowTaxaData)[1:] | |
750 #Check to see if the abundance score meets the threshold and count if it does | |
751 for iScoreIndex in xrange(0,len(ldAbundanceMeasures)): | |
752 if(ldAbundanceMeasures[iScoreIndex] >= dictPercentiles[self.lsSampleNames[iScoreIndex]]): | |
753 dCountAbovePercentile = dCountAbovePercentile + 1.0 | |
754 dPercentOverPercentile = dCountAbovePercentile / dSampleCount | |
755 if(dPercentOverPercentile >= (self.c_dPercentageAbovePercentile/100.0)): | |
756 retls.append(sCurTaxaName) | |
757 return retls | |
758 | |
759 #Happy Path Tested | |
760 def filterByCladeSize(self, lsIDs): | |
761 """ | |
762 Filter by the count of individuals in the clade. | |
763 | |
764 :param lsIDs: Ids to filter | |
765 :type: lsIDs List of strings | |
766 """ | |
767 #First get terminal nodes | |
768 lsTerminalNodes = AbundanceTable.funcGetTerminalNodesFromList(lsIDs,self.cFeatureDelimiter) | |
769 | |
770 #Count up clades | |
771 cladeCounts = dict() | |
772 | |
773 #For each terminal node count the | |
774 #Clades at clade levels | |
775 for sTerminalNode in lsTerminalNodes: | |
776 lsLineage = sTerminalNode.split(self.cFeatureDelimiter) | |
777 iLineageCount = len(lsLineage) | |
778 #If the lineage is shorter than the reduced clade level then no need to filter it | |
779 if iLineageCount >= self.iCladeLevelToReduce: | |
780 #If the lineage is longer than the reduced clade level and measuring clade level then count | |
781 #or If the lineage is longer than the reduced clade level but shorter than the measuring clade, | |
782 #only count if the last element is unclassified | |
783 if (iLineageCount >= self.iCladeLevelToMeasure) or (lsLineage[-1] == self.strUnclassified): | |
784 sLineage = self.cFeatureDelimiter.join(lsLineage[0:self.iCladeLevelToReduce]) | |
785 cladeCounts[sLineage] = cladeCounts.get(sLineage,0) + 1 | |
786 | |
787 #Go through the IDs and reduce as needed using the clade counts | |
788 retls = list() | |
789 for sID in lsIDs: | |
790 lsID = sID.split(self.cFeatureDelimiter) | |
791 iIDCount = len(lsID) | |
792 | |
793 #Too short to filter | |
794 if iLineageCount < self.iCladeLevelToReduce: | |
795 retls.append(sID) | |
796 #Check to see if the clade which is being reduced made the cut | |
797 if iIDCount >= self.iCladeLevelToReduce: | |
798 if (iIDCount >= self.iCladeLevelToMeasure) or (lsID[-1] == self.strUnclassified): | |
799 if cladeCounts[self.cFeatureDelimiter.join(lsID[0:self.iCladeLevelToReduce])] >= self.iMinCladeSize: | |
800 retls.append(sID) | |
801 | |
802 return retls | |
803 | |
804 #Happy path tested | |
805 def formatRGB(self, sColor): | |
806 """ | |
807 Takes a string that is of the format 0-255,0-255,0-255 and converts it to the | |
808 color format of circlader _c_[0-1,0-1,0-1] | |
809 | |
810 :param sColor: String RGB format | |
811 :type: sColor String | |
812 """ | |
813 sCircladerColor = "_c_[1,1,1]" | |
814 if(sColor is not None): | |
815 sColorElements = filter(None,re.split(",",sColor)) | |
816 if(len(sColorElements)==3): | |
817 iR = int(sColorElements[0])/255.0 | |
818 iG = int(sColorElements[1])/255.0 | |
819 iB = int(sColorElements[2])/255.0 | |
820 sCircladerColor = "".join(["_c_[",str(iR),",",str(iG),",",str(iB),"]"]) | |
821 return sCircladerColor | |
822 | |
823 #Happy path tested | |
824 def generateLabels(self, lsIDs): | |
825 """ | |
826 Labels for visualization. | |
827 Changes unclassified to one_level_higher.unclassified and enables numeric labeling / relabeling. | |
828 Will only rename, will not add the label. The key must exist for the value to be used in replacing. | |
829 | |
830 :param lsIDs: Ids to include in the labels file | |
831 :type: lsIDs List of strings | |
832 """ | |
833 dictRet = dict() | |
834 for sID in lsIDs: | |
835 lsIDElements = filter(None,re.split("\|",sID)) | |
836 iIDElementsCount = len(lsIDElements) | |
837 sLabel = lsIDElements[iIDElementsCount-1] | |
838 #Fix unclassified | |
839 if((sLabel == "unclassified") and (iIDElementsCount > 1)): | |
840 sLabel = ".".join([lsIDElements[iIDElementsCount-2],sLabel]) | |
841 #Change to relabels if given | |
842 if(self.dictRelabels is not None): | |
843 if(sLabel in self.dictRelabels): | |
844 sLabel = self.dictRelabels[sLabel] | |
845 #Store lable | |
846 dictRet[sID] = sLabel | |
847 return dictRet | |
848 | |
849 #Happy path tested | |
850 def manageFilePaths(self, sTaxaFileName, strStyleFile, sColorFileName=None, sTickFileName=None, sHighlightFileName=None, sSizeFileName=None, sCircleFileName=None): | |
851 """ | |
852 This method sets the naming to the files generated that Circlader acts on. | |
853 These files include the tree, color, highlight, tick, circle, and size files. | |
854 Checks to make sure the file path to the syle file provided is an existing file. | |
855 Deletes any existing files with these generated names (except for the style files). | |
856 | |
857 :param sStyleFile: File path indicating the style file to use | |
858 :type: String | |
859 :param strTaxaFile: File path indicating the taxa file to use | |
860 :type: String | |
861 :param sColorFile: File path indicating the color file to use | |
862 :type: String | |
863 :param sTickFile: File path indicating the tick file to use | |
864 :type: String | |
865 :param sHighlightFile: File path indicating the highlight file to use | |
866 :type: String | |
867 :param sSizeFile: File path indicating the size file to use | |
868 :type: String | |
869 :param sCircleFileName: File path for circle files | |
870 :type: String | |
871 :return boolean: True indicates success, false indicates error | |
872 """ | |
873 #Do not remove the style file, it is static | |
874 if strStyleFile is None: | |
875 print("Error, style file is None") | |
876 return(False) | |
877 if not os.path.exists(strStyleFile): | |
878 print("Error, no style file found.") | |
879 return(False) | |
880 else: | |
881 self.strStyleFilePath = strStyleFile | |
882 | |
883 #Set output files and remove if needed | |
884 self.strTreeFilePath = sTaxaFileName | |
885 self.strColorFilePath = sColorFileName | |
886 self.strTickFilePath = sTickFileName | |
887 self.strHighLightFilePath = sHighlightFileName | |
888 self.strSizeFilePath = sSizeFileName | |
889 self.strCircleFilePath = sCircleFileName | |
890 for sFile in [self.strTreeFilePath,self.strColorFilePath,self.strTickFilePath, | |
891 self.strHighLightFilePath,self.strSizeFilePath,self.strCircleFilePath]: | |
892 if not sFile is None: | |
893 if(os.path.exists(sFile)): | |
894 os.remove(sFile) | |
895 return True | |
896 | |
897 #Not tested | |
898 def relabelIDs(self, dictLabels): | |
899 """ | |
900 Allows the relabeling of ids. Can be used to make numeric labeling of ids or renaming | |
901 | |
902 :param dictLabels: Should label (key) (after unclassified is modified) and new label (value) | |
903 :type: dictLabels Dictionary of string (key:label to replace) string (value:new label to use in replacing) | |
904 """ | |
905 self.dictRelabels = dictLabels | |
906 | |
907 #Happy path tested | |
908 def updateToRoot(self, lsIDs): | |
909 """ | |
910 Updates the clade to the root given. The clade must contain the root and the level of the | |
911 root in the clade will be rest to it's first level, ignoring the previous levels of the clade. | |
912 | |
913 :param lsIDs: List of Clades that will be reset to the root specified by setRoot | |
914 :type: lsIDs List of strings. Each string representing a clade. | |
915 """ | |
916 | |
917 if(self.strRoot is None): | |
918 return lsIDs | |
919 #Force root tree if indicated to do so | |
920 lsRootedIDs = list() | |
921 for sID in lsIDs: | |
922 sIDElements = filter(None,re.split("\|",sID)) | |
923 if(self.strRoot in sIDElements): | |
924 iRootIndex = sIDElements.index(self.strRoot) | |
925 #If multiple levels of the clade exist after the new root merge them. | |
926 if(len(sIDElements)>iRootIndex+2): | |
927 lsRootedIDs.append("|".join(sIDElements[iRootIndex+1:])) | |
928 #If only one level of the clade exists after the new root, return it. | |
929 elif(len(sIDElements)>iRootIndex+1): | |
930 lsRootedIDs.append(sIDElements[iRootIndex+1]) | |
931 return(lsRootedIDs) | |
932 | |
933 #Testing: Used extensively in other tests | |
934 def writeToFile(self, strFileName, strDataToWrite, fAppend): | |
935 """ | |
936 Helper function that writes a string to a file | |
937 | |
938 :param strFileName: File to write to | |
939 :type: strFileName File path (string) | |
940 :param strDataToWrite: Data to write to file | |
941 :type: strDataToWrite String | |
942 :param fAppend: Indicates if an append should occur (True == Append) | |
943 :type: fAppend boolean | |
944 """ | |
945 | |
946 cMode = 'w' | |
947 if fAppend: | |
948 cMode = 'a' | |
949 with open(strFileName,cMode) as f: | |
950 f.write(strDataToWrite) |