Mercurial > repos > sagun98 > micropita
comparison galaxy_micropita/src/breadcrumbs/src/Metric.py @ 3:8fb4630ab314 draft default tip
Uploaded
author | sagun98 |
---|---|
date | Thu, 03 Jun 2021 17:07:36 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:1c5736dc85ab | 3:8fb4630ab314 |
---|---|
1 """ | |
2 Author: Timothy Tickle | |
3 Description: Calculates Metrics. | |
4 """ | |
5 | |
6 ##################################################################################### | |
7 #Copyright (C) <2012> | |
8 # | |
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of | |
10 #this software and associated documentation files (the "Software"), to deal in the | |
11 #Software without restriction, including without limitation the rights to use, copy, | |
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
13 #and to permit persons to whom the Software is furnished to do so, subject to | |
14 #the following conditions: | |
15 # | |
16 #The above copyright notice and this permission notice shall be included in all copies | |
17 #or substantial portions of the Software. | |
18 # | |
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | |
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
25 ##################################################################################### | |
26 | |
27 __author__ = "Timothy Tickle" | |
28 __copyright__ = "Copyright 2012" | |
29 __credits__ = ["Timothy Tickle"] | |
30 __license__ = "MIT" | |
31 __maintainer__ = "Timothy Tickle" | |
32 __email__ = "ttickle@sph.harvard.edu" | |
33 __status__ = "Development" | |
34 | |
35 #Update path | |
36 from ConstantsBreadCrumbs import ConstantsBreadCrumbs | |
37 import csv | |
38 import numpy as np | |
39 from types import * | |
40 from ValidateData import ValidateData | |
41 | |
42 #External libraries | |
43 from cogent.maths.unifrac.fast_unifrac import fast_unifrac_file | |
44 import cogent.maths.stats.alpha_diversity | |
45 import scipy.spatial.distance | |
46 | |
47 class Metric: | |
48 """ | |
49 Performs ecological measurements. | |
50 """ | |
51 | |
52 #Diversity metrics Alpha | |
53 c_strSimpsonDiversity = "SimpsonD" | |
54 c_strInvSimpsonDiversity = "InSimpsonD" | |
55 c_strChao1Diversity = "Chao1" | |
56 | |
57 #Diversity metrics Beta | |
58 c_strBrayCurtisDissimilarity = "B_Curtis" | |
59 c_strUnifracUnweighted = "unifrac_unweighted" | |
60 c_strUnifracWeighted = "unifrac_weighted" | |
61 | |
62 #Additive inverses of beta metrics | |
63 c_strInvBrayCurtisDissimilarity = "InB_Curtis" | |
64 | |
65 #Richness | |
66 c_strShannonRichness = "ShannonR" | |
67 c_strObservedCount = "Observed_Count" | |
68 | |
69 #Different alpha diversity metrics | |
70 setAlphaDiversities = set(["observed_species","margalef","menhinick", | |
71 "dominance","reciprocal_simpson","shannon","equitability","berger_parker_d", | |
72 "mcintosh_d","brillouin_d","strong","fisher_alpha","simpson", | |
73 "mcintosh_e","heip_e","simpson_e","robbins","michaelis_menten_fit","chao1","ACE"]) | |
74 | |
75 #Different beta diversity metrics | |
76 setBetaDiversities = set(["braycurtis","canberra","chebyshev","cityblock", | |
77 "correlation","cosine","euclidean","hamming","sqeuclidean"]) | |
78 | |
79 #Tested 4 | |
80 @staticmethod | |
81 def funcGetSimpsonsDiversityIndex(ldSampleTaxaAbundancies=None): | |
82 """ | |
83 Calculates the Simpsons diversity index as defined as sum(Pi*Pi). | |
84 Note***: Assumes that the abundance measurements are already normalized by the total population N. | |
85 | |
86 :param ldSampleTaxaAbundancies: List of measurements to calculate metric on (a sample). | |
87 :type: List of doubles | |
88 :return Double: Diversity metric | |
89 """ | |
90 | |
91 #Calculate metric | |
92 return sum((ldSampleTaxaAbundancies)*(ldSampleTaxaAbundancies)) | |
93 | |
94 #Tested 4 | |
95 @staticmethod | |
96 def funcGetInverseSimpsonsDiversityIndex(ldSampleTaxaAbundancies=None): | |
97 """ | |
98 Calculates Inverse Simpsons diversity index 1/sum(Pi*Pi). | |
99 This is multiplicative inverse which reverses the order of the simpsons diversity index. | |
100 Note***: Assumes that the abundance measurements are already normalized by the total population N. | |
101 | |
102 :param ldSampleTaxaAbundancies: List of measurements to calculate metric on (a sample). | |
103 :type: List of doubles | |
104 :return Double: Diversity metric | |
105 """ | |
106 | |
107 simpsons = Metric.funcGetSimpsonsDiversityIndex(ldSampleTaxaAbundancies) | |
108 #If simpsons is false return false, else return inverse | |
109 if not simpsons: | |
110 return False | |
111 return 1.0/simpsons | |
112 | |
113 #Tested 4 | |
114 @staticmethod | |
115 def funcGetShannonRichnessIndex(ldSampleTaxaAbundancies=None): | |
116 """ | |
117 Calculates the Shannon richness index. | |
118 Note***: Assumes that the abundance measurements are already normalized by the total population N. | |
119 If not normalized, include N in the parameter tempTotalN and it will be. | |
120 This is in base exp(1) like the default R Vegan package. Cogent is by defaul in bits (base=2) | |
121 Both options are here for your use. See Metric.funcGetAlphaDiversity() to access cogent | |
122 | |
123 :param ldSampleTaxaAbundancies: List of measurements to calculate metric on (a sample). | |
124 :type: List of doubles | |
125 :return Double: Richness metric | |
126 """ | |
127 | |
128 #Calculate metric | |
129 ldSampleTaxaAbundancies = ldSampleTaxaAbundancies[np.where(ldSampleTaxaAbundancies != 0)] | |
130 tempIntermediateNumber = sum(ldSampleTaxaAbundancies*(np.log(ldSampleTaxaAbundancies))) | |
131 if(tempIntermediateNumber == 0.0): | |
132 return 0.0 | |
133 return -1 * tempIntermediateNumber | |
134 | |
135 #Test 3 | |
136 @staticmethod | |
137 def funcGetChao1DiversityIndex(ldSampleTaxaAbundancies=None, fCorrectForBias=False): | |
138 """ | |
139 Calculates the Chao1 diversity index. | |
140 Note***: Not normalized by abundance. | |
141 | |
142 :param ldSampleTaxaAbundancies: List of measurements to calculate metric on (a sample). | |
143 :type: List of doubles | |
144 :param fCorrectForBias: Indicator to use bias correction. | |
145 :type: Boolean False indicates uncorrected for bias (uncorrected = Chao 1984, corrected = Chao 1987, Eq. 2) | |
146 :return Double: Diversity metric | |
147 """ | |
148 #If not counts return false | |
149 if [num for num in ldSampleTaxaAbundancies if((num<1) and (not num==0))]: return False | |
150 | |
151 #Observed = total number of species observed in all samples pooled | |
152 totalObservedSpecies = len(ldSampleTaxaAbundancies)-len(ldSampleTaxaAbundancies[ldSampleTaxaAbundancies == 0]) | |
153 | |
154 #Singles = number of species that occur in exactly 1 sample | |
155 singlesObserved = len(ldSampleTaxaAbundancies[ldSampleTaxaAbundancies == 1.0]) | |
156 | |
157 #Doubles = number of species that occue in exactly 2 samples | |
158 doublesObserved = len(ldSampleTaxaAbundancies[ldSampleTaxaAbundancies == 2.0]) | |
159 | |
160 #If singles or doubles = 0, return observations so that a divided by zero error does not occur | |
161 if((singlesObserved == 0) or (doublesObserved == 0)): | |
162 return totalObservedSpecies | |
163 | |
164 #Calculate metric | |
165 if fCorrectForBias: | |
166 return cogent.maths.stats.alpha_diversity.chao1_bias_corrected(observed = totalObservedSpecies, singles = singlesObserved, doubles = doublesObserved) | |
167 else: | |
168 return cogent.maths.stats.alpha_diversity.chao1_uncorrected(observed = totalObservedSpecies, singles = singlesObserved, doubles = doublesObserved) | |
169 | |
170 #Test 3 | |
171 @staticmethod | |
172 def funcGetObservedCount(ldSampleAbundances, dThreshold = 0.0): | |
173 """ | |
174 Count how many bugs / features have a value of greater than 0 or the threshold given. | |
175 Expects a vector of abundances. | |
176 ****Do not normalize data if using the threshold. | |
177 | |
178 :param ldSampleAbundances: List of measurements to calculate metric on (a sample). | |
179 :type: List of doubles | |
180 :param dThreshold: The lowest number the measurement can be to be counted as an observation. | |
181 :type: Double | |
182 :return Count: Number of features observed in a sample. | |
183 """ | |
184 | |
185 return sum([1 for observation in ldSampleAbundances if observation > dThreshold]) | |
186 | |
187 #Test Cases 6 | |
188 @staticmethod | |
189 def funcGetAlphaDiversity(liCounts,strMetric): | |
190 """ | |
191 Passes counts to cogent for an alpha diversity metric. | |
192 setAlphaDiversities are the names supported | |
193 | |
194 :param liCount: List of counts to calculate metric on (a sample). | |
195 :type: List of ints | |
196 :return Diversity: Double diversity metric. | |
197 """ | |
198 | |
199 return getattr(cogent.maths.stats.alpha_diversity,strMetric)(liCounts) | |
200 | |
201 #Happy path tested 1 | |
202 @staticmethod | |
203 def funcGetDissimilarity(ldSampleTaxaAbundancies, funcDistanceFunction): | |
204 """ | |
205 Calculates the distance between samples given a function. | |
206 | |
207 If you have 5 rows (labeled r1,r2,r3,r4,r5) the vector are the distances in this order. | |
208 condensed form = [d(r1,r2), d(r1,r3), d(r1,r4), d(r1,r5), d(r2,r3), d(r2,r4), d(r2,r5), d(r3,r4), d(r3,r5), d(r4,r5)]. | |
209 Note***: Assumes that the abundance measurements are already normalized by the total population N. | |
210 | |
211 :param ldSampleTaxaAbundancies: | |
212 :type: List of doubles | |
213 :param funcDistanceFunction: Distance function used to calculate distances | |
214 :type: Function | |
215 :return Double: Dissimilarity metric | |
216 """ | |
217 | |
218 #Calculate metric | |
219 try: | |
220 return scipy.spatial.distance.pdist(ldSampleTaxaAbundancies, funcDistanceFunction) | |
221 except ValueError as error: | |
222 print "".join(["Metric.funcGetDissimilarity. Error=",str(error)]) | |
223 return False | |
224 | |
225 #Test case 1 | |
226 @staticmethod | |
227 def funcGetDissimilarityByName(ldSampleTaxaAbundancies, strMetric): | |
228 """ | |
229 Calculates beta-diversity metrics between lists of abundances | |
230 setBetaDiversities are the names supported | |
231 | |
232 :param ldSampleTaxaAbundancies: | |
233 :type: List of doubles | |
234 :param strMetric: Name of the distance function used to calculate distances | |
235 :type: String | |
236 :return list double: Dissimilarity metrics between each sample | |
237 """ | |
238 | |
239 return scipy.spatial.distance.pdist(ldSampleTaxaAbundancies,strMetric) | |
240 | |
241 #Test 3 | |
242 @staticmethod | |
243 def funcGetBrayCurtisDissimilarity(ldSampleTaxaAbundancies): | |
244 """ | |
245 Calculates the BrayCurtis Beta dissimilarity index. | |
246 d(u,v)=sum(abs(row1-row2))/sum(row1+row2). | |
247 This is scale invariant. | |
248 If you have 5 rows (labeled r1,r2,r3,r4,r5) the vector are the distances in this order. | |
249 condensed form = [d(r1,r2), d(r1,r3), d(r1,r4), d(r1,r5), d(r2,r3), d(r2,r4), d(r2,r5), d(r3,r4), d(r3,r5), d(r4,r5)]. | |
250 Note***: Assumes that the abundance measurements are already normalized by the total population N. | |
251 | |
252 :param ldSampleTaxaAbundancies: | |
253 :type: List of doubles | |
254 :return Double Matrix: Dissimilarity metric | |
255 """ | |
256 | |
257 #Calculate metric | |
258 try: | |
259 return scipy.spatial.distance.pdist(X=ldSampleTaxaAbundancies, metric='braycurtis') | |
260 except ValueError as error: | |
261 print "".join(["Metric.getBrayCurtisDissimilarity. Error=",str(error)]) | |
262 return False | |
263 | |
264 #Test 3 | |
265 @staticmethod | |
266 def funcGetInverseBrayCurtisDissimilarity(ldSampleTaxaAbundancies): | |
267 """ | |
268 Calculates 1 - the BrayCurtis Beta dissimilarity index. | |
269 d(u,v)=1-(sum(abs(row1-row2))/sum(row1+row2)). | |
270 This is scale invariant and ranges between 0 and 1. | |
271 If you have 5 rows (labeled r1,r2,r3,r4,r5) the vector are the distances in this order. | |
272 condensed form = [d(r1,r2), d(r1,r3), d(r1,r4), d(r1,r5), d(r2,r3), d(r2,r4), d(r2,r5), d(r3,r4), d(r3,r5), d(r4,r5)]. | |
273 Note***: Assumes that the abundance measurements are already normalized by the total population N. | |
274 | |
275 :param ldSampleTaxaAbundancies: An np.array of samples (rows) x measurements (columns) in which distance is measured between rows | |
276 :type: List List of doubles | |
277 :return Double Matrix: 1 - Bray-Curtis dissimilarity. | |
278 """ | |
279 | |
280 bcValue = Metric.funcGetBrayCurtisDissimilarity(ldSampleTaxaAbundancies = ldSampleTaxaAbundancies) | |
281 if not type(bcValue) is BooleanType: | |
282 return 1.0-bcValue | |
283 return False | |
284 | |
285 #Test cases 8 | |
286 @staticmethod | |
287 def funcGetUnifracDistance(istrmTree,istrmEnvr,lsSampleOrder=None,fWeighted=True): | |
288 """ | |
289 Gets a unifrac distance from files/filestreams. | |
290 | |
291 :param istrmTree: File path or stream which is a Newick format file | |
292 :type: String of file stream | |
293 :param istrmEnvr: File path or stream which is a Newick format file | |
294 :type: String of file stream | |
295 """ | |
296 npaDist, lsSampleNames = fast_unifrac_file(open(istrmTree,"r") if isinstance(istrmTree, str) else istrmTree, | |
297 open(istrmEnvr,"r") if isinstance(istrmEnvr, str) else istrmEnvr, weighted=fWeighted).get("distance_matrix",False) | |
298 | |
299 #Was trying to avoid preallocating a matrix but if you only need a subset of the samples then it | |
300 #is simpler to preallocate so this is what I am doing but making a condensed matrix and not a full matrix | |
301 | |
302 #Dictionary to translate the current order of the samples to what is expected if given an input order | |
303 if lsSampleOrder: | |
304 #{NewOrder:OriginalOrder} way to convert from old to new sample location | |
305 dictTranslate = dict([[lsSampleOrder.index(sSampleName),lsSampleNames.index(sSampleName)] for sSampleName in lsSampleNames if sSampleName in lsSampleOrder]) | |
306 | |
307 #Check to make sure all samples requested were found | |
308 if not len(dictTranslate.keys()) == len(lsSampleOrder): | |
309 print "Metric.funcGetUnifracDistance. Error= The some or all sample names given (lsSampleOrder) were not contained in the matrix." | |
310 return False | |
311 | |
312 #Length of data | |
313 iLengthOfData = len(lsSampleOrder) | |
314 | |
315 #Preallocate matrix and shuffle | |
316 mtrxData = np.zeros(shape=(iLengthOfData,iLengthOfData)) | |
317 for x in xrange(iLengthOfData): | |
318 for y in xrange(iLengthOfData): | |
319 mtrxData[x,y] = npaDist[dictTranslate[x],dictTranslate[y]] | |
320 npaDist = mtrxData | |
321 | |
322 lsSampleNames = lsSampleOrder | |
323 | |
324 #If no sample order is given, condense the matrix and return | |
325 return (scipy.spatial.distance.squareform(npaDist),lsSampleNames) | |
326 | |
327 | |
328 #Test 7 | |
329 @staticmethod | |
330 def funcGetAlphaMetric(ldAbundancies, strMetric): | |
331 """ | |
332 Get alpha abundance of the metric for the vector. | |
333 Note: Shannon is measured with base 2 ("shannon") or base exp(1) (Metric.c_strShannonRichness) depending which method is called. | |
334 | |
335 :param ldAbundancies: List of values to compute metric (a sample). | |
336 :type: List List of doubles. | |
337 :param strMetric: The metric to measure. | |
338 :type: String Metric name (Use from constants above). | |
339 :return Double: Metric specified by strMetric derived from ldAbundancies. | |
340 """ | |
341 | |
342 if(strMetric == Metric.c_strShannonRichness): | |
343 return Metric.funcGetShannonRichnessIndex(ldSampleTaxaAbundancies=ldAbundancies) | |
344 elif(strMetric == Metric.c_strSimpsonDiversity): | |
345 return Metric.funcGetSimpsonsDiversityIndex(ldSampleTaxaAbundancies=ldAbundancies) | |
346 elif(strMetric == Metric.c_strInvSimpsonDiversity): | |
347 return Metric.funcGetInverseSimpsonsDiversityIndex(ldSampleTaxaAbundancies=ldAbundancies) | |
348 elif(strMetric == Metric.c_strObservedCount): | |
349 return Metric.funcGetObservedCount(ldSampleAbundances=ldAbundancies) | |
350 #Chao1 Needs NOT Normalized Abundance (Counts) | |
351 elif(strMetric == Metric.c_strChao1Diversity): | |
352 return Metric.funcGetChao1DiversityIndex(ldSampleTaxaAbundancies=ldAbundancies) | |
353 elif(strMetric in Metric.setAlphaDiversities): | |
354 return Metric.funcGetAlphaDiversity(liCounts=ldAbundancies, strMetric=strMetric) | |
355 else: | |
356 return False | |
357 | |
358 #Test 5 | |
359 @staticmethod | |
360 def funcBuildAlphaMetricsMatrix(npaSampleAbundance = None, lsSampleNames = None, lsDiversityMetricAlpha = None): | |
361 """ | |
362 Build a matrix of alpha diversity metrics for each sample | |
363 Row = metric, column = sample | |
364 | |
365 :param npaSampleAbundance: Observations (Taxa (row) x sample (column)) | |
366 :type: Numpy Array | |
367 :param lsSampleNames: List of sample names of samples to measure (do not include the taxa id column name or other column names which should not be read). | |
368 :type: List of strings Strings being samples to measure from the npaSampleAbundance. | |
369 :param lsDiversityMetricAlpha: List of diversity metrics to use in measuring. | |
370 :type: List of strings Strings being metrics to derived from the indicated samples. | |
371 :return List of List of doubles: Each internal list is a list of (floats) indicating a specific metric measurement method measuring multiple samples | |
372 [[metric1-sample1, metric1-sample2, metric1-sample3],[metric1-sample1, metric1-sample2, metric1-sample3]] | |
373 """ | |
374 | |
375 if not ValidateData.funcIsValidList(lsDiversityMetricAlpha): | |
376 lsDiversityMetricAlpha = [lsDiversityMetricAlpha] | |
377 | |
378 #Get amount of metrics | |
379 metricsCount = len(lsDiversityMetricAlpha) | |
380 | |
381 #Create return | |
382 returnMetricsMatrixRet = [[] for index in lsDiversityMetricAlpha] | |
383 | |
384 #For each sample get all metrics | |
385 #Place in list of lists | |
386 #[[metric1-sample1, metric1-sample2, metric1-sample3],[metric1-sample1, metric1-sample2, metric1-sample3]] | |
387 for sample in lsSampleNames: | |
388 sampleAbundance = npaSampleAbundance[sample] | |
389 for metricIndex in xrange(0,metricsCount): | |
390 returnMetricsMatrixRet[metricIndex].append(Metric.funcGetAlphaMetric(ldAbundancies = sampleAbundance, strMetric = lsDiversityMetricAlpha[metricIndex])) | |
391 return returnMetricsMatrixRet | |
392 | |
393 #Testing 6 cases | |
394 @staticmethod | |
395 def funcGetBetaMetric(npadAbundancies=None, sMetric=None, istrmTree=None, istrmEnvr=None, lsSampleOrder=None, fAdditiveInverse = False): | |
396 """ | |
397 Takes a matrix of values and returns a beta metric matrix. The metric returned is indicated by name (sMetric). | |
398 | |
399 :param npadAbundancies: Numpy array of sample abundances to measure against. | |
400 :type: Numpy Array Numpy array where row=samples and columns = features. | |
401 :param sMetric: String name of beta metric. Possibilities are listed in microPITA. | |
402 :type: String String name of beta metric. Possibilities are listed in microPITA. | |
403 :return Double: Measurement indicated by metric for given abundance list | |
404 """ | |
405 | |
406 if sMetric == Metric.c_strBrayCurtisDissimilarity: | |
407 mtrxDistance = Metric.funcGetBrayCurtisDissimilarity(ldSampleTaxaAbundancies=npadAbundancies) | |
408 elif sMetric == Metric.c_strInvBrayCurtisDissimilarity: | |
409 mtrxDistance = Metric.funcGetInverseBrayCurtisDissimilarity(ldSampleTaxaAbundancies=npadAbundancies) | |
410 elif sMetric in Metric.setBetaDiversities: | |
411 mtrxDistance = Metric.funcGetDissimilarityByName(ldSampleTaxaAbundancies=npadAbundancies, strMetric=sMetric) | |
412 elif sMetric == Metric.c_strUnifracUnweighted: | |
413 mtrxDistance = Metric.funcGetUnifracDistance(istrmTree=istrmTree,istrmEnvr=istrmEnvr,lsSampleOrder=lsSampleOrder,fWeighted=False) | |
414 # mtrxDistance = xReturn[0] if not type(xReturn) is BooleanType else xReturn | |
415 elif sMetric == Metric.c_strUnifracWeighted: | |
416 mtrxDistance = Metric.funcGetUnifracDistance(istrmTree=istrmTree,istrmEnvr=istrmEnvr,lsSampleOrder=lsSampleOrder,fWeighted=True) | |
417 # mtrxDistance = xReturn[0] if not type(xReturn) is BooleanType else xReturn | |
418 else: | |
419 mtrxDistance = False | |
420 if fAdditiveInverse and not type(mtrxDistance) is BooleanType: | |
421 if sMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]: | |
422 mtrxDistance = (1.0 - mtrxDistance[0],mtrxDistance[1]) | |
423 else: | |
424 mtrxDistance = 1.0 - mtrxDistance | |
425 return mtrxDistance | |
426 | |
427 #Test Cases 11 | |
428 @staticmethod | |
429 def funcReadMatrixFile(istmMatrixFile, lsSampleOrder=None): | |
430 """ | |
431 Reads in a file with a precalculated beta-diversty matrix. | |
432 | |
433 :param istmMatrixFile: File with beta-diversity matrix | |
434 :type: FileStream of String file path | |
435 """ | |
436 | |
437 #Read in data | |
438 f = csv.reader(open(istmMatrixFile,"r") if isinstance(istmMatrixFile, str) else istmMatrixFile, delimiter=ConstantsBreadCrumbs.c_matrixFileDelim ) | |
439 | |
440 #Get header | |
441 try: | |
442 lsHeader = f.next() | |
443 except StopIteration: | |
444 return (False,False) | |
445 lsHeaderReducedToSamples = [sHeader for sHeader in lsHeader if sHeader in lsSampleOrder] if lsSampleOrder else lsHeader[1:] | |
446 | |
447 #If no sample ordering is given, set the ordering to what is in the file | |
448 if not lsSampleOrder: | |
449 lsSampleOrder = lsHeaderReducedToSamples | |
450 | |
451 #Preallocate matrix | |
452 mtrxData = np.zeros(shape=(len(lsSampleOrder),len(lsSampleOrder))) | |
453 | |
454 #Make sure all samples requested are in the file | |
455 if(not len(lsSampleOrder) == len(lsHeaderReducedToSamples)): return False | |
456 | |
457 for lsLine in f: | |
458 if lsLine[0] in lsSampleOrder: | |
459 iRowIndex = lsSampleOrder.index(lsLine[0]) | |
460 | |
461 for i in xrange(1,len(lsSampleOrder)): | |
462 iColumnIndexComing = lsHeader.index(lsSampleOrder[i]) | |
463 iColumnIndexGoing = lsSampleOrder.index(lsSampleOrder[i]) | |
464 mtrxData[iRowIndex,iColumnIndexGoing] = lsLine[iColumnIndexComing] | |
465 mtrxData[iColumnIndexGoing,iRowIndex] = lsLine[iColumnIndexComing] | |
466 tpleMData = mtrxData.shape | |
467 mtrxData = mtrxData if any(sum(ld)>0 for ld in mtrxData) or ((tpleMData[0]==1) and (tpleMData[1]==1)) else [] | |
468 return (mtrxData,lsSampleOrder) | |
469 | |
470 #Test cases 2 | |
471 @staticmethod | |
472 def funcWriteMatrixFile(mtrxMatrix, ostmMatrixFile, lsSampleNames=None): | |
473 """ | |
474 Writes a square matrix to file. | |
475 | |
476 :param mtrxMatrix: Matrix to write to file | |
477 :type: Numpy array | |
478 :lsSampleNames: The names of the samples in the order of the matrix | |
479 :type: List of strings | |
480 :ostmBetaMatrixFile: File to write to | |
481 :type: String or file stream | |
482 """ | |
483 | |
484 if not sum(mtrxMatrix.shape)>0 or not ostmMatrixFile: | |
485 return False | |
486 | |
487 #Check to make sure the sample names are the correct length | |
488 tpleiShape = mtrxMatrix.shape | |
489 if not lsSampleNames: | |
490 lsSampleNames = range(tpleiShape[0]) | |
491 if not(len(lsSampleNames) == tpleiShape[0]): | |
492 print "".join(["Metric.funcWriteMatrixFile. Error= Length of sample names ("+str(len(lsSampleNames))+") and matrix ("+str(mtrxMatrix.shape)+") not equal."]) | |
493 return False | |
494 | |
495 #Write to file | |
496 ostmOut = csv.writer(open(ostmMatrixFile,"w") if isinstance(ostmMatrixFile,str) else ostmMatrixFile, delimiter=ConstantsBreadCrumbs.c_matrixFileDelim ) | |
497 | |
498 #Add the additional space at the beginning of the sample names to represent the id row/column | |
499 lsSampleNames = [""]+list(lsSampleNames) | |
500 | |
501 #Write header and each row to file | |
502 ostmOut.writerow(lsSampleNames) | |
503 [ostmOut.writerow([lsSampleNames[iIndex+1]]+mtrxMatrix[iIndex,].tolist()) for iIndex in xrange(tpleiShape[0])] | |
504 return True |