annotate galaxy_micropita/MicroPITA.py @ 3:8fb4630ab314 draft default tip

Uploaded
author sagun98
date Thu, 03 Jun 2021 17:07:36 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1 #!/usr/bin/env python
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
2 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
3 Author: Timothy Tickle
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
4 Description: Class to Run analysis for the microPITA paper
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
5 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
6
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
7 #####################################################################################
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
8 #Copyright (C) <2012>
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
9 #
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
10 #Permission is hereby granted, free of charge, to any person obtaining a copy of
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
11 #this software and associated documentation files (the "Software"), to deal in the
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
12 #Software without restriction, including without limitation the rights to use, copy,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
13 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
14 #and to permit persons to whom the Software is furnished to do so, subject to
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
15 #the following conditions:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
16 #
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
17 #The above copyright notice and this permission notice shall be included in all copies
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
18 #or substantial portions of the Software.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
19 #
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
20 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
21 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
22 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
23 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
24 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
25 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
26 #####################################################################################
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
27
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
28 __author__ = "Timothy Tickle"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
29 __copyright__ = "Copyright 2012"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
30 __credits__ = ["Timothy Tickle"]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
31 __license__ = "MIT"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
32 __maintainer__ = "Timothy Tickle"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
33 __email__ = "ttickle@sph.harvard.edu"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
34 __status__ = "Development"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
35
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
36 import sys
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
37 import argparse
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
38 from src.breadcrumbs.src.AbundanceTable import AbundanceTable
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
39 from src.breadcrumbs.src.ConstantsBreadCrumbs import ConstantsBreadCrumbs
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
40 from src.breadcrumbs.src.Metric import Metric
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
41 from src.breadcrumbs.src.KMedoids import Kmedoids
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
42 from src.breadcrumbs.src.MLPYDistanceAdaptor import MLPYDistanceAdaptor
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
43 from src.breadcrumbs.src.SVM import SVM
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
44 from src.breadcrumbs.src.UtilityMath import UtilityMath
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
45
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
46 from src.ConstantsMicropita import ConstantsMicropita
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
47 import csv
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
48 import logging
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
49 import math
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
50 import mlpy
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
51 import numpy as np
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
52 import operator
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
53 import os
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
54 import random
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
55 import scipy.cluster.hierarchy as hcluster
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
56 import scipy.spatial.distance
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
57 from types import *
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
58
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
59 class MicroPITA:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
60 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
61 Selects samples from a first tier of a multi-tiered study to be used in a second tier.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
62 Different methods can be used for selection.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
63 The expected input is an abundance table (and potentially a text file of targeted features,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
64 if using the targeted features option). Output is a list of samples exhibiting the
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
65 characteristics of interest.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
66 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
67
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
68 #Constants
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
69 #Diversity metrics Alpha
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
70 c_strInverseSimpsonDiversity = Metric.c_strInvSimpsonDiversity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
71 c_strChao1Diversity = Metric.c_strChao1Diversity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
72
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
73 #Diversity metrics Beta
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
74 c_strBrayCurtisDissimilarity = Metric.c_strBrayCurtisDissimilarity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
75
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
76 #Additive inverses of diversity metrics beta
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
77 c_strInvBrayCurtisDissimilarity = Metric.c_strInvBrayCurtisDissimilarity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
78
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
79 #Technique Names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
80 ConstantsMicropita.c_strDiversity2 = ConstantsMicropita.c_strDiversity+"_C"
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
81
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
82 #Targeted feature settings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
83 c_strTargetedRanked = ConstantsMicropita.c_strTargetedRanked
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
84 c_strTargetedAbundance = ConstantsMicropita.c_strTargetedAbundance
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
85
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
86 #Technique groupings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
87 # c_lsDiversityMethods = [ConstantsMicropita.c_strDiversity,ConstantsMicropita.c_strDiversity2]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
88
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
89 #Converts ecology metrics into standardized method selection names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
90 dictConvertAMetricDiversity = {c_strInverseSimpsonDiversity:ConstantsMicropita.c_strDiversity, c_strChao1Diversity:ConstantsMicropita.c_strDiversity2}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
91 # dictConvertMicroPITAToAMetric = {ConstantsMicropita.c_strDiversity:c_strInverseSimpsonDiversity, ConstantsMicropita.c_strDiversity2:c_strChao1Diversity}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
92 dictConvertBMetricToMethod = {c_strBrayCurtisDissimilarity:ConstantsMicropita.c_strRepresentative}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
93 dictConvertInvBMetricToMethod = {c_strBrayCurtisDissimilarity:ConstantsMicropita.c_strExtreme}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
94
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
95 #Linkage used in the Hierarchical clustering
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
96 c_strHierarchicalClusterMethod = 'average'
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
97
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
98 ####Group 1## Diversity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
99 #Testing: Happy path Testing (8)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
100 def funcGetTopRankedSamples(self, lldMatrix = None, lsSampleNames = None, iTopAmount = None):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
101 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
102 Given a list of lists of measurements, for each list the indices of the highest values are returned. If lsSamplesNames is given
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
103 it is treated as a list of string names that is in the order of the measurements in each list. Indices are returned or the sample
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
104 names associated with the indices.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
105
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
106 :param lldMatrix: List of lists [[value,value,value,value],[value,value,value,value]].
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
107 :type: List of lists List of measurements. Each list is a different measurement. Each measurement in positionally related to a sample.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
108 :param lsSampleNames: List of sample names positionally related (the same) to each list (Optional).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
109 :type: List of strings List of strings.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
110 :param iTopAmount: The amount of top measured samples (assumes the higher measurements are better).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
111 :type: integer Integer amount of sample names/ indices to return.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
112 :return List: List of samples to be selected.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
113 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
114 topRankListRet = []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
115 for rowMetrics in lldMatrix:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
116 #Create 2 d array to hold value and index and sort
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
117 liIndexX = [rowMetrics,range(len(rowMetrics))]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
118 liIndexX[1].sort(key = liIndexX[0].__getitem__,reverse = True)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
119
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
120 if lsSampleNames:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
121 topRankListRet.append([lsSampleNames[iIndex] for iIndex in liIndexX[1][:iTopAmount]])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
122 else:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
123 topRankListRet.append(liIndexX[1][:iTopAmount])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
124
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
125 return topRankListRet
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
126
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
127 ####Group 2## Representative Dissimilarity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
128 #Testing: Happy path tested 1
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
129 def funcGetCentralSamplesByKMedoids(self, npaMatrix=None, sMetric=None, lsSampleNames=None, iNumberSamplesReturned=0, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
130 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
131 Gets centroid samples by k-medoids clustering of a given matrix.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
132
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
133 :param npaMatrix: Numpy array where row=features and columns=samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
134 :type: Numpy array Abundance Data.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
135 :param sMetric: String name of beta metric used as the distance metric.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
136 :type: String String name of beta metric.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
137 :param lsSampleNames: The names of the sample
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
138 :type: List List of strings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
139 :param iNumberSamplesReturned: Number of samples to return, each will be a centroid of a sample.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
140 :type: Integer Number of samples to return
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
141 :return List: List of selected samples.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
142 :param istmBetaMatrix: File with beta-diversity matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
143 :type: File stream or file path string
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
144 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
145
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
146 #Count of how many rows
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
147 sampleCount = npaMatrix.shape[0]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
148 if iNumberSamplesReturned > sampleCount:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
149 logging.error("MicroPITA.funcGetCentralSamplesByKMedoids:: There are not enough samples to return the amount of samples specified. Return sample count = "+str(iNumberSamplesReturned)+". Sample number = "+str(sampleCount)+".")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
150 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
151
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
152 #If the cluster count is equal to the sample count return all samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
153 if sampleCount == iNumberSamplesReturned:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
154 return list(lsSampleNames)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
155
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
156 #Get distance matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
157 distanceMatrix=scipy.spatial.distance.squareform(Metric.funcReadMatrixFile(istmMatrixFile=istmBetaMatrix,lsSampleOrder=lsSampleNames)[0]) if istmBetaMatrix else Metric.funcGetBetaMetric(npadAbundancies=npaMatrix, sMetric=sMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr, lsSampleOrder=lsSampleNames)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
158 if type(distanceMatrix) is BooleanType:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
159 logging.error("MicroPITA.funcGetCentralSamplesByKMedoids:: Could not read in the supplied distance matrix, returning false.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
160 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
161
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
162 # Handle unifrac output
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
163 if sMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
164 distanceMatrix = distanceMatrix[0]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
165
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
166 #Log distance matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
167 logging.debug("MicroPITA.funcGetCentralSamplesByKMedoids:: Distance matrix for representative selection using metric="+str(sMetric))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
168
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
169 distance = MLPYDistanceAdaptor(npaDistanceMatrix=distanceMatrix, fIsCondensedMatrix=True)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
170
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
171 #Create object to determine clusters/medoids
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
172 medoidsMaker = Kmedoids(k=iNumberSamplesReturned, dist=distance)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
173 #medoidsData includes(1d numpy array, medoids indexes;
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
174 # 1d numpy array, non-medoids indexes;
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
175 # 1d numpy array, cluster membership for non-medoids;
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
176 # double, cost of configuration)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
177 #npaMatrix is samples x rows
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
178 #Build a matrix of lists of indicies to pass to the distance matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
179 lliIndicesMatrix = [[iIndexPosition] for iIndexPosition in xrange(0,len(npaMatrix))]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
180 medoidsData = medoidsMaker.compute(np.array(lliIndicesMatrix))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
181 logging.debug("MicroPITA.funcGetCentralSamplesByKMedoids:: Results from the kmedoid method in representative selection:")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
182 logging.debug(str(medoidsData))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
183
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
184 #If returning the same amount of clusters and samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
185 #Return centroids
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
186 selectedIndexes = medoidsData[0]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
187 return [lsSampleNames[selectedIndexes[index]] for index in xrange(0,iNumberSamplesReturned)]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
188
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
189 ####Group 3## Highest Dissimilarity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
190 #Testing: Happy path tested
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
191 def funcSelectExtremeSamplesFromHClust(self, strBetaMetric, npaAbundanceMatrix, lsSampleNames, iSelectSampleCount, istmBetaMatrix=None, istrmTree=None, istrmEnvr=None):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
192 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
193 Select extreme samples from HClustering.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
194
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
195 :param strBetaMetric: The beta metric to use for distance matrix generation.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
196 :type: String The name of the beta metric to use.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
197 :param npaAbundanceMatrix: Numpy array where row=samples and columns=features.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
198 :type: Numpy Array Abundance data.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
199 :param lsSampleNames: The names of the sample.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
200 :type: List List of strings.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
201 :param iSelectSampleCount: Number of samples to select (return).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
202 :type: Integer Integer number of samples returned.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
203 :return Samples: List of samples.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
204 :param istmBetaMatrix: File with beta-diversity matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
205 :type: File stream or file path string
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
206 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
207
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
208 #If they want all the sample count, return all sample names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
209 iSampleCount=len(npaAbundanceMatrix[:,0])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
210 if iSelectSampleCount==iSampleCount:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
211 return lsSampleNames
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
212
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
213 #Holds the samples to be returned
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
214 lsReturnSamplesRet = []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
215
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
216 #Generate beta matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
217 #Returns condensed matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
218 tempDistanceMatrix = scipy.spatial.distance.squareform(Metric.funcReadMatrixFile(istmMatrixFile=istmBetaMatrix,lsSampleOrder=lsSampleNames)[0]) if istmBetaMatrix else Metric.funcGetBetaMetric(npadAbundancies=npaAbundanceMatrix, sMetric=strBetaMetric, istrmTree=istrmTree, istrmEnvr=istrmEnvr, lsSampleOrder=lsSampleNames, fAdditiveInverse = True)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
219
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
220 if strBetaMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
221 tempDistanceMatrix = tempDistanceMatrix[0]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
222
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
223 if type(tempDistanceMatrix) is BooleanType:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
224 logging.error("MicroPITA.funcSelectExtremeSamplesFromHClust:: Could not read in the supplied distance matrix, returning false.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
225 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
226
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
227 if istmBetaMatrix:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
228 tempDistanceMatrix = 1-tempDistanceMatrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
229
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
230 #Feed beta matrix to linkage to cluster
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
231 #Send condensed matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
232 linkageMatrix = hcluster.linkage(tempDistanceMatrix, method=self.c_strHierarchicalClusterMethod)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
233
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
234 #Extract cluster information from dendrogram
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
235 #The linakge matrix is of the form
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
236 #[[int1 int2 doube int3],...]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
237 #int1 and int1 are the paired samples indexed at 0 and up.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
238 #each list is an entry for a branch that is number starting with the first
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
239 #list being sample count index + 1
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
240 #each list is then named by an increment as they appear
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
241 #this means that if a number is in the list and is = sample count or greater it is not
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
242 #terminal and is instead a branch.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
243 #This method just takes the lowest metric measurement (highest distance pairs/clusters)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
244 #Works much better than the original technique
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
245 #get total number of samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
246
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
247 iCurrentSelectCount = 0
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
248 for row in linkageMatrix:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
249 #Get nodes ofthe lowest pairing (so the furthest apart pair)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
250 iNode1 = int(row[0])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
251 iNode2 = int(row[1])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
252 #Make sure the nodes are a terminal node (sample) and not a branch in the dendrogram
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
253 #The branching in the dendrogram will start at the number of samples and increment higher.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
254 #Add each of the pair one at a time breaking when enough samples are selected.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
255 if iNode1<iSampleCount:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
256 lsReturnSamplesRet.append(lsSampleNames[iNode1])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
257 iCurrentSelectCount = iCurrentSelectCount + 1
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
258 if iCurrentSelectCount == iSelectSampleCount:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
259 break
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
260 if iNode2<iSampleCount:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
261 lsReturnSamplesRet.append(lsSampleNames[iNode2])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
262 iCurrentSelectCount = iCurrentSelectCount + 1
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
263 if iCurrentSelectCount == iSelectSampleCount:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
264 break
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
265
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
266 #Return selected samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
267 return lsReturnSamplesRet
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
268
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
269 ####Group 4## Rank Average of user Defined Taxa
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
270 #Testing: Happy Path Tested
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
271 def funcGetAverageAbundanceSamples(self, abndTable, lsTargetedFeature, fRank=False):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
272 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
273 Averages feature abundance or ranked abundance. Expects a column 0 of taxa id that is skipped.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
274
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
275 :param abndTable: Abundance Table to analyse
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
276 :type: AbundanceTable Abundance Table
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
277 :param lsTargetedFeature: String names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
278 :type: list list of string names of features (bugs) which are measured after ranking against the full sample
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
279 :param fRank: Indicates to rank the abundance before getting the average abundance of the features (default false)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
280 :type: boolean Flag indicating ranking abundance before calculating average feature measurement (false= no ranking)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
281 :return List of lists or boolean: List of lists or False on error. One internal list per sample indicating the sample,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
282 feature average abundance or ranked abundance. Lists will already be sorted.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
283 For not Ranked [[sample,average abundance of selected feature,1]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
284 For Ranked [[sample,average ranked abundance, average abundance of selected feature]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
285 Error Returns false
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
286 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
287
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
288 llAbundance = abndTable.funcGetAverageAbundancePerSample(lsTargetedFeature)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
289 if not llAbundance:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
290 logging.error("MicroPITA.funcGetAverageAbundanceSamples:: Could not get average abundance, returned false. Make sure the features (bugs) are spelled correctly and in the abundance table.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
291 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
292 #Add a space for ranking if needed
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
293 #Not ranked will be [[sSample,average abundance,1]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
294 #(where 1 will not discriminant ties if used in later functions, so this generalizes)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
295 #Ranked will be [[sSample, average rank, average abundance]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
296 llRetAbundance = [[llist[0],-1,llist[1]] for llist in llAbundance]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
297 #Rank if needed
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
298 if fRank:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
299 abndRanked = abndTable.funcRankAbundance()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
300 if abndRanked == None:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
301 logging.error("MicroPITA.funcGetAverageAbundanceSamples:: Could not rank the abundance table, returned false.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
302 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
303 llRetRank = abndRanked.funcGetAverageAbundancePerSample(lsTargetedFeature)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
304 if not llRetRank:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
305 logging.error("MicroPITA.funcGetAverageAbundanceSamples:: Could not get average ranked abundance, returned false. Make sure the features (bugs) are spelled correctly and in the abundance table.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
306 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
307 dictRanks = dict(llRetRank)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
308 llRetAbundance = [[a[0],dictRanks[a[0]],a[2]] for a in llRetAbundance]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
309
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
310 #Sort first for ties and then for the main feature
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
311 if not fRank or ConstantsMicropita.c_fBreakRankTiesByDiversity:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
312 llRetAbundance = sorted(llRetAbundance, key = lambda sampleData: sampleData[2], reverse = not fRank)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
313 if fRank:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
314 llRetAbundance = sorted(llRetAbundance, key = lambda sampleData: sampleData[1], reverse = not fRank)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
315 return llRetAbundance
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
316
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
317 #Testing: Happy Path Tested
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
318 def funcSelectTargetedTaxaSamples(self, abndMatrix, lsTargetedTaxa, iSampleSelectionCount, sMethod = ConstantsMicropita.lsTargetedFeatureMethodValues[0]):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
319 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
320 Selects samples with the highest ranks or abundance of targeted features.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
321 If ranked, select the highest abundance for tie breaking
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
322
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
323 :param abndMatrix: Abundance table to analyse
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
324 :type: AbundanceTable Abundance table
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
325 :param lsTargetedTaxa: List of features
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
326 :type: list list of strings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
327 :param iSampleSelectionCount: Number of samples to select
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
328 :type: integer integer
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
329 :param sMethod: Method to select targeted features
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
330 :type: string String (Can be values found in ConstantsMicropita.lsTargetedFeatureMethodValues)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
331 :return List of strings: List of sample names which were selected
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
332 List of strings Empty list is returned on an error.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
333 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
334
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
335 #Check data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
336 if(len(lsTargetedTaxa) < 1):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
337 logging.error("MicroPITA.funcSelectTargetedTaxaSamples. Taxa defined selection was requested but no features were given.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
338 return []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
339
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
340 lsTargetedSamples = self.funcGetAverageAbundanceSamples(abndTable=abndMatrix, lsTargetedFeature=lsTargetedTaxa,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
341 fRank=sMethod.lower() == self.c_strTargetedRanked.lower())
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
342 #If an error occured or the key word for the method was not recognized
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
343 if lsTargetedSamples == False:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
344 logging.error("MicroPITA.funcSelectTargetedTaxaSamples:: Was not able to select for the features given. So targeted feature selection was performed. Check to make sure the features are spelled correctly and exist in the abundance file.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
345 return []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
346
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
347 #Select from results
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
348 return [sSample[0] for sSample in lsTargetedSamples[:iSampleSelectionCount]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
349
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
350 ####Group 5## Random
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
351 #Testing: Happy path Tested
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
352 def funcGetRandomSamples(self, lsSamples=None, iNumberOfSamplesToReturn=0):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
353 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
354 Returns random sample names of the number given. No replacement.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
355
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
356 :param lsSamples: List of sample names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
357 :type: list list of strings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
358 :param iNumberOfSamplesToReturn: Number of samples to select
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
359 :type: integer integer.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
360 :return List: List of selected samples (strings).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
361 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
362
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
363 #Input matrix sample count
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
364 sampleCount = len(lsSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
365
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
366 #Return the full matrix if they ask for a return matrix where length == original
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
367 if(iNumberOfSamplesToReturn >= sampleCount):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
368 return lsSamples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
369
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
370 #Get the random indices for the sample (without replacement)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
371 liRandomIndices = random.sample(range(sampleCount), iNumberOfSamplesToReturn)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
372
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
373 #Create a boolean array of if indexes are to be included in the reduced array
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
374 return [sSample for iIndex, sSample in enumerate(lsSamples) if iIndex in liRandomIndices]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
375
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
376 #Happy path tested (case 3)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
377 def funcGetAveragePopulation(self, abndTable, lfCompress):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
378 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
379 Get the average row per column in the abndtable.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
380
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
381 :param abndTable: AbundanceTable of data to be averaged
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
382 :type: AbudanceTable
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
383 :param lfCompress: List of boolean flags (false means to remove sample before averaging
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
384 :type: List of floats
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
385 :return List of doubles:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
386 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
387 if sum(lfCompress) == 0:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
388 return []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
389
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
390 #Get the average populations
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
391 lAverageRet = []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
392
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
393 for sFeature in abndTable.funcGetAbundanceCopy():
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
394 sFeature = list(sFeature)[1:]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
395 sFeature=np.compress(lfCompress,sFeature,axis=0)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
396 lAverageRet.append(sum(sFeature)/float(len(sFeature)))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
397 return lAverageRet
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
398
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
399 #Happy path tested (2 cases)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
400 def funcGetDistanceFromAverage(self, abndTable,ldAverage,lsSamples,lfSelected):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
401 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
402 Given an abundance table and an average sample, this returns the distance of each sample
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
403 (measured using brays-curtis dissimilarity) from the average.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
404 The distances are reduced by needing to be in the lsSamples and being a true in the lfSelected
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
405 (which is associated with the samples in the order of the samples in the abundance table;
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
406 use abundancetable.funcGetSampleNames() to see the order if needed).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
407
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
408 :param abndTable: Abundance table holding the data to be analyzed.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
409 :type: AbundanceTable
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
410 :param ldAverage: Average population (Average features of the abundance table of samples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
411 :type: List of doubles which represent the average population
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
412 :param lsSamples: These are the only samples used in the analysis
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
413 :type: List of strings (sample ids)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
414 :param lfSelected: Samples to be included in the analysis
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
415 :type: List of boolean (true means include)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
416 :return: List of distances (doubles)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
417 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
418 #Get the distance from label 1 of all samples in label0 splitting into selected and not selected lists
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
419 ldSelectedDistances = []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
420
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
421 for sSampleName in [sSample for iindex, sSample in enumerate(lsSamples) if lfSelected[iindex]]:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
422 #Get the sample measurements
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
423 ldSelectedDistances.append(Metric.funcGetBrayCurtisDissimilarity(np.array([abndTable.funcGetSample(sSampleName),ldAverage]))[0])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
424 return ldSelectedDistances
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
425
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
426 #Happy path tested (1 case)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
427 def funcMeasureDistanceFromLabelToAverageOtherLabel(self, abndTable, lfGroupOfInterest, lfGroupOther):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
428 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
429 Get the distance of samples from one label from the average sample of not the label.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
430 Note: This assumes 2 classes.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
431
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
432 :param abndTable: Table of data to work out of.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
433 :type: Abundace Table
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
434 :param lfGroupOfInterest: Boolean indicator of the sample being in the first group.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
435 :type: List of floats, true indicating an individual in the group of interest.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
436 :param lfGroupOther: Boolean indicator of the sample being in the other group.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
437 :type: List of floats, true indicating an individual in the
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
438 :return List of List of doubles: [list of tuples (string sample name,double distance) for the selected population, list of tuples for the not selected population]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
439 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
440 #Get all sample names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
441 lsAllSamples = abndTable.funcGetSampleNames()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
442
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
443 #Get average populations
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
444 lAverageOther = self.funcGetAveragePopulation(abndTable=abndTable, lfCompress=lfGroupOther)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
445
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
446 #Get the distance from the average of the other label (label 1)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
447 ldSelectedDistances = self.funcGetDistanceFromAverage(abndTable=abndTable, ldAverage=lAverageOther,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
448 lsSamples=lsAllSamples, lfSelected=lfGroupOfInterest)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
449
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
450 return zip([lsAllSamples[iindex] for iindex, fGroup in enumerate(lfGroupOfInterest) if fGroup],ldSelectedDistances)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
451
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
452 #Happy path tested (1 test case)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
453 def funcPerformDistanceSelection(self, abndTable, iSelectionCount, sLabel, sValueOfInterest):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
454 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
455 Given metadata, metadata of one value (sValueOfInterest) is measured from the average (centroid) value of another label group.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
456 An iSelectionCount of samples is selected from the group of interest closest to and furthest from the centroid of the other group.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
457
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
458 :params abndTable: Abundance of measurements
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
459 :type: AbundanceTable
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
460 :params iSelectionCount: The number of samples selected per sample.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
461 :type: Integer Integer greater than 0
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
462 :params sLabel: ID of the metadata which is the supervised label
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
463 :type: String
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
464 :params sValueOfInterest: Metadata value in the sLabel metadta row of the abundance table which defines the group of interest.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
465 :type: String found in the abundance table metadata row indicated by sLabel.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
466 :return list list of tuples (samplename, distance) [[iSelectionCount of tuples closest to the other centroid], [iSelectionCount of tuples farthest from the other centroid], [all tuples of samples not selected]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
467 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
468
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
469 lsMetadata = abndTable.funcGetMetadata(sLabel)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
470 #Other metadata values
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
471 lsUniqueOtherValues = list(set(lsMetadata)-set(sValueOfInterest))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
472
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
473 #Get boolean indicator of values of interest
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
474 lfLabelsInterested = [sValueOfInterest == sValue for sValue in lsMetadata]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
475
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
476 #Get the distances of the items of interest from the other metadata values
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
477 dictDistanceAverages = {}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
478 for sOtherLabel in lsUniqueOtherValues:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
479 #Get boolean indicator of labels not of interest
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
480 lfLabelsOther = [sOtherLabel == sValue for sValue in lsMetadata]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
481
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
482 #Get the distances of data from two different groups to the average of the other
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
483 ldValueDistances = dict(self.funcMeasureDistanceFromLabelToAverageOtherLabel(abndTable, lfLabelsInterested, lfLabelsOther))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
484
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
485 for sKey in ldValueDistances:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
486 dictDistanceAverages[sKey] = ldValueDistances[sKey] + dictDistanceAverages[sKey] if sKey in dictDistanceAverages else ldValueDistances[sKey]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
487
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
488 #Finish average by dividing by length of lsUniqueOtherValues
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
489 ltpleAverageDistances = [(sKey, dictDistanceAverages[sKey]/float(len(lsUniqueOtherValues))) for sKey in dictDistanceAverages]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
490
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
491 #Sort to extract extremes
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
492 ltpleAverageDistances = sorted(ltpleAverageDistances,key=operator.itemgetter(1))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
493
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
494 #Get the closest and farthest distances
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
495 ltupleDiscriminantSamples = ltpleAverageDistances[:iSelectionCount]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
496 ltupleDistinctSamples = ltpleAverageDistances[iSelectionCount*-1:]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
497
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
498 #Remove the selected samples from the larger population of distances (better visualization)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
499 ldSelected = [tpleSelected[0] for tpleSelected in ltupleDiscriminantSamples+ltupleDistinctSamples]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
500
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
501 #Return discriminant tuples, distinct tuples, other tuples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
502 return [ltupleDiscriminantSamples, ltupleDistinctSamples,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
503 [tplData for tplData in ltpleAverageDistances if tplData[0] not in ldSelected]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
504
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
505 #Run the supervised method surrounding distance from centroids
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
506 #Happy path tested (3 test cases)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
507 def funcRunSupervisedDistancesFromCentroids(self, abundanceTable, fRunDistinct, fRunDiscriminant,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
508 xOutputSupFile, xPredictSupFile, strSupervisedMetadata,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
509 iSampleSupSelectionCount, lsOriginalSampleNames, lsOriginalLabels, fAppendFiles = False):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
510 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
511 Runs supervised methods based on measuring distances of one label from the centroid of another. NAs are evaluated as theirown group.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
512
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
513 :param abundanceTable: AbundanceTable
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
514 :type: AbudanceTable Data to analyze
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
515 :param fRunDistinct: Run distinct selection method
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
516 :type: Boolean boolean (true runs method)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
517 :param fRunDiscriminant: Run discriminant method
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
518 :type: Boolean boolean (true runs method)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
519 :param xOutputSupFile: File output from supervised methods detailing data going into the method.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
520 :type: String or FileStream
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
521 :param xPredictSupFile: File output from supervised methods distance results from supervised methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
522 :type: String or FileStream
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
523 :param strSupervisedMetadata: The metadata that will be used to group samples.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
524 :type: String
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
525 :param iSampleSupSelectionCount: Number of samples to select
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
526 :type: Integer int sample selection count
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
527 :param lsOriginalSampleNames: List of the sample names, order is important and should be preserved from the abundanceTable.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
528 :type: List of samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
529 :param fAppendFiles: Indicates that output files already exist and appending is occuring.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
530 :type: Boolean
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
531 :return Selected Samples: A dictionary of selected samples by selection ID
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
532 Dictionary {"Selection Method":["SampleID","SampleID"...]}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
533 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
534 #Get labels and run one label against many
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
535 lstrMetadata = abundanceTable.funcGetMetadata(strSupervisedMetadata)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
536 dictlltpleDistanceMeasurements = {}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
537 for sMetadataValue in set(lstrMetadata):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
538
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
539 #For now perform the selection here for the label of interest against the other labels
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
540 dictlltpleDistanceMeasurements.setdefault(sMetadataValue,[]).extend(self.funcPerformDistanceSelection(abndTable=abundanceTable,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
541 iSelectionCount=iSampleSupSelectionCount, sLabel=strSupervisedMetadata, sValueOfInterest=sMetadataValue))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
542
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
543 #Make expected output files for supervised methods
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
544 #1. Output file which is similar to an input file for SVMs
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
545 #2. Output file that is similar to the probabilitic output of a SVM (LibSVM)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
546 #Manly for making output of supervised methods (Distance from Centroid) similar
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
547 #MicropitaVis needs some of these files
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
548 if xOutputSupFile:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
549 if fAppendFiles:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
550 SVM.funcUpdateSVMFileWithAbundanceTable(abndAbundanceTable=abundanceTable, xOutputSVMFile=xOutputSupFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
551 lsOriginalLabels=lsOriginalLabels, lsSampleOrdering=lsOriginalSampleNames)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
552 else:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
553 SVM.funcConvertAbundanceTableToSVMFile(abndAbundanceTable=abundanceTable, xOutputSVMFile=xOutputSupFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
554 sMetadataLabel=strSupervisedMetadata, lsOriginalLabels=lsOriginalLabels, lsSampleOrdering=lsOriginalSampleNames)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
555
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
556 #Will contain the samples selected to return
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
557 #One or more of the methods may be active so this is why I am extending instead of just returning the result of each method type
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
558 dictSelectedSamplesRet = dict()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
559 for sKey, ltplDistances in dictlltpleDistanceMeasurements.items():
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
560 if fRunDistinct:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
561 dictSelectedSamplesRet.setdefault(ConstantsMicropita.c_strDistinct,[]).extend([ltple[0] for ltple in ltplDistances[1]])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
562 if fRunDiscriminant:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
563 dictSelectedSamplesRet.setdefault(ConstantsMicropita.c_strDiscriminant,[]).extend([ltple[0] for ltple in ltplDistances[0]])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
564
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
565 if xPredictSupFile:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
566 dictFlattenedDistances = dict()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
567 [dictFlattenedDistances.setdefault(sKey, []).append(tple)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
568 for sKey, lltple in dictlltpleDistanceMeasurements.items()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
569 for ltple in lltple for tple in ltple]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
570 if fAppendFiles:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
571 self._updatePredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xOutputSupFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
572 dictltpleDistanceMeasurements=dictFlattenedDistances, abundanceTable=abundanceTable, lsOriginalSampleNames=lsOriginalSampleNames)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
573 else:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
574 self._writeToPredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xOutputSupFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
575 dictltpleDistanceMeasurements=dictFlattenedDistances, abundanceTable=abundanceTable, lsOriginalSampleNames=lsOriginalSampleNames)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
576 return dictSelectedSamplesRet
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
577
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
578 #Two happy path test cases
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
579 def _updatePredictFile(self, xPredictSupFile, xInputLabelsFile, dictltpleDistanceMeasurements, abundanceTable, lsOriginalSampleNames):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
580 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
581 Manages updating the predict file.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
582
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
583 :param xPredictSupFile: File that has predictions (distances) from the supervised method.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
584 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
585 :param xInputLabelsFile: File that as input to the supervised methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
586 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
587 :param dictltpleDistanceMeasurements:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
588 :type: Dictionary of lists of tuples {"labelgroup":[("SampleName",dDistance)...], "labelgroup":[("SampleName",dDistance)...]}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
589 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
590
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
591 if not isinstance(xPredictSupFile, str):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
592 xPredictSupFile.close()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
593 xPredictSupFile = xPredictSupFile.name
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
594 csvr = open(xPredictSupFile,'r')
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
595
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
596 f = csv.reader(csvr,delimiter=ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
597 lsHeader = f.next()[1:]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
598 dictlltpleRead = dict([(sHeader,[]) for sHeader in lsHeader])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
599
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
600 #Read data in
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
601 iSampleIndex = 0
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
602 for sRow in f:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
603 sLabel = sRow[0]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
604 [dictlltpleRead[lsHeader[iDistanceIndex]].append((lsOriginalSampleNames[iSampleIndex],dDistance)) for iDistanceIndex, dDistance in enumerate(sRow[1:])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
605 if not dDistance == ConstantsMicropita.c_sEmptyPredictFileValue]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
606 iSampleIndex += 1
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
607
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
608 #Combine dictltpleDistanceMeasurements with new data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
609 #If they share a key then merge keeping parameter data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
610 #If they do not share the key, keep the full data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
611 dictNew = {}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
612 for sKey in dictltpleDistanceMeasurements.keys():
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
613 lsSamples = [tple[0] for tple in dictltpleDistanceMeasurements[sKey]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
614 dictNew[sKey] = dictltpleDistanceMeasurements[sKey]+[tple for tple in dictlltpleRead[sKey] if tple[0] not in lsSamples] if sKey in dictlltpleRead.keys() else dictltpleDistanceMeasurements[sKey]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
615 for sKey in dictlltpleRead:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
616 if sKey not in dictltpleDistanceMeasurements.keys():
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
617 dictNew[sKey] = dictlltpleRead[sKey]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
618
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
619 #Call writer
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
620 self._writeToPredictFile(xPredictSupFile=xPredictSupFile, xInputLabelsFile=xInputLabelsFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
621 dictltpleDistanceMeasurements=dictNew, abundanceTable=abundanceTable,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
622 lsOriginalSampleNames=lsOriginalSampleNames, fFromUpdate=True)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
623
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
624 #2 happy path test cases
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
625 def _writeToPredictFile(self, xPredictSupFile, xInputLabelsFile, dictltpleDistanceMeasurements, abundanceTable, lsOriginalSampleNames, fFromUpdate=False):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
626 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
627 Write to the predict file.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
628
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
629 :param xPredictSupFile: File that has predictions (distances) from the supervised method.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
630 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
631 :param xInputLabelsFile: File that as input to the supervised methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
632 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
633 :param dictltpleDistanceMeasurements:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
634 :type: Dictionary of lists of tuples {"labelgroup":[("SampleName",dDistance)...], "labelgroup":[("SampleName",dDistance)...]}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
635 :param abundanceTable: An abundance table of the sample data.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
636 :type: AbundanceTable
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
637 :param lsOriginalSampleNames: Used if the file is being updated as the sample names so that it may be passed in and consistent with other writing.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
638 Otherwise will use the sample names from the abundance table.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
639 :type: List of strings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
640 :param fFromUpdate: Indicates if this is part of an update to the file or not.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
641 :type: Boolean
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
642 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
643
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
644 xInputLabelsFileName = xInputLabelsFile
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
645 if not isinstance(xInputLabelsFile,str):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
646 xInputLabelsFileName = xInputLabelsFile.name
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
647 f = csv.writer(open(xPredictSupFile,"w") if isinstance(xPredictSupFile, str) else xPredictSupFile,delimiter=ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
648
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
649 lsAllSampleNames = abundanceTable.funcGetSampleNames()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
650 lsLabels = SVM.funcReadLabelsFromFile(xSVMFile=xInputLabelsFileName, lsAllSampleNames= lsOriginalSampleNames if fFromUpdate else lsAllSampleNames,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
651 isPredictFile=False)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
652 dictLabels = dict([(sSample,sLabel) for sLabel in lsLabels.keys() for sSample in lsLabels[sLabel]])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
653
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
654 #Dictionay keys will be used to order the predict file
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
655 lsMeasurementKeys = dictltpleDistanceMeasurements.keys()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
656 #Make header
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
657 f.writerow(["labels"]+lsMeasurementKeys)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
658
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
659 #Reformat dictionary to make it easier to use
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
660 for sKey in dictltpleDistanceMeasurements:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
661 dictltpleDistanceMeasurements[sKey] = dict([ltpl for ltpl in dictltpleDistanceMeasurements[sKey]])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
662
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
663 for sSample in lsOriginalSampleNames:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
664 #Make body of file
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
665 f.writerow([dictLabels.get(sSample,ConstantsMicropita.c_sEmptyPredictFileValue)]+
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
666 [str(dictltpleDistanceMeasurements[sKey].get(sSample,ConstantsMicropita.c_sEmptyPredictFileValue))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
667 for sKey in lsMeasurementKeys])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
668
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
669 def _funcRunNormalizeSensitiveMethods(self, abndData, iSampleSelectionCount, dictSelectedSamples, lsAlphaMetrics, lsBetaMetrics, lsInverseBetaMetrics,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
670 fRunDiversity, fRunRepresentative, fRunExtreme, strAlphaMetadata=None,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
671 istmBetaMatrix=None, istrmTree=None, istrmEnvr=None, fInvertDiversity=False):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
672 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
673 Manages running methods that are sensitive to normalization. This is called twice, once for the set of methods which should not be normalized and the other
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
674 for the set that should be normalized.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
675
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
676 :param abndData: Abundance table object holding the samples to be measured.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
677 :type: AbundanceTable
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
678 :param iSampleSelectionCount The number of samples to select per method.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
679 :type: Integer
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
680 :param dictSelectedSamples Will be added to as samples are selected {"Method:["strSelectedSampleID","strSelectedSampleID"...]}.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
681 :type: Dictionary
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
682 :param lsAlphaMetrics: List of alpha metrics to use on alpha metric dependent assays (like highest diversity).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
683 :type: List of strings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
684 :param lsBetaMetrics: List of beta metrics to use on beta metric dependent assays (like most representative).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
685 :type: List of strings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
686 :param lsInverseBetaMetrics: List of inverse beta metrics to use on inverse beta metric dependent assays (like most dissimilar).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
687 :type: List of strings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
688 :param fRunDiversity: Run Diversity based methods (true indicates run).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
689 :type: Boolean
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
690 :param fRunRepresentative: Run Representative based methods (true indicates run).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
691 :type: Boolean
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
692 :param fRunExtreme: Run Extreme based methods (true indicates run).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
693 :type: Boolean
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
694 :param istmBetaMatrix: File that has a precalculated beta matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
695 :type: File stream or File path string
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
696 :return Selected Samples: Samples selected by methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
697 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
698 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
699
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
700 #Sample ids/names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
701 lsSampleNames = abndData.funcGetSampleNames()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
702
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
703 #Generate alpha metrics and get most diverse
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
704 if fRunDiversity:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
705
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
706 #Get Alpha metrics matrix
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
707 internalAlphaMatrix = None
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
708 #Name of technique
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
709 strMethod = [strAlphaMetadata] if strAlphaMetadata else lsAlphaMetrics
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
710
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
711 #If given an alpha-diversity metadata
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
712 if strAlphaMetadata:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
713 internalAlphaMatrix = [[float(strNum) for strNum in abndData.funcGetMetadata(strAlphaMetadata)]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
714 else:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
715 #Expects Observations (Taxa (row) x sample (column))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
716 #Returns [[metric1-sample1, metric1-sample2, metric1-sample3],[metric1-sample1, metric1-sample2, metric1-sample3]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
717 internalAlphaMatrix = Metric.funcBuildAlphaMetricsMatrix(npaSampleAbundance = abndData.funcGetAbundanceCopy()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
718 if not abndData.funcIsSummed()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
719 else abndData.funcGetFeatureAbundanceTable(abndData.funcGetTerminalNodes()).funcGetAbundanceCopy(),
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
720 lsSampleNames = lsSampleNames, lsDiversityMetricAlpha = lsAlphaMetrics)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
721
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
722 if internalAlphaMatrix:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
723 #Invert measurments
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
724 if fInvertDiversity:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
725 lldNewDiversity = []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
726 for lsLine in internalAlphaMatrix:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
727 lldNewDiversity.append([1/max(dValue,ConstantsMicropita.c_smallNumber) for dValue in lsLine])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
728 internalAlphaMatrix = lldNewDiversity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
729 #Get top ranked alpha diversity by most diverse
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
730 #Expects [[sample1,sample2,sample3...],[sample1,sample2,sample3..],...]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
731 #Returns [[sampleName1, sampleName2, sampleNameN],[sampleName1, sampleName2, sampleNameN]]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
732 mostDiverseAlphaSamplesIndexes = self.funcGetTopRankedSamples(lldMatrix=internalAlphaMatrix, lsSampleNames=lsSampleNames, iTopAmount=iSampleSelectionCount)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
733
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
734 #Add to results
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
735 for index in xrange(0,len(strMethod)):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
736 strSelectionMethod = self.dictConvertAMetricDiversity.get(strMethod[index],ConstantsMicropita.c_strDiversity+"="+strMethod[index])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
737 dictSelectedSamples.setdefault(strSelectionMethod,[]).extend(mostDiverseAlphaSamplesIndexes[index])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
738
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
739 logging.info("MicroPITA.funcRunNormalizeSensitiveMethods:: Selected Samples 1b")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
740 logging.info(dictSelectedSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
741
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
742 #Generate beta metrics and
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
743 if fRunRepresentative or fRunExtreme:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
744
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
745 #Abundance matrix transposed
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
746 npaTransposedAbundance = UtilityMath.funcTransposeDataMatrix(abndData.funcGetAbundanceCopy(), fRemoveAdornments=True)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
747
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
748 #Get center selection using clusters/tiling
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
749 #This will be for beta metrics in normalized space
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
750 if fRunRepresentative:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
751
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
752 if istmBetaMatrix:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
753 #Get representative dissimilarity samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
754 medoidSamples=self.funcGetCentralSamplesByKMedoids(npaMatrix=npaTransposedAbundance, sMetric=ConstantsMicropita.c_custom, lsSampleNames=lsSampleNames, iNumberSamplesReturned=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
755
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
756 if medoidSamples:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
757 dictSelectedSamples.setdefault(ConstantsMicropita.c_strRepresentative+"="+ConstantsMicropita.c_custom,[]).extend(medoidSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
758 else:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
759 logging.info("MicroPITA.funcRunNormalizeSensitiveMethods:: Performing representative selection on normalized data.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
760 for bMetric in lsBetaMetrics:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
761
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
762 #Get representative dissimilarity samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
763 medoidSamples=self.funcGetCentralSamplesByKMedoids(npaMatrix=npaTransposedAbundance, sMetric=bMetric, lsSampleNames=lsSampleNames, iNumberSamplesReturned=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
764
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
765 if medoidSamples:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
766 dictSelectedSamples.setdefault(self.dictConvertBMetricToMethod.get(bMetric,ConstantsMicropita.c_strRepresentative+"="+bMetric),[]).extend(medoidSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
767
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
768 #Get extreme selection using clusters, tiling
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
769 if fRunExtreme:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
770 logging.info("MicroPITA.funcRunNormalizeSensitiveMethods:: Performing extreme selection on normalized data.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
771 if istmBetaMatrix:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
772
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
773 #Samples for representative dissimilarity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
774 #This involves inverting the distance metric,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
775 #Taking the dendrogram level of where the number cluster == the number of samples to select
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
776 #Returning a repersentative sample from each cluster
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
777 extremeSamples = self.funcSelectExtremeSamplesFromHClust(strBetaMetric=ConstantsMicropita.c_custom, npaAbundanceMatrix=npaTransposedAbundance, lsSampleNames=lsSampleNames, iSelectSampleCount=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
778
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
779 #Add selected samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
780 if extremeSamples:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
781 dictSelectedSamples.setdefault(ConstantsMicropita.c_strExtreme+"="+ConstantsMicropita.c_custom,[]).extend(extremeSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
782
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
783 else:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
784 #Run KMedoids with inverse custom distance metric in normalized space
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
785 for bMetric in lsInverseBetaMetrics:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
786
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
787 #Samples for representative dissimilarity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
788 #This involves inverting the distance metric,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
789 #Taking the dendrogram level of where the number cluster == the number of samples to select
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
790 #Returning a repersentative sample from each cluster
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
791 extremeSamples = self.funcSelectExtremeSamplesFromHClust(strBetaMetric=bMetric, npaAbundanceMatrix=npaTransposedAbundance, lsSampleNames=lsSampleNames, iSelectSampleCount=iSampleSelectionCount, istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
792
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
793 #Add selected samples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
794 if extremeSamples:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
795 dictSelectedSamples.setdefault(self.dictConvertInvBMetricToMethod.get(bMetric,ConstantsMicropita.c_strExtreme+"="+bMetric),[]).extend(extremeSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
796
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
797 logging.info("MicroPITA.funcRunNormalizeSensitiveMethods:: Selected Samples 2,3b")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
798 logging.info(dictSelectedSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
799 return dictSelectedSamples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
800
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
801 def funcRun(self, strIDName, strLastMetadataName, istmInput,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
802 ostmInputPredictFile, ostmPredictFile, ostmCheckedFile, ostmOutput,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
803 cDelimiter, cFeatureNameDelimiter, strFeatureSelection,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
804 istmFeatures, iCount, lstrMethods, strLastRowMetadata = None, strLabel = None, strStratify = None,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
805 strCustomAlpha = None, strCustomBeta = None, strAlphaMetadata = None, istmBetaMatrix = None, istrmTree = None, istrmEnvr = None,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
806 iMinSeqs = ConstantsMicropita.c_liOccurenceFilter[0], iMinSamples = ConstantsMicropita.c_liOccurenceFilter[1], fInvertDiversity = False):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
807 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
808 Manages the selection of samples given different metrics.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
809
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
810 :param strIDName: Sample Id metadata row
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
811 :type: String
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
812 :param strLastMetadataName: The id of the metadata positioned last in the abundance table.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
813 :type: String String metadata id.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
814 :param istmInput: File to store input data to supervised methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
815 :type: FileStream of String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
816 :param ostmInputPredictFile: File to store distances from supervised methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
817 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
818 :param ostmCheckedFile: File to store the AbundanceTable data after it is being checked.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
819 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
820 :param ostmOutPut: File to store sample selection by methods of interest.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
821 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
822 :param cDelimiter: Delimiter of abundance table.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
823 :type: Character Char (default TAB).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
824 :param cFeatureNameDelimiter: Delimiter of the name of features (for instance if they contain consensus lineages indicating clades).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
825 :type: Character (default |).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
826 :param stFeatureSelectionMethod: Which method to use to select features in a targeted manner (Using average ranked abundance or average abundance).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
827 :type: String (specific values indicated in ConstantsMicropita.lsTargetedFeatureMethodValues).
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
828 :param istmFeatures: File which holds the features of interest if using targeted feature methodology.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
829 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
830 :param iCount: Number of samples to select in each methods, supervised methods select this amount per label if possible.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
831 :type: Integer integer.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
832 :param lstrMethods: List of strings indicating selection techniques.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
833 :type: List of string method names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
834 :param strLabel: The metadata used for supervised labels.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
835 :type: String
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
836 :param strStratify: The metadata used to stratify unsupervised data.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
837 :type: String
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
838 :param strCustomAlpha: Custom alpha diversity metric
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
839 :type: String
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
840 :param strCustomBeta: Custom beta diversity metric
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
841 :type: String
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
842 :param strAlphaMetadata: Metadata id which is a diveristy metric to use in highest diversity sampling
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
843 :type: String
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
844 :param istmBetaMatrix: File containing precalculated beta-diversity matrix for representative sampling
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
845 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
846 :param istrmTree: File containing tree for phylogentic beta-diversity analysis
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
847 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
848 :param istrmEnvr: File containing environment for phylogentic beta-diversity analysis
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
849 :type: FileStream or String file path
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
850 :param iMinSeqs: Minimum sequence in the occurence filter which filters all features not with a minimum number of sequences in each of a minimum number of samples.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
851 :type: Integer
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
852 :param iMinSamples: Minimum sample count for the occurence filter.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
853 :type: Integer
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
854 :param fInvertDiversity: When true will invert diversity measurements before using.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
855 :type: boolean
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
856 :return Selected Samples: Samples selected by methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
857 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
858 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
859
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
860 #Holds the top ranked samples from different metrics
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
861 #dict[metric name] = [samplename,samplename...]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
862 selectedSamples = dict()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
863
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
864 #If a target feature file is given make sure that targeted feature is in the selection methods, if not add
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
865 if ConstantsMicropita.c_strFeature in lstrMethods:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
866 if not istmFeatures:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
867 logging.error("MicroPITA.funcRun:: Did not receive both the Targeted feature file and the feature selection method. MicroPITA did not run.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
868 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
869
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
870 #Diversity metrics to run
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
871 #Use custom metrics if specified
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
872 #Custom beta metrics set to normalized only, custom alpha metrics set to count only
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
873 diversityMetricsAlpha = [] if strCustomAlpha or strAlphaMetadata else [MicroPITA.c_strInverseSimpsonDiversity]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
874 diversityMetricsBeta = [] if istmBetaMatrix else [strCustomBeta] if strCustomBeta else [MicroPITA.c_strBrayCurtisDissimilarity]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
875 # inverseDiversityMetricsBeta = [MicroPITA.c_strInvBrayCurtisDissimilarity]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
876 diversityMetricsAlphaNoNormalize = [strAlphaMetadata] if strAlphaMetadata else [strCustomAlpha] if strCustomAlpha else []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
877 diversityMetricsBetaNoNormalize = []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
878 # inverseDiversityMetricsBetaNoNormalize = []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
879
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
880 #Targeted taxa
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
881 userDefinedTaxa = []
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
882
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
883 #Perform different flows flags
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
884 c_RUN_MAX_DIVERSITY_1 = ConstantsMicropita.c_strDiversity in lstrMethods
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
885 c_RUN_REPRESENTIVE_DISSIMILARITY_2 = ConstantsMicropita.c_strRepresentative in lstrMethods
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
886 c_RUN_MAX_DISSIMILARITY_3 = ConstantsMicropita.c_strExtreme in lstrMethods
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
887 c_RUN_RANK_AVERAGE_USER_4 = False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
888 if ConstantsMicropita.c_strFeature in lstrMethods:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
889 c_RUN_RANK_AVERAGE_USER_4 = True
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
890 if not istmFeatures:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
891 logging.error("MicroPITA.funcRun:: No taxa file was given for taxa selection.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
892 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
893 #Read in taxa list, break down to lines and filter out empty strings
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
894 userDefinedTaxa = filter(None,(s.strip( ) for s in istmFeatures.readlines()))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
895 c_RUN_RANDOM_5 = ConstantsMicropita.c_strRandom in lstrMethods
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
896 c_RUN_DISTINCT = ConstantsMicropita.c_strDistinct in lstrMethods
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
897 c_RUN_DISCRIMINANT = ConstantsMicropita.c_strDiscriminant in lstrMethods
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
898
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
899 #Read in abundance data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
900 #Abundance is a structured array. Samples (column) by Taxa (rows) with the taxa id row included as the column index=0
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
901 #Abundance table object to read in and manage data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
902 totalAbundanceTable = AbundanceTable.funcMakeFromFile(xInputFile=istmInput, lOccurenceFilter = [iMinSeqs, iMinSamples],
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
903 cDelimiter=cDelimiter, sMetadataID=strIDName, sLastMetadataRow=strLastRowMetadata,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
904 sLastMetadata=strLastMetadataName, cFeatureNameDelimiter=cFeatureNameDelimiter, xOutputFile=ostmCheckedFile)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
905 if not totalAbundanceTable:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
906 logging.error("MicroPITA.funcRun:: Could not read in the abundance table. Analysis was not performed."+
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
907 " This often occurs when the Last Metadata is not specified correctly."+
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
908 " Please check to make sure the Last Metadata selection is the row of the last metadata,"+
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
909 " all values after this selection should be microbial measurements and should be numeric.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
910 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
911
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
912 lsOriginalLabels = SVM.funcMakeLabels(totalAbundanceTable.funcGetMetadata(strLabel)) if strLabel else strLabel
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
913
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
914 dictTotalMetadata = totalAbundanceTable.funcGetMetadataCopy()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
915 logging.debug("MicroPITA.funcRun:: Received metadata=" + str(dictTotalMetadata))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
916 #If there is only 1 unique value for the labels, do not run the Supervised methods
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
917 if strLabel and ( len(set(dictTotalMetadata.get(strLabel,[]))) < 2 ):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
918 logging.error("The label " + strLabel + " did not have 2 or more values. Labels found=" + str(dictTotalMetadata.get(strLabel,[])))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
919 return False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
920
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
921 #Run unsupervised methods###
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
922 #Stratify the data if need be and drop the old data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
923 lStratifiedAbundanceTables = totalAbundanceTable.funcStratifyByMetadata(strStratify) if strStratify else [totalAbundanceTable]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
924
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
925 #For each stratified abundance block or for the unstratfified abundance
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
926 #Run the unsupervised blocks
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
927 fAppendSupFiles = False
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
928 for stratAbundanceTable in lStratifiedAbundanceTables:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
929 logging.info("MicroPITA.funcRun:: Running abundance block:"+stratAbundanceTable.funcGetName())
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
930
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
931 ###NOT SUMMED, NOT NORMALIZED
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
932 #Only perform if the data is not yet normalized
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
933 if not stratAbundanceTable.funcIsNormalized( ):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
934 #Need to first work with unnormalized data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
935 if c_RUN_MAX_DIVERSITY_1 or c_RUN_REPRESENTIVE_DISSIMILARITY_2 or c_RUN_MAX_DISSIMILARITY_3:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
936
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
937 self._funcRunNormalizeSensitiveMethods(abndData=stratAbundanceTable, iSampleSelectionCount=iCount,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
938 dictSelectedSamples=selectedSamples, lsAlphaMetrics=diversityMetricsAlphaNoNormalize,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
939 lsBetaMetrics=diversityMetricsBetaNoNormalize,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
940 lsInverseBetaMetrics=diversityMetricsBetaNoNormalize,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
941 fRunDiversity=c_RUN_MAX_DIVERSITY_1,fRunRepresentative=c_RUN_REPRESENTIVE_DISSIMILARITY_2,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
942 fRunExtreme=c_RUN_MAX_DISSIMILARITY_3, strAlphaMetadata=strAlphaMetadata,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
943 istrmTree=istrmTree, istrmEnvr=istrmEnvr, fInvertDiversity=fInvertDiversity)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
944
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
945
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
946 #Generate selection by the rank average of user defined taxa
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
947 #Expects (Taxa (row) by Samples (column))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
948 #Expects a column 0 of taxa id that is skipped
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
949 #Returns [(sample name,average,rank)]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
950 #SUMMED AND NORMALIZED
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
951 stratAbundanceTable.funcSumClades()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
952 #Normalize data at this point
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
953 stratAbundanceTable.funcNormalize()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
954 if c_RUN_RANK_AVERAGE_USER_4:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
955 selectedSamples[ConstantsMicropita.c_strFeature] = self.funcSelectTargetedTaxaSamples(abndMatrix=stratAbundanceTable,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
956 lsTargetedTaxa=userDefinedTaxa, iSampleSelectionCount=iCount, sMethod=strFeatureSelection)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
957 logging.info("MicroPITA.funcRun:: Selected Samples Rank")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
958 logging.info(selectedSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
959
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
960 ###SUMMED AND NORMALIZED analysis block
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
961 #Diversity based metric will move reduce to terminal taxa as needed
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
962 if c_RUN_MAX_DIVERSITY_1 or c_RUN_REPRESENTIVE_DISSIMILARITY_2 or c_RUN_MAX_DISSIMILARITY_3:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
963
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
964 self._funcRunNormalizeSensitiveMethods(abndData=stratAbundanceTable, iSampleSelectionCount=iCount,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
965 dictSelectedSamples=selectedSamples, lsAlphaMetrics=diversityMetricsAlpha,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
966 lsBetaMetrics=diversityMetricsBeta,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
967 lsInverseBetaMetrics=diversityMetricsBeta,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
968 fRunDiversity=c_RUN_MAX_DIVERSITY_1,fRunRepresentative=c_RUN_REPRESENTIVE_DISSIMILARITY_2,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
969 fRunExtreme=c_RUN_MAX_DISSIMILARITY_3,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
970 istmBetaMatrix=istmBetaMatrix, istrmTree=istrmTree, istrmEnvr=istrmEnvr, fInvertDiversity=fInvertDiversity)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
971
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
972 #5::Select randomly
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
973 #Expects sampleNames = List of sample names [name, name, name...]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
974 if(c_RUN_RANDOM_5):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
975 #Select randomly from sample names
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
976 selectedSamples[ConstantsMicropita.c_strRandom] = self.funcGetRandomSamples(lsSamples=stratAbundanceTable.funcGetSampleNames(), iNumberOfSamplesToReturn=iCount)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
977 logging.info("MicroPITA.funcRun:: Selected Samples Random")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
978 logging.info(selectedSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
979
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
980 #Perform supervised selection
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
981 if c_RUN_DISTINCT or c_RUN_DISCRIMINANT:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
982 if strLabel:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
983 dictSelectionRet = self.funcRunSupervisedDistancesFromCentroids(abundanceTable=stratAbundanceTable,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
984 fRunDistinct=c_RUN_DISTINCT, fRunDiscriminant=c_RUN_DISCRIMINANT,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
985 xOutputSupFile=ostmInputPredictFile,xPredictSupFile=ostmPredictFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
986 strSupervisedMetadata=strLabel, iSampleSupSelectionCount=iCount,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
987 lsOriginalSampleNames = totalAbundanceTable.funcGetSampleNames(),
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
988 lsOriginalLabels = lsOriginalLabels,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
989 fAppendFiles=fAppendSupFiles)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
990
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
991 [selectedSamples.setdefault(sKey,[]).extend(lValue) for sKey,lValue in dictSelectionRet.items()]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
992
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
993 if not fAppendSupFiles:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
994 fAppendSupFiles = True
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
995 logging.info("MicroPITA.funcRun:: Selected Samples Unsupervised")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
996 logging.info(selectedSamples)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
997 return selectedSamples
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
998
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
999 #Testing: Happy path tested
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1000 @staticmethod
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1001 def funcWriteSelectionToFile(dictSelection,xOutputFilePath):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1002 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1003 Writes the selection of samples by method to an output file.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1004
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1005 :param dictSelection: The dictionary of selections by method to be written to a file.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1006 :type: Dictionary The dictionary of selections by method {"method":["sample selected","sample selected"...]}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1007 :param xOutputFilePath: FileStream or String path to file inwhich the dictionary is written.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1008 :type: String FileStream or String path to file
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1009 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1010
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1011 if not dictSelection:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1012 return
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1013
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1014 #Open file
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1015 f = csv.writer(open(xOutputFilePath,"w") if isinstance(xOutputFilePath, str) else xOutputFilePath, delimiter=ConstantsMicropita.c_outputFileDelim )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1016
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1017 #Create output content from dictionary
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1018 for sKey in dictSelection:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1019 f.writerow([sKey]+dictSelection[sKey])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1020 logging.debug("MicroPITA.funcRun:: Selected samples output to file:"+str(dictSelection[sKey]))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1021
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1022 #Testing: Happy Path tested
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1023 @staticmethod
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1024 def funcReadSelectionFileToDictionary(xInputFile):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1025 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1026 Reads in an output selection file from micropita and formats it into a dictionary.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1027
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1028 :param xInputFile: String path to file or file stream to read and translate into a dictionary.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1029 {"method":["sample selected","sample selected"...]}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1030 :type: FileStream or String Path to file
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1031 :return Dictionary: Samples selected by methods.
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1032 Dictionary {"Selection Method":["SampleID","SampleID","SampleID",...]}
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1033 """
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1034
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1035 #Open file
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1036 istmReader = csv.reader(open(xInputFile,'r') if isinstance(xInputFile, str) else xInputFile, delimiter = ConstantsMicropita.c_outputFileDelim)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1037
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1038 #Dictionary to hold selection data
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1039 return dict([(lsLine[0], lsLine[1:]) for lsLine in istmReader])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1040
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1041 #Set up arguments reader
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1042 argp = argparse.ArgumentParser( prog = "MicroPITA.py",
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1043 description = """Selects samples from abundance tables based on various selection schemes.""" )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1044
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1045 args = argp.add_argument_group( "Common", "Commonly modified options" )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1046 args.add_argument(ConstantsMicropita.c_strCountArgument,"--num", dest="iCount", metavar = "samples", default = 10, type = int, help = ConstantsMicropita.c_strCountHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1047 args.add_argument("-m","--method", dest = "lstrMethods", metavar = "method", default = [], help = ConstantsMicropita.c_strSelectionTechniquesHelp,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1048 choices = ConstantsMicropita.c_lsAllMethods, action = "append")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1049
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1050 args = argp.add_argument_group( "Custom", "Selecting and inputing custom metrics" )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1051 args.add_argument("-a","--alpha", dest = "strAlphaDiversity", metavar = "AlphaDiversity", default = None, help = ConstantsMicropita.c_strCustomAlphaDiversityHelp, choices = Metric.setAlphaDiversities)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1052 args.add_argument("-b","--beta", dest = "strBetaDiversity", metavar = "BetaDiversity", default = None, help = ConstantsMicropita.c_strCustomBetaDiversityHelp, choices = list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted])
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1053 args.add_argument("-q","--alphameta", dest = "strAlphaMetadata", metavar = "AlphaDiversityMetadata", default = None, help = ConstantsMicropita.c_strCustomAlphaDiversityMetadataHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1054 args.add_argument("-x","--betamatrix", dest = "istmBetaMatrix", metavar = "BetaDiversityMatrix", default = None, help = ConstantsMicropita.c_strCustomBetaDiversityMatrixHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1055 args.add_argument("-o","--tree", dest = "istrmTree", metavar = "PhylogeneticTree", default = None, help = ConstantsMicropita.c_strCustomPhylogeneticTreeHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1056 args.add_argument("-i","--envr", dest = "istrmEnvr", metavar = "EnvironmentFile", default = None, help = ConstantsMicropita.c_strCustomEnvironmentFileHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1057 args.add_argument("-f","--invertDiversity", dest = "fInvertDiversity", action="store_true", default = False, help = ConstantsMicropita.c_strInvertDiversityHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1058
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1059 args = argp.add_argument_group( "Miscellaneous", "Row/column identifiers and feature targeting options" )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1060 args.add_argument("-d",ConstantsMicropita.c_strIDNameArgument, dest="strIDName", metavar="sample_id", help= ConstantsMicropita.c_strIDNameHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1061 args.add_argument("-l",ConstantsMicropita.c_strLastMetadataNameArgument, dest="strLastMetadataName", metavar = "metadata_id", default = None,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1062 help= ConstantsMicropita.c_strLastMetadataNameHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1063 args.add_argument("-r",ConstantsMicropita.c_strTargetedFeatureMethodArgument, dest="strFeatureSelection", metavar="targeting_method", default=ConstantsMicropita.lsTargetedFeatureMethodValues[0],
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1064 choices=ConstantsMicropita.lsTargetedFeatureMethodValues, help= ConstantsMicropita.c_strTargetedFeatureMethodHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1065 args.add_argument("-t",ConstantsMicropita.c_strTargetedSelectionFileArgument, dest="istmFeatures", metavar="feature_file", type=argparse.FileType("rU"), help=ConstantsMicropita.c_strTargetedSelectionFileHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1066 args.add_argument("-w",ConstantsMicropita.c_strFeatureMetadataArgument, dest="strLastFeatureMetadata", metavar="Last_Feature_Metadata", default=None, help=ConstantsMicropita.c_strFeatureMetadataHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1067
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1068 args = argp.add_argument_group( "Data labeling", "Metadata IDs for strata and supervised label values" )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1069 args.add_argument("-e",ConstantsMicropita.c_strSupervisedLabelArgument, dest="strLabel", metavar= "supervised_id", help=ConstantsMicropita.c_strSupervisedLabelHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1070 args.add_argument("-s",ConstantsMicropita.c_strUnsupervisedStratifyMetadataArgument, dest="strUnsupervisedStratify", metavar="stratify_id",
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1071 help= ConstantsMicropita.c_strUnsupervisedStratifyMetadataHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1072
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1073 args = argp.add_argument_group( "File formatting", "Rarely modified file formatting options" )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1074 args.add_argument("-j",ConstantsMicropita.c_strFileDelimiterArgument, dest="cFileDelimiter", metavar="column_delimiter", default="\t", help=ConstantsMicropita.c_strFileDelimiterHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1075 args.add_argument("-k",ConstantsMicropita.c_strFeatureNameDelimiterArgument, dest="cFeatureNameDelimiter", metavar="taxonomy_delimiter", default="|", help=ConstantsMicropita.c_strFeatureNameDelimiterHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1076
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1077 args = argp.add_argument_group( "Debugging", "Debugging options - modify at your own risk!" )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1078 args.add_argument("-v",ConstantsMicropita.c_strLoggingArgument, dest="strLogLevel", metavar = "log_level", default="WARNING",
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1079 choices=ConstantsMicropita.c_lsLoggingChoices, help= ConstantsMicropita.c_strLoggingHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1080 args.add_argument("-c",ConstantsMicropita.c_strCheckedAbundanceFileArgument, dest="ostmCheckedFile", metavar = "output_qc", type = argparse.FileType("w"), help = ConstantsMicropita.c_strCheckedAbundanceFileHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1081 args.add_argument("-g",ConstantsMicropita.c_strLoggingFileArgument, dest="ostmLoggingFile", metavar = "output_log", type = argparse.FileType("w"), help = ConstantsMicropita.c_strLoggingFileHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1082 args.add_argument("-u",ConstantsMicropita.c_strSupervisedInputFile, dest="ostmInputPredictFile", metavar = "output_scaled", type = argparse.FileType("w"), help = ConstantsMicropita.c_strSupervisedInputFileHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1083 args.add_argument("-p",ConstantsMicropita.c_strSupervisedPredictedFile, dest="ostmPredictFile", metavar = "output_labels", type = argparse.FileType("w"), help = ConstantsMicropita.c_strSupervisedPredictedFileHelp)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1084
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1085 argp.add_argument("istmInput", metavar = "input.pcl/biome", type = argparse.FileType("rU"), help = ConstantsMicropita.c_strAbundanceFileHelp,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1086 default = sys.stdin)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1087 argp.add_argument("ostmOutput", metavar = "output.txt", type = argparse.FileType("w"), help = ConstantsMicropita.c_strGenericOutputDataFileHelp,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1088 default = sys.stdout)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1089
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1090 __doc__ = "::\n\n\t" + argp.format_help( ).replace( "\n", "\n\t" ) + __doc__
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1091
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1092 def _main( ):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1093 args = argp.parse_args( )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1094
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1095 #Set up logger
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1096 iLogLevel = getattr(logging, args.strLogLevel.upper(), None)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1097 logging.basicConfig(stream = args.ostmLoggingFile if args.ostmLoggingFile else sys.stderr, filemode = 'w', level=iLogLevel)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1098
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1099 #Run micropita
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1100 logging.info("MicroPITA:: Start microPITA")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1101 microPITA = MicroPITA()
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1102
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1103 #Argparse will append to the default but will not remove the default so I do this here
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1104 if not len(args.lstrMethods):
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1105 args.lstrMethods = [ConstantsMicropita.c_strRepresentative]
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1106
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1107 dictSelectedSamples = microPITA.funcRun(
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1108 strIDName = args.strIDName,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1109 strLastMetadataName = args.strLastMetadataName,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1110 istmInput = args.istmInput,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1111 ostmInputPredictFile = args.ostmInputPredictFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1112 ostmPredictFile = args.ostmPredictFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1113 ostmCheckedFile = args.ostmCheckedFile,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1114 ostmOutput = args.ostmOutput,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1115 cDelimiter = args.cFileDelimiter,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1116 cFeatureNameDelimiter = args.cFeatureNameDelimiter,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1117 istmFeatures = args.istmFeatures,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1118 strFeatureSelection = args.strFeatureSelection,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1119 iCount = args.iCount,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1120 strLastRowMetadata = args.strLastFeatureMetadata,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1121 strLabel = args.strLabel,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1122 strStratify = args.strUnsupervisedStratify,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1123 strCustomAlpha = args.strAlphaDiversity,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1124 strCustomBeta = args.strBetaDiversity,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1125 strAlphaMetadata = args.strAlphaMetadata,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1126 istmBetaMatrix = args.istmBetaMatrix,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1127 istrmTree = args.istrmTree,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1128 istrmEnvr = args.istrmEnvr,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1129 lstrMethods = args.lstrMethods,
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1130 fInvertDiversity = args.fInvertDiversity
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1131 )
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1132
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1133 if not dictSelectedSamples:
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1134 logging.error("MicroPITA:: Error, did not get a result from analysis.")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1135 return -1
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1136 logging.info("End microPITA")
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1137
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1138 #Log output for debugging
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1139 logging.debug("MicroPITA:: Returned the following samples:"+str(dictSelectedSamples))
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1140
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1141 #Write selection to file
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1142 microPITA.funcWriteSelectionToFile(dictSelection=dictSelectedSamples, xOutputFilePath=args.ostmOutput)
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1143
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1144 if __name__ == "__main__":
8fb4630ab314 Uploaded
sagun98
parents:
diff changeset
1145 _main( )