annotate src/breadcrumbs/src/SVM.py @ 0:0de566f21448 draft default tip

v2
author sagun98
date Thu, 03 Jun 2021 18:13:32 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
sagun98
parents:
diff changeset
1 """
sagun98
parents:
diff changeset
2 Author: Timothy Tickle
sagun98
parents:
diff changeset
3 Description: Class to Allow Support Vector Machine analysis and to contain associated scripts
sagun98
parents:
diff changeset
4 """
sagun98
parents:
diff changeset
5
sagun98
parents:
diff changeset
6 #####################################################################################
sagun98
parents:
diff changeset
7 #Copyright (C) <2012>
sagun98
parents:
diff changeset
8 #
sagun98
parents:
diff changeset
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of
sagun98
parents:
diff changeset
10 #this software and associated documentation files (the "Software"), to deal in the
sagun98
parents:
diff changeset
11 #Software without restriction, including without limitation the rights to use, copy,
sagun98
parents:
diff changeset
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
sagun98
parents:
diff changeset
13 #and to permit persons to whom the Software is furnished to do so, subject to
sagun98
parents:
diff changeset
14 #the following conditions:
sagun98
parents:
diff changeset
15 #
sagun98
parents:
diff changeset
16 #The above copyright notice and this permission notice shall be included in all copies
sagun98
parents:
diff changeset
17 #or substantial portions of the Software.
sagun98
parents:
diff changeset
18 #
sagun98
parents:
diff changeset
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
sagun98
parents:
diff changeset
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
sagun98
parents:
diff changeset
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
sagun98
parents:
diff changeset
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
sagun98
parents:
diff changeset
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
sagun98
parents:
diff changeset
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
sagun98
parents:
diff changeset
25 #####################################################################################
sagun98
parents:
diff changeset
26
sagun98
parents:
diff changeset
27 __author__ = "Timothy Tickle"
sagun98
parents:
diff changeset
28 __copyright__ = "Copyright 2012"
sagun98
parents:
diff changeset
29 __credits__ = ["Timothy Tickle"]
sagun98
parents:
diff changeset
30 __license__ = "MIT"
sagun98
parents:
diff changeset
31 __maintainer__ = "Timothy Tickle"
sagun98
parents:
diff changeset
32 __email__ = "ttickle@sph.harvard.edu"
sagun98
parents:
diff changeset
33 __status__ = "Development"
sagun98
parents:
diff changeset
34
sagun98
parents:
diff changeset
35 #Libraries
sagun98
parents:
diff changeset
36 from AbundanceTable import AbundanceTable
sagun98
parents:
diff changeset
37 from ConstantsBreadCrumbs import ConstantsBreadCrumbs
sagun98
parents:
diff changeset
38 import csv
sagun98
parents:
diff changeset
39 import os
sagun98
parents:
diff changeset
40 from random import shuffle
sagun98
parents:
diff changeset
41 from ValidateData import ValidateData
sagun98
parents:
diff changeset
42
sagun98
parents:
diff changeset
43 class SVM:
sagun98
parents:
diff changeset
44 """
sagun98
parents:
diff changeset
45 Class which holds generic methods for SVM use.
sagun98
parents:
diff changeset
46 """
sagun98
parents:
diff changeset
47
sagun98
parents:
diff changeset
48 #1 Happy Path tested
sagun98
parents:
diff changeset
49 @staticmethod
sagun98
parents:
diff changeset
50 def funcConvertAbundanceTableToSVMFile(abndAbundanceTable, xOutputSVMFile, sMetadataLabel, lsOriginalLabels = None, lsSampleOrdering = None):
sagun98
parents:
diff changeset
51 """
sagun98
parents:
diff changeset
52 Converts abundance files to input SVM files.
sagun98
parents:
diff changeset
53
sagun98
parents:
diff changeset
54 :param abndAbundanceTable: AbudanceTable object to turn to input SVM file.
sagun98
parents:
diff changeset
55 :type: AbundanceTable
sagun98
parents:
diff changeset
56 :param xOutputSVMFile: File to save SVM data to when converted from the abundance table.
sagun98
parents:
diff changeset
57 :type: FileStream or string file path
sagun98
parents:
diff changeset
58 :param sMetadataLabel: The name of the last row in the abundance table representing metadata.
sagun98
parents:
diff changeset
59 :type: String
sagun98
parents:
diff changeset
60 :param: lsOriginalLabels The original labels.
sagun98
parents:
diff changeset
61 :type: List of strings
sagun98
parents:
diff changeset
62 :param lsSampleOrdering: Order of samples to output to output file. If none, the order in the abundance table is used.
sagun98
parents:
diff changeset
63 :type: List of strings
sagun98
parents:
diff changeset
64 :return lsUniqueLabels: List of unique labels.
sagun98
parents:
diff changeset
65 """
sagun98
parents:
diff changeset
66
sagun98
parents:
diff changeset
67 #Create data matrix
sagun98
parents:
diff changeset
68 dataMatrix = zip(*abndAbundanceTable.funcGetAbundanceCopy())
sagun98
parents:
diff changeset
69
sagun98
parents:
diff changeset
70 #Add labels
sagun98
parents:
diff changeset
71 llData = []
sagun98
parents:
diff changeset
72 lsLabels = lsOriginalLabels if lsOriginalLabels else SVM.funcMakeLabels(abndAbundanceTable.funcGetMetadata(sMetadataLabel))
sagun98
parents:
diff changeset
73 if not isinstance(xOutputSVMFile,str):
sagun98
parents:
diff changeset
74 if xOutputSVMFile.closed:
sagun98
parents:
diff changeset
75 xOutputSVMFile = open(xOutputSVMFile.name,"w")
sagun98
parents:
diff changeset
76 ostm = open(xOutputSVMFile,"w") if isinstance(xOutputSVMFile, str) else xOutputSVMFile
sagun98
parents:
diff changeset
77 f = csv.writer(ostm, csv.excel_tab, delimiter = ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
sagun98
parents:
diff changeset
78
sagun98
parents:
diff changeset
79 #This allows the creation of partially known files for stratification purposes
sagun98
parents:
diff changeset
80 lsCurrentSamples = abndAbundanceTable.funcGetSampleNames()
sagun98
parents:
diff changeset
81 lsOrderingSamples = lsSampleOrdering if lsSampleOrdering else lsCurrentSamples[:]
sagun98
parents:
diff changeset
82
sagun98
parents:
diff changeset
83 iLabelIndex = 0
sagun98
parents:
diff changeset
84 iSize = len(dataMatrix[0])
sagun98
parents:
diff changeset
85 iIndexSample = 1
sagun98
parents:
diff changeset
86 for sSample in lsOrderingSamples:
sagun98
parents:
diff changeset
87 if sSample in lsCurrentSamples:
sagun98
parents:
diff changeset
88 f.writerow([lsLabels[iLabelIndex]]+
sagun98
parents:
diff changeset
89 [ConstantsBreadCrumbs.c_strColon.join([str(tpleFeature[0]+1),str(tpleFeature[1])]) for tpleFeature in enumerate(dataMatrix[iIndexSample])])
sagun98
parents:
diff changeset
90 iLabelIndex += 1
sagun98
parents:
diff changeset
91 iIndexSample += 1
sagun98
parents:
diff changeset
92 #Make blank entry
sagun98
parents:
diff changeset
93 else:
sagun98
parents:
diff changeset
94 f.writerow([ConstantsBreadCrumbs.c_strSVMNoSample]+[ConstantsBreadCrumbs.c_strColon.join([str(tpleNas[0]+1),str(tpleNas[1])])
sagun98
parents:
diff changeset
95 for tpleNas in enumerate([ConstantsBreadCrumbs.c_strSVMNoSample]*iSize)])
sagun98
parents:
diff changeset
96 if lsOriginalLabels:
sagun98
parents:
diff changeset
97 iLabelIndex += 1
sagun98
parents:
diff changeset
98 ostm.close()
sagun98
parents:
diff changeset
99 return set(lsLabels)
sagun98
parents:
diff changeset
100
sagun98
parents:
diff changeset
101 @staticmethod
sagun98
parents:
diff changeset
102 def funcUpdateSVMFileWithAbundanceTable(abndAbundanceTable, xOutputSVMFile, lsOriginalLabels, lsSampleOrdering):
sagun98
parents:
diff changeset
103 """
sagun98
parents:
diff changeset
104 Takes a SVM input file and updates it with an abundance table.
sagun98
parents:
diff changeset
105 lsOriginalLabels and lsSampleOrdering should be consistent to the input file.
sagun98
parents:
diff changeset
106 Samples in the abundance table will be used to update the file if the sample name in the abundace table is also in the lsSampleOrdering.
sagun98
parents:
diff changeset
107 lsOriginalLabels and lsSampleOrdering should be in the same order.
sagun98
parents:
diff changeset
108
sagun98
parents:
diff changeset
109 :param abndAbundanceTable: AbudanceTable object to turn to input SVM file.
sagun98
parents:
diff changeset
110 :type: AbundanceTable
sagun98
parents:
diff changeset
111 :param xOutputSVMFile: File to save SVM data to when converted from the abundance table.
sagun98
parents:
diff changeset
112 :type: FileStream or string file path
sagun98
parents:
diff changeset
113 :param lsOriginalLabels: The list of the original labels (as numerics 0,1,2,3,4...as should be in the file).
sagun98
parents:
diff changeset
114 :type: List of strings
sagun98
parents:
diff changeset
115 :param lsSampleOrdering: Order of samples in the output file.
sagun98
parents:
diff changeset
116 :type: List of strings
sagun98
parents:
diff changeset
117 :return lsUniqueLabels: List of unique labels.
sagun98
parents:
diff changeset
118 """
sagun98
parents:
diff changeset
119
sagun98
parents:
diff changeset
120 #Read in old file
sagun98
parents:
diff changeset
121 if not isinstance(xOutputSVMFile,str):
sagun98
parents:
diff changeset
122 if xOutputSVMFile.closed:
sagun98
parents:
diff changeset
123 xOutputSVMFile = open(xOutputSVMFile.name,"r")
sagun98
parents:
diff changeset
124 ostm = open(xOutputSVMFile,"r") if isinstance(xOutputSVMFile, str) else xOutputSVMFile
sagun98
parents:
diff changeset
125 fin = csv.reader(ostm, csv.excel_tab, delimiter = ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
sagun98
parents:
diff changeset
126 #Read in contents of file
sagun98
parents:
diff changeset
127 llsOldContents = [lsRow for lsRow in fin]
sagun98
parents:
diff changeset
128 ostm.close()
sagun98
parents:
diff changeset
129
sagun98
parents:
diff changeset
130 #Check to make sure this ordering covers all positions in the old file
sagun98
parents:
diff changeset
131 if not len(llsOldContents) == len(lsSampleOrdering):
sagun98
parents:
diff changeset
132 print "The length of the original file ("+str(len(llsOldContents))+") does not match the length of the ordering given ("+str(len(lsSampleOrdering))+")."
sagun98
parents:
diff changeset
133 return False
sagun98
parents:
diff changeset
134
sagun98
parents:
diff changeset
135 #Create data matrix from new data
sagun98
parents:
diff changeset
136 dataMatrix = zip(*abndAbundanceTable.funcGetAbundanceCopy())
sagun98
parents:
diff changeset
137
sagun98
parents:
diff changeset
138 #Add labels
sagun98
parents:
diff changeset
139 llData = []
sagun98
parents:
diff changeset
140
sagun98
parents:
diff changeset
141 #Write to file
sagun98
parents:
diff changeset
142 if not isinstance(xOutputSVMFile,str):
sagun98
parents:
diff changeset
143 if xOutputSVMFile.closed:
sagun98
parents:
diff changeset
144 xOutputSVMFile = open(xOutputSVMFile.name,"w")
sagun98
parents:
diff changeset
145 ostm = open(xOutputSVMFile,"w") if isinstance(xOutputSVMFile, str) else xOutputSVMFile
sagun98
parents:
diff changeset
146 f = csv.writer(ostm, csv.excel_tab, delimiter = ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace)
sagun98
parents:
diff changeset
147
sagun98
parents:
diff changeset
148 #This allows to know what position to place the new lines
sagun98
parents:
diff changeset
149 lsCurrentSamples = abndAbundanceTable.funcGetSampleNames()
sagun98
parents:
diff changeset
150
sagun98
parents:
diff changeset
151 iSize = len(dataMatrix[0])
sagun98
parents:
diff changeset
152 iIndexSample = 1
sagun98
parents:
diff changeset
153 iIndexOriginalOrder = 0
sagun98
parents:
diff changeset
154 for sSample in lsSampleOrdering:
sagun98
parents:
diff changeset
155 if sSample in lsCurrentSamples:
sagun98
parents:
diff changeset
156 f.writerow([lsOriginalLabels[iIndexOriginalOrder]]+
sagun98
parents:
diff changeset
157 [ConstantsBreadCrumbs.c_strColon.join([str(tpleFeature[0]+1),str(tpleFeature[1])]) for tpleFeature in enumerate(dataMatrix[iIndexSample])])
sagun98
parents:
diff changeset
158 iIndexSample += 1
sagun98
parents:
diff changeset
159 #Make blank entry
sagun98
parents:
diff changeset
160 else:
sagun98
parents:
diff changeset
161 f.writerow(llsOldContents[iIndexOriginalOrder])
sagun98
parents:
diff changeset
162 iIndexOriginalOrder += 1
sagun98
parents:
diff changeset
163 ostm.close()
sagun98
parents:
diff changeset
164 return True
sagun98
parents:
diff changeset
165
sagun98
parents:
diff changeset
166 #Tested 5
sagun98
parents:
diff changeset
167 @staticmethod
sagun98
parents:
diff changeset
168 def funcMakeLabels(lsMetadata):
sagun98
parents:
diff changeset
169 """
sagun98
parents:
diff changeset
170 Given a list of metadata, labels are assigned. This is function represents a central location to make labels so all are consistent.
sagun98
parents:
diff changeset
171
sagun98
parents:
diff changeset
172 :param lsMetafdata: List of metadata to turn into labels based on the metadata's values.
sagun98
parents:
diff changeset
173 :type: List of integer labels
sagun98
parents:
diff changeset
174 """
sagun98
parents:
diff changeset
175 #Do not use a set to make elements unique. Need to preserve order.
sagun98
parents:
diff changeset
176 #First label should be 0
sagun98
parents:
diff changeset
177 lsUniqueLabels = []
sagun98
parents:
diff changeset
178 [lsUniqueLabels.append(sElement) for sElement in lsMetadata if not (sElement in lsUniqueLabels)]
sagun98
parents:
diff changeset
179
sagun98
parents:
diff changeset
180 dictLabels = dict([[str(lenuLabels[1]),str(lenuLabels[0])] for lenuLabels in enumerate(lsUniqueLabels)])
sagun98
parents:
diff changeset
181 return [dictLabels[sLabel] for sLabel in lsMetadata]
sagun98
parents:
diff changeset
182
sagun98
parents:
diff changeset
183 #Tested
sagun98
parents:
diff changeset
184 @staticmethod
sagun98
parents:
diff changeset
185 def funcReadLabelsFromFile(xSVMFile, lsAllSampleNames, isPredictFile):
sagun98
parents:
diff changeset
186 """
sagun98
parents:
diff changeset
187 Reads in the labels from the input file or prediction output file of a LibSVM formatted file
sagun98
parents:
diff changeset
188 and associates them in order with the given sample names.
sagun98
parents:
diff changeset
189
sagun98
parents:
diff changeset
190 Prediction file expected format: Labels declared in first line with labels keyword.
sagun98
parents:
diff changeset
191 Each following row a sample with the first entry the predicted label
sagun98
parents:
diff changeset
192 Prediction file example:
sagun98
parents:
diff changeset
193 labels 0 1
sagun98
parents:
diff changeset
194 0 0.3 0.4 0.6
sagun98
parents:
diff changeset
195 1 0.1 0.2 0.3
sagun98
parents:
diff changeset
196 1 0.2 0.2 0.2
sagun98
parents:
diff changeset
197 0 0.2 0.4 0.3
sagun98
parents:
diff changeset
198
sagun98
parents:
diff changeset
199 Input file expected format:
sagun98
parents:
diff changeset
200 Each row a sample with the first entry the predicted label
sagun98
parents:
diff changeset
201 Input file example:
sagun98
parents:
diff changeset
202 0 0.3 0.4 0.6
sagun98
parents:
diff changeset
203 1 0.1 0.2 0.3
sagun98
parents:
diff changeset
204 1 0.2 0.2 0.2
sagun98
parents:
diff changeset
205 0 0.2 0.4 0.3
sagun98
parents:
diff changeset
206
sagun98
parents:
diff changeset
207 :param xSVMFile: File path to read in prediction labels.
sagun98
parents:
diff changeset
208 :type String
sagun98
parents:
diff changeset
209 :param lsAllSampleNames List of sample ids in the order of the labels.
sagun98
parents:
diff changeset
210 :type List of Strings
sagun98
parents:
diff changeset
211 :param isPredictFile: Indicates if the file is the input (False) or prediction (True) file
sagun98
parents:
diff changeset
212 :type boolean
sagun98
parents:
diff changeset
213 :return: Dictionary {label:["sampleName1", "sampleName2"...],...} or False on error
sagun98
parents:
diff changeset
214 """
sagun98
parents:
diff changeset
215 #Open prediction file and input file and get labels to compare to the predictions
sagun98
parents:
diff changeset
216 g = csv.reader( open(xSVMFile, 'r') if isinstance(xSVMFile, str) else xSVMFile, csv.excel_tab, delimiter = ConstantsBreadCrumbs.c_strBreadCrumbsSVMSpace )
sagun98
parents:
diff changeset
217 lsOriginalLabels = [lsLineElements[0] for lsLineElements in g if not lsLineElements[0] == ConstantsBreadCrumbs.c_strSVMNoSample]
sagun98
parents:
diff changeset
218
sagun98
parents:
diff changeset
219 if isPredictFile:
sagun98
parents:
diff changeset
220 lsOriginalLabels = lsOriginalLabels[1:]
sagun98
parents:
diff changeset
221
sagun98
parents:
diff changeset
222 #Check sample name length
sagun98
parents:
diff changeset
223 if not len(lsAllSampleNames) == len(lsOriginalLabels):
sagun98
parents:
diff changeset
224 print "SVM::funcReadLabelsFromFile. Error, the length of sample names did not match the original labels length. Samples ("+str(len(lsAllSampleNames))+"):"+str(lsAllSampleNames)+" Labels ("+str(len(lsOriginalLabels))+"):"+str(lsOriginalLabels)
sagun98
parents:
diff changeset
225 return False
sagun98
parents:
diff changeset
226
sagun98
parents:
diff changeset
227 #Change to {label:["sampleName1", "sampleName2"...],...}
sagun98
parents:
diff changeset
228 dictSampleLabelsRet = dict()
sagun98
parents:
diff changeset
229 for sValue in set(lsOriginalLabels):
sagun98
parents:
diff changeset
230 dictSampleLabelsRet[sValue] = set([lsAllSampleNames[iindex] for iindex, sLabel in enumerate(lsOriginalLabels) if sLabel == sValue])
sagun98
parents:
diff changeset
231 return dictSampleLabelsRet
sagun98
parents:
diff changeset
232
sagun98
parents:
diff changeset
233 #Tested
sagun98
parents:
diff changeset
234 @staticmethod
sagun98
parents:
diff changeset
235 def funcScaleFeature(npdData):
sagun98
parents:
diff changeset
236 """
sagun98
parents:
diff changeset
237 Scale a feature between 0 and 1. Using 01 and not 01,1 because it keeps the sparsity of the data and may save time.
sagun98
parents:
diff changeset
238
sagun98
parents:
diff changeset
239 :param npdData: Feature data to scale.
sagun98
parents:
diff changeset
240 :type Numpy Array Scaled feature data.
sagun98
parents:
diff changeset
241 :return npaFloat: A numpy array of floats.
sagun98
parents:
diff changeset
242 """
sagun98
parents:
diff changeset
243 if sum(npdData) == 0 or len(set(npdData))==1:
sagun98
parents:
diff changeset
244 return npdData
sagun98
parents:
diff changeset
245 dMin = min(npdData)
sagun98
parents:
diff changeset
246 return (npdData-dMin)/float(max(npdData-dMin))
sagun98
parents:
diff changeset
247
sagun98
parents:
diff changeset
248 #Tested
sagun98
parents:
diff changeset
249 @staticmethod
sagun98
parents:
diff changeset
250 def funcWeightLabels(lLabels):
sagun98
parents:
diff changeset
251 """
sagun98
parents:
diff changeset
252 Returns weights for labels based on how balanced the labels are. Weights try to balance unbalanced results.
sagun98
parents:
diff changeset
253
sagun98
parents:
diff changeset
254 :params lLabels: List of labels to use for measure how balanced the comparison is.
sagun98
parents:
diff changeset
255 :type List
sagun98
parents:
diff changeset
256 :return List: [dictWeights ({"label":weight}),lUniqueLabels (unique occurences of original labels)]
sagun98
parents:
diff changeset
257 """
sagun98
parents:
diff changeset
258 #Convert to dict
sagun98
parents:
diff changeset
259 #Do not use set to make elements unique. Need to preserve order.
sagun98
parents:
diff changeset
260 #First label should be 0
sagun98
parents:
diff changeset
261 lUniqueLabels = []
sagun98
parents:
diff changeset
262 for sElement in lLabels:
sagun98
parents:
diff changeset
263 if sElement not in lUniqueLabels:
sagun98
parents:
diff changeset
264 lUniqueLabels.append(sElement)
sagun98
parents:
diff changeset
265 dictLabels = dict(zip(lUniqueLabels, range(len(lUniqueLabels))))
sagun98
parents:
diff changeset
266
sagun98
parents:
diff changeset
267 #Build a dict of weights per label {label:weight, label:weight}
sagun98
parents:
diff changeset
268 #Get the occurrence of each label
sagun98
parents:
diff changeset
269 dictWeights = dict()
sagun98
parents:
diff changeset
270 for sLabelKey in dictLabels:
sagun98
parents:
diff changeset
271 sCurLabel = dictLabels[sLabelKey]
sagun98
parents:
diff changeset
272 dictWeights[sCurLabel] = lLabels.count(sLabelKey)
sagun98
parents:
diff changeset
273
sagun98
parents:
diff changeset
274 #Divide the highest occurrence each occurrence
sagun98
parents:
diff changeset
275 iMaxOccurence = max(dictWeights.values())
sagun98
parents:
diff changeset
276 for sWeightKey in dictWeights:
sagun98
parents:
diff changeset
277 dictWeights[sWeightKey]=iMaxOccurence/float(dictWeights[sWeightKey])
sagun98
parents:
diff changeset
278
sagun98
parents:
diff changeset
279 return [dictWeights,lUniqueLabels]
sagun98
parents:
diff changeset
280
sagun98
parents:
diff changeset
281 #Tested 3/4 cases could add in test 12 with randomize True
sagun98
parents:
diff changeset
282 def func10FoldCrossvalidation(self, iTotalSampleCount, fRandomise = False):
sagun98
parents:
diff changeset
283 """
sagun98
parents:
diff changeset
284 Generator.
sagun98
parents:
diff changeset
285 Generates the indexes for a 10 fold cross validation given a sample count.
sagun98
parents:
diff changeset
286 If there are less than 10 samples, it uses the sample count as the K-fold cross validation
sagun98
parents:
diff changeset
287 as a leave one out method.
sagun98
parents:
diff changeset
288
sagun98
parents:
diff changeset
289 :param iTotalSampleCount: Total Sample Count
sagun98
parents:
diff changeset
290 :type Integer Sample Count
sagun98
parents:
diff changeset
291 :param fRandomise: Random sample indices
sagun98
parents:
diff changeset
292 :type Boolean True indicates randomise (Default False)
sagun98
parents:
diff changeset
293 """
sagun98
parents:
diff changeset
294 #Make indices and shuffle if needed
sagun98
parents:
diff changeset
295 liindices = range(iTotalSampleCount)
sagun98
parents:
diff changeset
296 if fRandomise:
sagun98
parents:
diff changeset
297 shuffle(liindices)
sagun98
parents:
diff changeset
298
sagun98
parents:
diff changeset
299 #For 10 times
sagun98
parents:
diff changeset
300 iKFold = 10
sagun98
parents:
diff changeset
301 if iTotalSampleCount < iKFold:
sagun98
parents:
diff changeset
302 iKFold = iTotalSampleCount
sagun98
parents:
diff changeset
303 for iiteration in xrange(iKFold):
sagun98
parents:
diff changeset
304 lfTraining = [iindex % iKFold != iiteration for iindex in liindices]
sagun98
parents:
diff changeset
305 lfValidation = [not iindex for iindex in lfTraining]
sagun98
parents:
diff changeset
306 yield lfTraining, lfValidation