diff SMART/Java/Python/clusterize.py @ 67:f4de72c80eac draft

Uploaded
author m-zytnicki
date Mon, 16 Nov 2015 11:59:35 -0500
parents 90f4b29d884f
children
line wrap: on
line diff
--- a/SMART/Java/Python/clusterize.py	Wed Nov 04 03:41:26 2015 -0500
+++ b/SMART/Java/Python/clusterize.py	Mon Nov 16 11:59:35 2015 -0500
@@ -101,21 +101,20 @@
 			fs.setOutputFileName(self.sortedFileNames[fileName])
 			fs.sort()
 			self.splittedFileNames[fileName] = fs.getOutputFileNames()
-			self.nbElementsPerChromosome     = fs.getNbElementsPerChromosome()
-			self.nbElements                  = fs.getNbElements()
 			self.chromosomes.update(self.splittedFileNames[fileName].keys())
 		
 	def _iterate(self):
 		progress = UnlimitedProgress(10000, "Reading input file", self.verbosity)
-		transcripts = []
-		heap        = []
 		parsersSets = []
+		self.nbElements = 0
 		if self.chromosomes:
+			for chromosome in self.chromosomes:
+				parsersSets.append([NCListFileUnpickle(self.splittedFileNames[fileName][chromosome]) for fileName in self.splittedFileNames if chromosome in self.splittedFileNames[fileName]])
+		else:
 			parsersSets.append(self.parsers.values())
-		else:
-			for chromosome in self.chromosomes:
-				parsersSets.append([self.splittedFileNames[fileName][chromosome] for fileName in self.splittedFileNames if chromosome in self.splittedFileNames[fileName]])
 		for parsers in parsersSets:
+			transcripts = []
+			heap        = []
 			for parser in parsers:
 				iterator = parser.getIterator()
 				for transcript in iterator:
@@ -142,6 +141,7 @@
 						newTranscripts.append(oldTranscript)
 				newTranscripts.append(newTranscript)
 				transcripts = newTranscripts
+				self.nbElements += 1
 				progress.inc()
 			for transcript in transcripts:
 				self._write(transcript)