view SMART/Java/Python/ncList/NCListMerger.py @ 11:2da30502c2f1

Updated CompareOverlappingSmallQuery.xml
author m-zytnicki
date Thu, 14 Mar 2013 05:37:08 -0400
parents 769e306b7933
children
line wrap: on
line source

#! /usr/bin/env python
#
# Copyright INRA-URGI 2009-2010
# 
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
# 
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
# 
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
# 
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#

import struct, os, shutil
try:
	import cPickle as pickle
except:
	import pickle

LONG_SIZE = struct.calcsize('l')

INFO_PER_NCLIST = 5
H_FILE		    = 0
L_FILE		    = 1
G_FILE		    = 2
FIRST_LIST_SIZE = 3
INDEX		    = 4

def pack(input):
	return struct.pack("l", long(input))
def unpack(input):
	return struct.unpack("l", input)[0]


class NCListMerger(object):

	def __init__(self, verbosity):
		self._verbosity = verbosity
		self._index	 = False

	def setFileName(self, fileName):
		self._handle = open(fileName, "wb")

	def setNCLists(self, ncLists):
		self._ncLists = ncLists
		self._chromosomes = sorted(self._ncLists.keys())

	def addIndex(self, boolean):
		self._index = boolean

	def merge(self):
		self._writeHeader()
		self._addNCLists()
		self._handle.close()
		self._removeInputFiles()

	def _writeHeader(self):
		pickle.dump(self._chromosomes, self._handle, -1)
		for chromosome in self._chromosomes:
			self._handle.write(pack(self._ncLists[chromosome]._nbLines))
		self._headerPos = self._handle.tell()
		for chromosome in self._chromosomes:
			for i in range(INFO_PER_NCLIST):
				self._handle.write(pack(-1))

	def _addInHeader(self, i, info, value = None):
		currentPos = self._handle.tell()
		if value == None:
			value = currentPos
		self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + info * LONG_SIZE)
		self._handle.write(pack(value))
		self._handle.seek(currentPos)

	def _addNCLists(self):
		self._inputFileNames = []
		for i, chromosome in enumerate(self._chromosomes):
			ncList = self._ncLists[chromosome]
			self._addInHeader(i, H_FILE)
			hFile = open(ncList._hFileName)
			shutil.copyfileobj(hFile, self._handle)
			hFile.close()
			self._inputFileNames.append(ncList._hFileName)
		for i, chromosome in enumerate(self._chromosomes):
			ncList = self._ncLists[chromosome]
			self._addInHeader(i, L_FILE)
			lFile = open(ncList._lFileName)
			shutil.copyfileobj(lFile, self._handle)
			lFile.close()
			self._inputFileNames.append(ncList._lFileName)
		for i, chromosome in enumerate(self._chromosomes):
			ncList = self._ncLists[chromosome]
			self._addInHeader(i, FIRST_LIST_SIZE, ncList.getSizeFirstList())
		if self._index:
			for i, chromosome in enumerate(self._chromosomes):
				ncList = self._ncLists[chromosome]
				self._addInHeader(i, INDEX)
				pickle.dump(ncList.getIndex()._indices, self._handle, -1)
		for i, chromosome in enumerate(self._chromosomes):
			ncList = self._ncLists[chromosome]
			self._addInHeader(i, G_FILE)
			tFile = open(ncList._transcriptFileName)
			shutil.copyfileobj(tFile, self._handle)
			tFile.close()
			self._inputFileNames.append(ncList._transcriptFileName)

	def _removeInputFiles(self):
		for fileName in self._inputFileNames:
			os.remove(fileName)