view SMART/Java/Python/ncList/NCListHandler.py @ 69:1473ab954708 draft

Corrected bug in "CollapsedReads" XML file.
author m-zytnicki
date Wed, 18 Nov 2015 10:59:02 -0500
parents 769e306b7933
children
line wrap: on
line source

#! /usr/bin/env python
#
# Copyright INRA-URGI 2009-2010
# 
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
# 
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
# 
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
# 
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#

import struct
try:
	import cPickle as pickle
except:
	import pickle
from SMART.Java.Python.ncList.NCList import NCList
from SMART.Java.Python.ncList.NCIndex import NCIndex
from SMART.Java.Python.ncList.NCListFilePickle import NCListFileUnpickle

LONG_SIZE = struct.calcsize('l')

INFO_PER_NCLIST = 5
H_FILE		    = 0
L_FILE		    = 1
G_FILE		    = 2
FIRST_LIST_SIZE = 3
INDEX		    = 4

H = 0
L = 1
T = 2
G = 3

def pack(input):
	return struct.pack("l", long(input))
def unpack(input):
	return struct.unpack("l", input)[0]


class NCListHandler(object):

	def __init__(self, verbosity):
		self._verbosity = verbosity
		self._index	    = False

	def setFileName(self, fileName):
		self._fileName = fileName
		self._handle   = open(fileName, "rb")

	def loadData(self):
		self._chromosomes = pickle.load(self._handle)
		self._nbElements = 0
		self._nbElementsPerChromosome = {}
		self._ncLists = {}
		for chromosome in self._chromosomes:
			self._nbElementsPerChromosome[chromosome] = unpack(self._handle.read(LONG_SIZE))
			self._nbElements += self._nbElementsPerChromosome[chromosome]
		self._headerPos = self._handle.tell()
		for i, chromosome in enumerate(self._chromosomes):
			ncList = NCList(self._verbosity)
			ncList._hHandle = self._handle
			ncList._lHandle = self._handle
			ncList._parser  = NCListFileUnpickle(self._fileName)
			self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + H_FILE * LONG_SIZE)
			ncList.setOffset(H, unpack(self._handle.read(LONG_SIZE)))
			self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + L_FILE * LONG_SIZE)
			ncList.setOffset(L, unpack(self._handle.read(LONG_SIZE)))
			self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + G_FILE * LONG_SIZE)
			ncList.setOffset(G, unpack(self._handle.read(LONG_SIZE)))
			self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + FIRST_LIST_SIZE * LONG_SIZE)
			ncList._sizeFirstList = unpack(self._handle.read(LONG_SIZE))
			self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + INDEX * LONG_SIZE)
			indices = unpack(self._handle.read(LONG_SIZE))
			if indices != -1:
				self._handle.seek(indices)
				data = pickle.load(self._handle)
				index = NCIndex(self._verbosity)
				index._indices = data
				ncList._index = index
			self._ncLists[chromosome] = ncList

	def getChromosomes(self):
		return self._chromosomes

	def getNbElements(self):
		return self._nbElements

	def getNbElementsPerChromosome(self):
		return self._nbElementsPerChromosome

	def getNCLists(self):
		return self._ncLists

	def getParser(self, chromosome = None):
		parser = NCListFileUnpickle(self._fileName)
		if chromosome == None:
			parser.setInitAddress(unpack(self._handle, self._headerPos + G_FILE * LONG_SIZE))
			return parser
		i = self._chromosomes.index(chromosome)
		self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + G_FILE * LONG_SIZE)
		pos = unpack(self._handle.read(LONG_SIZE))
		parser.setInitAddress(pos)
		parser.setChromosome(chromosome)
		return parser