comparison SMART/Java/Python/ncList/NCListMerger.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 #! /usr/bin/env python
2 #
3 # Copyright INRA-URGI 2009-2010
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30 #
31
32 import struct, os, shutil
33 try:
34 import cPickle as pickle
35 except:
36 import pickle
37
38 LONG_SIZE = struct.calcsize('l')
39
40 INFO_PER_NCLIST = 5
41 H_FILE = 0
42 L_FILE = 1
43 G_FILE = 2
44 FIRST_LIST_SIZE = 3
45 INDEX = 4
46
47 def pack(input):
48 return struct.pack("l", long(input))
49 def unpack(input):
50 return struct.unpack("l", input)[0]
51
52
53 class NCListMerger(object):
54
55 def __init__(self, verbosity):
56 self._verbosity = verbosity
57 self._index = False
58
59 def setFileName(self, fileName):
60 self._handle = open(fileName, "wb")
61
62 def setNCLists(self, ncLists):
63 self._ncLists = ncLists
64 self._chromosomes = sorted(self._ncLists.keys())
65
66 def addIndex(self, boolean):
67 self._index = boolean
68
69 def merge(self):
70 self._writeHeader()
71 self._addNCLists()
72 self._handle.close()
73 self._removeInputFiles()
74
75 def _writeHeader(self):
76 pickle.dump(self._chromosomes, self._handle, -1)
77 for chromosome in self._chromosomes:
78 self._handle.write(pack(self._ncLists[chromosome]._nbLines))
79 self._headerPos = self._handle.tell()
80 for chromosome in self._chromosomes:
81 for i in range(INFO_PER_NCLIST):
82 self._handle.write(pack(-1))
83
84 def _addInHeader(self, i, info, value = None):
85 currentPos = self._handle.tell()
86 if value == None:
87 value = currentPos
88 self._handle.seek(self._headerPos + i * INFO_PER_NCLIST * LONG_SIZE + info * LONG_SIZE)
89 self._handle.write(pack(value))
90 self._handle.seek(currentPos)
91
92 def _addNCLists(self):
93 self._inputFileNames = []
94 for i, chromosome in enumerate(self._chromosomes):
95 ncList = self._ncLists[chromosome]
96 self._addInHeader(i, H_FILE)
97 hFile = open(ncList._hFileName)
98 shutil.copyfileobj(hFile, self._handle)
99 hFile.close()
100 self._inputFileNames.append(ncList._hFileName)
101 for i, chromosome in enumerate(self._chromosomes):
102 ncList = self._ncLists[chromosome]
103 self._addInHeader(i, L_FILE)
104 lFile = open(ncList._lFileName)
105 shutil.copyfileobj(lFile, self._handle)
106 lFile.close()
107 self._inputFileNames.append(ncList._lFileName)
108 for i, chromosome in enumerate(self._chromosomes):
109 ncList = self._ncLists[chromosome]
110 self._addInHeader(i, FIRST_LIST_SIZE, ncList.getSizeFirstList())
111 if self._index:
112 for i, chromosome in enumerate(self._chromosomes):
113 ncList = self._ncLists[chromosome]
114 self._addInHeader(i, INDEX)
115 pickle.dump(ncList.getIndex()._indices, self._handle, -1)
116 for i, chromosome in enumerate(self._chromosomes):
117 ncList = self._ncLists[chromosome]
118 self._addInHeader(i, G_FILE)
119 tFile = open(ncList._transcriptFileName)
120 shutil.copyfileobj(tFile, self._handle)
121 tFile.close()
122 self._inputFileNames.append(ncList._transcriptFileName)
123
124 def _removeInputFiles(self):
125 for fileName in self._inputFileNames:
126 os.remove(fileName)