6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 import sys
|
|
33 import os
|
|
34 from commons.core.coord.Map import Map
|
|
35 from commons.core.coord.Set import Set
|
|
36 try:
|
|
37 from commons.core.checker.CheckerUtils import CheckerUtils
|
|
38 except ImportError:
|
|
39 pass
|
|
40
|
|
41
|
|
42 ## static methods manipulating Map instances
|
|
43 #
|
|
44 class MapUtils( object ):
|
|
45
|
|
46 ## Return a list of Map instances sorted in increasing order according to the min, then the max, and finally their initial order
|
|
47 #
|
|
48 # @param lMaps list of Map instances
|
|
49 #
|
|
50 def getMapListSortedByIncreasingMinThenMax( lMaps ):
|
|
51 return sorted( lMaps, key=lambda iMap: ( iMap.getMin(), iMap.getMax() ) )
|
|
52
|
|
53 getMapListSortedByIncreasingMinThenMax = staticmethod( getMapListSortedByIncreasingMinThenMax )
|
|
54
|
|
55
|
|
56 ## Return a list of Map instances sorted in increasing order according to the name, then the seqname, then the min, then the max
|
|
57 #
|
|
58 # @param lMaps list of Map instances
|
|
59 #
|
|
60 def getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax( lMaps ):
|
|
61 return sorted( lMaps, key=lambda iMap: ( iMap.getName(), iMap.getSeqname(), iMap.getMin(), iMap.getMax() ) )
|
|
62
|
|
63 getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax = staticmethod( getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax )
|
|
64
|
|
65
|
|
66 ## Return a dictionary which keys are Map names and values the corresponding Map instances
|
|
67 #
|
|
68 def getDictPerNameFromMapFile( mapFile ):
|
|
69 dName2Maps = {}
|
|
70 mapFileHandler = open( mapFile, "r" )
|
|
71 while True:
|
|
72 line = mapFileHandler.readline()
|
|
73 if line == "":
|
|
74 break
|
|
75 iMap = Map()
|
|
76 iMap.setFromString( line, "\t" )
|
|
77 if dName2Maps.has_key( iMap.name ):
|
|
78 if iMap == dName2Maps[ iMap.name ]:
|
|
79 continue
|
|
80 else:
|
|
81 msg = "ERROR: in file '%s' two different Map instances have the same name '%s'" % ( mapFile, iMap.name )
|
|
82 sys.stderr.write( "%s\n" % ( msg ) )
|
|
83 sys.exit(1)
|
|
84 dName2Maps[ iMap.name ] = iMap
|
|
85 mapFileHandler.close()
|
|
86 return dName2Maps
|
|
87
|
|
88 getDictPerNameFromMapFile = staticmethod( getDictPerNameFromMapFile )
|
|
89
|
|
90
|
|
91 ## Give a list of Set instances from a list of Map instances
|
|
92 #
|
|
93 # @param lMaps list of Map instances
|
|
94 # @return lSets list of Set instances
|
|
95 #
|
|
96 def mapList2SetList( lMaps ):
|
|
97 lSets = []
|
|
98 c = 0
|
|
99 for iMap in lMaps:
|
|
100 c += 1
|
|
101 iSet = Set()
|
|
102 iSet.id = c
|
|
103 iSet.name = iMap.getName()
|
|
104 iSet.seqname = iMap.getSeqname()
|
|
105 iSet.start = iMap.getStart()
|
|
106 iSet.end = iMap.getEnd()
|
|
107 lSets.append( iSet )
|
|
108 return lSets
|
|
109
|
|
110 mapList2SetList = staticmethod( mapList2SetList )
|
|
111
|
|
112
|
|
113 ## Merge the Map instances in a Map file using 'mapOp'
|
|
114 #
|
|
115 def mergeCoordsInFile( inFile, outFile ):
|
|
116 if not sys.modules.has_key( "commons.core.checker.CheckerUtils" ):
|
|
117 msg = "WARNING: can't find module 'CheckerUtils'"
|
|
118 sys.stderr.write( "%s\n" % msg )
|
|
119 elif not CheckerUtils.isExecutableInUserPath( "mapOp" ):
|
|
120 msg = "WARNING: can't find executable 'mapOp'"
|
|
121 sys.stderr.write( "%s\n" % msg )
|
|
122 else:
|
|
123 cmd = "mapOp"
|
|
124 cmd += " -q %s" % ( inFile )
|
|
125 cmd += " -m"
|
|
126 cmd += " 2>&1 > /dev/null"
|
|
127 returnStatus = os.system( cmd )
|
|
128 if returnStatus != 0:
|
|
129 print "ERROR: mapOp returned %i" % ( returnStatus )
|
|
130 sys.exit(1)
|
|
131 os.rename( "%s.merge" % inFile,
|
|
132 outFile )
|
|
133
|
|
134 mergeCoordsInFile = staticmethod( mergeCoordsInFile )
|
|
135
|
|
136
|
|
137 ## Return a dictionary which keys are Map seqnames and values the corresponding Map instances
|
|
138 #
|
|
139 def getDictPerSeqNameFromMapFile( mapFile ):
|
|
140 dSeqName2Maps = {}
|
|
141 mapFileHandler = open( mapFile, "r" )
|
|
142 while True:
|
|
143 line = mapFileHandler.readline()
|
|
144 if line == "":
|
|
145 break
|
|
146 iMap = Map()
|
|
147 iMap.setFromString( line, "\t" )
|
|
148 if not dSeqName2Maps.has_key( iMap.seqname ):
|
|
149 dSeqName2Maps[ iMap.seqname ] = []
|
|
150 dSeqName2Maps[ iMap.seqname ].append( iMap )
|
|
151 mapFileHandler.close()
|
|
152 return dSeqName2Maps
|
|
153
|
|
154 getDictPerSeqNameFromMapFile = staticmethod( getDictPerSeqNameFromMapFile )
|
|
155
|
|
156
|
|
157 ## Convert an Map file into a Set file
|
|
158 #
|
|
159 # @param mapFile string input map file name
|
|
160 # @param setFile string output set file name
|
|
161 #
|
|
162 def convertMapFileIntoSetFile( mapFileName, setFileName = "" ):
|
|
163 if setFileName == "":
|
|
164 setFileName = "%s.set" % mapFileName
|
|
165 mapFileHandler = open( mapFileName, "r" )
|
|
166 setFileHandler = open( setFileName, "w" )
|
|
167 iMap = Map()
|
|
168 count = 0
|
|
169 while True:
|
|
170 line = mapFileHandler.readline()
|
|
171 if line == "":
|
|
172 break
|
|
173 iMap.setFromString(line)
|
|
174 count += 1
|
|
175 iSet = Set()
|
|
176 iSet.id = count
|
|
177 iSet.name = iMap.getName()
|
|
178 iSet.seqname = iMap.getSeqname()
|
|
179 iSet.start = iMap.getStart()
|
|
180 iSet.end = iMap.getEnd()
|
|
181 iSet.write(setFileHandler)
|
|
182 mapFileHandler.close()
|
|
183 setFileHandler.close()
|
|
184
|
|
185 convertMapFileIntoSetFile = staticmethod( convertMapFileIntoSetFile )
|
|
186
|
|
187 ## Write Map instances contained in the given list
|
|
188 #
|
|
189 # @param lMaps list of Map instances
|
|
190 # @param fileName a file name
|
|
191 # @param mode the open mode of the file '"w"' or '"a"'
|
|
192 #
|
|
193 def writeListInFile(lMaps, fileName, mode="w"):
|
|
194 fileHandler = open(fileName, mode)
|
|
195 for iMap in lMaps:
|
|
196 iMap.write(fileHandler)
|
|
197 fileHandler.close()
|
|
198
|
|
199 writeListInFile = staticmethod( writeListInFile )
|
|
200
|
|
201
|
|
202 ## Get the length of the shorter seq in map file
|
|
203 #
|
|
204 # @param mapFileName
|
|
205 # @param mode the open mode of the file '"w"' or '"a"'
|
|
206 #
|
|
207 def getMinLengthOfMapFile(self, mapFileName):
|
|
208 fileHandler = open(mapFileName, "r")
|
|
209 line = fileHandler.readline()
|
|
210 start = int (line.split('\t')[2])
|
|
211 end = int (line.split('\t')[3])
|
|
212 min = end - start + 1
|
|
213 while True:
|
|
214 line = fileHandler.readline()
|
|
215 if line == "":
|
|
216 break
|
|
217 start = int (line.split('\t')[2])
|
|
218 end = int (line.split('\t')[3])
|
|
219 currentMin = end - start + 1
|
|
220 if min >= currentMin:
|
|
221 min = currentMin
|
|
222 fileHandler.close()
|
|
223 return min
|
|
224
|
|
225 ## Get the max length of the shorter seq in map file
|
|
226 #
|
|
227 # @param mapFileName
|
|
228 # @param mode the open mode of the file '"w"' or '"a"'
|
|
229 #
|
|
230 def getMaxLengthOfMapFile(self, mapFileName):
|
|
231 fileHandler = open(mapFileName, "r")
|
|
232 line = fileHandler.readline()
|
|
233 start = int (line.split('\t')[2])
|
|
234 end = int (line.split('\t')[3])
|
|
235 max = end - start + 1
|
|
236 while True:
|
|
237 line = fileHandler.readline()
|
|
238 if line == "":
|
|
239 break
|
|
240 start = int (line.split('\t')[2])
|
|
241 end = int (line.split('\t')[3])
|
|
242 currentMax = end - start + 1
|
|
243 if max <= currentMax:
|
|
244 max = currentMax
|
|
245 fileHandler.close()
|
|
246 return max |