comparison smart_toolShed/commons/core/coord/MapUtils.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e0f8dcca02ed
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31
32 import sys
33 import os
34 from commons.core.coord.Map import Map
35 from commons.core.coord.Set import Set
36 try:
37 from commons.core.checker.CheckerUtils import CheckerUtils
38 except ImportError:
39 pass
40
41
42 ## static methods manipulating Map instances
43 #
44 class MapUtils( object ):
45
46 ## Return a list of Map instances sorted in increasing order according to the min, then the max, and finally their initial order
47 #
48 # @param lMaps list of Map instances
49 #
50 def getMapListSortedByIncreasingMinThenMax( lMaps ):
51 return sorted( lMaps, key=lambda iMap: ( iMap.getMin(), iMap.getMax() ) )
52
53 getMapListSortedByIncreasingMinThenMax = staticmethod( getMapListSortedByIncreasingMinThenMax )
54
55
56 ## Return a list of Map instances sorted in increasing order according to the name, then the seqname, then the min, then the max
57 #
58 # @param lMaps list of Map instances
59 #
60 def getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax( lMaps ):
61 return sorted( lMaps, key=lambda iMap: ( iMap.getName(), iMap.getSeqname(), iMap.getMin(), iMap.getMax() ) )
62
63 getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax = staticmethod( getMapListSortedByIncreasingNameThenSeqnameThenMinThenMax )
64
65
66 ## Return a dictionary which keys are Map names and values the corresponding Map instances
67 #
68 def getDictPerNameFromMapFile( mapFile ):
69 dName2Maps = {}
70 mapFileHandler = open( mapFile, "r" )
71 while True:
72 line = mapFileHandler.readline()
73 if line == "":
74 break
75 iMap = Map()
76 iMap.setFromString( line, "\t" )
77 if dName2Maps.has_key( iMap.name ):
78 if iMap == dName2Maps[ iMap.name ]:
79 continue
80 else:
81 msg = "ERROR: in file '%s' two different Map instances have the same name '%s'" % ( mapFile, iMap.name )
82 sys.stderr.write( "%s\n" % ( msg ) )
83 sys.exit(1)
84 dName2Maps[ iMap.name ] = iMap
85 mapFileHandler.close()
86 return dName2Maps
87
88 getDictPerNameFromMapFile = staticmethod( getDictPerNameFromMapFile )
89
90
91 ## Give a list of Set instances from a list of Map instances
92 #
93 # @param lMaps list of Map instances
94 # @return lSets list of Set instances
95 #
96 def mapList2SetList( lMaps ):
97 lSets = []
98 c = 0
99 for iMap in lMaps:
100 c += 1
101 iSet = Set()
102 iSet.id = c
103 iSet.name = iMap.getName()
104 iSet.seqname = iMap.getSeqname()
105 iSet.start = iMap.getStart()
106 iSet.end = iMap.getEnd()
107 lSets.append( iSet )
108 return lSets
109
110 mapList2SetList = staticmethod( mapList2SetList )
111
112
113 ## Merge the Map instances in a Map file using 'mapOp'
114 #
115 def mergeCoordsInFile( inFile, outFile ):
116 if not sys.modules.has_key( "commons.core.checker.CheckerUtils" ):
117 msg = "WARNING: can't find module 'CheckerUtils'"
118 sys.stderr.write( "%s\n" % msg )
119 elif not CheckerUtils.isExecutableInUserPath( "mapOp" ):
120 msg = "WARNING: can't find executable 'mapOp'"
121 sys.stderr.write( "%s\n" % msg )
122 else:
123 cmd = "mapOp"
124 cmd += " -q %s" % ( inFile )
125 cmd += " -m"
126 cmd += " 2>&1 > /dev/null"
127 returnStatus = os.system( cmd )
128 if returnStatus != 0:
129 print "ERROR: mapOp returned %i" % ( returnStatus )
130 sys.exit(1)
131 os.rename( "%s.merge" % inFile,
132 outFile )
133
134 mergeCoordsInFile = staticmethod( mergeCoordsInFile )
135
136
137 ## Return a dictionary which keys are Map seqnames and values the corresponding Map instances
138 #
139 def getDictPerSeqNameFromMapFile( mapFile ):
140 dSeqName2Maps = {}
141 mapFileHandler = open( mapFile, "r" )
142 while True:
143 line = mapFileHandler.readline()
144 if line == "":
145 break
146 iMap = Map()
147 iMap.setFromString( line, "\t" )
148 if not dSeqName2Maps.has_key( iMap.seqname ):
149 dSeqName2Maps[ iMap.seqname ] = []
150 dSeqName2Maps[ iMap.seqname ].append( iMap )
151 mapFileHandler.close()
152 return dSeqName2Maps
153
154 getDictPerSeqNameFromMapFile = staticmethod( getDictPerSeqNameFromMapFile )
155
156
157 ## Convert an Map file into a Set file
158 #
159 # @param mapFile string input map file name
160 # @param setFile string output set file name
161 #
162 def convertMapFileIntoSetFile( mapFileName, setFileName = "" ):
163 if setFileName == "":
164 setFileName = "%s.set" % mapFileName
165 mapFileHandler = open( mapFileName, "r" )
166 setFileHandler = open( setFileName, "w" )
167 iMap = Map()
168 count = 0
169 while True:
170 line = mapFileHandler.readline()
171 if line == "":
172 break
173 iMap.setFromString(line)
174 count += 1
175 iSet = Set()
176 iSet.id = count
177 iSet.name = iMap.getName()
178 iSet.seqname = iMap.getSeqname()
179 iSet.start = iMap.getStart()
180 iSet.end = iMap.getEnd()
181 iSet.write(setFileHandler)
182 mapFileHandler.close()
183 setFileHandler.close()
184
185 convertMapFileIntoSetFile = staticmethod( convertMapFileIntoSetFile )
186
187 ## Write Map instances contained in the given list
188 #
189 # @param lMaps list of Map instances
190 # @param fileName a file name
191 # @param mode the open mode of the file '"w"' or '"a"'
192 #
193 def writeListInFile(lMaps, fileName, mode="w"):
194 fileHandler = open(fileName, mode)
195 for iMap in lMaps:
196 iMap.write(fileHandler)
197 fileHandler.close()
198
199 writeListInFile = staticmethod( writeListInFile )
200
201
202 ## Get the length of the shorter seq in map file
203 #
204 # @param mapFileName
205 # @param mode the open mode of the file '"w"' or '"a"'
206 #
207 def getMinLengthOfMapFile(self, mapFileName):
208 fileHandler = open(mapFileName, "r")
209 line = fileHandler.readline()
210 start = int (line.split('\t')[2])
211 end = int (line.split('\t')[3])
212 min = end - start + 1
213 while True:
214 line = fileHandler.readline()
215 if line == "":
216 break
217 start = int (line.split('\t')[2])
218 end = int (line.split('\t')[3])
219 currentMin = end - start + 1
220 if min >= currentMin:
221 min = currentMin
222 fileHandler.close()
223 return min
224
225 ## Get the max length of the shorter seq in map file
226 #
227 # @param mapFileName
228 # @param mode the open mode of the file '"w"' or '"a"'
229 #
230 def getMaxLengthOfMapFile(self, mapFileName):
231 fileHandler = open(mapFileName, "r")
232 line = fileHandler.readline()
233 start = int (line.split('\t')[2])
234 end = int (line.split('\t')[3])
235 max = end - start + 1
236 while True:
237 line = fileHandler.readline()
238 if line == "":
239 break
240 start = int (line.split('\t')[2])
241 end = int (line.split('\t')[3])
242 currentMax = end - start + 1
243 if max <= currentMax:
244 max = currentMax
245 fileHandler.close()
246 return max