annotate commons/core/coord/AlignUtils.py @ 62:8c42a6d7ffd4

Added simple test BED file.
author m-zytnicki
date Mon, 19 Oct 2015 11:25:11 +0200
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 # http://www.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 import os
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 import sys
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 import shutil
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 from commons.core.coord.Align import Align
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 ## Static methods manipulating Align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 class AlignUtils( object ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 ## Return a list with Align instances from the given file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 # @param inFile name of a file in the Align format
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 def getAlignListFromFile( inFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 lAlignInstances = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 inFileHandler = open( inFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 line = inFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 a = Align()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 a.setFromString( line )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 lAlignInstances.append( a )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 inFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 return lAlignInstances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 getAlignListFromFile = staticmethod( getAlignListFromFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 ## Return a list with all the scores
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 # @param lAlignInstances: list of Align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 def getListOfScores( lAlignInstances ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 lScores = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 for iAlign in lAlignInstances:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 lScores.append( iAlign.score )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 return lScores
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 getListOfScores = staticmethod( getListOfScores )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 ## Return a list with all the scores from the given file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 # @param inFile name of a file in the Align format
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 def getScoreListFromFile(inFile):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 lScores = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 append = lScores.append
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 with open(inFile, "r") as inFileHandler:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 line = inFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 while line:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 if line != "\n":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 append(int(line.split('\t')[7]))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 line = inFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 return lScores
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 getScoreListFromFile = staticmethod( getScoreListFromFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 ## for each line of a given Align file, write the coordinates on the query and the subject as two distinct lines in a Map file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 # @param alignFile: name of the input Align file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 # @param mapFile: name of the output Map file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 def convertAlignFileIntoMapFileWithQueriesAndSubjects( alignFile, mapFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99 alignFileHandler = open( alignFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 mapFileHandler = open( mapFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 iAlign = Align()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 line = alignFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 iAlign.setFromString( line )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 iMapQ, iMapS = iAlign.getMapsOfQueryAndSubject()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 iMapQ.write( mapFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 iMapS.write( mapFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 alignFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 mapFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 convertAlignFileIntoMapFileWithQueriesAndSubjects = staticmethod( convertAlignFileIntoMapFileWithQueriesAndSubjects )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 ## for each line of a given Align file, write the coordinates of the subject on the query as one line in a Map file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 # @param alignFile: name of the input Align file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119 # @param mapFile: name of the output Map file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 def convertAlignFileIntoMapFileWithSubjectsOnQueries( alignFile, mapFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 alignFileHandler = open( alignFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 mapFileHandler = open( mapFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 iAlign = Align()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 line = alignFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129 iAlign.setFromString( line )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 iMapQ = iAlign.getSubjectAsMapOfQuery()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131 iMapQ.write( mapFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132 alignFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 mapFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 convertAlignFileIntoMapFileWithSubjectsOnQueries = staticmethod( convertAlignFileIntoMapFileWithSubjectsOnQueries )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 ## return a list of Align instances sorted in decreasing order according to their score, then their length on the query and finally their initial order
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 # @param lAligns: list of Align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 def getAlignListSortedByDecreasingScoreThenLength( lAligns ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 return sorted( lAligns, key=lambda iAlign: ( 1 / float(iAlign.getScore()), 1 / float(iAlign.getLengthOnQuery()) ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 getAlignListSortedByDecreasingScoreThenLength = staticmethod( getAlignListSortedByDecreasingScoreThenLength )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148 ## Convert an Align file into a Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 # @param alignFile string name of the input Align file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 # @param pathFile string name of the output Path file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 def convertAlignFileIntoPathFile( alignFile, pathFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 alignFileHandler = open( alignFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 pathFileHandler = open( pathFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 iAlign = Align()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157 countAlign = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 line = alignFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162 countAlign += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163 iAlign.setFromString( line, "\t" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164 pathFileHandler.write( "%i\t%s\n" % ( countAlign, iAlign.toString() ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165 alignFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166 pathFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168 convertAlignFileIntoPathFile = staticmethod( convertAlignFileIntoPathFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171 ## Sort an Align file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173 def sortAlignFile( inFile, outFile="" ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174 if outFile == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175 outFile = "%s.sort" % ( inFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176 prg = "sort"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177 cmd = prg
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178 cmd += " -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 cmd += " %s" % ( inFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180 cmd += " > %s" % ( outFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181 exitStatus = os.system( cmd )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182 if exitStatus != 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183 msg = "ERROR: '%s' returned '%i'" % ( prg, exitStatus )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 sys.stderr.write( "%s\n" % ( msg ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185 sys.exit( exitStatus )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
186
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
187 sortAlignFile = staticmethod( sortAlignFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
188
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
189
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
190 ## Write Align instances contained in the given list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
191 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
192 # @param lAlign a list of Align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
193 # @param fileName name of the file to write the Align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
194 # @param mode the open mode of the file ""w"" or ""a""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
195 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
196 def writeListInFile( lAlign, fileName, mode="w" ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
197 fileHandler = open( fileName, mode )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198 for iAlign in lAlign:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199 iAlign.write( fileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
200 fileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
201
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
202 writeListInFile = staticmethod( writeListInFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
203
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
204
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
205 ## Split a list of Align instances according to the name of the query
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
206 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
207 # @param lInAlign list of align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
208 # @return lOutAlignList list of align instances lists
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
209 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
210 def splitAlignListByQueryName( lInAlign ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
211 lSortedAlign = sorted(lInAlign, key=lambda o: o.range_query.seqname)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
212 lOutAlignList = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
213 if len(lSortedAlign) != 0 :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
214 lAlignForCurrentQuery = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
215 previousQuery = lSortedAlign[0].range_query.seqname
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
216 for align in lSortedAlign :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
217 currentQuery = align.range_query.seqname
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
218 if previousQuery != currentQuery :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
219 lOutAlignList.append(lAlignForCurrentQuery)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
220 previousQuery = currentQuery
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
221 lAlignForCurrentQuery = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
222 lAlignForCurrentQuery.append(align)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
223
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
224 lOutAlignList.append(lAlignForCurrentQuery)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
225
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
226 return lOutAlignList
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
227
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
228 splitAlignListByQueryName = staticmethod( splitAlignListByQueryName )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
229
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
230
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
231 ## Create an Align file from each list of Align instances in the input list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
232 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
233 # @param lAlignList list of lists with Align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
234 # @param pattern string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
235 # @param dirName string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
236 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
237 def createAlignFiles( lAlignList, pattern, dirName="" ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
238 savedDir = os.getcwd()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
239 nbFiles = len(lAlignList)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
240 countFile = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
241 if dirName != "" :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
242 try:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
243 os.makedirs(dirName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
244 except:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
245 pass
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
246 os.chdir(dirName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
247
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
248 for lAlign in lAlignList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
249 fileName = "%s_%s.align" % (pattern, str(countFile).zfill(len(str(nbFiles))))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
250 AlignUtils.writeListInFile(lAlign, fileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
251 countFile += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
252 os.chdir(savedDir)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
253
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
254 createAlignFiles = staticmethod( createAlignFiles )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
255
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
256
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
257 ## Return a list with Align instances sorted by query name, subject name, query start, query end and score
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
258 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
259 def sortList( lAligns ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
260 return sorted( lAligns, key=lambda iAlign: ( iAlign.getQueryName(),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
261 iAlign.getSubjectName(),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
262 iAlign.getQueryStart(),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
263 iAlign.getQueryEnd(),
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
264 iAlign.getScore() ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
265
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
266 sortList = staticmethod( sortList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
267
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
268
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
269 ## Return a list after merging all overlapping Align instances
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
270 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
271 def mergeList( lAligns ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
272 lMerged = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
273
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
274 lSorted = AlignUtils.sortList( lAligns )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
275
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
276 prev_count = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
277 for iAlign in lSorted:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
278 if prev_count != len(lSorted):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
279 for i in lSorted[ prev_count + 1: ]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
280 if iAlign.isOverlapping( i ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
281 iAlign.merge( i )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
282 IsAlreadyInList = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
283 for newAlign in lMerged:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
284 if newAlign.isOverlapping( iAlign ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
285 IsAlreadyInList = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
286 newAlign.merge( iAlign )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
287 lMerged [ lMerged.index( newAlign ) ] = newAlign
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
288 if not IsAlreadyInList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
289 lMerged.append( iAlign )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
290 prev_count += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
291
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
292 return lMerged
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
293
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
294 mergeList = staticmethod( mergeList )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
295
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
296
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
297 ## Merge all Align instance in a given Align file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
298 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
299 def mergeFile( inFile, outFile="" ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
300 if outFile == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
301 outFile = "%s.merged" % ( inFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
302 if os.path.exists( outFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
303 os.remove( outFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
304
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
305 tmpFile = "%s.sorted" % ( inFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
306 AlignUtils.sortAlignFile( inFile, tmpFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
307
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
308 tmpF = open( tmpFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
309 dQrySbj2Aligns = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
310 prevPairQrySbj = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
311 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
312 line = tmpF.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
313 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
314 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
315 iAlign = Align()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
316 iAlign.setFromString( line )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
317 pairQrySbj = "%s_%s" % ( iAlign.getQueryName(), iAlign.getSubjectName() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
318 if not dQrySbj2Aligns.has_key( pairQrySbj ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
319 if prevPairQrySbj != "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
320 lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
321 AlignUtils.writeListInFile( lMerged, outFile, "a" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
322 del dQrySbj2Aligns[ prevPairQrySbj ]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
323 prevPairQrySbj = pairQrySbj
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
324 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
325 prevPairQrySbj = pairQrySbj
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
326 dQrySbj2Aligns[ pairQrySbj ] = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
327 dQrySbj2Aligns[ pairQrySbj ].append( iAlign )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
328 lMerged = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
329 if len(dQrySbj2Aligns.keys()) > 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
330 lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
331 AlignUtils.writeListInFile( lMerged, outFile, "a" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
332 tmpF.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
333 os.remove( tmpFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
334
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
335 mergeFile = staticmethod( mergeFile )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
336
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
337
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
338 ## Update the scores of each match in the input file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
339 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
340 # @note the new score is the length on the query times the percentage of identity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
341 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
342 def updateScoresInFile( inFile, outFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
343 inHandler = open( inFile, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
344 outHandler = open( outFile, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
345 iAlign = Align()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
346
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
347 while True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
348 line = inHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
349 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
350 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
351 iAlign.reset()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
352 iAlign.setFromString( line, "\t" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
353 iAlign.updateScore()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
354 iAlign.write( outHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
355
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
356 inHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
357 outHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
358
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
359 updateScoresInFile = staticmethod( updateScoresInFile )