annotate commons/tools/ChangeSequenceHeaders.py @ 19:9bcfa7936eec

Deleted selected files
author m-zytnicki
date Mon, 29 Apr 2013 03:23:29 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 # Copyright INRA (Institut National de la Recherche Agronomique)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 # http://www.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # http://urgi.versailles.inra.fr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # This software is governed by the CeCILL license under French law and
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # abiding by the rules of distribution of free software. You can use,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # modify and/ or redistribute the software under the terms of the CeCILL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 # license as circulated by CEA, CNRS and INRIA at the following URL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # "http://www.cecill.info".
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 # As a counterpart to the access to the source code and rights to copy,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 # modify and redistribute granted by the license, users are provided only
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 # with a limited warranty and the software's author, the holder of the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 # economic rights, and the successive licensors have only limited
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 # liability.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 # In this respect, the user's attention is drawn to the risks associated
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 # with loading, using, modifying and/or developing or reproducing the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 # software by the user in light of its specific status of free software,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 # that may mean that it is complicated to manipulate, and that also
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 # therefore means that it is reserved for developers and experienced
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 # professionals having in-depth computer knowledge. Users are therefore
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 # encouraged to load and test the software's suitability as regards their
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 # requirements in conditions enabling the security of their systems and/or
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 # data to be ensured and, more generally, to use and operate it in the
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 # same conditions as regards security.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 # The fact that you are presently reading this means that you have had
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 # knowledge of the CeCILL license and that you accept its terms.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 from commons.core.coord.Align import Align
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 from commons.core.coord.Path import Path
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 from commons.core.coord.Match import Match
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 class ChangeSequenceHeaders( object ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 def __init__( self, name="ChangeSequenceHeaders", inFile="", format="", step=0, prefix="seq", outFile="",linkFile="", whichCluster = "", replace_query=True, replace_subject=True, verbosity=0):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 self._name = name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 self._inFile = inFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 self._format = format
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 self._step = step
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 self._prefix = prefix
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 self._linkFile = linkFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 self._whichCluster = whichCluster
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 self._outFile = outFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 self.replace_query = replace_query
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 self.replace_subject = replace_subject
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 self._verbose = verbosity
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 def help( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 print "usage: %s.py [ options ]" % ( self._name )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 print " -i: name of the input file"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 print " -f: format of the input file (fasta/newick/align/path/tab)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 print " -s: step (1: shorten headers / 2: retrieve initial headers)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 print " -p: prefix of new headers (with '-s 1', default='seq')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 print " -l: name of the 'link' file (with '-s 2', format=map)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 print " -w: header formatting type (A: after LTRharvest, B: for ClusterConsensus, not specified: no change)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 print " -o: name of the output file (default=inFile+'.newH'/'.initH')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 def setAttributesFromCmdLine( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 opts, args = getopt.getopt(sys.argv[1:],"hi:f:s:p:l:w:o:v:")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 sys.stderr.write( "%s\n" % ( str(err) ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 self.help(); sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 elif o == "-i":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 self.setInputFile( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 elif o == "-f":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 self.setFormat( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 elif o == "-s":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 self.setStep( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 elif o == "-p":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 self.setPrefix( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 elif o == "-l":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 self.setLinkFile( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 elif o == "-w":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 self.setWhichcluster( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 elif o == "-o":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 self.setOutputFile( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 self.setVerbosityLevel( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 def setInputFile( self, inFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 self._inFile = inFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 def setFormat( self, format ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 self._format = format
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 def setPrefix( self, prefix ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 self._prefix = prefix
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 def setStep( self, step ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 self._step = int(step)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 def setLinkFile( self, linkFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 self._linkFile = linkFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 def setWhichcluster( self, whichCluster ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 self._whichCluster = whichCluster
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 def setOutputFile( self, outFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 self._outFile = outFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 def setVerbosityLevel( self, verbose ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 self._verbose = int(verbose)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 def checkAttributes( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 if self._inFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 sys.stderr.write( "ERROR: missing input file name (-i)\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 if not os.path.exists( self._inFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 sys.stderr.write( "ERROR: input file doesn't exist (-i)\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 if self._format not in ["fasta","newick","align","path","tab","axt","lastz", "psl","chain"]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 sys.stderr.write( "ERROR: unrecognized format '%s' (-f)\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 if self._step not in [1,2]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 sys.stderr.write( "ERROR: missing step (-s)\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 if self._step == 1 and self._prefix == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 sys.stderr.write( "ERROR: missing prefix (-p)\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 if self._step == 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 if self._linkFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 sys.stderr.write( "ERROR: missing link file name (-l)\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 if not os.path.exists( self._linkFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 sys.stderr.write( "ERROR: link file doesn't exist (-l)\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 if self._whichCluster not in ["A", "B", ""]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 sys.stderr.write( "ERROR: formatting type not available (-w option): %s\n" % self._whichCluster)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 if self._outFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 if self._step == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 self._outFile = "%s.newH" % ( self._inFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 elif self._step == 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 self._outFile = "%s.initH" % ( self._inFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 def shortenSequenceHeadersForFastaFile( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 print "shorten sequence headers for fasta file..."
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 if self._verbose > 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 print "save sequences in '%s'" %( self._outFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 linkFileHandler = open( self._linkFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 countSeq = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 lengthSeq = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 line = inFileHandler.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 if line[0] == ">":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 countSeq += 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 newHeader = "%s%i" % ( self._prefix, countSeq )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 if self._verbose > 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 print "initial '%s' -> new '%s'" % ( line[1:-1], newHeader )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 outFileHandler.write( ">%s\n" % ( newHeader ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 if lengthSeq != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 linkFileHandler.write( "\t%i\t%i\n" % ( 1, lengthSeq ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 lengthSeq = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 linkFileHandler.write( "%s\t%s" % ( newHeader, line[1:-1] ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 lengthSeq += len( line.replace("\n","") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 outFileHandler.write( line )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 linkFileHandler.write( "\t%i\t%i\n" % ( 1, lengthSeq ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 linkFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 print "nb of sequences: %i" % ( countSeq )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 def getLinksNewToInitialHeaders( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197 print "retrieve the links new->initial headers"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199 dNew2Init = {}
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 linkFileHandler = open( self._linkFile,"r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
202 line = linkFileHandler.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
203 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
204 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
205 tokens = line.split("\t")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
206 if len(tokens) == 4:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
207 dNew2Init[ tokens[0] ] = tokens[1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
208 elif len(tokens) == 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
209 dNew2Init[ tokens[0] ] = tokens[1].split("\n")[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
210 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
211 sys.stderr.write( "ERROR: link file is badly formatted\n" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
212 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
213 linkFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
214 if self._verbose > 0: print "nb of links: %i" % ( len(dNew2Init.keys()) ); sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
215 return dNew2Init
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
216
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
217
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
218 def retrieveInitialSequenceHeadersForFastaFile( self, dNew2Init ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
219 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
220 print "retrieve initial headers for fasta file"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
221 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
222 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
223 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
224 countSeq = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
225 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
226 line = inFileHandler.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
227 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
228 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
229 if line[0] == ">":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
230 if self._whichCluster == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
231 initHeader = line[1:-1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
232 newHeader = dNew2Init[initHeader]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
233 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
234 tokens = line[1:-1].split("_")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
235 initHeader = dNew2Init[tokens[1]]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
236
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
237 pattern = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
238 if "BlastclustCluster" in tokens[0]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
239 pattern = "Blc"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
240 if "MCLCluster" in tokens[0]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
241 pattern = "MCL"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
242
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
243 if self._whichCluster == "A":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
244 newHeader = "%s_%s" % (tokens[0], initHeader)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
245 elif self._whichCluster == "B":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
246 classif = initHeader.split("_")[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
247 consensusName = "_".join(initHeader.split("_")[1:])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
248 clusterId = tokens[0].split("Cluster")[1].split("Mb")[0]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
249 newHeader = "%s_%s%s_%s" % (classif, pattern, clusterId, consensusName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
250
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
251 outFileHandler.write( ">%s\n" % newHeader )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
252 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
253 outFileHandler.write( line )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
254 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
255 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
256 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
257 print "nb of sequences: %i" % ( countSeq )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
258
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
259
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
260 def retrieveInitialSequenceHeadersForNewickFile( self, dNew2Init ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
261 inF = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
262 lines = inF.readlines()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
263 inF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
264 line = "".join(lines) #.replace(";","").replace("\n","")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
265 outF = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
266 for newH in dNew2Init.keys():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
267 line = line.replace( newH+":", dNew2Init[newH].replace(" ","_").replace("::","-").replace(":","-").replace(",","-")+":" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
268 outF.write( line )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
269 outF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
270
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
271
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
272 def retrieveInitialSequenceHeadersForAlignFile( self, dNew2Init ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
273 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
274 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
275 a = Align()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
276 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
277 line = inFileHandler.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
278 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
279 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
280 a.setFromTuple( line.split("\t") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
281 nameToBeReplaced = a.range_query.seqname
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
282 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
283 a.range_query.seqname = dNew2Init[ nameToBeReplaced ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
284 nameToBeReplaced = a.range_subject.seqname
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
285 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
286 a.range_subject.seqname = dNew2Init[ nameToBeReplaced ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
287 a.write( outFileHandler )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
288 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
289 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
290
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
291
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
292 def retrieveInitialSequenceHeadersForPathFile( self, dNew2Init ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
293 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
294 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
295 p = Path()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
296 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
297 line = inFileHandler.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
298 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
299 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
300 p.setFromTuple( line.split("\t") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
301 nameToBeReplaced = p.range_query.seqname
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
302 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
303 p.range_query.seqname = dNew2Init[ nameToBeReplaced ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
304 nameToBeReplaced = p.range_subject.seqname
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
305 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
306 p.range_subject.seqname = dNew2Init[ nameToBeReplaced ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
307 p.write( outFileHandler )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
308 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
309 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
310
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
311
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
312 def retrieveInitialSequenceHeadersForMatchFile( self, dNew2Init ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
313 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
314 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
315 m = Match()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
316 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
317 line = inFileHandler.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
318 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
319 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
320 if line[0:10] == "query.name":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
321 continue
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
322 m.setFromTuple( line.split("\t") )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
323 nameToBeReplaced = m.range_query.seqname
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
324 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
325 m.range_query.seqname = dNew2Init[ nameToBeReplaced ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
326 nameToBeReplaced = m.range_subject.seqname
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
327 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
328 m.range_subject.seqname = dNew2Init[ nameToBeReplaced ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
329 m.write( outFileHandler )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
330 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
331 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
332
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
333 def retrieveInitialSequenceHeadersForAxtFile( self, dNew2Init):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
334 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
335 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
336 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
337 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
338 line = inFileHandler.next()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
339 except StopIteration:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
340 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
341
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
342 if line == "" or not "seq" in line:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
343 outFileHandler.write(line)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
344 else :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
345 elems = line.split(" ")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
346 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
347 subject_seqname = elems[1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
348 if self.replace_subject :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
349 nameToBeReplaced = elems[1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
350 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
351 subject_seqname = dNew2Init[nameToBeReplaced]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
352 subject_seqname = subject_seqname.strip('\n').strip('\r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
353
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
354 query_seqname = elems[4]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
355 if self.replace_query:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
356 nameToBeReplaced = elems[4]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
357 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
358 query_seqname = dNew2Init[nameToBeReplaced]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
359 query_seqname = query_seqname.strip('\n').strip('\r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
360
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
361 modedelems = [ elems[0], subject_seqname, elems[2], elems[3], query_seqname, elems[5], elems[6], elems[7], elems[8]]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
362 newLine = " ".join(modedelems)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
363 outFileHandler.write("%s\n" % newLine)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
364 if self._verbose >0 :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
365 print("query", query_seqname, "subject", subject_seqname)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
366 print("Output axt_line : line %s " % newLine)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
367 except: pass
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
368 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
369 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
370
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
371 def retrieveInitialSequenceHeadersForPslFile( self, dNew2Init):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
372
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
373 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
374 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
375 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
376 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
377 line = inFileHandler.next()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
378 except StopIteration:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
379 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
380
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
381 if line == "" or not "seq" in line:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
382 outFileHandler.write(line)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
383 else :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
384 elems = line.split()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
385 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
386 subject_seqname = elems[13]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
387 if self.replace_subject :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
388 nameToBeReplaced = elems[13]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
389 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
390 subject_seqname = dNew2Init[nameToBeReplaced]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
391 subject_seqname = subject_seqname.strip('\n').strip('\r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
392
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
393 query_seqname = elems[9]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
394 if self.replace_query:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
395 nameToBeReplaced = elems[9]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
396 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
397 query_seqname = dNew2Init[nameToBeReplaced]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
398 query_seqname = query_seqname.strip('\n').strip('\r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
399
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
400 modedelems =elems[0:9]+[query_seqname]+elems[10:13]+[subject_seqname]+elems[14:21]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
401 #modedelems = [ elems[0], elems[1], elems[2], elems[3], elems[4], elems[5], elems[6], elems[7], elems[8], query_seqname, ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
402 #modedelems = [ elems[0], subject_seqname, elems[2], elems[3], query_seqname, elems[5], elems[6], elems[7], elems[8]]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
403 newLine = "\t".join(modedelems)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
404 outFileHandler.write("%s\n" % newLine)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
405 if self._verbose >0 :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
406 print("query", query_seqname, "subject", subject_seqname)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
407 print("Output psl_line : line %s " % newLine)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
408 except: pass
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
409 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
410 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
411 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
412
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
413
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
414 def retrieveInitialSequenceHeadersForLastZFile( self, dNew2Init):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
415 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
416 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
417 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
418 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
419 line = inFileHandler.next()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
420 except StopIteration:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
421 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
422 #score, name1, strand1, size1, zstart1, end1, name2, strand2, size2, zstart2, end2, identity, coverage
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
423
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
424 if line == "" or not "seq" in line:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
425 outFileHandler.write(line)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
426 else :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
427 elems = line.split("\t")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
428 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
429 subject_seqname = elems[1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
430 if self.replace_subject :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
431 nameToBeReplaced = elems[1]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
432 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
433 subject_seqname = dNew2Init[nameToBeReplaced]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
434 subject_seqname = subject_seqname.strip('\n').strip('\r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
435
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
436 query_seqname = elems[6]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
437 if self.replace_query:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
438 nameToBeReplaced = elems[6]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
439 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
440 query_seqname = dNew2Init[nameToBeReplaced]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
441 query_seqname = query_seqname.strip('\n').strip('\r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
442
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
443 modedelems = [ elems[0], subject_seqname, elems[2], elems[3], elems[4], elems[5], query_seqname, elems[7], elems[8],elems[9],elems[10], elems[11], elems[12],elems[13],elems[14].strip('\n').strip('\r')]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
444 newLine = "\t".join(modedelems)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
445 outFileHandler.write("%s\n" % newLine)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
446 if self._verbose >0 :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
447 print("query", query_seqname, "subject", subject_seqname)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
448 print("Output lastz_line : line %s " % newLine)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
449 except: pass
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
450 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
451 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
452
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
453 def retrieveInitialSequenceHeadersForChainFile( self, dNew2Init):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
454 #format: chain score tName tSize tStrand tStart tEnd qName qSize qStrand qStart qEnd id
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
455 inFileHandler = open( self._inFile, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
456 outFileHandler = open( self._outFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
457 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
458 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
459 line = inFileHandler.next()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
460 except StopIteration:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
461 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
462 if line == "" or not "seq" in line:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
463 outFileHandler.write(line)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
464 else :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
465 elems = line.split(" ")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
466 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
467 subject_seqname = elems[2]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
468 if self.replace_subject :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
469 nameToBeReplaced = elems[2]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
470 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
471 subject_seqname = dNew2Init[nameToBeReplaced]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
472 subject_seqname = subject_seqname.strip('\n').strip('\r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
473
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
474 query_seqname = elems[7]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
475 if self.replace_query:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
476 nameToBeReplaced = elems[7]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
477 if dNew2Init.has_key( nameToBeReplaced ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
478 query_seqname = dNew2Init[nameToBeReplaced]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
479 query_seqname = query_seqname.strip('\n').strip('\r')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
480
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
481 modedelems = elems[:]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
482 modedelems[2] = subject_seqname
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
483 modedelems[7] = query_seqname
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
484 newLine = " ".join(modedelems)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
485 outFileHandler.write("%s\n" % newLine)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
486 except: pass
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
487
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
488 inFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
489 outFileHandler.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
490
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
491
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
492 def run( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
493 self.checkAttributes()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
494 if self._step == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
495 if self._linkFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
496 self._linkFile = "%s.newHlink" % ( self._inFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
497 if self._format == "fasta":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
498 self.shortenSequenceHeadersForFastaFile()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
499 if self._step == 2:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
500 dNew2Init = self.getLinksNewToInitialHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
501 if self._format == "fasta":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
502 self.retrieveInitialSequenceHeadersForFastaFile( dNew2Init )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
503 elif self._format == "newick":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
504 self.retrieveInitialSequenceHeadersForNewickFile( dNew2Init )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
505 elif self._format == "align":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
506 self.retrieveInitialSequenceHeadersForAlignFile( dNew2Init )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
507 elif self._format == "path":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
508 self.retrieveInitialSequenceHeadersForPathFile( dNew2Init )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
509 elif self._format == "axt":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
510 self.retrieveInitialSequenceHeadersForAxtFile( dNew2Init)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
511 elif self._format == "psl":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
512 self.retrieveInitialSequenceHeadersForPslFile( dNew2Init)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
513 elif self._format == "lastz":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
514 self.retrieveInitialSequenceHeadersForLastZFile(dNew2Init)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
515 elif self._format == "chain":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
516 self.retrieveInitialSequenceHeadersForChainFile(dNew2Init)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
517 elif self._format in [ "tab", "match" ]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
518 self.retrieveInitialSequenceHeadersForMatchFile( dNew2Init )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
519
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
520
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
521 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
522 i = ChangeSequenceHeaders()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
523 i.setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
524 i.run()