Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/SpliceTEsFromGenome.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys | |
4 import os | |
5 import getopt | |
6 | |
7 from commons.core.sql.DbMySql import DbMySql | |
8 from commons.core.seq.FastaUtils import FastaUtils | |
9 from commons.core.coord.MapUtils import MapUtils | |
10 from commons.core.coord.AlignUtils import AlignUtils | |
11 from commons.core.coord.PathUtils import PathUtils | |
12 | |
13 | |
14 class SpliceTEsFromGenome( object ): | |
15 | |
16 def __init__( self ): | |
17 self._inputData = "" | |
18 self._formatData = "" | |
19 self._genomeFile = "" | |
20 self._configFile = "" | |
21 self._outFile = "" | |
22 self._verbose = 0 | |
23 self._db = None | |
24 | |
25 | |
26 def help( self ): | |
27 print | |
28 print "usage: SpliceTEsFromGenome.py [ options ]" | |
29 print "options:" | |
30 print " -h: this help" | |
31 print " -i: input TE coordinates (can be file or table)" | |
32 print " TEs as subjects if align or path format" | |
33 print " -f: format of the data (map/align/path)" | |
34 print " -g: genome file (format=fasta)" | |
35 print " -C: configuration file (if table as input)" | |
36 print " -o: output fasta file (default=genomeFile+'.splice')" | |
37 print " -v: verbosity level (default=0/1)" | |
38 print | |
39 | |
40 | |
41 def setAttributesFromCmdLine( self ): | |
42 try: | |
43 opts, args = getopt.getopt(sys.argv[1:],"hi:f:g:C:o:v:") | |
44 except getopt.GetoptError, err: | |
45 msg = "%s" % str(err) | |
46 sys.stderr.write( "%s\n" % msg ) | |
47 self.help(); sys.exit(1) | |
48 for o,a in opts: | |
49 if o == "-h": | |
50 self.help(); sys.exit(0) | |
51 elif o == "-i": | |
52 self._inputData = a | |
53 elif o == "-f": | |
54 self._formatData = a | |
55 elif o == "-g": | |
56 self._genomeFile = a | |
57 elif o == "-C": | |
58 self._configFile = a | |
59 elif o =="-o": | |
60 self._outFile = a | |
61 elif o == "-v": | |
62 self._verbose = int(a) | |
63 | |
64 | |
65 def checkAttributes( self ): | |
66 if self._inputData == "": | |
67 msg = "ERROR: missing input data (-i)" | |
68 sys.stderr.write( "%s\n" % msg ) | |
69 self.help() | |
70 sys.exit(1) | |
71 if not os.path.exists( self._inputData ): | |
72 if not os.path.exists( self._configFile ): | |
73 msg = "ERROR: neither input file '%s' nor configuration file '%s'" % ( self._inputData, self._configFile ) | |
74 sys.stderr.write( "%s\n" % msg ) | |
75 self.help() | |
76 sys.exit(1) | |
77 if not os.path.exists( self._configFile ): | |
78 msg = "ERROR: can't find config file '%s'" % ( self._configFile ) | |
79 sys.stderr.write( "%s\n" % msg ) | |
80 sys.exit(1) | |
81 self._db = DbMySql( cfgFileName=self._configFile ) | |
82 if not self._db.doesTableExist( self._inputData ): | |
83 msg = "ERROR: can't find table '%s'" % ( self._inputData ) | |
84 sys.stderr.write( "%s\n" % msg ) | |
85 self.help() | |
86 sys.exit(1) | |
87 if self._formatData == "": | |
88 msg = "ERROR: need to precise format (-f)" | |
89 sys.stderr.write( "%s\n" % msg ) | |
90 self.help() | |
91 sys.exit(1) | |
92 if self._formatData not in [ "map", "align", "path" ]: | |
93 msg = "ERROR: format '%s' not yet supported" % ( self._formatData ) | |
94 sys.stderr.write( "%s\n" % msg ) | |
95 self.help() | |
96 sys.exit(1) | |
97 if self._genomeFile == "": | |
98 msg = "ERROR: missing genome file (-g)" | |
99 sys.stderr.write( "%s\n" % msg ) | |
100 self.help() | |
101 sys.exit(1) | |
102 if not os.path.exists( self._genomeFile ): | |
103 msg = "ERROR: can't find genome file '%s'" % ( self._genomeFile ) | |
104 sys.stderr.write( "%s\n" % msg ) | |
105 self.help() | |
106 sys.exit(1) | |
107 if self._outFile == "": | |
108 self._outFile = "%s.splice" % ( self._genomeFile ) | |
109 if self._verbose > 0: | |
110 print "output fasta file: %s" % self._outFile | |
111 | |
112 | |
113 def getCoordsAsMapFile( self ): | |
114 if self._verbose > 0: | |
115 print "get TE coordinates as 'Map' file" | |
116 sys.stdout.flush() | |
117 if self._db != None: | |
118 cmd = "srptExportTable.py" | |
119 cmd += " -i %s" % ( self._inputData ) | |
120 cmd += " -C %s" % ( self._configFile ) | |
121 cmd += " -o %s.%s" % ( self._inputData, self._formatData ) | |
122 returnStatus = os.system( cmd ) | |
123 if returnStatus != 0: | |
124 msg = "ERROR while exporting data from table" | |
125 sys.stderr.write( "%s\n" % msg ) | |
126 sys.exit(1) | |
127 self._inputData += ".%s" % ( self._formatData ) | |
128 | |
129 if self._formatData == "map": | |
130 return self._inputData | |
131 elif self._formatData == "align": | |
132 mapFile = "%s.map" % ( self._inputData ) | |
133 AlignUtils.convertAlignFileIntoMapFileWithSubjectsOnQueries( self._inputData, mapFile ) | |
134 return mapFile | |
135 elif self._formatData == "path": | |
136 mapFile = "%s.map" % ( self._inputData ) | |
137 PathUtils.convertPathFileIntoMapFileWithSubjectsOnQueries( self._inputData, mapFile ) | |
138 return mapFile | |
139 | |
140 | |
141 def mergeCoordsInMapFile( self, mapFile ): | |
142 if self._verbose > 0: | |
143 print "merge TE coordinates" | |
144 sys.stdout.flush() | |
145 mergeFile = "%s.merge" % ( mapFile ) | |
146 MapUtils.mergeCoordsInFile( mapFile, mergeFile ) | |
147 if self._formatData != "map" or self._db != None: | |
148 os.remove( mapFile ) | |
149 return mergeFile | |
150 | |
151 | |
152 def spliceFastaFromCoords( self, mergeFile ): | |
153 if self._verbose > 0: | |
154 print "splice TE copies from the genome" | |
155 sys.stdout.flush() | |
156 FastaUtils.spliceFromCoords( self._genomeFile, | |
157 mergeFile, | |
158 self._outFile ) | |
159 | |
160 os.remove( mergeFile ) | |
161 | |
162 | |
163 def start( self ): | |
164 self.checkAttributes() | |
165 if self._verbose > 0: | |
166 print "START SpliceTEsFromGenome.py" | |
167 sys.stdout.flush() | |
168 | |
169 | |
170 def end( self ): | |
171 if self._db != None: | |
172 self._db.close() | |
173 if self._verbose > 0: | |
174 print "END SpliceTEsFromGenome.py" | |
175 sys.stdout.flush() | |
176 | |
177 | |
178 def run( self ): | |
179 self.start() | |
180 | |
181 mapFile = self.getCoordsAsMapFile() | |
182 | |
183 mergeFile = self.mergeCoordsInMapFile( mapFile ) | |
184 | |
185 self.spliceFastaFromCoords( mergeFile ) | |
186 | |
187 self.end() | |
188 | |
189 | |
190 if __name__ == "__main__": | |
191 i = SpliceTEsFromGenome() | |
192 i.setAttributesFromCmdLine() | |
193 i.run() |