annotate commons/tools/SpliceTEsFromGenome.py @ 19:9bcfa7936eec

Deleted selected files
author m-zytnicki
date Mon, 29 Apr 2013 03:23:29 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 from commons.core.sql.DbMySql import DbMySql
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 from commons.core.seq.FastaUtils import FastaUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 from commons.core.coord.MapUtils import MapUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 from commons.core.coord.AlignUtils import AlignUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 from commons.core.coord.PathUtils import PathUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 class SpliceTEsFromGenome( object ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 def __init__( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 self._inputData = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 self._formatData = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 self._genomeFile = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 self._configFile = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 self._outFile = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 self._verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 self._db = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 def help( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 print "usage: SpliceTEsFromGenome.py [ options ]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 print " -i: input TE coordinates (can be file or table)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 print " TEs as subjects if align or path format"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 print " -f: format of the data (map/align/path)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 print " -g: genome file (format=fasta)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 print " -C: configuration file (if table as input)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 print " -o: output fasta file (default=genomeFile+'.splice')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 print " -v: verbosity level (default=0/1)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 def setAttributesFromCmdLine( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 opts, args = getopt.getopt(sys.argv[1:],"hi:f:g:C:o:v:")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 msg = "%s" % str(err)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 self.help(); sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 elif o == "-i":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 self._inputData = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 elif o == "-f":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 self._formatData = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 elif o == "-g":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 self._genomeFile = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 elif o == "-C":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 self._configFile = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 elif o =="-o":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 self._outFile = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 self._verbose = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 def checkAttributes( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 if self._inputData == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 msg = "ERROR: missing input data (-i)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 if not os.path.exists( self._inputData ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 if not os.path.exists( self._configFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 msg = "ERROR: neither input file '%s' nor configuration file '%s'" % ( self._inputData, self._configFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 if not os.path.exists( self._configFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 msg = "ERROR: can't find config file '%s'" % ( self._configFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 self._db = DbMySql( cfgFileName=self._configFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 if not self._db.doesTableExist( self._inputData ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 msg = "ERROR: can't find table '%s'" % ( self._inputData )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 if self._formatData == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 msg = "ERROR: need to precise format (-f)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 if self._formatData not in [ "map", "align", "path" ]:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 msg = "ERROR: format '%s' not yet supported" % ( self._formatData )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 if self._genomeFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 msg = "ERROR: missing genome file (-g)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 if not os.path.exists( self._genomeFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 msg = "ERROR: can't find genome file '%s'" % ( self._genomeFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 self.help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 if self._outFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 self._outFile = "%s.splice" % ( self._genomeFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 print "output fasta file: %s" % self._outFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 def getCoordsAsMapFile( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 print "get TE coordinates as 'Map' file"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 if self._db != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 cmd = "srptExportTable.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 cmd += " -i %s" % ( self._inputData )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 cmd += " -C %s" % ( self._configFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 cmd += " -o %s.%s" % ( self._inputData, self._formatData )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 returnStatus = os.system( cmd )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 if returnStatus != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 msg = "ERROR while exporting data from table"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 self._inputData += ".%s" % ( self._formatData )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 if self._formatData == "map":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 return self._inputData
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 elif self._formatData == "align":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 mapFile = "%s.map" % ( self._inputData )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 AlignUtils.convertAlignFileIntoMapFileWithSubjectsOnQueries( self._inputData, mapFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 return mapFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 elif self._formatData == "path":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 mapFile = "%s.map" % ( self._inputData )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 PathUtils.convertPathFileIntoMapFileWithSubjectsOnQueries( self._inputData, mapFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 return mapFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 def mergeCoordsInMapFile( self, mapFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 print "merge TE coordinates"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 mergeFile = "%s.merge" % ( mapFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 MapUtils.mergeCoordsInFile( mapFile, mergeFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 if self._formatData != "map" or self._db != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 os.remove( mapFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 return mergeFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 def spliceFastaFromCoords( self, mergeFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 print "splice TE copies from the genome"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 FastaUtils.spliceFromCoords( self._genomeFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 mergeFile,
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 self._outFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 os.remove( mergeFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 def start( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 self.checkAttributes()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 print "START SpliceTEsFromGenome.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 def end( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 if self._db != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 self._db.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 print "END SpliceTEsFromGenome.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 def run( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 self.start()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 mapFile = self.getCoordsAsMapFile()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 mergeFile = self.mergeCoordsInMapFile( mapFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 self.spliceFastaFromCoords( mergeFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 self.end()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191 i = SpliceTEsFromGenome()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 i.setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 i.run()