Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/getCumulLengthFromTEannot.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ##@file | |
4 # usage: getCumulLengthFromTEannot.py [ options ] | |
5 # options: | |
6 # -h: this help | |
7 # -i: table with the annotations (format=path) | |
8 # -r: name of a TE reference sequence (if empty, all subjects are considered) | |
9 # -g: length of the genome (in bp) | |
10 # -C: configuration file | |
11 # -c: clean | |
12 # -v: verbosity level (default=0/1) | |
13 | |
14 | |
15 import sys | |
16 import os | |
17 import getopt | |
18 from commons.core.sql.DbMySql import DbMySql | |
19 from commons.core.sql.TablePathAdaptator import TablePathAdaptator | |
20 | |
21 | |
22 class getCumulLengthFromTEannot( object ): | |
23 """ | |
24 Give the cumulative length of TE annotations (subjects mapped on queries). | |
25 """ | |
26 | |
27 def __init__( self ): | |
28 """ | |
29 Constructor. | |
30 """ | |
31 self._tableName = "" | |
32 self._TErefseq = "" | |
33 self._genomeLength = 0 | |
34 self._configFileName = "" | |
35 self._clean = False | |
36 self._verbose = 0 | |
37 self._db = None | |
38 self._tpA = None | |
39 | |
40 | |
41 def help( self ): | |
42 """ | |
43 Display the help on stdout. | |
44 """ | |
45 print | |
46 print "usage: getCumulLengthFromTEannot.py [ options ]" | |
47 print "options:" | |
48 print " -h: this help" | |
49 print " -i: table with the annotations (format=path)" | |
50 print " -r: name of a TE reference sequence (if empty, all subjects are considered)" | |
51 print " -g: length of the genome (in bp)" | |
52 print " -C: configuration file" | |
53 print " -c: clean" | |
54 print " -v: verbosity level (default=0/1)" | |
55 print | |
56 | |
57 | |
58 def setAttributesFromCmdLine( self ): | |
59 """ | |
60 Set the attributes from the command-line. | |
61 """ | |
62 try: | |
63 opts, args = getopt.getopt(sys.argv[1:],"hi:r:g:C:cv:") | |
64 except getopt.GetoptError, err: | |
65 print str(err); self.help(); sys.exit(1) | |
66 for o,a in opts: | |
67 if o == "-h": | |
68 self.help(); sys.exit(0) | |
69 elif o == "-i": | |
70 self.setInputTable( a ) | |
71 elif o == "-r": | |
72 self.setTErefseq( a ) | |
73 elif o == "-g": | |
74 self.setGenomeLength( a ) | |
75 elif o == "-C": | |
76 self.setConfigFileName( a ) | |
77 elif o == "-c": | |
78 self.setClean() | |
79 elif o == "-v": | |
80 self.setVerbosityLevel( a ) | |
81 | |
82 | |
83 def setInputTable( self, inTable ): | |
84 self._tableName = inTable | |
85 | |
86 def setTErefseq( self, a ): | |
87 self._TErefseq = a | |
88 | |
89 def setGenomeLength( self, genomeLength ): | |
90 self._genomeLength = int(genomeLength) | |
91 | |
92 def setConfigFileName( self, configFileName ): | |
93 self._configFileName = configFileName | |
94 | |
95 def setClean( self ): | |
96 self._clean = True | |
97 | |
98 def setVerbosityLevel( self, verbose ): | |
99 self._verbose = int(verbose) | |
100 | |
101 def checkAttributes( self ): | |
102 """ | |
103 Check the attributes are valid before running the algorithm. | |
104 """ | |
105 if self._tableName == "": | |
106 print "ERROR: missing input table"; self.help(); sys.exit(1) | |
107 | |
108 | |
109 def setAdaptatorToTable( self ): | |
110 self._db = DbMySql( cfgFileName=self._configFileName ) | |
111 self._tpA = TablePathAdaptator( self._db, self._tableName ) | |
112 | |
113 | |
114 def getAllSubjectsAsMapOfQueries( self ): | |
115 mapFileName = "%s.map" % self._tableName | |
116 mapFile = open( mapFileName, "w" ) | |
117 if self._TErefseq != "": | |
118 lPathnums = self._tpA.getIdListFromSubject( self._TErefseq ) | |
119 else: | |
120 lPathnums = self._tpA.getIdList() | |
121 if self._verbose > 0: | |
122 print "nb of paths: %i" % ( len(lPathnums) ) | |
123 for pathnum in lPathnums: | |
124 lPaths = self._tpA.getPathListFromId( pathnum ) | |
125 for path in lPaths: | |
126 map = path.getSubjectAsMapOfQuery() | |
127 map.write( mapFile ) | |
128 mapFile.close() | |
129 return mapFileName | |
130 | |
131 | |
132 def mergeRanges( self, mapFileName ): | |
133 mergeFileName = "%s.merge" % mapFileName | |
134 prg = os.environ["REPET_PATH"] + "/bin/mapOp" | |
135 cmd = prg | |
136 cmd += " -q %s" % ( mapFileName ) | |
137 cmd += " -m" | |
138 cmd += " 2>&1 > /dev/null" | |
139 log = os.system( cmd ) | |
140 if log != 0: | |
141 print "*** Error: %s returned %i" % ( prg, log ) | |
142 sys.exit(1) | |
143 if self._clean: | |
144 os.remove( mapFileName ) | |
145 return mergeFileName | |
146 | |
147 | |
148 def getCumulLength( self, mergeFileName ): | |
149 mergeFile = open( mergeFileName, "r" ) | |
150 total = 0 | |
151 while True: | |
152 line = mergeFile.readline() | |
153 if line == "": | |
154 break | |
155 tok = line.split("\t") | |
156 total += abs( int(tok[3]) - int(tok[2]) ) + 1 | |
157 mergeFile.close() | |
158 if self._clean: | |
159 os.remove( mergeFileName ) | |
160 return total | |
161 | |
162 | |
163 def start( self ): | |
164 """ | |
165 Useful commands before running the program. | |
166 """ | |
167 self.checkAttributes() | |
168 if self._verbose > 0: | |
169 print "START %s" % ( type(self).__name__ ); sys.stdout.flush() | |
170 self.setAdaptatorToTable() | |
171 | |
172 | |
173 def end( self, mapFileName, mergeFileName ): | |
174 """ | |
175 Useful commands before ending the program. | |
176 """ | |
177 self._db.close() | |
178 if self._verbose > 0: | |
179 print "END %s" % ( type(self).__name__ ); sys.stdout.flush() | |
180 | |
181 | |
182 def run( self ): | |
183 """ | |
184 Run the program. | |
185 """ | |
186 self.start() | |
187 | |
188 mapFileName = self.getAllSubjectsAsMapOfQueries() | |
189 mergeFileName = self.mergeRanges( mapFileName ) | |
190 total = self.getCumulLength( mergeFileName ) | |
191 print "cumulative length: %i bp" % total | |
192 if self._genomeLength > 0: | |
193 print "TE content: %.2f%%" % ( 100 * total / float(self._genomeLength) ) | |
194 | |
195 self.end( mapFileName, mergeFileName ) | |
196 | |
197 | |
198 if __name__ == "__main__": | |
199 i = getCumulLengthFromTEannot() | |
200 i.setAttributesFromCmdLine() | |
201 i.run() |