Mercurial > repos > yufei-luo > s_mart
comparison commons/tools/CalcCoordCumulLength.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Calculate the cumulative length of coordinates data in the L{Map<commons.coreMap>} format. | |
5 """ | |
6 | |
7 import os | |
8 import sys | |
9 import getopt | |
10 from pyRepet.launcher.programLauncher import programLauncher | |
11 from pyRepet.util.Stat import Stat | |
12 from commons.core.checker.CheckerUtils import CheckerUtils | |
13 | |
14 | |
15 class CalcCoordCumulLength( object ): | |
16 """ | |
17 Compute the coverage of coordinates data in the L{Map<commons.core.ccommons.core """ | |
18 | |
19 | |
20 def __init__( self ): | |
21 """ | |
22 Constructor. | |
23 """ | |
24 self._inFileName = "" | |
25 self._outFileName = "" | |
26 self._verbose = 0 | |
27 | |
28 | |
29 def help( self ): | |
30 """ | |
31 Display the help on stdout. | |
32 """ | |
33 print | |
34 print "usage:",sys.argv[0]," [ options ]" | |
35 print "options:" | |
36 print " -h: this help" | |
37 print " -i: name of the input file (format='map')" | |
38 print " -o: name of the output file (default=inFileName+'.coverage')" | |
39 print | |
40 | |
41 | |
42 def setAttributesFromCmdLine( self ): | |
43 """ | |
44 Set the attributes from the command-line. | |
45 """ | |
46 try: | |
47 opts, args = getopt.getopt(sys.argv[1:],"hi:o:v:") | |
48 except getopt.GetoptError, err: | |
49 print str(err); self.help(); sys.exit(1) | |
50 for o,a in opts: | |
51 if o == "-h": | |
52 self.help(); sys.exit(0) | |
53 elif o == "-i": | |
54 self.setInputFileName( a ) | |
55 elif o == "-o": | |
56 self._outFileName = a | |
57 elif o == "-v": | |
58 self._verbose = int(a) | |
59 | |
60 | |
61 def setInputFileName( self, inFileName ): | |
62 self._inFileName = inFileName | |
63 | |
64 def setVerbose( self, verbose ): | |
65 self._verbose = int(verbose) | |
66 | |
67 def checkAttributes( self ): | |
68 """ | |
69 Check the attributes are valid before running the algorithm. | |
70 """ | |
71 if self._inFileName == "": | |
72 print "ERROR: missing input file" | |
73 self.help(); sys.exit(1) | |
74 if not os.path.exists( self._inFileName ): | |
75 print "ERROR: can't find file '%s'" % ( self._inFileName ) | |
76 self.help(); sys.exit(1) | |
77 if self._outFileName == "": | |
78 self._outFileName = "%s.coverage" % ( self._inFileName ) | |
79 | |
80 | |
81 def mergeCoordinates( self ): | |
82 """ | |
83 Merge the coordinates with 'mapOp'. | |
84 """ | |
85 if self._verbose > 0: | |
86 print "merge the coordinates with mapOp..."; sys.stdout.flush() | |
87 if not CheckerUtils.isExecutableInUserPath( "mapOp" ): | |
88 msg = "ERROR: 'mapOp' is not in your PATH" | |
89 sys.stderr.write( "%s\n" % msg ) | |
90 sys.exit(1) | |
91 pL = programLauncher() | |
92 prg = os.environ["REPET_PATH"] + "/bin/mapOp" | |
93 cmd = prg | |
94 cmd += " -q %s" % ( self._inFileName ) | |
95 cmd += " -m" | |
96 cmd += " > /dev/null" | |
97 pL.launch( prg, cmd, self._verbose - 1 ) | |
98 if self._verbose > 0: | |
99 print "coordinates merged !"; sys.stdout.flush() | |
100 mergeFileName = "%s.merge" % ( self._inFileName ) | |
101 inPath, inName = os.path.split( self._inFileName ) | |
102 if inPath != "": | |
103 os.system( "mv %s.merge %s" % ( inName, inPath ) ) | |
104 return mergeFileName | |
105 | |
106 | |
107 def getStatsPerChr( self, mergeFileName ): | |
108 """ | |
109 Return summary statistics on the coordinates, per chromosome. | |
110 @param mergeFileName: L{Map<commons.core.coord.Macommons.coreype mergeFileName: string | |
111 @return: dictionary whose keys are the chromosomes of the 'map file and values are L{Stat<pyRepet.util.Stat>} instances | |
112 """ | |
113 dChr2Stats = {} | |
114 if self._verbose > 0: | |
115 print "compute the coverage of the coordinates..."; sys.stdout.flush() | |
116 mergeF = open( mergeFileName, "r" ) | |
117 line = mergeF.readline() | |
118 while True: | |
119 if line == "": break | |
120 tokens = line[:-1].split("\t") | |
121 if int(tokens[2]) < int(tokens[3]): | |
122 matchLength = int(tokens[3]) - int(tokens[2]) + 1 | |
123 elif int(tokens[2]) > int(tokens[3]): | |
124 matchLength = int(tokens[2]) - int(tokens[3]) + 1 | |
125 if not dChr2Stats.has_key( tokens[1] ): | |
126 dChr2Stats[ tokens[1] ] = Stat() | |
127 dChr2Stats[ tokens[1] ].add( matchLength ) | |
128 line = mergeF.readline() | |
129 mergeF.close() | |
130 os.remove( mergeFileName ) | |
131 return dChr2Stats | |
132 | |
133 | |
134 def saveCumulLength( self, dChr2Stats ): | |
135 """ | |
136 Write the stats in the output file. | |
137 """ | |
138 outF = open( self._outFileName, "w" ) | |
139 totalLength = 0 | |
140 for i in dChr2Stats.keys(): | |
141 totalLength += dChr2Stats[i].sum | |
142 string = "cumulative length (in bp) on '%s': %i" % ( i, dChr2Stats[i].sum ) | |
143 outF.write( "%s\n" % ( string ) ) | |
144 if self._verbose > 0: print string | |
145 string = "total cumulative length (in bp): %i" % ( totalLength ) | |
146 outF.write( "%s\n" % ( string ) ) | |
147 if self._verbose > 0: print string | |
148 outF.close() | |
149 sys.stdout.flush() | |
150 | |
151 | |
152 def start( self ): | |
153 """ | |
154 Useful commands before running the program. | |
155 """ | |
156 if self._verbose > 0: | |
157 print "beginning of %s" % (sys.argv[0].split("/")[-1]); sys.stdout.flush() | |
158 self.checkAttributes() | |
159 if self._verbose > 0: | |
160 print "input file : '%s'" % ( self._inFileName ) | |
161 sys.stdout.flush() | |
162 | |
163 | |
164 def end( self ): | |
165 """ | |
166 Useful commands before ending the program. | |
167 """ | |
168 if self._verbose > 0: | |
169 print "%s finished successfully" % (sys.argv[0].split("/")[-1]); sys.stdout.flush() | |
170 | |
171 | |
172 def run( self ): | |
173 """ | |
174 Run the program. | |
175 """ | |
176 self.start() | |
177 mergeFileName = self.mergeCoordinates() | |
178 dChr2Stats = self.getStatsPerChr( mergeFileName ) | |
179 self.saveCumulLength( dChr2Stats ) | |
180 self.end() | |
181 | |
182 | |
183 if __name__ == '__main__': | |
184 i = CalcCoordCumulLength() | |
185 i.setAttributesFromCmdLine() | |
186 i.run() |