annotate commons/tools/CalcCoordCumulLength.py @ 19:9bcfa7936eec

Deleted selected files
author m-zytnicki
date Mon, 29 Apr 2013 03:23:29 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 Calculate the cumulative length of coordinates data in the L{Map<commons.coreMap>} format.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 from pyRepet.launcher.programLauncher import programLauncher
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 from pyRepet.util.Stat import Stat
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 from commons.core.checker.CheckerUtils import CheckerUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 class CalcCoordCumulLength( object ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 Compute the coverage of coordinates data in the L{Map<commons.core.ccommons.core """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 def __init__( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 Constructor.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 self._inFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 self._outFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 self._verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 def help( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 Display the help on stdout.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 print "usage:",sys.argv[0]," [ options ]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 print " -i: name of the input file (format='map')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 print " -o: name of the output file (default=inFileName+'.coverage')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 def setAttributesFromCmdLine( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 Set the attributes from the command-line.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 opts, args = getopt.getopt(sys.argv[1:],"hi:o:v:")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 print str(err); self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 self.help(); sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 elif o == "-i":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 self.setInputFileName( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 elif o == "-o":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 self._outFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 self._verbose = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 def setInputFileName( self, inFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 self._inFileName = inFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 def setVerbose( self, verbose ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 self._verbose = int(verbose)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 def checkAttributes( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 Check the attributes are valid before running the algorithm.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 if self._inFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 print "ERROR: missing input file"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 if not os.path.exists( self._inFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 print "ERROR: can't find file '%s'" % ( self._inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 self.help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 if self._outFileName == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 self._outFileName = "%s.coverage" % ( self._inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 def mergeCoordinates( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 Merge the coordinates with 'mapOp'.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 print "merge the coordinates with mapOp..."; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 if not CheckerUtils.isExecutableInUserPath( "mapOp" ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 msg = "ERROR: 'mapOp' is not in your PATH"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 pL = programLauncher()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 prg = os.environ["REPET_PATH"] + "/bin/mapOp"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 cmd = prg
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 cmd += " -q %s" % ( self._inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 cmd += " -m"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 cmd += " > /dev/null"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 pL.launch( prg, cmd, self._verbose - 1 )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 print "coordinates merged !"; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 mergeFileName = "%s.merge" % ( self._inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 inPath, inName = os.path.split( self._inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 if inPath != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 os.system( "mv %s.merge %s" % ( inName, inPath ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 return mergeFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 def getStatsPerChr( self, mergeFileName ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 Return summary statistics on the coordinates, per chromosome.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 @param mergeFileName: L{Map<commons.core.coord.Macommons.coreype mergeFileName: string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 @return: dictionary whose keys are the chromosomes of the 'map file and values are L{Stat<pyRepet.util.Stat>} instances
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 dChr2Stats = {}
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 print "compute the coverage of the coordinates..."; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 mergeF = open( mergeFileName, "r" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 line = mergeF.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 if line == "": break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 tokens = line[:-1].split("\t")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 if int(tokens[2]) < int(tokens[3]):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 matchLength = int(tokens[3]) - int(tokens[2]) + 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 elif int(tokens[2]) > int(tokens[3]):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 matchLength = int(tokens[2]) - int(tokens[3]) + 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 if not dChr2Stats.has_key( tokens[1] ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 dChr2Stats[ tokens[1] ] = Stat()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 dChr2Stats[ tokens[1] ].add( matchLength )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 line = mergeF.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 mergeF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 os.remove( mergeFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 return dChr2Stats
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 def saveCumulLength( self, dChr2Stats ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 Write the stats in the output file.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 outF = open( self._outFileName, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 totalLength = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 for i in dChr2Stats.keys():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 totalLength += dChr2Stats[i].sum
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 string = "cumulative length (in bp) on '%s': %i" % ( i, dChr2Stats[i].sum )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 outF.write( "%s\n" % ( string ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 if self._verbose > 0: print string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 string = "total cumulative length (in bp): %i" % ( totalLength )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 outF.write( "%s\n" % ( string ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 if self._verbose > 0: print string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 outF.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 def start( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 Useful commands before running the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 print "beginning of %s" % (sys.argv[0].split("/")[-1]); sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 self.checkAttributes()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 print "input file : '%s'" % ( self._inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 def end( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 Useful commands before ending the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 if self._verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169 print "%s finished successfully" % (sys.argv[0].split("/")[-1]); sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 def run( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 Run the program.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 self.start()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 mergeFileName = self.mergeCoordinates()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 dChr2Stats = self.getStatsPerChr( mergeFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 self.saveCumulLength( dChr2Stats )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 self.end()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 if __name__ == '__main__':
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 i = CalcCoordCumulLength()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 i.setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 i.run()