18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Calculate the cumulative length of coordinates data in the L{Map<commons.coreMap>} format.
|
|
5 """
|
|
6
|
|
7 import os
|
|
8 import sys
|
|
9 import getopt
|
|
10 from pyRepet.launcher.programLauncher import programLauncher
|
|
11 from pyRepet.util.Stat import Stat
|
|
12 from commons.core.checker.CheckerUtils import CheckerUtils
|
|
13
|
|
14
|
|
15 class CalcCoordCumulLength( object ):
|
|
16 """
|
|
17 Compute the coverage of coordinates data in the L{Map<commons.core.ccommons.core """
|
|
18
|
|
19
|
|
20 def __init__( self ):
|
|
21 """
|
|
22 Constructor.
|
|
23 """
|
|
24 self._inFileName = ""
|
|
25 self._outFileName = ""
|
|
26 self._verbose = 0
|
|
27
|
|
28
|
|
29 def help( self ):
|
|
30 """
|
|
31 Display the help on stdout.
|
|
32 """
|
|
33 print
|
|
34 print "usage:",sys.argv[0]," [ options ]"
|
|
35 print "options:"
|
|
36 print " -h: this help"
|
|
37 print " -i: name of the input file (format='map')"
|
|
38 print " -o: name of the output file (default=inFileName+'.coverage')"
|
|
39 print
|
|
40
|
|
41
|
|
42 def setAttributesFromCmdLine( self ):
|
|
43 """
|
|
44 Set the attributes from the command-line.
|
|
45 """
|
|
46 try:
|
|
47 opts, args = getopt.getopt(sys.argv[1:],"hi:o:v:")
|
|
48 except getopt.GetoptError, err:
|
|
49 print str(err); self.help(); sys.exit(1)
|
|
50 for o,a in opts:
|
|
51 if o == "-h":
|
|
52 self.help(); sys.exit(0)
|
|
53 elif o == "-i":
|
|
54 self.setInputFileName( a )
|
|
55 elif o == "-o":
|
|
56 self._outFileName = a
|
|
57 elif o == "-v":
|
|
58 self._verbose = int(a)
|
|
59
|
|
60
|
|
61 def setInputFileName( self, inFileName ):
|
|
62 self._inFileName = inFileName
|
|
63
|
|
64 def setVerbose( self, verbose ):
|
|
65 self._verbose = int(verbose)
|
|
66
|
|
67 def checkAttributes( self ):
|
|
68 """
|
|
69 Check the attributes are valid before running the algorithm.
|
|
70 """
|
|
71 if self._inFileName == "":
|
|
72 print "ERROR: missing input file"
|
|
73 self.help(); sys.exit(1)
|
|
74 if not os.path.exists( self._inFileName ):
|
|
75 print "ERROR: can't find file '%s'" % ( self._inFileName )
|
|
76 self.help(); sys.exit(1)
|
|
77 if self._outFileName == "":
|
|
78 self._outFileName = "%s.coverage" % ( self._inFileName )
|
|
79
|
|
80
|
|
81 def mergeCoordinates( self ):
|
|
82 """
|
|
83 Merge the coordinates with 'mapOp'.
|
|
84 """
|
|
85 if self._verbose > 0:
|
|
86 print "merge the coordinates with mapOp..."; sys.stdout.flush()
|
|
87 if not CheckerUtils.isExecutableInUserPath( "mapOp" ):
|
|
88 msg = "ERROR: 'mapOp' is not in your PATH"
|
|
89 sys.stderr.write( "%s\n" % msg )
|
|
90 sys.exit(1)
|
|
91 pL = programLauncher()
|
|
92 prg = os.environ["REPET_PATH"] + "/bin/mapOp"
|
|
93 cmd = prg
|
|
94 cmd += " -q %s" % ( self._inFileName )
|
|
95 cmd += " -m"
|
|
96 cmd += " > /dev/null"
|
|
97 pL.launch( prg, cmd, self._verbose - 1 )
|
|
98 if self._verbose > 0:
|
|
99 print "coordinates merged !"; sys.stdout.flush()
|
|
100 mergeFileName = "%s.merge" % ( self._inFileName )
|
|
101 inPath, inName = os.path.split( self._inFileName )
|
|
102 if inPath != "":
|
|
103 os.system( "mv %s.merge %s" % ( inName, inPath ) )
|
|
104 return mergeFileName
|
|
105
|
|
106
|
|
107 def getStatsPerChr( self, mergeFileName ):
|
|
108 """
|
|
109 Return summary statistics on the coordinates, per chromosome.
|
|
110 @param mergeFileName: L{Map<commons.core.coord.Macommons.coreype mergeFileName: string
|
|
111 @return: dictionary whose keys are the chromosomes of the 'map file and values are L{Stat<pyRepet.util.Stat>} instances
|
|
112 """
|
|
113 dChr2Stats = {}
|
|
114 if self._verbose > 0:
|
|
115 print "compute the coverage of the coordinates..."; sys.stdout.flush()
|
|
116 mergeF = open( mergeFileName, "r" )
|
|
117 line = mergeF.readline()
|
|
118 while True:
|
|
119 if line == "": break
|
|
120 tokens = line[:-1].split("\t")
|
|
121 if int(tokens[2]) < int(tokens[3]):
|
|
122 matchLength = int(tokens[3]) - int(tokens[2]) + 1
|
|
123 elif int(tokens[2]) > int(tokens[3]):
|
|
124 matchLength = int(tokens[2]) - int(tokens[3]) + 1
|
|
125 if not dChr2Stats.has_key( tokens[1] ):
|
|
126 dChr2Stats[ tokens[1] ] = Stat()
|
|
127 dChr2Stats[ tokens[1] ].add( matchLength )
|
|
128 line = mergeF.readline()
|
|
129 mergeF.close()
|
|
130 os.remove( mergeFileName )
|
|
131 return dChr2Stats
|
|
132
|
|
133
|
|
134 def saveCumulLength( self, dChr2Stats ):
|
|
135 """
|
|
136 Write the stats in the output file.
|
|
137 """
|
|
138 outF = open( self._outFileName, "w" )
|
|
139 totalLength = 0
|
|
140 for i in dChr2Stats.keys():
|
|
141 totalLength += dChr2Stats[i].sum
|
|
142 string = "cumulative length (in bp) on '%s': %i" % ( i, dChr2Stats[i].sum )
|
|
143 outF.write( "%s\n" % ( string ) )
|
|
144 if self._verbose > 0: print string
|
|
145 string = "total cumulative length (in bp): %i" % ( totalLength )
|
|
146 outF.write( "%s\n" % ( string ) )
|
|
147 if self._verbose > 0: print string
|
|
148 outF.close()
|
|
149 sys.stdout.flush()
|
|
150
|
|
151
|
|
152 def start( self ):
|
|
153 """
|
|
154 Useful commands before running the program.
|
|
155 """
|
|
156 if self._verbose > 0:
|
|
157 print "beginning of %s" % (sys.argv[0].split("/")[-1]); sys.stdout.flush()
|
|
158 self.checkAttributes()
|
|
159 if self._verbose > 0:
|
|
160 print "input file : '%s'" % ( self._inFileName )
|
|
161 sys.stdout.flush()
|
|
162
|
|
163
|
|
164 def end( self ):
|
|
165 """
|
|
166 Useful commands before ending the program.
|
|
167 """
|
|
168 if self._verbose > 0:
|
|
169 print "%s finished successfully" % (sys.argv[0].split("/")[-1]); sys.stdout.flush()
|
|
170
|
|
171
|
|
172 def run( self ):
|
|
173 """
|
|
174 Run the program.
|
|
175 """
|
|
176 self.start()
|
|
177 mergeFileName = self.mergeCoordinates()
|
|
178 dChr2Stats = self.getStatsPerChr( mergeFileName )
|
|
179 self.saveCumulLength( dChr2Stats )
|
|
180 self.end()
|
|
181
|
|
182
|
|
183 if __name__ == '__main__':
|
|
184 i = CalcCoordCumulLength()
|
|
185 i.setAttributesFromCmdLine()
|
|
186 i.run()
|