Mercurial > repos > yufei-luo > s_mart
view commons/tools/CalcCoordCumulLength.py @ 19:9bcfa7936eec
Deleted selected files
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:23:29 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line source
#!/usr/bin/env python """ Calculate the cumulative length of coordinates data in the L{Map<commons.coreMap>} format. """ import os import sys import getopt from pyRepet.launcher.programLauncher import programLauncher from pyRepet.util.Stat import Stat from commons.core.checker.CheckerUtils import CheckerUtils class CalcCoordCumulLength( object ): """ Compute the coverage of coordinates data in the L{Map<commons.core.ccommons.core """ def __init__( self ): """ Constructor. """ self._inFileName = "" self._outFileName = "" self._verbose = 0 def help( self ): """ Display the help on stdout. """ print print "usage:",sys.argv[0]," [ options ]" print "options:" print " -h: this help" print " -i: name of the input file (format='map')" print " -o: name of the output file (default=inFileName+'.coverage')" print def setAttributesFromCmdLine( self ): """ Set the attributes from the command-line. """ try: opts, args = getopt.getopt(sys.argv[1:],"hi:o:v:") except getopt.GetoptError, err: print str(err); self.help(); sys.exit(1) for o,a in opts: if o == "-h": self.help(); sys.exit(0) elif o == "-i": self.setInputFileName( a ) elif o == "-o": self._outFileName = a elif o == "-v": self._verbose = int(a) def setInputFileName( self, inFileName ): self._inFileName = inFileName def setVerbose( self, verbose ): self._verbose = int(verbose) def checkAttributes( self ): """ Check the attributes are valid before running the algorithm. """ if self._inFileName == "": print "ERROR: missing input file" self.help(); sys.exit(1) if not os.path.exists( self._inFileName ): print "ERROR: can't find file '%s'" % ( self._inFileName ) self.help(); sys.exit(1) if self._outFileName == "": self._outFileName = "%s.coverage" % ( self._inFileName ) def mergeCoordinates( self ): """ Merge the coordinates with 'mapOp'. """ if self._verbose > 0: print "merge the coordinates with mapOp..."; sys.stdout.flush() if not CheckerUtils.isExecutableInUserPath( "mapOp" ): msg = "ERROR: 'mapOp' is not in your PATH" sys.stderr.write( "%s\n" % msg ) sys.exit(1) pL = programLauncher() prg = os.environ["REPET_PATH"] + "/bin/mapOp" cmd = prg cmd += " -q %s" % ( self._inFileName ) cmd += " -m" cmd += " > /dev/null" pL.launch( prg, cmd, self._verbose - 1 ) if self._verbose > 0: print "coordinates merged !"; sys.stdout.flush() mergeFileName = "%s.merge" % ( self._inFileName ) inPath, inName = os.path.split( self._inFileName ) if inPath != "": os.system( "mv %s.merge %s" % ( inName, inPath ) ) return mergeFileName def getStatsPerChr( self, mergeFileName ): """ Return summary statistics on the coordinates, per chromosome. @param mergeFileName: L{Map<commons.core.coord.Macommons.coreype mergeFileName: string @return: dictionary whose keys are the chromosomes of the 'map file and values are L{Stat<pyRepet.util.Stat>} instances """ dChr2Stats = {} if self._verbose > 0: print "compute the coverage of the coordinates..."; sys.stdout.flush() mergeF = open( mergeFileName, "r" ) line = mergeF.readline() while True: if line == "": break tokens = line[:-1].split("\t") if int(tokens[2]) < int(tokens[3]): matchLength = int(tokens[3]) - int(tokens[2]) + 1 elif int(tokens[2]) > int(tokens[3]): matchLength = int(tokens[2]) - int(tokens[3]) + 1 if not dChr2Stats.has_key( tokens[1] ): dChr2Stats[ tokens[1] ] = Stat() dChr2Stats[ tokens[1] ].add( matchLength ) line = mergeF.readline() mergeF.close() os.remove( mergeFileName ) return dChr2Stats def saveCumulLength( self, dChr2Stats ): """ Write the stats in the output file. """ outF = open( self._outFileName, "w" ) totalLength = 0 for i in dChr2Stats.keys(): totalLength += dChr2Stats[i].sum string = "cumulative length (in bp) on '%s': %i" % ( i, dChr2Stats[i].sum ) outF.write( "%s\n" % ( string ) ) if self._verbose > 0: print string string = "total cumulative length (in bp): %i" % ( totalLength ) outF.write( "%s\n" % ( string ) ) if self._verbose > 0: print string outF.close() sys.stdout.flush() def start( self ): """ Useful commands before running the program. """ if self._verbose > 0: print "beginning of %s" % (sys.argv[0].split("/")[-1]); sys.stdout.flush() self.checkAttributes() if self._verbose > 0: print "input file : '%s'" % ( self._inFileName ) sys.stdout.flush() def end( self ): """ Useful commands before ending the program. """ if self._verbose > 0: print "%s finished successfully" % (sys.argv[0].split("/")[-1]); sys.stdout.flush() def run( self ): """ Run the program. """ self.start() mergeFileName = self.mergeCoordinates() dChr2Stats = self.getStatsPerChr( mergeFileName ) self.saveCumulLength( dChr2Stats ) self.end() if __name__ == '__main__': i = CalcCoordCumulLength() i.setAttributesFromCmdLine() i.run()