diff commons/tools/CalcCoordCumulLength.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/tools/CalcCoordCumulLength.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+
+"""
+Calculate the cumulative length of coordinates data in the L{Map<commons.coreMap>} format.
+"""
+
+import os
+import sys
+import getopt
+from pyRepet.launcher.programLauncher import programLauncher
+from pyRepet.util.Stat import Stat
+from commons.core.checker.CheckerUtils import CheckerUtils
+
+
+class CalcCoordCumulLength( object ):
+    """
+    Compute the coverage of coordinates data in the L{Map<commons.core.ccommons.core    """
+    
+    
+    def __init__( self ):
+        """
+        Constructor.
+        """
+        self._inFileName = ""
+        self._outFileName = ""
+        self._verbose = 0
+        
+        
+    def help( self ):
+        """
+        Display the help on stdout.
+        """
+        print
+        print "usage:",sys.argv[0]," [ options ]"
+        print "options:"
+        print "     -h: this help"
+        print "     -i: name of the input file (format='map')"
+        print "     -o: name of the output file (default=inFileName+'.coverage')"
+        print
+        
+        
+    def setAttributesFromCmdLine( self ):
+        """
+        Set the attributes from the command-line.
+        """
+        try:
+            opts, args = getopt.getopt(sys.argv[1:],"hi:o:v:")
+        except getopt.GetoptError, err:
+            print str(err); self.help(); sys.exit(1)
+        for o,a in opts:
+            if o == "-h":
+                self.help(); sys.exit(0)
+            elif o == "-i":
+                self.setInputFileName( a )
+            elif o == "-o":
+                self._outFileName = a
+            elif o == "-v":
+                self._verbose = int(a)
+                
+                
+    def setInputFileName( self, inFileName ):
+        self._inFileName = inFileName
+        
+    def setVerbose( self, verbose ):
+        self._verbose = int(verbose)
+        
+    def checkAttributes( self ):
+        """
+        Check the attributes are valid before running the algorithm.
+        """
+        if self._inFileName == "":
+            print "ERROR: missing input file"
+            self.help(); sys.exit(1)
+        if not os.path.exists( self._inFileName ):
+            print "ERROR: can't find file '%s'" % ( self._inFileName )
+            self.help(); sys.exit(1)
+        if self._outFileName == "":
+            self._outFileName = "%s.coverage" % ( self._inFileName )
+            
+            
+    def mergeCoordinates( self ):
+        """
+        Merge the coordinates with 'mapOp'.
+        """
+        if self._verbose > 0:
+            print "merge the coordinates with mapOp..."; sys.stdout.flush()
+        if not CheckerUtils.isExecutableInUserPath( "mapOp" ):
+            msg = "ERROR: 'mapOp' is not in your PATH"
+            sys.stderr.write( "%s\n" % msg )
+            sys.exit(1)
+        pL = programLauncher()
+        prg = os.environ["REPET_PATH"] + "/bin/mapOp"
+        cmd = prg
+        cmd += " -q %s" % ( self._inFileName )
+        cmd += " -m"
+        cmd += " > /dev/null"
+        pL.launch( prg, cmd, self._verbose - 1 )
+        if self._verbose > 0:
+            print "coordinates merged !"; sys.stdout.flush()
+        mergeFileName = "%s.merge" % ( self._inFileName )
+        inPath, inName = os.path.split( self._inFileName )
+        if inPath != "":
+            os.system( "mv %s.merge %s" % ( inName, inPath ) )
+        return mergeFileName
+    
+    
+    def getStatsPerChr( self, mergeFileName ):
+        """
+        Return summary statistics on the coordinates, per chromosome.
+        @param mergeFileName: L{Map<commons.core.coord.Macommons.coreype mergeFileName: string
+        @return: dictionary whose keys are the chromosomes of the 'map file and values are L{Stat<pyRepet.util.Stat>} instances
+        """
+        dChr2Stats = {}
+        if self._verbose > 0:
+            print "compute the coverage of the coordinates..."; sys.stdout.flush()
+        mergeF = open( mergeFileName, "r" )
+        line = mergeF.readline()
+        while True:
+            if line == "": break
+            tokens = line[:-1].split("\t")
+            if int(tokens[2]) < int(tokens[3]):
+                matchLength = int(tokens[3]) - int(tokens[2]) + 1
+            elif int(tokens[2]) > int(tokens[3]):
+                matchLength = int(tokens[2]) - int(tokens[3]) + 1
+            if not dChr2Stats.has_key( tokens[1] ):
+                dChr2Stats[ tokens[1] ] = Stat()
+            dChr2Stats[ tokens[1] ].add( matchLength )
+            line = mergeF.readline()
+        mergeF.close()
+        os.remove( mergeFileName )
+        return dChr2Stats
+    
+    
+    def saveCumulLength( self, dChr2Stats ):
+        """
+        Write the stats in the output file.
+        """
+        outF = open( self._outFileName, "w" )
+        totalLength = 0
+        for i in dChr2Stats.keys():
+            totalLength += dChr2Stats[i].sum
+            string = "cumulative length (in bp) on '%s': %i" % ( i, dChr2Stats[i].sum )
+            outF.write( "%s\n" % ( string ) )
+            if self._verbose > 0: print string
+        string = "total cumulative length (in bp): %i" % ( totalLength )
+        outF.write( "%s\n" % ( string ) )
+        if self._verbose > 0: print string
+        outF.close()
+        sys.stdout.flush()
+        
+        
+    def start( self ):
+        """
+        Useful commands before running the program.
+        """
+        if self._verbose > 0:
+            print "beginning of %s" % (sys.argv[0].split("/")[-1]); sys.stdout.flush()
+        self.checkAttributes()
+        if self._verbose > 0:
+            print "input file : '%s'" % ( self._inFileName )
+            sys.stdout.flush()
+            
+            
+    def end( self ):
+        """
+        Useful commands before ending the program.
+        """
+        if self._verbose > 0:
+            print "%s finished successfully" % (sys.argv[0].split("/")[-1]); sys.stdout.flush()
+            
+            
+    def run( self ):
+        """
+        Run the program.
+        """
+        self.start()
+        mergeFileName = self.mergeCoordinates()
+        dChr2Stats = self.getStatsPerChr( mergeFileName )
+        self.saveCumulLength( dChr2Stats )
+        self.end()
+        
+        
+if __name__ == '__main__':
+    i = CalcCoordCumulLength()
+    i.setAttributesFromCmdLine()
+    i.run()