Mercurial > repos > yufei-luo > s_mart
view commons/tools/SpliceTEsFromGenome.py @ 19:9bcfa7936eec
Deleted selected files
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:23:29 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line source
#!/usr/bin/env python import sys import os import getopt from commons.core.sql.DbMySql import DbMySql from commons.core.seq.FastaUtils import FastaUtils from commons.core.coord.MapUtils import MapUtils from commons.core.coord.AlignUtils import AlignUtils from commons.core.coord.PathUtils import PathUtils class SpliceTEsFromGenome( object ): def __init__( self ): self._inputData = "" self._formatData = "" self._genomeFile = "" self._configFile = "" self._outFile = "" self._verbose = 0 self._db = None def help( self ): print print "usage: SpliceTEsFromGenome.py [ options ]" print "options:" print " -h: this help" print " -i: input TE coordinates (can be file or table)" print " TEs as subjects if align or path format" print " -f: format of the data (map/align/path)" print " -g: genome file (format=fasta)" print " -C: configuration file (if table as input)" print " -o: output fasta file (default=genomeFile+'.splice')" print " -v: verbosity level (default=0/1)" print def setAttributesFromCmdLine( self ): try: opts, args = getopt.getopt(sys.argv[1:],"hi:f:g:C:o:v:") except getopt.GetoptError, err: msg = "%s" % str(err) sys.stderr.write( "%s\n" % msg ) self.help(); sys.exit(1) for o,a in opts: if o == "-h": self.help(); sys.exit(0) elif o == "-i": self._inputData = a elif o == "-f": self._formatData = a elif o == "-g": self._genomeFile = a elif o == "-C": self._configFile = a elif o =="-o": self._outFile = a elif o == "-v": self._verbose = int(a) def checkAttributes( self ): if self._inputData == "": msg = "ERROR: missing input data (-i)" sys.stderr.write( "%s\n" % msg ) self.help() sys.exit(1) if not os.path.exists( self._inputData ): if not os.path.exists( self._configFile ): msg = "ERROR: neither input file '%s' nor configuration file '%s'" % ( self._inputData, self._configFile ) sys.stderr.write( "%s\n" % msg ) self.help() sys.exit(1) if not os.path.exists( self._configFile ): msg = "ERROR: can't find config file '%s'" % ( self._configFile ) sys.stderr.write( "%s\n" % msg ) sys.exit(1) self._db = DbMySql( cfgFileName=self._configFile ) if not self._db.doesTableExist( self._inputData ): msg = "ERROR: can't find table '%s'" % ( self._inputData ) sys.stderr.write( "%s\n" % msg ) self.help() sys.exit(1) if self._formatData == "": msg = "ERROR: need to precise format (-f)" sys.stderr.write( "%s\n" % msg ) self.help() sys.exit(1) if self._formatData not in [ "map", "align", "path" ]: msg = "ERROR: format '%s' not yet supported" % ( self._formatData ) sys.stderr.write( "%s\n" % msg ) self.help() sys.exit(1) if self._genomeFile == "": msg = "ERROR: missing genome file (-g)" sys.stderr.write( "%s\n" % msg ) self.help() sys.exit(1) if not os.path.exists( self._genomeFile ): msg = "ERROR: can't find genome file '%s'" % ( self._genomeFile ) sys.stderr.write( "%s\n" % msg ) self.help() sys.exit(1) if self._outFile == "": self._outFile = "%s.splice" % ( self._genomeFile ) if self._verbose > 0: print "output fasta file: %s" % self._outFile def getCoordsAsMapFile( self ): if self._verbose > 0: print "get TE coordinates as 'Map' file" sys.stdout.flush() if self._db != None: cmd = "srptExportTable.py" cmd += " -i %s" % ( self._inputData ) cmd += " -C %s" % ( self._configFile ) cmd += " -o %s.%s" % ( self._inputData, self._formatData ) returnStatus = os.system( cmd ) if returnStatus != 0: msg = "ERROR while exporting data from table" sys.stderr.write( "%s\n" % msg ) sys.exit(1) self._inputData += ".%s" % ( self._formatData ) if self._formatData == "map": return self._inputData elif self._formatData == "align": mapFile = "%s.map" % ( self._inputData ) AlignUtils.convertAlignFileIntoMapFileWithSubjectsOnQueries( self._inputData, mapFile ) return mapFile elif self._formatData == "path": mapFile = "%s.map" % ( self._inputData ) PathUtils.convertPathFileIntoMapFileWithSubjectsOnQueries( self._inputData, mapFile ) return mapFile def mergeCoordsInMapFile( self, mapFile ): if self._verbose > 0: print "merge TE coordinates" sys.stdout.flush() mergeFile = "%s.merge" % ( mapFile ) MapUtils.mergeCoordsInFile( mapFile, mergeFile ) if self._formatData != "map" or self._db != None: os.remove( mapFile ) return mergeFile def spliceFastaFromCoords( self, mergeFile ): if self._verbose > 0: print "splice TE copies from the genome" sys.stdout.flush() FastaUtils.spliceFromCoords( self._genomeFile, mergeFile, self._outFile ) os.remove( mergeFile ) def start( self ): self.checkAttributes() if self._verbose > 0: print "START SpliceTEsFromGenome.py" sys.stdout.flush() def end( self ): if self._db != None: self._db.close() if self._verbose > 0: print "END SpliceTEsFromGenome.py" sys.stdout.flush() def run( self ): self.start() mapFile = self.getCoordsAsMapFile() mergeFile = self.mergeCoordsInMapFile( mapFile ) self.spliceFastaFromCoords( mergeFile ) self.end() if __name__ == "__main__": i = SpliceTEsFromGenome() i.setAttributesFromCmdLine() i.run()