Mercurial > repos > yufei-luo > s_mart
view commons/launcher/launchTEclass.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python import os import sys import getopt import glob import shutil def help(): print print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] ) print "options:" print " -h: this help" print " -i: name of the input file (format='fasta')" print " -o: name of the output file (format='map', default=inFileName+'.map')" print " -c: clean" print " -v: verbosity level (default=0/1)" print def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ): tmpHandler = open( inFile, "r" ) outHandler = open( outFile, "w" ) dClassif2Count = {} header = "" classif = "" while True: line = tmpHandler.readline() if line == "": break if line[0] == ">": header = line[1:].split("|")[0] classif = line[1:-1].split(": ")[1].split("|")[0] if not dClassif2Count.has_key( classif ): dClassif2Count[ classif ] = 0 dClassif2Count[ classif ] += 1 else: seqLength = len(line[:-1]) outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) ) tmpHandler.close() outHandler.close() if verbose > 0: for classif in dClassif2Count.keys(): print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] ) sys.stdout.flush() def main(): """ Launch TEclass to classify TE sequences. """ inFileName = "" outFileName = "" clean = False verbose = 0 try: opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" ) except getopt.GetoptError, err: print str(err) help() sys.exit(1) for o,a in opts: if o == "-h": help() sys.exit(0) elif o == "-i": inFileName = a elif o == "-o": outFileName = a elif o == "-c": clean = True elif o == "-v": verbose = int(a) if inFileName == "": print "ERROR: missing input file (-i)" help() sys.exit(1) if not os.path.exists( inFileName ): print "ERROR: can't find input file '%s'" % ( inFileName ) help() sys.exit(1) if outFileName == "": outFileName = "%s.TEclass.map" % ( inFileName ) if verbose > 0: print "START %s" % ( sys.argv[0].split("/")[-1] ) sys.stdout.flush() if verbose > 0: print "launch TEclass..." sys.stdout.flush() prg = "test_consensi_2.1.pl" cmd = prg cmd += " %s" % ( inFileName ) returnValue = os.system( cmd ) if returnValue != 0: print "ERROR: '%s' returned %i" % ( prg, returnValue ) sys.exit(1) lOut1 = glob.glob( "%s_*" % ( inFileName ) ) outDir = "" for i in lOut1: if os.path.isdir( i ): lOut2 = glob.glob( "%s/*" % ( i ) ) if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2: outDir = i break if outDir == "": print "ERROR: can't find output directory" sys.exit(1) os.chdir( outDir ) if verbose > 0: print "parse the results..." sys.stdout.flush() parseFastaFileFromTEclass( "%s.lib" % ( inFileName ), outFileName, verbose ) os.system( "mv %s .." % ( outFileName ) ) os.chdir( ".." ) if clean: if verbose > 0: print "clean the temporary files..." sys.stdout.flush() shutil.rmtree( outDir ) if verbose > 0: print "END %s" % ( sys.argv[0].split("/")[-1] ) sys.stdout.flush() return 0 if __name__ == "__main__": main()