Mercurial > repos > yufei-luo > s_mart
diff commons/launcher/launchTEclass.py @ 31:0ab839023fe4
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:33:21 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/launcher/launchTEclass.py Tue Apr 30 14:33:21 2013 -0400 @@ -0,0 +1,138 @@ +#!/usr/bin/env python + +import os +import sys +import getopt +import glob +import shutil + + +def help(): + print + print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] ) + print "options:" + print " -h: this help" + print " -i: name of the input file (format='fasta')" + print " -o: name of the output file (format='map', default=inFileName+'.map')" + print " -c: clean" + print " -v: verbosity level (default=0/1)" + print + +def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ): + tmpHandler = open( inFile, "r" ) + outHandler = open( outFile, "w" ) + dClassif2Count = {} + header = "" + classif = "" + while True: + line = tmpHandler.readline() + if line == "": + break + if line[0] == ">": + header = line[1:].split("|")[0] + classif = line[1:-1].split(": ")[1].split("|")[0] + if not dClassif2Count.has_key( classif ): + dClassif2Count[ classif ] = 0 + dClassif2Count[ classif ] += 1 + else: + seqLength = len(line[:-1]) + outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) ) + tmpHandler.close() + outHandler.close() + if verbose > 0: + for classif in dClassif2Count.keys(): + print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] ) + sys.stdout.flush() + + +def main(): + """ + Launch TEclass to classify TE sequences. + """ + inFileName = "" + outFileName = "" + clean = False + verbose = 0 + + try: + opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" ) + except getopt.GetoptError, err: + print str(err) + help() + sys.exit(1) + for o,a in opts: + if o == "-h": + help() + sys.exit(0) + elif o == "-i": + inFileName = a + elif o == "-o": + outFileName = a + elif o == "-c": + clean = True + elif o == "-v": + verbose = int(a) + + if inFileName == "": + print "ERROR: missing input file (-i)" + help() + sys.exit(1) + if not os.path.exists( inFileName ): + print "ERROR: can't find input file '%s'" % ( inFileName ) + help() + sys.exit(1) + if outFileName == "": + outFileName = "%s.TEclass.map" % ( inFileName ) + + if verbose > 0: + print "START %s" % ( sys.argv[0].split("/")[-1] ) + sys.stdout.flush() + + if verbose > 0: + print "launch TEclass..." + sys.stdout.flush() + prg = "test_consensi_2.1.pl" + cmd = prg + cmd += " %s" % ( inFileName ) + returnValue = os.system( cmd ) + if returnValue != 0: + print "ERROR: '%s' returned %i" % ( prg, returnValue ) + sys.exit(1) + + lOut1 = glob.glob( "%s_*" % ( inFileName ) ) + outDir = "" + for i in lOut1: + if os.path.isdir( i ): + lOut2 = glob.glob( "%s/*" % ( i ) ) + if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2: + outDir = i + break + if outDir == "": + print "ERROR: can't find output directory" + sys.exit(1) + os.chdir( outDir ) + + if verbose > 0: + print "parse the results..." + sys.stdout.flush() + parseFastaFileFromTEclass( "%s.lib" % ( inFileName ), + outFileName, + verbose ) + os.system( "mv %s .." % ( outFileName ) ) + os.chdir( ".." ) + + if clean: + if verbose > 0: + print "clean the temporary files..." + sys.stdout.flush() + shutil.rmtree( outDir ) + + if verbose > 0: + print "END %s" % ( sys.argv[0].split("/")[-1] ) + sys.stdout.flush() + + return 0 + + +if __name__ == "__main__": + main()