18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os
|
|
4 import sys
|
|
5 import getopt
|
|
6 import glob
|
|
7 import shutil
|
|
8
|
|
9
|
|
10 def help():
|
|
11 print
|
|
12 print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
|
|
13 print "options:"
|
|
14 print " -h: this help"
|
|
15 print " -i: name of the input file (format='fasta')"
|
|
16 print " -o: name of the output file (format='map', default=inFileName+'.map')"
|
|
17 print " -c: clean"
|
|
18 print " -v: verbosity level (default=0/1)"
|
|
19 print
|
|
20
|
|
21 def parseFastaFileFromTEclass( inFile, outFile, verbose=0 ):
|
|
22 tmpHandler = open( inFile, "r" )
|
|
23 outHandler = open( outFile, "w" )
|
|
24 dClassif2Count = {}
|
|
25 header = ""
|
|
26 classif = ""
|
|
27 while True:
|
|
28 line = tmpHandler.readline()
|
|
29 if line == "":
|
|
30 break
|
|
31 if line[0] == ">":
|
|
32 header = line[1:].split("|")[0]
|
|
33 classif = line[1:-1].split(": ")[1].split("|")[0]
|
|
34 if not dClassif2Count.has_key( classif ):
|
|
35 dClassif2Count[ classif ] = 0
|
|
36 dClassif2Count[ classif ] += 1
|
|
37 else:
|
|
38 seqLength = len(line[:-1])
|
|
39 outHandler.write( "%s\t%s\t%i\t%i\n" % ( classif, header, 1, seqLength ) )
|
|
40 tmpHandler.close()
|
|
41 outHandler.close()
|
|
42 if verbose > 0:
|
|
43 for classif in dClassif2Count.keys():
|
|
44 print "%s: %i sequences" % ( classif, dClassif2Count[ classif ] )
|
|
45 sys.stdout.flush()
|
|
46
|
|
47
|
|
48 def main():
|
|
49 """
|
|
50 Launch TEclass to classify TE sequences.
|
|
51 """
|
|
52 inFileName = ""
|
|
53 outFileName = ""
|
|
54 clean = False
|
|
55 verbose = 0
|
|
56
|
|
57 try:
|
|
58 opts, args = getopt.getopt( sys.argv[1:], "hi:o:cv:" )
|
|
59 except getopt.GetoptError, err:
|
|
60 print str(err)
|
|
61 help()
|
|
62 sys.exit(1)
|
|
63 for o,a in opts:
|
|
64 if o == "-h":
|
|
65 help()
|
|
66 sys.exit(0)
|
|
67 elif o == "-i":
|
|
68 inFileName = a
|
|
69 elif o == "-o":
|
|
70 outFileName = a
|
|
71 elif o == "-c":
|
|
72 clean = True
|
|
73 elif o == "-v":
|
|
74 verbose = int(a)
|
|
75
|
|
76 if inFileName == "":
|
|
77 print "ERROR: missing input file (-i)"
|
|
78 help()
|
|
79 sys.exit(1)
|
|
80 if not os.path.exists( inFileName ):
|
|
81 print "ERROR: can't find input file '%s'" % ( inFileName )
|
|
82 help()
|
|
83 sys.exit(1)
|
|
84 if outFileName == "":
|
|
85 outFileName = "%s.TEclass.map" % ( inFileName )
|
|
86
|
|
87 if verbose > 0:
|
|
88 print "START %s" % ( sys.argv[0].split("/")[-1] )
|
|
89 sys.stdout.flush()
|
|
90
|
|
91 if verbose > 0:
|
|
92 print "launch TEclass..."
|
|
93 sys.stdout.flush()
|
|
94 prg = "test_consensi_2.1.pl"
|
|
95 cmd = prg
|
|
96 cmd += " %s" % ( inFileName )
|
|
97 returnValue = os.system( cmd )
|
|
98 if returnValue != 0:
|
|
99 print "ERROR: '%s' returned %i" % ( prg, returnValue )
|
|
100 sys.exit(1)
|
|
101
|
|
102 lOut1 = glob.glob( "%s_*" % ( inFileName ) )
|
|
103 outDir = ""
|
|
104 for i in lOut1:
|
|
105 if os.path.isdir( i ):
|
|
106 lOut2 = glob.glob( "%s/*" % ( i ) )
|
|
107 if len(lOut2) == 4 and "%s/%s.lib" % ( i, inFileName ) in lOut2:
|
|
108 outDir = i
|
|
109 break
|
|
110 if outDir == "":
|
|
111 print "ERROR: can't find output directory"
|
|
112 sys.exit(1)
|
|
113 os.chdir( outDir )
|
|
114
|
|
115 if verbose > 0:
|
|
116 print "parse the results..."
|
|
117 sys.stdout.flush()
|
|
118 parseFastaFileFromTEclass( "%s.lib" % ( inFileName ),
|
|
119 outFileName,
|
|
120 verbose )
|
|
121 os.system( "mv %s .." % ( outFileName ) )
|
|
122 os.chdir( ".." )
|
|
123
|
|
124 if clean:
|
|
125 if verbose > 0:
|
|
126 print "clean the temporary files..."
|
|
127 sys.stdout.flush()
|
|
128 shutil.rmtree( outDir )
|
|
129
|
|
130 if verbose > 0:
|
|
131 print "END %s" % ( sys.argv[0].split("/")[-1] )
|
|
132 sys.stdout.flush()
|
|
133
|
|
134 return 0
|
|
135
|
|
136
|
|
137 if __name__ == "__main__":
|
|
138 main()
|