4
|
1 #!/usr/bin/env python
|
|
2 """
|
|
3 Parse the FR-HIT output file format.
|
|
4 """
|
|
5
|
|
6 from __future__ import division
|
|
7 import sys, getopt
|
|
8
|
|
9 class FRHITFragment:
|
|
10 """
|
|
11 Represents a fragment match to a reference genome.
|
|
12 """
|
|
13
|
|
14 def __init__(self, frhit_string):
|
|
15 fields = frhit_string.rstrip().split()
|
|
16
|
|
17 self.name = fields[0]
|
|
18 self.refseq = fields[8].split('|')[3]
|
|
19 self.location = int(fields[9])
|
|
20 self.identity = float(fields[7][:-1])
|
|
21 self.length = int(fields[3])
|
|
22
|
|
23 def __repr__(self):
|
|
24 out = []
|
|
25 out.append("Name: %s\n" % self.name)
|
|
26 out.append("Location: %d\n" % self.location)
|
|
27 out.append("Identity: %4.1f\n" % self.identity)
|
|
28 return ''.join(out)
|
|
29
|
|
30
|
|
31 def FRHITFile(filename):
|
|
32 """
|
|
33 Parses the fr-hit format file, returning FRHITFragment objects.
|
|
34
|
|
35 This is a generator, so that there is no need to store a full list of
|
|
36 fragments in memory if it's not necessary to do so.
|
|
37 """
|
|
38
|
|
39 f = open(filename, 'rb')
|
|
40
|
|
41 for line in f:
|
|
42 yield FRHITFragment(line)
|
|
43
|
|
44 f.close()
|
|
45
|
|
46
|
|
47 if __name__ == '__main__':
|
|
48 opts, args = getopt.getopt(sys.argv[1:], 'h')
|
|
49
|
|
50 for o, a in opts:
|
|
51 if o == '-h':
|
|
52 print 'Usage:'
|
|
53 print ' %s <fr-hit_output_file>' % sys.argv[0]
|
|
54 sys.exit(0)
|
|
55 else:
|
|
56 print 'unhandled option'
|
|
57 sys.exit(1)
|
|
58
|
|
59 if len(args) == 0:
|
|
60 print 'Specify an FR-HIT output file as an argument'
|
|
61 sys.exit(1)
|
|
62
|
|
63 for frag in FRHITFile(args[0]):
|
|
64 print frag
|