comparison FRHIT_parser.py @ 4:398c3753c358 draft

Uploaded
author schang
date Tue, 02 Dec 2014 19:54:00 -0500
parents
children
comparison
equal deleted inserted replaced
3:451df50fb4d4 4:398c3753c358
1 #!/usr/bin/env python
2 """
3 Parse the FR-HIT output file format.
4 """
5
6 from __future__ import division
7 import sys, getopt
8
9 class FRHITFragment:
10 """
11 Represents a fragment match to a reference genome.
12 """
13
14 def __init__(self, frhit_string):
15 fields = frhit_string.rstrip().split()
16
17 self.name = fields[0]
18 self.refseq = fields[8].split('|')[3]
19 self.location = int(fields[9])
20 self.identity = float(fields[7][:-1])
21 self.length = int(fields[3])
22
23 def __repr__(self):
24 out = []
25 out.append("Name: %s\n" % self.name)
26 out.append("Location: %d\n" % self.location)
27 out.append("Identity: %4.1f\n" % self.identity)
28 return ''.join(out)
29
30
31 def FRHITFile(filename):
32 """
33 Parses the fr-hit format file, returning FRHITFragment objects.
34
35 This is a generator, so that there is no need to store a full list of
36 fragments in memory if it's not necessary to do so.
37 """
38
39 f = open(filename, 'rb')
40
41 for line in f:
42 yield FRHITFragment(line)
43
44 f.close()
45
46
47 if __name__ == '__main__':
48 opts, args = getopt.getopt(sys.argv[1:], 'h')
49
50 for o, a in opts:
51 if o == '-h':
52 print 'Usage:'
53 print ' %s <fr-hit_output_file>' % sys.argv[0]
54 sys.exit(0)
55 else:
56 print 'unhandled option'
57 sys.exit(1)
58
59 if len(args) == 0:
60 print 'Specify an FR-HIT output file as an argument'
61 sys.exit(1)
62
63 for frag in FRHITFile(args[0]):
64 print frag