annotate FRHIT_parser.py @ 7:6d94241370cc draft default tip

Uploaded
author schang
date Tue, 02 Dec 2014 21:06:35 -0500
parents 398c3753c358
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
398c3753c358 Uploaded
schang
parents:
diff changeset
1 #!/usr/bin/env python
398c3753c358 Uploaded
schang
parents:
diff changeset
2 """
398c3753c358 Uploaded
schang
parents:
diff changeset
3 Parse the FR-HIT output file format.
398c3753c358 Uploaded
schang
parents:
diff changeset
4 """
398c3753c358 Uploaded
schang
parents:
diff changeset
5
398c3753c358 Uploaded
schang
parents:
diff changeset
6 from __future__ import division
398c3753c358 Uploaded
schang
parents:
diff changeset
7 import sys, getopt
398c3753c358 Uploaded
schang
parents:
diff changeset
8
398c3753c358 Uploaded
schang
parents:
diff changeset
9 class FRHITFragment:
398c3753c358 Uploaded
schang
parents:
diff changeset
10 """
398c3753c358 Uploaded
schang
parents:
diff changeset
11 Represents a fragment match to a reference genome.
398c3753c358 Uploaded
schang
parents:
diff changeset
12 """
398c3753c358 Uploaded
schang
parents:
diff changeset
13
398c3753c358 Uploaded
schang
parents:
diff changeset
14 def __init__(self, frhit_string):
398c3753c358 Uploaded
schang
parents:
diff changeset
15 fields = frhit_string.rstrip().split()
398c3753c358 Uploaded
schang
parents:
diff changeset
16
398c3753c358 Uploaded
schang
parents:
diff changeset
17 self.name = fields[0]
398c3753c358 Uploaded
schang
parents:
diff changeset
18 self.refseq = fields[8].split('|')[3]
398c3753c358 Uploaded
schang
parents:
diff changeset
19 self.location = int(fields[9])
398c3753c358 Uploaded
schang
parents:
diff changeset
20 self.identity = float(fields[7][:-1])
398c3753c358 Uploaded
schang
parents:
diff changeset
21 self.length = int(fields[3])
398c3753c358 Uploaded
schang
parents:
diff changeset
22
398c3753c358 Uploaded
schang
parents:
diff changeset
23 def __repr__(self):
398c3753c358 Uploaded
schang
parents:
diff changeset
24 out = []
398c3753c358 Uploaded
schang
parents:
diff changeset
25 out.append("Name: %s\n" % self.name)
398c3753c358 Uploaded
schang
parents:
diff changeset
26 out.append("Location: %d\n" % self.location)
398c3753c358 Uploaded
schang
parents:
diff changeset
27 out.append("Identity: %4.1f\n" % self.identity)
398c3753c358 Uploaded
schang
parents:
diff changeset
28 return ''.join(out)
398c3753c358 Uploaded
schang
parents:
diff changeset
29
398c3753c358 Uploaded
schang
parents:
diff changeset
30
398c3753c358 Uploaded
schang
parents:
diff changeset
31 def FRHITFile(filename):
398c3753c358 Uploaded
schang
parents:
diff changeset
32 """
398c3753c358 Uploaded
schang
parents:
diff changeset
33 Parses the fr-hit format file, returning FRHITFragment objects.
398c3753c358 Uploaded
schang
parents:
diff changeset
34
398c3753c358 Uploaded
schang
parents:
diff changeset
35 This is a generator, so that there is no need to store a full list of
398c3753c358 Uploaded
schang
parents:
diff changeset
36 fragments in memory if it's not necessary to do so.
398c3753c358 Uploaded
schang
parents:
diff changeset
37 """
398c3753c358 Uploaded
schang
parents:
diff changeset
38
398c3753c358 Uploaded
schang
parents:
diff changeset
39 f = open(filename, 'rb')
398c3753c358 Uploaded
schang
parents:
diff changeset
40
398c3753c358 Uploaded
schang
parents:
diff changeset
41 for line in f:
398c3753c358 Uploaded
schang
parents:
diff changeset
42 yield FRHITFragment(line)
398c3753c358 Uploaded
schang
parents:
diff changeset
43
398c3753c358 Uploaded
schang
parents:
diff changeset
44 f.close()
398c3753c358 Uploaded
schang
parents:
diff changeset
45
398c3753c358 Uploaded
schang
parents:
diff changeset
46
398c3753c358 Uploaded
schang
parents:
diff changeset
47 if __name__ == '__main__':
398c3753c358 Uploaded
schang
parents:
diff changeset
48 opts, args = getopt.getopt(sys.argv[1:], 'h')
398c3753c358 Uploaded
schang
parents:
diff changeset
49
398c3753c358 Uploaded
schang
parents:
diff changeset
50 for o, a in opts:
398c3753c358 Uploaded
schang
parents:
diff changeset
51 if o == '-h':
398c3753c358 Uploaded
schang
parents:
diff changeset
52 print 'Usage:'
398c3753c358 Uploaded
schang
parents:
diff changeset
53 print ' %s <fr-hit_output_file>' % sys.argv[0]
398c3753c358 Uploaded
schang
parents:
diff changeset
54 sys.exit(0)
398c3753c358 Uploaded
schang
parents:
diff changeset
55 else:
398c3753c358 Uploaded
schang
parents:
diff changeset
56 print 'unhandled option'
398c3753c358 Uploaded
schang
parents:
diff changeset
57 sys.exit(1)
398c3753c358 Uploaded
schang
parents:
diff changeset
58
398c3753c358 Uploaded
schang
parents:
diff changeset
59 if len(args) == 0:
398c3753c358 Uploaded
schang
parents:
diff changeset
60 print 'Specify an FR-HIT output file as an argument'
398c3753c358 Uploaded
schang
parents:
diff changeset
61 sys.exit(1)
398c3753c358 Uploaded
schang
parents:
diff changeset
62
398c3753c358 Uploaded
schang
parents:
diff changeset
63 for frag in FRHITFile(args[0]):
398c3753c358 Uploaded
schang
parents:
diff changeset
64 print frag