annotate LocationFile.py @ 9:22fe0154fa54

added support for heterochromatic regions
author Richard Burhans <burhans@bx.psu.edu>
date Tue, 10 Jul 2012 11:41:22 -0400
parents 2c498d40ecde
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 def die( message ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 print >> sys.stderr, message
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 def open_or_die( filename, mode='r', message=None ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10 if message is None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 message = 'Error opening {0}'.format( filename )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 fh = open( filename, mode )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 except IOError, err:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 die( '{0}: {1}'.format( message, err.strerror ) )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 return fh
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 class LocationFile( object ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19 def __init__( self, filename, comment_chars=None, delimiter='\t', key_column=0 ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20 self.filename = filename
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 if comment_chars is None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 self.comment_chars = ( '#' )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 self.comment_chars = tuple( comment_chars )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 self.delimiter = delimiter
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26 self.key_column = key_column
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 self._map = {}
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 self._populate_map()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 def _populate_map( self ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 with open( self.filename ) as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 line_number = 0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 for line in fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35 line_number += 1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 line = line.rstrip( '\r\n' )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 if not line.startswith( self.comment_chars ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38 elems = line.split( self.delimiter )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 if len( elems ) <= self.key_column:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 die( 'Location file {0} line {1}: less than {2} columns'.format( self.filename, line_number, self.key_column + 1 ) )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 key = elems.pop( self.key_column )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 if key in self._map:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 if self._map[key] != elems:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 die( 'Location file {0} line {1}: duplicate key "{2}"'.format( self.filename, line_number, key ) )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 self._map[key] = elems
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 except IOError, err:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 die( 'Error opening location file {0}: {1}'.format( self.filename, err.strerror ) )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 def get_values( self, key ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 if key in self._map:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 rval = self._map[key]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 if len( rval ) == 1:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55 return rval[0]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 return rval
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 die( 'key "{0}" not found in location file {1}'.format( key, self.filename ) )
9
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
60
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
61 def get_values_if_exists( self, key ):
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
62 if key in self._map:
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
63 rval = self._map[key]
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
64 if len( rval ) == 1:
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
65 return rval[0]
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
66 else:
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
67 return rval
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
68 else:
22fe0154fa54 added support for heterochromatic regions
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
69 return None