annotate LocationFile.py @ 28:184d14e4270d

Update to Miller Lab devshed revision 4ede22dd5500
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 17 Jul 2013 12:46:46 -0400
parents 4b6590dd7250
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
2
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
3 import sys
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
4
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
5 def die( message ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
6 print >> sys.stderr, message
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
7 sys.exit(1)
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
8
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
9 def open_or_die( filename, mode='r', message=None ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
10 if message is None:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
11 message = 'Error opening {0}'.format( filename )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
12 try:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
13 fh = open( filename, mode )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
14 except IOError, err:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
15 die( '{0}: {1}'.format( message, err.strerror ) )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
16 return fh
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
17
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
18 class LocationFile( object ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
19 def __init__( self, filename, comment_chars=None, delimiter='\t', key_column=0 ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
20 self.filename = filename
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
21 if comment_chars is None:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
22 self.comment_chars = ( '#' )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
23 else:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
24 self.comment_chars = tuple( comment_chars )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
25 self.delimiter = delimiter
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
26 self.key_column = key_column
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
27 self._map = {}
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
28 self._populate_map()
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
29
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
30 def _populate_map( self ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
31 try:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
32 with open( self.filename ) as fh:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
33 line_number = 0
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
34 for line in fh:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
35 line_number += 1
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
36 line = line.rstrip( '\r\n' )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
37 if not line.startswith( self.comment_chars ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
38 elems = line.split( self.delimiter )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
39 if len( elems ) <= self.key_column:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
40 die( 'Location file {0} line {1}: less than {2} columns'.format( self.filename, line_number, self.key_column + 1 ) )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
41 else:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
42 key = elems.pop( self.key_column )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
43 if key in self._map:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
44 if self._map[key] != elems:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
45 die( 'Location file {0} line {1}: duplicate key "{2}"'.format( self.filename, line_number, key ) )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
46 else:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
47 self._map[key] = elems
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
48 except IOError, err:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
49 die( 'Error opening location file {0}: {1}'.format( self.filename, err.strerror ) )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
50
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
51 def get_values( self, key ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
52 if key in self._map:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
53 rval = self._map[key]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
54 if len( rval ) == 1:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
55 return rval[0]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
56 else:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
57 return rval
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
58 else:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
59 die( 'key "{0}" not found in location file {1}'.format( key, self.filename ) )
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
60
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
61 def get_values_if_exists( self, key ):
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
62 if key in self._map:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
63 rval = self._map[key]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
64 if len( rval ) == 1:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
65 return rval[0]
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
66 else:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
67 return rval
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
68 else:
4b6590dd7250 Uploaded
miller-lab
parents:
diff changeset
69 return None