annotate LocationFile.py @ 7:e29f4d801bb0

change wsf -> snp; wpf -> sap
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 18 Apr 2012 11:12:21 -0400
parents 2c498d40ecde
children 22fe0154fa54
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 def die( message ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 print >> sys.stderr, message
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 def open_or_die( filename, mode='r', message=None ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10 if message is None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 message = 'Error opening {0}'.format( filename )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 fh = open( filename, mode )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 except IOError, err:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 die( '{0}: {1}'.format( message, err.strerror ) )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 return fh
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 class LocationFile( object ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19 def __init__( self, filename, comment_chars=None, delimiter='\t', key_column=0 ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20 self.filename = filename
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 if comment_chars is None:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 self.comment_chars = ( '#' )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 self.comment_chars = tuple( comment_chars )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 self.delimiter = delimiter
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26 self.key_column = key_column
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 self._map = {}
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 self._populate_map()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 def _populate_map( self ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 with open( self.filename ) as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 line_number = 0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 for line in fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35 line_number += 1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 line = line.rstrip( '\r\n' )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 if not line.startswith( self.comment_chars ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38 elems = line.split( self.delimiter )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 if len( elems ) <= self.key_column:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 die( 'Location file {0} line {1}: less than {2} columns'.format( self.filename, line_number, self.key_column + 1 ) )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 key = elems.pop( self.key_column )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 if key in self._map:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 if self._map[key] != elems:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 die( 'Location file {0} line {1}: duplicate key "{2}"'.format( self.filename, line_number, key ) )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 self._map[key] = elems
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 except IOError, err:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 die( 'Error opening location file {0}: {1}'.format( self.filename, err.strerror ) )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 def get_values( self, key ):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 if key in self._map:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 rval = self._map[key]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 if len( rval ) == 1:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55 return rval[0]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57 return rval
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 die( 'key "{0}" not found in location file {1}'.format( key, self.filename ) )