annotate merge_columns_with_delimiter/merge_columns_with_delimiter.py @ 0:eaf7c9b0a1a4 draft default tip

Uploaded
author saket-choudhary
date Tue, 07 Oct 2014 19:38:23 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
1 import sys, re
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
2
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
3 def stop_err( msg ):
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
4 sys.stderr.write( msg )
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
5 sys.exit()
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
6
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
7 def __main__():
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
8 try:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
9 infile = open ( sys.argv[1], 'r')
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
10 outfile = open ( sys.argv[2], 'w')
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
11 except:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
12 stop_err( 'Cannot open or create a file\n' )
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
13
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
14 if len( sys.argv ) < 5:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
15 stop_err( 'No columns to merge' )
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
16 else:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
17 delimiter = sys.argv[3]
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
18 cols = sys.argv[4:]
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
19
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
20 skipped_lines = 0
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
21
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
22 char_dict = {
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
23 'T': '\t',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
24 's': '\s',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
25 'Dt': '\.',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
26 'Sl': '\\',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
27 'Sr': '/',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
28 'C': ',',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
29 'D': '-',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
30 'U': '_',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
31 'P': '\|',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
32 'Co': ':',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
33 'Sc': ';',
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
34 'Ep': ''
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
35 }
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
36 for line in infile:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
37 line = line.rstrip( '\r\n' )
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
38 if line and not line.startswith( '#' ):
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
39 fields = line.split( '\t' )
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
40 line += '\t'
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
41 for i, col in enumerate(cols):
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
42 try:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
43 if i!=len(cols)-1:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
44 line += fields[ int( col ) -1 ] + char_dict[delimiter]
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
45 else:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
46 line += fields[ int( col ) -1 ]
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
47
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
48 except:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
49 skipped_lines += 1
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
50
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
51 print >>outfile, line
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
52
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
53 if skipped_lines > 0:
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
54 print 'Skipped %d invalid lines' % skipped_lines
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
55
eaf7c9b0a1a4 Uploaded
saket-choudhary
parents:
diff changeset
56 if __name__ == "__main__" : __main__()