0
|
1 import sys, re
|
|
2
|
|
3 def stop_err( msg ):
|
|
4 sys.stderr.write( msg )
|
|
5 sys.exit()
|
|
6
|
|
7 def __main__():
|
|
8 try:
|
|
9 infile = open ( sys.argv[1], 'r')
|
|
10 outfile = open ( sys.argv[2], 'w')
|
|
11 except:
|
|
12 stop_err( 'Cannot open or create a file\n' )
|
|
13
|
|
14 if len( sys.argv ) < 5:
|
|
15 stop_err( 'No columns to merge' )
|
|
16 else:
|
|
17 delimiter = sys.argv[3]
|
|
18 cols = sys.argv[4:]
|
|
19
|
|
20 skipped_lines = 0
|
|
21
|
|
22 char_dict = {
|
|
23 'T': '\t',
|
|
24 's': '\s',
|
|
25 'Dt': '\.',
|
|
26 'Sl': '\\',
|
|
27 'Sr': '/',
|
|
28 'C': ',',
|
|
29 'D': '-',
|
|
30 'U': '_',
|
|
31 'P': '\|',
|
|
32 'Co': ':',
|
|
33 'Sc': ';',
|
|
34 'Ep': ''
|
|
35 }
|
|
36 for line in infile:
|
|
37 line = line.rstrip( '\r\n' )
|
|
38 if line and not line.startswith( '#' ):
|
|
39 fields = line.split( '\t' )
|
|
40 line += '\t'
|
|
41 for i, col in enumerate(cols):
|
|
42 try:
|
|
43 if i!=len(cols)-1:
|
|
44 line += fields[ int( col ) -1 ] + char_dict[delimiter]
|
|
45 else:
|
|
46 line += fields[ int( col ) -1 ]
|
|
47
|
|
48 except:
|
|
49 skipped_lines += 1
|
|
50
|
|
51 print >>outfile, line
|
|
52
|
|
53 if skipped_lines > 0:
|
|
54 print 'Skipped %d invalid lines' % skipped_lines
|
|
55
|
|
56 if __name__ == "__main__" : __main__()
|