comparison rearrange_columns/editColumnHeadings.py @ 0:d2cdffd27293 draft

Uploaded
author immport-devteam
date Mon, 27 Feb 2017 13:05:18 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d2cdffd27293
1 #!/usr/bin/env python
2
3 ######################################################################
4 # Copyright (c) 2016 Northrop Grumman.
5 # All rights reserved.
6 ######################################################################
7
8 from __future__ import print_function
9 import sys
10
11 from argparse import ArgumentParser
12
13
14 def is_integer(s):
15 try:
16 int(s)
17 return True
18 except ValueError:
19 return False
20
21
22 def rearrange_file(input_file, col_order, col_names, output_file):
23 with open(input_file, "r") as infl, open(output_file, "w") as outf:
24 # headers
25 hdrs = infl.readline().strip()
26 current_hdrs = hdrs.split("\t")
27 if not col_order and col_names:
28 if len(col_names) != len(current_hdrs):
29 sys.stderr.write("There are " + str(len(current_hdrs)) + " columns but " + str(len(col_names)) + " marker names were provided\n")
30 sys.exit(4)
31 if col_names:
32 tmp_hdr = []
33 for i in range(0, len(col_names)):
34 if col_names[i].strip():
35 tmp_hdr.append(col_names[i].strip())
36 else:
37 if col_order:
38 tmp_hdr.append(current_hdrs[col_order[i]])
39 else:
40 tmp_hdr.append(current_hdrs[i])
41 hdrs = ("\t".join(tmp_hdr))
42 elif col_order:
43 tp_hdr = []
44 for j in col_order:
45 tp_hdr.append(current_hdrs[j])
46 hdrs = ("\t".join(tp_hdr))
47
48 outf.write(hdrs + "\n")
49
50 # columns
51 for lines in infl:
52 cols = lines.strip().split("\t")
53 if not col_order:
54 col_order = [x for x in range(0, len(current_hdrs))]
55 outf.write("\t".join([cols[c] for c in col_order]) + "\n")
56
57
58 if __name__ == "__main__":
59 parser = ArgumentParser(
60 prog="editColumnHeadings",
61 description="Cut, rearrange and rename columns in a tab-separated file.")
62
63 parser.add_argument(
64 '-i',
65 dest="input_file",
66 required=True,
67 help="File location for the text file.")
68
69 parser.add_argument(
70 '-c',
71 dest="columns",
72 help="Columns to keep in the order to keep them in.")
73
74 parser.add_argument(
75 '-n',
76 dest="column_names",
77 help="Column names if renaming.")
78
79 parser.add_argument(
80 '-o',
81 dest="output_file",
82 required=True,
83 help="Name of the output file.")
84
85 args = parser.parse_args()
86
87 # check column indices
88 default_value_col = ["i.e.:1,5,2", "default", "Default"]
89 col_order = []
90 if args.columns:
91 if args.columns not in default_value_col:
92 tmp_col = args.columns.split(",")
93 if len(tmp_col) == 1:
94 if not tmp_col[0].strip():
95 col_order = []
96 elif not is_integer(tmp_col[0].strip()):
97 sys.exit(2)
98 else:
99 col_order.append(int(tmp_col[0].strip()) - 1)
100 else:
101 for c in range(0, len(tmp_col)):
102 if not is_integer(tmp_col[c].strip()):
103 sys.exit(3)
104 else:
105 col_order.append(int(tmp_col[c].strip()) - 1)
106
107 # check column names
108 default_value_nms = ["i.e.:Marker1,,Marker4", "default", "Default"]
109 col_names = []
110 if args.column_names:
111 if args.column_names not in default_value_nms:
112 col_names = args.column_names.split(",")
113 if col_order:
114 if len(col_order) != len(col_names):
115 sys.stderr.write("There are " + str(len(col_order)) + " columns selected and " + str(len(col_names)) + " marker names\n")
116 sys.exit(4)
117
118 rearrange_file(args.input_file, col_order, col_names, args.output_file)
119
120 sys.exit(0)