comparison editColumnHeadings.py @ 1:ce206587d42f draft default tip

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/rearrange_columns commit 3289c1f97444f45f946ff92d47c490c9d58f3d94"
author azomics
date Thu, 16 Jul 2020 07:22:06 -0400
parents
children
comparison
equal deleted inserted replaced
0:d2cdffd27293 1:ce206587d42f
1 #!/usr/bin/env python
2
3 ######################################################################
4 # Copyright (c) 2016 Northrop Grumman.
5 # All rights reserved.
6 ######################################################################
7 #
8 # Cristel Thomas - May 2018
9 # Version 2 -- with Pandas!
10 #
11
12 import sys
13
14 from argparse import ArgumentParser
15 import pandas as pd
16
17
18 def is_integer(s):
19 try:
20 int(s)
21 return True
22 except ValueError:
23 return False
24
25
26 def rearrange_file(input_file, output_file, new_cols, new_order, flag_text):
27 df = pd.read_table(input_file)
28 original_columns = [x for x in df.columns]
29 if new_cols:
30 edited_cols = []
31 if len(new_cols) > len(df.columns):
32 sys.exit(6)
33 for i in range(0, len(df.columns)):
34 if df.columns[i] in new_cols:
35 edited_cols.append(new_cols[df.columns[i]])
36 else:
37 edited_cols.append(df.columns[i])
38 df.columns = edited_cols
39
40 if new_order:
41 if len(new_order) > len(df.columns):
42 sys.exit(6)
43 subset = []
44 if flag_text:
45 existing_cols = list(df.columns)
46 unknown_elements = list(set(new_order) - set(existing_cols))
47 if len(unknown_elements):
48 print("%s of the provided columns for reorder is/are not in the input file." % len(unknown_elements), file=sys.stderr)
49 print("Existing columns:", file=sys.stderr)
50 for col in existing_cols:
51 print(col, file=sys.stderr)
52 print("Provided columns for new order which are not in the original list:", file=sys.stderr)
53 for col in unknown_elements:
54 print(col, file=sys.stderr)
55 sys.exit(9)
56 subset = new_order
57 else:
58 subset = [df.columns[x] for x in new_order]
59 df = df[subset]
60
61 df.to_csv(output_file, sep="\t", index=False)
62 if new_cols:
63 for c in new_cols:
64 if c not in original_columns:
65 sys.exit(10)
66
67 if __name__ == "__main__":
68 parser = ArgumentParser(
69 prog="editColumnHeadings",
70 description="Cut, rearrange and rename columns in a tab-separated file.")
71
72 parser.add_argument(
73 '-i',
74 dest="input_file",
75 required=True,
76 help="File location for the text file.")
77
78 parser.add_argument(
79 '-r',
80 dest="columns",
81 action="append",
82 help="Columns to replace.")
83
84 parser.add_argument(
85 '-w',
86 dest="replace_with",
87 action="append",
88 help="new column headers.")
89
90 parser.add_argument(
91 '-n',
92 dest="new_order",
93 help="New column order if re-ordering or subsetting.")
94
95 parser.add_argument(
96 '-o',
97 dest="output_file",
98 required=True,
99 help="Name of the output file.")
100
101 args = parser.parse_args()
102
103
104 new_order = []
105 new_cols = {}
106 # flag = False
107 # exit_codes = [3,4,7,8,9,10,2]
108 defaults = ["i.e.:TLR 6, TLR6PE", "i.e.:TLR6", "i.e.:1,2,5 or CD3,CD4,CCR3", "default", "Default", ""]
109 flag_text = False
110
111 if args.new_order:
112 if args.new_order not in defaults:
113 nwor = [x.strip() for x in args.new_order.strip().split(",")]
114 check_integer = [is_integer(x) for x in nwor]
115 if sum(check_integer) != len(check_integer):
116 flag_text = True
117 new_order = [str(x) if flag_text else int(x)-1 for x in nwor]
118 else:
119 sys.exit(8)
120
121 if args.columns:
122 if args.replace_with:
123 cols_to_change = [c.strip().split(",") if c not in defaults else None for c in args.columns]
124 replacements = [r.strip() if r not in defaults else None for r in args.replace_with]
125 check_col = sum([True if x is not None else False for x in cols_to_change])
126 check_rep = sum([True if x is not None else False for x in replacements])
127 if check_col != check_rep:
128 sys.exit(7)
129 for i in range(0, check_col):
130 if cols_to_change[i]:
131 if replacements[i]:
132 for c in cols_to_change[i]:
133 new_cols[c.strip()] = replacements[i]
134 else:
135 sys.exit(4)
136 else:
137 sys.exit(3)
138 else:
139 sys.exit(7)
140 else:
141 if args.replace_with:
142 sys.exit(7)
143
144 if not new_order and not new_cols:
145 sys.exit(2)
146
147 rearrange_file(args.input_file, args.output_file, new_cols, new_order, flag_text)