Mercurial > repos > immport-devteam > rearrange_columns
comparison editColumnHeadings.py @ 1:ce206587d42f draft default tip
"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/rearrange_columns commit 3289c1f97444f45f946ff92d47c490c9d58f3d94"
author | azomics |
---|---|
date | Thu, 16 Jul 2020 07:22:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d2cdffd27293 | 1:ce206587d42f |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ###################################################################### | |
4 # Copyright (c) 2016 Northrop Grumman. | |
5 # All rights reserved. | |
6 ###################################################################### | |
7 # | |
8 # Cristel Thomas - May 2018 | |
9 # Version 2 -- with Pandas! | |
10 # | |
11 | |
12 import sys | |
13 | |
14 from argparse import ArgumentParser | |
15 import pandas as pd | |
16 | |
17 | |
18 def is_integer(s): | |
19 try: | |
20 int(s) | |
21 return True | |
22 except ValueError: | |
23 return False | |
24 | |
25 | |
26 def rearrange_file(input_file, output_file, new_cols, new_order, flag_text): | |
27 df = pd.read_table(input_file) | |
28 original_columns = [x for x in df.columns] | |
29 if new_cols: | |
30 edited_cols = [] | |
31 if len(new_cols) > len(df.columns): | |
32 sys.exit(6) | |
33 for i in range(0, len(df.columns)): | |
34 if df.columns[i] in new_cols: | |
35 edited_cols.append(new_cols[df.columns[i]]) | |
36 else: | |
37 edited_cols.append(df.columns[i]) | |
38 df.columns = edited_cols | |
39 | |
40 if new_order: | |
41 if len(new_order) > len(df.columns): | |
42 sys.exit(6) | |
43 subset = [] | |
44 if flag_text: | |
45 existing_cols = list(df.columns) | |
46 unknown_elements = list(set(new_order) - set(existing_cols)) | |
47 if len(unknown_elements): | |
48 print("%s of the provided columns for reorder is/are not in the input file." % len(unknown_elements), file=sys.stderr) | |
49 print("Existing columns:", file=sys.stderr) | |
50 for col in existing_cols: | |
51 print(col, file=sys.stderr) | |
52 print("Provided columns for new order which are not in the original list:", file=sys.stderr) | |
53 for col in unknown_elements: | |
54 print(col, file=sys.stderr) | |
55 sys.exit(9) | |
56 subset = new_order | |
57 else: | |
58 subset = [df.columns[x] for x in new_order] | |
59 df = df[subset] | |
60 | |
61 df.to_csv(output_file, sep="\t", index=False) | |
62 if new_cols: | |
63 for c in new_cols: | |
64 if c not in original_columns: | |
65 sys.exit(10) | |
66 | |
67 if __name__ == "__main__": | |
68 parser = ArgumentParser( | |
69 prog="editColumnHeadings", | |
70 description="Cut, rearrange and rename columns in a tab-separated file.") | |
71 | |
72 parser.add_argument( | |
73 '-i', | |
74 dest="input_file", | |
75 required=True, | |
76 help="File location for the text file.") | |
77 | |
78 parser.add_argument( | |
79 '-r', | |
80 dest="columns", | |
81 action="append", | |
82 help="Columns to replace.") | |
83 | |
84 parser.add_argument( | |
85 '-w', | |
86 dest="replace_with", | |
87 action="append", | |
88 help="new column headers.") | |
89 | |
90 parser.add_argument( | |
91 '-n', | |
92 dest="new_order", | |
93 help="New column order if re-ordering or subsetting.") | |
94 | |
95 parser.add_argument( | |
96 '-o', | |
97 dest="output_file", | |
98 required=True, | |
99 help="Name of the output file.") | |
100 | |
101 args = parser.parse_args() | |
102 | |
103 | |
104 new_order = [] | |
105 new_cols = {} | |
106 # flag = False | |
107 # exit_codes = [3,4,7,8,9,10,2] | |
108 defaults = ["i.e.:TLR 6, TLR6PE", "i.e.:TLR6", "i.e.:1,2,5 or CD3,CD4,CCR3", "default", "Default", ""] | |
109 flag_text = False | |
110 | |
111 if args.new_order: | |
112 if args.new_order not in defaults: | |
113 nwor = [x.strip() for x in args.new_order.strip().split(",")] | |
114 check_integer = [is_integer(x) for x in nwor] | |
115 if sum(check_integer) != len(check_integer): | |
116 flag_text = True | |
117 new_order = [str(x) if flag_text else int(x)-1 for x in nwor] | |
118 else: | |
119 sys.exit(8) | |
120 | |
121 if args.columns: | |
122 if args.replace_with: | |
123 cols_to_change = [c.strip().split(",") if c not in defaults else None for c in args.columns] | |
124 replacements = [r.strip() if r not in defaults else None for r in args.replace_with] | |
125 check_col = sum([True if x is not None else False for x in cols_to_change]) | |
126 check_rep = sum([True if x is not None else False for x in replacements]) | |
127 if check_col != check_rep: | |
128 sys.exit(7) | |
129 for i in range(0, check_col): | |
130 if cols_to_change[i]: | |
131 if replacements[i]: | |
132 for c in cols_to_change[i]: | |
133 new_cols[c.strip()] = replacements[i] | |
134 else: | |
135 sys.exit(4) | |
136 else: | |
137 sys.exit(3) | |
138 else: | |
139 sys.exit(7) | |
140 else: | |
141 if args.replace_with: | |
142 sys.exit(7) | |
143 | |
144 if not new_order and not new_cols: | |
145 sys.exit(2) | |
146 | |
147 rearrange_file(args.input_file, args.output_file, new_cols, new_order, flag_text) |