Mercurial > repos > immport-devteam > rearrange_columns
view rearrange_columns/editColumnHeadings.py @ 0:d2cdffd27293 draft
Uploaded
author | immport-devteam |
---|---|
date | Mon, 27 Feb 2017 13:05:18 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python ###################################################################### # Copyright (c) 2016 Northrop Grumman. # All rights reserved. ###################################################################### from __future__ import print_function import sys from argparse import ArgumentParser def is_integer(s): try: int(s) return True except ValueError: return False def rearrange_file(input_file, col_order, col_names, output_file): with open(input_file, "r") as infl, open(output_file, "w") as outf: # headers hdrs = infl.readline().strip() current_hdrs = hdrs.split("\t") if not col_order and col_names: if len(col_names) != len(current_hdrs): sys.stderr.write("There are " + str(len(current_hdrs)) + " columns but " + str(len(col_names)) + " marker names were provided\n") sys.exit(4) if col_names: tmp_hdr = [] for i in range(0, len(col_names)): if col_names[i].strip(): tmp_hdr.append(col_names[i].strip()) else: if col_order: tmp_hdr.append(current_hdrs[col_order[i]]) else: tmp_hdr.append(current_hdrs[i]) hdrs = ("\t".join(tmp_hdr)) elif col_order: tp_hdr = [] for j in col_order: tp_hdr.append(current_hdrs[j]) hdrs = ("\t".join(tp_hdr)) outf.write(hdrs + "\n") # columns for lines in infl: cols = lines.strip().split("\t") if not col_order: col_order = [x for x in range(0, len(current_hdrs))] outf.write("\t".join([cols[c] for c in col_order]) + "\n") if __name__ == "__main__": parser = ArgumentParser( prog="editColumnHeadings", description="Cut, rearrange and rename columns in a tab-separated file.") parser.add_argument( '-i', dest="input_file", required=True, help="File location for the text file.") parser.add_argument( '-c', dest="columns", help="Columns to keep in the order to keep them in.") parser.add_argument( '-n', dest="column_names", help="Column names if renaming.") parser.add_argument( '-o', dest="output_file", required=True, help="Name of the output file.") args = parser.parse_args() # check column indices default_value_col = ["i.e.:1,5,2", "default", "Default"] col_order = [] if args.columns: if args.columns not in default_value_col: tmp_col = args.columns.split(",") if len(tmp_col) == 1: if not tmp_col[0].strip(): col_order = [] elif not is_integer(tmp_col[0].strip()): sys.exit(2) else: col_order.append(int(tmp_col[0].strip()) - 1) else: for c in range(0, len(tmp_col)): if not is_integer(tmp_col[c].strip()): sys.exit(3) else: col_order.append(int(tmp_col[c].strip()) - 1) # check column names default_value_nms = ["i.e.:Marker1,,Marker4", "default", "Default"] col_names = [] if args.column_names: if args.column_names not in default_value_nms: col_names = args.column_names.split(",") if col_order: if len(col_order) != len(col_names): sys.stderr.write("There are " + str(len(col_order)) + " columns selected and " + str(len(col_names)) + " marker names\n") sys.exit(4) rearrange_file(args.input_file, col_order, col_names, args.output_file) sys.exit(0)