view editColumnHeadings.py @ 1:ce206587d42f draft default tip

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/rearrange_columns commit 3289c1f97444f45f946ff92d47c490c9d58f3d94"
author azomics
date Thu, 16 Jul 2020 07:22:06 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env python

######################################################################
#                  Copyright (c) 2016 Northrop Grumman.
#                          All rights reserved.
######################################################################
#
# Cristel Thomas - May 2018
# Version 2 -- with Pandas!
#

import sys

from argparse import ArgumentParser
import pandas as pd


def is_integer(s):
    try:
        int(s)
        return True
    except ValueError:
        return False


def rearrange_file(input_file, output_file, new_cols, new_order, flag_text):
    df = pd.read_table(input_file)
    original_columns = [x for x in df.columns]
    if new_cols:
        edited_cols = []
        if len(new_cols) > len(df.columns):
            sys.exit(6)
        for i in range(0, len(df.columns)):
            if df.columns[i] in new_cols:
                edited_cols.append(new_cols[df.columns[i]])
            else:
                edited_cols.append(df.columns[i])
        df.columns = edited_cols

    if new_order:
        if len(new_order) > len(df.columns):
            sys.exit(6)
        subset = []
        if flag_text:
            existing_cols = list(df.columns)
            unknown_elements = list(set(new_order) - set(existing_cols))
            if len(unknown_elements):
                print("%s of the provided columns for reorder is/are not in the input file." % len(unknown_elements), file=sys.stderr)
                print("Existing columns:", file=sys.stderr)
                for col in existing_cols:
                    print(col, file=sys.stderr)
                print("Provided columns for new order which are not in the original list:", file=sys.stderr)
                for col in unknown_elements:
                    print(col, file=sys.stderr)
                sys.exit(9)
            subset = new_order
        else:
            subset = [df.columns[x] for x in new_order]
        df = df[subset]

    df.to_csv(output_file, sep="\t", index=False)
    if new_cols:
        for c in new_cols:
            if c not in original_columns:
                sys.exit(10)

if __name__ == "__main__":
    parser = ArgumentParser(
             prog="editColumnHeadings",
             description="Cut, rearrange and rename columns in a tab-separated file.")

    parser.add_argument(
            '-i',
            dest="input_file",
            required=True,
            help="File location for the text file.")

    parser.add_argument(
            '-r',
            dest="columns",
            action="append",
            help="Columns to replace.")

    parser.add_argument(
            '-w',
            dest="replace_with",
            action="append",
            help="new column headers.")

    parser.add_argument(
            '-n',
            dest="new_order",
            help="New column order if re-ordering or subsetting.")

    parser.add_argument(
            '-o',
            dest="output_file",
            required=True,
            help="Name of the output file.")

    args = parser.parse_args()


    new_order = []
    new_cols = {}
#    flag = False
#    exit_codes = [3,4,7,8,9,10,2]
    defaults = ["i.e.:TLR 6, TLR6PE", "i.e.:TLR6", "i.e.:1,2,5 or CD3,CD4,CCR3", "default", "Default", ""]
    flag_text = False

    if args.new_order:
        if args.new_order not in defaults:
            nwor = [x.strip() for x in args.new_order.strip().split(",")]
            check_integer = [is_integer(x) for x in nwor]
            if sum(check_integer) != len(check_integer):
                flag_text = True
            new_order = [str(x) if flag_text else int(x)-1 for x in nwor]
        else:
            sys.exit(8)

    if args.columns:
        if args.replace_with:
            cols_to_change = [c.strip().split(",") if c not in defaults else None for c in args.columns]
            replacements = [r.strip() if r not in defaults else None for r in args.replace_with]
            check_col = sum([True if x is not None else False for x in cols_to_change])
            check_rep = sum([True if x is not None else False for x in replacements])
            if check_col != check_rep:
                sys.exit(7)
            for i in range(0, check_col):
                if cols_to_change[i]:
                    if replacements[i]:
                        for c in cols_to_change[i]:
                            new_cols[c.strip()] = replacements[i]
                    else:
                        sys.exit(4)
                else:
                    sys.exit(3)
        else:
            sys.exit(7)
    else:
        if args.replace_with:
            sys.exit(7)

    if not new_order and not new_cols:
        sys.exit(2)

    rearrange_file(args.input_file, args.output_file, new_cols, new_order, flag_text)