diff editColumnHeadings.py @ 1:ce206587d42f draft default tip

"planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/rearrange_columns commit 3289c1f97444f45f946ff92d47c490c9d58f3d94"
author azomics
date Thu, 16 Jul 2020 07:22:06 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/editColumnHeadings.py	Thu Jul 16 07:22:06 2020 -0400
@@ -0,0 +1,147 @@
+#!/usr/bin/env python
+
+######################################################################
+#                  Copyright (c) 2016 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+#
+# Cristel Thomas - May 2018
+# Version 2 -- with Pandas!
+#
+
+import sys
+
+from argparse import ArgumentParser
+import pandas as pd
+
+
+def is_integer(s):
+    try:
+        int(s)
+        return True
+    except ValueError:
+        return False
+
+
+def rearrange_file(input_file, output_file, new_cols, new_order, flag_text):
+    df = pd.read_table(input_file)
+    original_columns = [x for x in df.columns]
+    if new_cols:
+        edited_cols = []
+        if len(new_cols) > len(df.columns):
+            sys.exit(6)
+        for i in range(0, len(df.columns)):
+            if df.columns[i] in new_cols:
+                edited_cols.append(new_cols[df.columns[i]])
+            else:
+                edited_cols.append(df.columns[i])
+        df.columns = edited_cols
+
+    if new_order:
+        if len(new_order) > len(df.columns):
+            sys.exit(6)
+        subset = []
+        if flag_text:
+            existing_cols = list(df.columns)
+            unknown_elements = list(set(new_order) - set(existing_cols))
+            if len(unknown_elements):
+                print("%s of the provided columns for reorder is/are not in the input file." % len(unknown_elements), file=sys.stderr)
+                print("Existing columns:", file=sys.stderr)
+                for col in existing_cols:
+                    print(col, file=sys.stderr)
+                print("Provided columns for new order which are not in the original list:", file=sys.stderr)
+                for col in unknown_elements:
+                    print(col, file=sys.stderr)
+                sys.exit(9)
+            subset = new_order
+        else:
+            subset = [df.columns[x] for x in new_order]
+        df = df[subset]
+
+    df.to_csv(output_file, sep="\t", index=False)
+    if new_cols:
+        for c in new_cols:
+            if c not in original_columns:
+                sys.exit(10)
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+             prog="editColumnHeadings",
+             description="Cut, rearrange and rename columns in a tab-separated file.")
+
+    parser.add_argument(
+            '-i',
+            dest="input_file",
+            required=True,
+            help="File location for the text file.")
+
+    parser.add_argument(
+            '-r',
+            dest="columns",
+            action="append",
+            help="Columns to replace.")
+
+    parser.add_argument(
+            '-w',
+            dest="replace_with",
+            action="append",
+            help="new column headers.")
+
+    parser.add_argument(
+            '-n',
+            dest="new_order",
+            help="New column order if re-ordering or subsetting.")
+
+    parser.add_argument(
+            '-o',
+            dest="output_file",
+            required=True,
+            help="Name of the output file.")
+
+    args = parser.parse_args()
+
+
+    new_order = []
+    new_cols = {}
+#    flag = False
+#    exit_codes = [3,4,7,8,9,10,2]
+    defaults = ["i.e.:TLR 6, TLR6PE", "i.e.:TLR6", "i.e.:1,2,5 or CD3,CD4,CCR3", "default", "Default", ""]
+    flag_text = False
+
+    if args.new_order:
+        if args.new_order not in defaults:
+            nwor = [x.strip() for x in args.new_order.strip().split(",")]
+            check_integer = [is_integer(x) for x in nwor]
+            if sum(check_integer) != len(check_integer):
+                flag_text = True
+            new_order = [str(x) if flag_text else int(x)-1 for x in nwor]
+        else:
+            sys.exit(8)
+
+    if args.columns:
+        if args.replace_with:
+            cols_to_change = [c.strip().split(",") if c not in defaults else None for c in args.columns]
+            replacements = [r.strip() if r not in defaults else None for r in args.replace_with]
+            check_col = sum([True if x is not None else False for x in cols_to_change])
+            check_rep = sum([True if x is not None else False for x in replacements])
+            if check_col != check_rep:
+                sys.exit(7)
+            for i in range(0, check_col):
+                if cols_to_change[i]:
+                    if replacements[i]:
+                        for c in cols_to_change[i]:
+                            new_cols[c.strip()] = replacements[i]
+                    else:
+                        sys.exit(4)
+                else:
+                    sys.exit(3)
+        else:
+            sys.exit(7)
+    else:
+        if args.replace_with:
+            sys.exit(7)
+
+    if not new_order and not new_cols:
+        sys.exit(2)
+
+    rearrange_file(args.input_file, args.output_file, new_cols, new_order, flag_text)