# HG changeset patch # User azomics # Date 1594898526 14400 # Node ID ce206587d42f80a3fe96f241bf604bf2add2bb05 # Parent d2cdffd27293aa67c85ddc13dddac15400a28102 "planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/rearrange_columns commit 3289c1f97444f45f946ff92d47c490c9d58f3d94" diff -r d2cdffd27293 -r ce206587d42f editColumnHeadings.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/editColumnHeadings.py Thu Jul 16 07:22:06 2020 -0400 @@ -0,0 +1,147 @@ +#!/usr/bin/env python + +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### +# +# Cristel Thomas - May 2018 +# Version 2 -- with Pandas! +# + +import sys + +from argparse import ArgumentParser +import pandas as pd + + +def is_integer(s): + try: + int(s) + return True + except ValueError: + return False + + +def rearrange_file(input_file, output_file, new_cols, new_order, flag_text): + df = pd.read_table(input_file) + original_columns = [x for x in df.columns] + if new_cols: + edited_cols = [] + if len(new_cols) > len(df.columns): + sys.exit(6) + for i in range(0, len(df.columns)): + if df.columns[i] in new_cols: + edited_cols.append(new_cols[df.columns[i]]) + else: + edited_cols.append(df.columns[i]) + df.columns = edited_cols + + if new_order: + if len(new_order) > len(df.columns): + sys.exit(6) + subset = [] + if flag_text: + existing_cols = list(df.columns) + unknown_elements = list(set(new_order) - set(existing_cols)) + if len(unknown_elements): + print("%s of the provided columns for reorder is/are not in the input file." % len(unknown_elements), file=sys.stderr) + print("Existing columns:", file=sys.stderr) + for col in existing_cols: + print(col, file=sys.stderr) + print("Provided columns for new order which are not in the original list:", file=sys.stderr) + for col in unknown_elements: + print(col, file=sys.stderr) + sys.exit(9) + subset = new_order + else: + subset = [df.columns[x] for x in new_order] + df = df[subset] + + df.to_csv(output_file, sep="\t", index=False) + if new_cols: + for c in new_cols: + if c not in original_columns: + sys.exit(10) + +if __name__ == "__main__": + parser = ArgumentParser( + prog="editColumnHeadings", + description="Cut, rearrange and rename columns in a tab-separated file.") + + parser.add_argument( + '-i', + dest="input_file", + required=True, + help="File location for the text file.") + + parser.add_argument( + '-r', + dest="columns", + action="append", + help="Columns to replace.") + + parser.add_argument( + '-w', + dest="replace_with", + action="append", + help="new column headers.") + + parser.add_argument( + '-n', + dest="new_order", + help="New column order if re-ordering or subsetting.") + + parser.add_argument( + '-o', + dest="output_file", + required=True, + help="Name of the output file.") + + args = parser.parse_args() + + + new_order = [] + new_cols = {} +# flag = False +# exit_codes = [3,4,7,8,9,10,2] + defaults = ["i.e.:TLR 6, TLR6PE", "i.e.:TLR6", "i.e.:1,2,5 or CD3,CD4,CCR3", "default", "Default", ""] + flag_text = False + + if args.new_order: + if args.new_order not in defaults: + nwor = [x.strip() for x in args.new_order.strip().split(",")] + check_integer = [is_integer(x) for x in nwor] + if sum(check_integer) != len(check_integer): + flag_text = True + new_order = [str(x) if flag_text else int(x)-1 for x in nwor] + else: + sys.exit(8) + + if args.columns: + if args.replace_with: + cols_to_change = [c.strip().split(",") if c not in defaults else None for c in args.columns] + replacements = [r.strip() if r not in defaults else None for r in args.replace_with] + check_col = sum([True if x is not None else False for x in cols_to_change]) + check_rep = sum([True if x is not None else False for x in replacements]) + if check_col != check_rep: + sys.exit(7) + for i in range(0, check_col): + if cols_to_change[i]: + if replacements[i]: + for c in cols_to_change[i]: + new_cols[c.strip()] = replacements[i] + else: + sys.exit(4) + else: + sys.exit(3) + else: + sys.exit(7) + else: + if args.replace_with: + sys.exit(7) + + if not new_order and not new_cols: + sys.exit(2) + + rearrange_file(args.input_file, args.output_file, new_cols, new_order, flag_text) diff -r d2cdffd27293 -r ce206587d42f editColumnHeadings.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/editColumnHeadings.xml Thu Jul 16 07:22:06 2020 -0400 @@ -0,0 +1,191 @@ + + in txt-converted FCS files + + pandas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r d2cdffd27293 -r ce206587d42f rearrange_columns/editColumnHeadings.py --- a/rearrange_columns/editColumnHeadings.py Mon Feb 27 13:05:18 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,120 +0,0 @@ -#!/usr/bin/env python - -###################################################################### -# Copyright (c) 2016 Northrop Grumman. -# All rights reserved. -###################################################################### - -from __future__ import print_function -import sys - -from argparse import ArgumentParser - - -def is_integer(s): - try: - int(s) - return True - except ValueError: - return False - - -def rearrange_file(input_file, col_order, col_names, output_file): - with open(input_file, "r") as infl, open(output_file, "w") as outf: - # headers - hdrs = infl.readline().strip() - current_hdrs = hdrs.split("\t") - if not col_order and col_names: - if len(col_names) != len(current_hdrs): - sys.stderr.write("There are " + str(len(current_hdrs)) + " columns but " + str(len(col_names)) + " marker names were provided\n") - sys.exit(4) - if col_names: - tmp_hdr = [] - for i in range(0, len(col_names)): - if col_names[i].strip(): - tmp_hdr.append(col_names[i].strip()) - else: - if col_order: - tmp_hdr.append(current_hdrs[col_order[i]]) - else: - tmp_hdr.append(current_hdrs[i]) - hdrs = ("\t".join(tmp_hdr)) - elif col_order: - tp_hdr = [] - for j in col_order: - tp_hdr.append(current_hdrs[j]) - hdrs = ("\t".join(tp_hdr)) - - outf.write(hdrs + "\n") - - # columns - for lines in infl: - cols = lines.strip().split("\t") - if not col_order: - col_order = [x for x in range(0, len(current_hdrs))] - outf.write("\t".join([cols[c] for c in col_order]) + "\n") - - -if __name__ == "__main__": - parser = ArgumentParser( - prog="editColumnHeadings", - description="Cut, rearrange and rename columns in a tab-separated file.") - - parser.add_argument( - '-i', - dest="input_file", - required=True, - help="File location for the text file.") - - parser.add_argument( - '-c', - dest="columns", - help="Columns to keep in the order to keep them in.") - - parser.add_argument( - '-n', - dest="column_names", - help="Column names if renaming.") - - parser.add_argument( - '-o', - dest="output_file", - required=True, - help="Name of the output file.") - - args = parser.parse_args() - - # check column indices - default_value_col = ["i.e.:1,5,2", "default", "Default"] - col_order = [] - if args.columns: - if args.columns not in default_value_col: - tmp_col = args.columns.split(",") - if len(tmp_col) == 1: - if not tmp_col[0].strip(): - col_order = [] - elif not is_integer(tmp_col[0].strip()): - sys.exit(2) - else: - col_order.append(int(tmp_col[0].strip()) - 1) - else: - for c in range(0, len(tmp_col)): - if not is_integer(tmp_col[c].strip()): - sys.exit(3) - else: - col_order.append(int(tmp_col[c].strip()) - 1) - - # check column names - default_value_nms = ["i.e.:Marker1,,Marker4", "default", "Default"] - col_names = [] - if args.column_names: - if args.column_names not in default_value_nms: - col_names = args.column_names.split(",") - if col_order: - if len(col_order) != len(col_names): - sys.stderr.write("There are " + str(len(col_order)) + " columns selected and " + str(len(col_names)) + " marker names\n") - sys.exit(4) - - rearrange_file(args.input_file, col_order, col_names, args.output_file) - - sys.exit(0) diff -r d2cdffd27293 -r ce206587d42f rearrange_columns/editColumnHeadings.xml --- a/rearrange_columns/editColumnHeadings.xml Mon Feb 27 13:05:18 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ - - in txt-converted FCS files. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r d2cdffd27293 -r ce206587d42f rearrange_columns/test-data/input1.txt --- a/rearrange_columns/test-data/input1.txt Mon Feb 27 13:05:18 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4 -449 157 551 129 169 292 -894 1023 199 277 320 227 -262 73 437 69 0 146 -340 115 509 268 0 74 -316 76 50 0 60 129 -394 144 83 138 335 194 -383 139 499 0 0 224 -800 1023 239 284 288 280 -388 97 534 111 83 177 diff -r d2cdffd27293 -r ce206587d42f rearrange_columns/test-data/input2.txt --- a/rearrange_columns/test-data/input2.txt Mon Feb 27 13:05:18 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -Forward Scatter Side Scatter FITC CD4 PE CXCR3 PP CD8 APC CCR5 -363 76 550 200 0 127 -372 126 519 44 51 148 -1023 1023 289 401 362 254 -770 1023 175 361 225 237 -384 111 525 121 0 138 -602 578 385 286 222 131 -788 1023 216 310 270 294 -420 211 552 479 0 62 -668 1019 73 193 227 132 diff -r d2cdffd27293 -r ce206587d42f rearrange_columns/test-data/input3.txt --- a/rearrange_columns/test-data/input3.txt Mon Feb 27 13:05:18 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA -289 56 438 0 626 480 -352 153 30 147 483 386 -383 190 156 228 734 408 -261 62 432 121 598 555 -451 120 537 338 568 201 -373 104 3 110 621 584 -418 105 561 0 610 562 -358 185 0 292 641 327 -733 970 139 227 293 259 diff -r d2cdffd27293 -r ce206587d42f rearrange_columns/test-data/output1.flowtext --- a/rearrange_columns/test-data/output1.flowtext Mon Feb 27 13:05:18 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -CD4 CCR3 CD8 CCR7 -551 129 169 292 -199 277 320 227 -437 69 0 146 -509 268 0 74 -50 0 60 129 -83 138 335 194 -499 0 0 224 -239 284 288 280 -534 111 83 177 diff -r d2cdffd27293 -r ce206587d42f rearrange_columns/test-data/output2.flowtext --- a/rearrange_columns/test-data/output2.flowtext Mon Feb 27 13:05:18 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -Side Scatter FITC CD4 APC CCR5 Forward Scatter -76 550 127 363 -126 519 148 372 -1023 289 254 1023 -1023 175 237 770 -111 525 138 384 -578 385 131 602 -1023 216 294 788 -211 552 62 420 -1019 73 132 668 diff -r d2cdffd27293 -r ce206587d42f rearrange_columns/test-data/output3.flowtext --- a/rearrange_columns/test-data/output3.flowtext Mon Feb 27 13:05:18 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -M1 M2 M3 M4 M5 M6 -289 56 438 0 626 480 -352 153 30 147 483 386 -383 190 156 228 734 408 -261 62 432 121 598 555 -451 120 537 338 568 201 -373 104 3 110 621 584 -418 105 561 0 610 562 -358 185 0 292 641 327 -733 970 139 227 293 259 diff -r d2cdffd27293 -r ce206587d42f test-data/input1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input1.txt Thu Jul 16 07:22:06 2020 -0400 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4 +449 157 551 129 169 292 +894 1023 199 277 320 227 +262 73 437 69 0 146 +340 115 509 268 0 74 +316 76 50 0 60 129 +394 144 83 138 335 194 +383 139 499 0 0 224 +800 1023 239 284 288 280 +388 97 534 111 83 177 diff -r d2cdffd27293 -r ce206587d42f test-data/input2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input2.txt Thu Jul 16 07:22:06 2020 -0400 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CXCR3 PP CD8 APC CCR5 +363 76 550 200 0 127 +372 126 519 44 51 148 +1023 1023 289 401 362 254 +770 1023 175 361 225 237 +384 111 525 121 0 138 +602 578 385 286 222 131 +788 1023 216 310 270 294 +420 211 552 479 0 62 +668 1019 73 193 227 132 diff -r d2cdffd27293 -r ce206587d42f test-data/input3.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input3.txt Thu Jul 16 07:22:06 2020 -0400 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA +289 56 438 0 626 480 +352 153 30 147 483 386 +383 190 156 228 734 408 +261 62 432 121 598 555 +451 120 537 338 568 201 +373 104 3 110 621 584 +418 105 561 0 610 562 +358 185 0 292 641 327 +733 970 139 227 293 259 diff -r d2cdffd27293 -r ce206587d42f test-data/output1.flowtext --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output1.flowtext Thu Jul 16 07:22:06 2020 -0400 @@ -0,0 +1,10 @@ +CD4 CCR3 CD8 CCR7 +551 129 169 292 +199 277 320 227 +437 69 0 146 +509 268 0 74 +50 0 60 129 +83 138 335 194 +499 0 0 224 +239 284 288 280 +534 111 83 177 diff -r d2cdffd27293 -r ce206587d42f test-data/output2.flowtext --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output2.flowtext Thu Jul 16 07:22:06 2020 -0400 @@ -0,0 +1,10 @@ +Side Scatter FITC CD4 APC CCR5 Forward Scatter +76 550 127 363 +126 519 148 372 +1023 289 254 1023 +1023 175 237 770 +111 525 138 384 +578 385 131 602 +1023 216 294 788 +211 552 62 420 +1019 73 132 668 diff -r d2cdffd27293 -r ce206587d42f test-data/output3.flowtext --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output3.flowtext Thu Jul 16 07:22:06 2020 -0400 @@ -0,0 +1,10 @@ +M1 M2 M3 M4 M5 M6 +289 56 438 0 626 480 +352 153 30 147 483 386 +383 190 156 228 734 408 +261 62 432 121 598 555 +451 120 537 338 568 201 +373 104 3 110 621 584 +418 105 561 0 610 562 +358 185 0 292 641 327 +733 970 139 227 293 259