Mercurial > repos > immport-devteam > rearrange_columns
changeset 0:d2cdffd27293 draft
Uploaded
author | immport-devteam |
---|---|
date | Mon, 27 Feb 2017 13:05:18 -0500 |
parents | |
children | ce206587d42f |
files | rearrange_columns/editColumnHeadings.py rearrange_columns/editColumnHeadings.xml rearrange_columns/test-data/input1.txt rearrange_columns/test-data/input2.txt rearrange_columns/test-data/input3.txt rearrange_columns/test-data/output1.flowtext rearrange_columns/test-data/output2.flowtext rearrange_columns/test-data/output3.flowtext |
diffstat | 8 files changed, 314 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rearrange_columns/editColumnHeadings.py Mon Feb 27 13:05:18 2017 -0500 @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +###################################################################### +# Copyright (c) 2016 Northrop Grumman. +# All rights reserved. +###################################################################### + +from __future__ import print_function +import sys + +from argparse import ArgumentParser + + +def is_integer(s): + try: + int(s) + return True + except ValueError: + return False + + +def rearrange_file(input_file, col_order, col_names, output_file): + with open(input_file, "r") as infl, open(output_file, "w") as outf: + # headers + hdrs = infl.readline().strip() + current_hdrs = hdrs.split("\t") + if not col_order and col_names: + if len(col_names) != len(current_hdrs): + sys.stderr.write("There are " + str(len(current_hdrs)) + " columns but " + str(len(col_names)) + " marker names were provided\n") + sys.exit(4) + if col_names: + tmp_hdr = [] + for i in range(0, len(col_names)): + if col_names[i].strip(): + tmp_hdr.append(col_names[i].strip()) + else: + if col_order: + tmp_hdr.append(current_hdrs[col_order[i]]) + else: + tmp_hdr.append(current_hdrs[i]) + hdrs = ("\t".join(tmp_hdr)) + elif col_order: + tp_hdr = [] + for j in col_order: + tp_hdr.append(current_hdrs[j]) + hdrs = ("\t".join(tp_hdr)) + + outf.write(hdrs + "\n") + + # columns + for lines in infl: + cols = lines.strip().split("\t") + if not col_order: + col_order = [x for x in range(0, len(current_hdrs))] + outf.write("\t".join([cols[c] for c in col_order]) + "\n") + + +if __name__ == "__main__": + parser = ArgumentParser( + prog="editColumnHeadings", + description="Cut, rearrange and rename columns in a tab-separated file.") + + parser.add_argument( + '-i', + dest="input_file", + required=True, + help="File location for the text file.") + + parser.add_argument( + '-c', + dest="columns", + help="Columns to keep in the order to keep them in.") + + parser.add_argument( + '-n', + dest="column_names", + help="Column names if renaming.") + + parser.add_argument( + '-o', + dest="output_file", + required=True, + help="Name of the output file.") + + args = parser.parse_args() + + # check column indices + default_value_col = ["i.e.:1,5,2", "default", "Default"] + col_order = [] + if args.columns: + if args.columns not in default_value_col: + tmp_col = args.columns.split(",") + if len(tmp_col) == 1: + if not tmp_col[0].strip(): + col_order = [] + elif not is_integer(tmp_col[0].strip()): + sys.exit(2) + else: + col_order.append(int(tmp_col[0].strip()) - 1) + else: + for c in range(0, len(tmp_col)): + if not is_integer(tmp_col[c].strip()): + sys.exit(3) + else: + col_order.append(int(tmp_col[c].strip()) - 1) + + # check column names + default_value_nms = ["i.e.:Marker1,,Marker4", "default", "Default"] + col_names = [] + if args.column_names: + if args.column_names not in default_value_nms: + col_names = args.column_names.split(",") + if col_order: + if len(col_order) != len(col_names): + sys.stderr.write("There are " + str(len(col_order)) + " columns selected and " + str(len(col_names)) + " marker names\n") + sys.exit(4) + + rearrange_file(args.input_file, col_order, col_names, args.output_file) + + sys.exit(0)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rearrange_columns/editColumnHeadings.xml Mon Feb 27 13:05:18 2017 -0500 @@ -0,0 +1,134 @@ +<tool id="edit_rearrange_columns" name="Remove, rearrange and/or rename columns" version="1.1"> + <description>in txt-converted FCS files.</description> + <stdio> + <exit_code range="2" level="fatal" description="Please provide a comma separated list of integers for columns you want to keep." /> + <exit_code range="3" level="fatal" description="Please provide integers for columns you want to keep." /> + <exit_code range="4" level="fatal" description="List of column headings and list of selected columns must match. For instance for columns 1,3,4: Marker1,,Marker3." /> + </stdio> + <command><![CDATA[ + python $__tool_directory__/editColumnHeadings.py -o "${output_file}" -i "${input}" + #if $columns + -c "${columns}" + #end if + #if $colnames + -n "${colnames}" + #end if + ]]> + </command> + <inputs> + <param format="flowtext" name="input" type="data" label="Text file"/> + <param name="columns" type="text" label="Column order:" value="i.e.:1,5,2" optional="true" help="By default, will keep all columns in the same order."/> + <param name="colnames" type="text" label="New column headings:" value="i.e.:Marker1,,Marker4" optional="true" help="By default, will not change the column headings. Check below for more details."> + </param> + </inputs> + <outputs> + <data format="flowtext" name="output_file" label="Rearranged ${input.name}"/> + </outputs> + <tests> + <test> + <param name="input" value="input1.txt"/> + <param name="columns" value="3,4,5,6"/> + <param name="colnames" value="CD4,CCR3,CD8,CCR7"/> + <output name="output_file" file="output1.flowtext"/> + </test> + <test> + <param name="input" value="input2.txt"/> + <param name="columns" value="2,3,6,1"/> + <param name="colnames" value="i.e.:Marker1,,Marker4"/> + <output name="output_file" file="output2.flowtext"/> + </test> + <test> + <param name="input" value="input3.txt"/> + <param name="columns" value="i.e.:1,5,2"/> + <param name="colnames" value="M1,M2,M3,M4,M5,M6"/> + <output name="output_file" file="output3.flowtext"/> + </test> + </tests> + <help><![CDATA[ + This tool enables the removal, rearrangement and/or renaming of text file columns. + +----- + +**Input files** + +This tool requires txt, flowtext or tabular files as input. + +**Column order** + +Please indicate columns to keep in the order in which they should be (comma-separated list). +This field is optional. + +**Column names** + +Please indicate the new columns headings in the order in which they should appear in the ouptut file (comma-separated list). The number of headings should match the number of columns in the output. +This field is optional. + +.. class: warningmark + +When providing column order AND new column headings the column count for each must match. See below for an example. + +**Output file** + +The output flowtext file is a copy of the input file with rearranged and/or renamed columns. + +----- + +**Examples** + +**Input file**:: + + Marker1 Marker2 Marker3 Marker4 Marker5 + 4 45 123 1956 62534 + 3 65 104 1254 36576 + 7 26 767 4124 42235 + 4 56 323 7623 74634 + 5 83 532 6256 34763 + 4 15 877 9312 21265 + +*Example 1* + +- Column order: 5,3,2,4 +- Column names: Default + +*Output1*:: + + Marker5 Marker3 Marker2 Marker4 + 62534 123 45 1956 + 36576 104 65 1254 + 42235 767 26 4124 + 74634 323 56 7623 + 34763 532 83 6256 + 21265 877 15 9312 + +*Example 2* + +- Column order: 5,3,2,4 +- Column names: Mar34,,Mar7, + +*Output2*:: + + Mar34 Marker3 Mar7 Marker4 + 62534 123 45 1956 + 36576 104 65 1254 + 42235 767 26 4124 + 74634 323 56 7623 + 34763 532 83 6256 + 21265 877 15 9312 + +*Example 3* + +- Column order: Default +- Column names: Mar23,,,Mar7,Mar8 + +*Output3*:: + + Mar23 Marker2 Marker3 Mar7 Mar8 + 4 45 123 1956 62534 + 3 65 104 1254 36576 + 7 26 767 4124 42235 + 4 56 323 7623 74634 + 5 83 532 6256 34763 + 4 15 877 9312 21265 + ]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rearrange_columns/test-data/input1.txt Mon Feb 27 13:05:18 2017 -0500 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CCR3 PP CD8 APC CCR4 +449 157 551 129 169 292 +894 1023 199 277 320 227 +262 73 437 69 0 146 +340 115 509 268 0 74 +316 76 50 0 60 129 +394 144 83 138 335 194 +383 139 499 0 0 224 +800 1023 239 284 288 280 +388 97 534 111 83 177
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rearrange_columns/test-data/input2.txt Mon Feb 27 13:05:18 2017 -0500 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CXCR3 PP CD8 APC CCR5 +363 76 550 200 0 127 +372 126 519 44 51 148 +1023 1023 289 401 362 254 +770 1023 175 361 225 237 +384 111 525 121 0 138 +602 578 385 286 222 131 +788 1023 216 310 270 294 +420 211 552 479 0 62 +668 1019 73 193 227 132
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rearrange_columns/test-data/input3.txt Mon Feb 27 13:05:18 2017 -0500 @@ -0,0 +1,10 @@ +Forward Scatter Side Scatter FITC CD4 PE CD25 PP CD3 APC CD45RA +289 56 438 0 626 480 +352 153 30 147 483 386 +383 190 156 228 734 408 +261 62 432 121 598 555 +451 120 537 338 568 201 +373 104 3 110 621 584 +418 105 561 0 610 562 +358 185 0 292 641 327 +733 970 139 227 293 259
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rearrange_columns/test-data/output1.flowtext Mon Feb 27 13:05:18 2017 -0500 @@ -0,0 +1,10 @@ +CD4 CCR3 CD8 CCR7 +551 129 169 292 +199 277 320 227 +437 69 0 146 +509 268 0 74 +50 0 60 129 +83 138 335 194 +499 0 0 224 +239 284 288 280 +534 111 83 177
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rearrange_columns/test-data/output2.flowtext Mon Feb 27 13:05:18 2017 -0500 @@ -0,0 +1,10 @@ +Side Scatter FITC CD4 APC CCR5 Forward Scatter +76 550 127 363 +126 519 148 372 +1023 289 254 1023 +1023 175 237 770 +111 525 138 384 +578 385 131 602 +1023 216 294 788 +211 552 62 420 +1019 73 132 668
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rearrange_columns/test-data/output3.flowtext Mon Feb 27 13:05:18 2017 -0500 @@ -0,0 +1,10 @@ +M1 M2 M3 M4 M5 M6 +289 56 438 0 626 480 +352 153 30 147 483 386 +383 190 156 228 734 408 +261 62 432 121 598 555 +451 120 537 338 568 201 +373 104 3 110 621 584 +418 105 561 0 610 562 +358 185 0 292 641 327 +733 970 139 227 293 259