changeset 0:d2cdffd27293 draft

Uploaded
author immport-devteam
date Mon, 27 Feb 2017 13:05:18 -0500
parents
children ce206587d42f
files rearrange_columns/editColumnHeadings.py rearrange_columns/editColumnHeadings.xml rearrange_columns/test-data/input1.txt rearrange_columns/test-data/input2.txt rearrange_columns/test-data/input3.txt rearrange_columns/test-data/output1.flowtext rearrange_columns/test-data/output2.flowtext rearrange_columns/test-data/output3.flowtext
diffstat 8 files changed, 314 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/editColumnHeadings.py	Mon Feb 27 13:05:18 2017 -0500
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+######################################################################
+#                  Copyright (c) 2016 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+
+from __future__ import print_function
+import sys
+
+from argparse import ArgumentParser
+
+
+def is_integer(s):
+    try:
+        int(s)
+        return True
+    except ValueError:
+        return False
+
+
+def rearrange_file(input_file, col_order, col_names, output_file):
+    with open(input_file, "r") as infl, open(output_file, "w") as outf:
+        # headers
+        hdrs = infl.readline().strip()
+        current_hdrs = hdrs.split("\t")
+        if not col_order and col_names:
+            if len(col_names) != len(current_hdrs):
+                sys.stderr.write("There are " + str(len(current_hdrs)) + " columns but " + str(len(col_names)) + " marker names were provided\n")
+                sys.exit(4)
+        if col_names:
+            tmp_hdr = []
+            for i in range(0, len(col_names)):
+                if col_names[i].strip():
+                    tmp_hdr.append(col_names[i].strip())
+                else:
+                    if col_order:
+                        tmp_hdr.append(current_hdrs[col_order[i]])
+                    else:
+                        tmp_hdr.append(current_hdrs[i])
+            hdrs = ("\t".join(tmp_hdr))
+        elif col_order:
+            tp_hdr = []
+            for j in col_order:
+                tp_hdr.append(current_hdrs[j])
+            hdrs = ("\t".join(tp_hdr))
+
+        outf.write(hdrs + "\n")
+
+        # columns
+        for lines in infl:
+            cols = lines.strip().split("\t")
+            if not col_order:
+                col_order = [x for x in range(0, len(current_hdrs))]
+            outf.write("\t".join([cols[c] for c in col_order]) + "\n")
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+             prog="editColumnHeadings",
+             description="Cut, rearrange and rename columns in a tab-separated file.")
+
+    parser.add_argument(
+            '-i',
+            dest="input_file",
+            required=True,
+            help="File location for the text file.")
+
+    parser.add_argument(
+            '-c',
+            dest="columns",
+            help="Columns to keep in the order to keep them in.")
+
+    parser.add_argument(
+            '-n',
+            dest="column_names",
+            help="Column names if renaming.")
+
+    parser.add_argument(
+            '-o',
+            dest="output_file",
+            required=True,
+            help="Name of the output file.")
+
+    args = parser.parse_args()
+
+    # check column indices
+    default_value_col = ["i.e.:1,5,2", "default", "Default"]
+    col_order = []
+    if args.columns:
+        if args.columns not in default_value_col:
+            tmp_col = args.columns.split(",")
+            if len(tmp_col) == 1:
+                if not tmp_col[0].strip():
+                    col_order = []
+                elif not is_integer(tmp_col[0].strip()):
+                    sys.exit(2)
+                else:
+                    col_order.append(int(tmp_col[0].strip()) - 1)
+            else:
+                for c in range(0, len(tmp_col)):
+                    if not is_integer(tmp_col[c].strip()):
+                        sys.exit(3)
+                    else:
+                        col_order.append(int(tmp_col[c].strip()) - 1)
+
+    # check column names
+    default_value_nms = ["i.e.:Marker1,,Marker4", "default", "Default"]
+    col_names = []
+    if args.column_names:
+        if args.column_names not in default_value_nms:
+            col_names = args.column_names.split(",")
+            if col_order:
+                if len(col_order) != len(col_names):
+                    sys.stderr.write("There are " + str(len(col_order)) + " columns selected and " + str(len(col_names)) + " marker names\n")
+                    sys.exit(4)
+
+    rearrange_file(args.input_file, col_order, col_names, args.output_file)
+
+    sys.exit(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/editColumnHeadings.xml	Mon Feb 27 13:05:18 2017 -0500
@@ -0,0 +1,134 @@
+<tool id="edit_rearrange_columns" name="Remove, rearrange and/or rename columns" version="1.1">
+  <description>in txt-converted FCS files.</description>
+  <stdio>
+    <exit_code range="2" level="fatal" description="Please provide a comma separated list of integers for columns you want to keep." />
+    <exit_code range="3" level="fatal" description="Please provide integers for columns you want to keep." />
+    <exit_code range="4" level="fatal" description="List of column headings and list of selected columns must match. For instance for columns 1,3,4: Marker1,,Marker3." />
+  </stdio>
+  <command><![CDATA[
+    python $__tool_directory__/editColumnHeadings.py -o "${output_file}" -i "${input}"
+ #if $columns
+    -c "${columns}"
+ #end if
+ #if $colnames
+    -n "${colnames}"
+ #end if
+  ]]>
+  </command>
+  <inputs>
+    <param format="flowtext" name="input" type="data" label="Text file"/>
+    <param name="columns" type="text" label="Column order:" value="i.e.:1,5,2" optional="true" help="By default, will keep all columns in the same order."/>
+    <param name="colnames" type="text" label="New column headings:" value="i.e.:Marker1,,Marker4" optional="true" help="By default, will not change the column headings. Check below for more details.">
+    </param>
+  </inputs>
+  <outputs>
+    <data format="flowtext" name="output_file" label="Rearranged ${input.name}"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="input1.txt"/>
+      <param name="columns" value="3,4,5,6"/>
+      <param name="colnames" value="CD4,CCR3,CD8,CCR7"/>
+      <output name="output_file" file="output1.flowtext"/>
+    </test>
+    <test>
+      <param name="input" value="input2.txt"/>
+      <param name="columns" value="2,3,6,1"/>
+      <param name="colnames" value="i.e.:Marker1,,Marker4"/>
+      <output name="output_file" file="output2.flowtext"/>
+    </test>
+    <test>
+      <param name="input" value="input3.txt"/>
+      <param name="columns" value="i.e.:1,5,2"/>
+      <param name="colnames" value="M1,M2,M3,M4,M5,M6"/>
+      <output name="output_file" file="output3.flowtext"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+   This tool enables the removal, rearrangement and/or renaming of text file columns.
+
+-----
+
+**Input files**
+
+This tool requires txt, flowtext or tabular files as input.
+
+**Column order**
+
+Please indicate columns to keep in the order in which they should be (comma-separated list).
+This field is optional.
+
+**Column names**
+
+Please indicate the new columns headings in the order in which they should appear in the ouptut file (comma-separated list). The number of headings should match the number of columns in the output.
+This field is optional.
+
+.. class: warningmark
+
+When providing column order AND new column headings the column count for each must match. See below for an example.
+
+**Output file**
+
+The output flowtext file is a copy of the input file with rearranged and/or renamed columns.
+
+-----
+
+**Examples**
+
+**Input file**::
+
+   Marker1 Marker2 Marker3 Marker4 Marker5
+   4       45      123     1956    62534
+   3       65      104     1254    36576
+   7       26      767     4124    42235
+   4       56      323     7623    74634
+   5       83      532     6256    34763
+   4       15      877     9312    21265
+
+*Example 1*
+
+- Column order: 5,3,2,4
+- Column names: Default
+
+*Output1*::
+
+   Marker5 Marker3 Marker2 Marker4
+   62534   123     45      1956
+   36576   104     65      1254
+   42235   767     26      4124
+   74634   323     56      7623
+   34763   532     83      6256
+   21265   877     15      9312
+
+*Example 2*
+
+- Column order: 5,3,2,4
+- Column names: Mar34,,Mar7,
+
+*Output2*::
+
+   Mar34 Marker3 Mar7 Marker4
+   62534 123     45   1956
+   36576 104     65   1254
+   42235 767     26   4124
+   74634 323     56   7623
+   34763 532     83   6256
+   21265 877     15   9312
+
+*Example 3*
+
+- Column order: Default
+- Column names: Mar23,,,Mar7,Mar8
+
+*Output3*::
+
+   Mar23 Marker2 Marker3 Mar7 Mar8
+   4     45      123     1956 62534
+   3     65      104     1254 36576
+   7     26      767     4124 42235
+   4     56      323     7623 74634
+   5     83      532     6256 34763
+   4     15      877     9312 21265
+  ]]>
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/input1.txt	Mon Feb 27 13:05:18 2017 -0500
@@ -0,0 +1,10 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CCR3	PP CD8	APC CCR4
+449	157	551	129	169	292
+894	1023	199	277	320	227
+262	73	437	69	0	146
+340	115	509	268	0	74
+316	76	50	0	60	129
+394	144	83	138	335	194
+383	139	499	0	0	224
+800	1023	239	284	288	280
+388	97	534	111	83	177
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/input2.txt	Mon Feb 27 13:05:18 2017 -0500
@@ -0,0 +1,10 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CXCR3	PP CD8	APC CCR5
+363	76	550	200	0	127
+372	126	519	44	51	148
+1023	1023	289	401	362	254
+770	1023	175	361	225	237
+384	111	525	121	0	138
+602	578	385	286	222	131
+788	1023	216	310	270	294
+420	211	552	479	0	62
+668	1019	73	193	227	132
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/input3.txt	Mon Feb 27 13:05:18 2017 -0500
@@ -0,0 +1,10 @@
+Forward Scatter	Side Scatter	FITC CD4	PE CD25	PP CD3	APC CD45RA
+289	56	438	0	626	480
+352	153	30	147	483	386
+383	190	156	228	734	408
+261	62	432	121	598	555
+451	120	537	338	568	201
+373	104	3	110	621	584
+418	105	561	0	610	562
+358	185	0	292	641	327
+733	970	139	227	293	259
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/output1.flowtext	Mon Feb 27 13:05:18 2017 -0500
@@ -0,0 +1,10 @@
+CD4	CCR3	CD8	CCR7
+551	129	169	292
+199	277	320	227
+437	69	0	146
+509	268	0	74
+50	0	60	129
+83	138	335	194
+499	0	0	224
+239	284	288	280
+534	111	83	177
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/output2.flowtext	Mon Feb 27 13:05:18 2017 -0500
@@ -0,0 +1,10 @@
+Side Scatter	FITC CD4	APC CCR5	Forward Scatter
+76	550	127	363
+126	519	148	372
+1023	289	254	1023
+1023	175	237	770
+111	525	138	384
+578	385	131	602
+1023	216	294	788
+211	552	62	420
+1019	73	132	668
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rearrange_columns/test-data/output3.flowtext	Mon Feb 27 13:05:18 2017 -0500
@@ -0,0 +1,10 @@
+M1	M2	M3	M4	M5	M6
+289	56	438	0	626	480
+352	153	30	147	483	386
+383	190	156	228	734	408
+261	62	432	121	598	555
+451	120	537	338	568	201
+373	104	3	110	621	584
+418	105	561	0	610	562
+358	185	0	292	641	327
+733	970	139	227	293	259