Mercurial > repos > bgruening > column_arrange_by_header
changeset 1:6c6d26ff01ff draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/column_arrange_by_header commit b6e0b2de32ddb91085235397728623a35ad13f42
author | bgruening |
---|---|
date | Fri, 15 Feb 2019 07:45:03 -0500 |
parents | f18f67056946 |
children | |
files | columnArrange.xml column_arrange.py column_arrange_by_header.tar.gz tool_dependencies.xml |
diffstat | 4 files changed, 73 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/columnArrange.xml Fri Oct 16 14:31:13 2015 -0400 +++ b/columnArrange.xml Fri Feb 15 07:45:03 2019 -0500 @@ -1,17 +1,15 @@ -<tool id="bg_column_arrange_by_header" name="Column arrange" version="0.1"> +<tool id="bg_column_arrange_by_header" name="Column arrange" version="0.2"> <description>by header name</description> - <requirements> - <requirement type="package" version="0.14.1">pandas</requirement> - </requirements> <command interpreter="python"> <![CDATA[ column_arrange.py --columns #for token in $rep_param_columns: - "$token.param_column" + '$token.param_column' #end for - --in $param_input - --out $output + --in '$param_input' + --out '$output' + $discard_remaining ]]> </command> <inputs> @@ -22,9 +20,15 @@ <valid initial="string.printable"> <remove value="'"/> </valid> + <mapping initial="none"> + <add source="'" target="'"'"'" /> + </mapping> </sanitizer> </param> </repeat> + <param name="discard_remaining" type="boolean" truevalue="--discard" falsevalue="" checked="false" + label="Discard unspecified columns?" + help="Columns not explicitly specified above for rearrangement can be appended after the last specified column in their original order (the default) or be discarded from the output." /> </inputs> <outputs> <data format="tabular" name="output" /> @@ -38,7 +42,36 @@ <repeat name="rep_param_columns"> <param name="param_column" value="age"/> </repeat> - <output name="out" file="columnarrange_result1.tab"/> + <output name="output" file="columnarrange_result1.tab"/> + </test> + <test> + <param name="param_input" value="columnarrange_input1.tab"/> + <repeat name="rep_param_columns"> + <param name="param_column" value="fname"/> + </repeat> + <param name="discard_remaining" value="true" /> + <output name="output"> + <assert_contents> + <has_n_columns n="1" /> + <has_line line="fname" /> + </assert_contents> + </output> + </test> + <test> + <param name="param_input" value="columnarrange_input1.tab"/> + <repeat name="rep_param_columns"> + <param name="param_column" value="fname"/> + </repeat> + <repeat name="rep_param_columns"> + <param name="param_column" value="age"/> + </repeat> + <param name="discard_remaining" value="true" /> + <output name="output"> + <assert_contents> + <has_n_columns n="2" /> + <has_line line="fname	age" /> + </assert_contents> + </output> </test> <test> <param name="param_input" value="columnarrange_input2.tab"/> @@ -48,14 +81,16 @@ <repeat name="rep_param_columns"> <param name="param_column" value="nationality"/> </repeat> - <output name="out" file="columnarrange_result2.tab"/> + <output name="output" file="columnarrange_result2.tab"/> </test> </tests> <help> **What it does** -With this tool you can specify (by naming the header) which columns need to be leftmost. -The columns which are not specified will be ordered as before, right of the columns which were specified. +With this tool you can specify - by name - the order of columns for tabular +data. +Columns not specified will remain ordered as before and be moved to the right +of the specified columns, as shown in the following example. Input file:: @@ -63,12 +98,15 @@ a b c d a b c d -Specifying **CHeader** and **BHeader**, as the columns that should be leftmost, generates:: +Specifying **CHeader** and **BHeader**, as the columns that should be leftmost, +generates:: CHeader BHeader AHeader DHeader c b a d c b a d +Alternatively, you can choose to retain *only* the specified columns in their +new arrangement and discard all other columns. </help> <citations> <citation type="bibtex">
--- a/column_arrange.py Fri Oct 16 14:31:13 2015 -0400 +++ b/column_arrange.py Fri Feb 15 07:45:03 2019 -0500 @@ -1,18 +1,30 @@ -#!/usr/bin/env python -import pandas as pd +#!/usr/bin/env python + import argparse parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help='Tabular Input File Name') parser.add_argument('-o','--output', help='Tabular Output File') -parser.add_argument('-c', '--columns', nargs='+', help='Column Headers to Sort By') +parser.add_argument( + '-c', '--columns', nargs='+', help='Column Headers to Sort By' +) +parser.add_argument( + '-d', '--discard', action='store_true', + help='Discard remaining columns' +) + args=parser.parse_args() -cols = args.columns -table = pd.read_csv(args.input, sep='\t') -blist = list(table.columns) -for token in cols: - blist.remove(token) -sorted_table = table[args.columns + blist] -# write without index, seperated by tabs -sorted_table.to_csv(args.output, sep='\t', index=False) +with open(args.input) as data: + hdr = next(data) + columns = hdr.rstrip('\n').split('\t') + idx = [columns.index(name) for name in args.columns] + if not args.discard: + idx += [i for i in range(len(columns)) if i not in idx] + rearranged_cols = [columns[i] for i in idx] + with open(args.output, 'w') as out: + out.write('\t'.join(rearranged_cols) + '\n') + for line in data: + columns = line.rstrip('\n').split('\t') + rearranged_cols = [columns[i] for i in idx] + out.write('\t'.join(rearranged_cols) + '\n')
--- a/tool_dependencies.xml Fri Oct 16 14:31:13 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="pandas" version="0.14.1"> - <repository changeset_revision="ac9f317487a9" name="package_pandas_0_14" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>