Mercurial > repos > bgruening > column_arrange_by_header
changeset 0:f18f67056946 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/column_arrange_by_header commit 088e73e958b55dc765778641b8a84080cc289f85-dirty
author | bgruening |
---|---|
date | Fri, 16 Oct 2015 14:31:13 -0400 |
parents | |
children | 6c6d26ff01ff |
files | columnArrange.xml column_arrange.py column_arrange_by_header.tar.gz test-data/columnarrange_input1.tab test-data/columnarrange_input2.tab test-data/columnarrange_result1.tab test-data/columnarrange_result2.tab tool_dependencies.xml |
diffstat | 8 files changed, 127 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/columnArrange.xml Fri Oct 16 14:31:13 2015 -0400 @@ -0,0 +1,83 @@ +<tool id="bg_column_arrange_by_header" name="Column arrange" version="0.1"> + <description>by header name</description> + <requirements> + <requirement type="package" version="0.14.1">pandas</requirement> + </requirements> + <command interpreter="python"> + <![CDATA[ + column_arrange.py + --columns + #for token in $rep_param_columns: + "$token.param_column" + #end for + --in $param_input + --out $output + ]]> + </command> + <inputs> + <param format="tabular" name="param_input" type="data" label="file to rearrange" /> + <repeat name ="rep_param_columns" min="1" title="Specify the first few columns by name"> + <param name="param_column" type="text" value="" label="column"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + </repeat> + </inputs> + <outputs> + <data format="tabular" name="output" /> + </outputs> + <tests> + <test> + <param name="param_input" value="columnarrange_input1.tab"/> + <repeat name="rep_param_columns"> + <param name="param_column" value="fname"/> + </repeat> + <repeat name="rep_param_columns"> + <param name="param_column" value="age"/> + </repeat> + <output name="out" file="columnarrange_result1.tab"/> + </test> + <test> + <param name="param_input" value="columnarrange_input2.tab"/> + <repeat name="rep_param_columns"> + <param name="param_column" value="first name"/> + </repeat> + <repeat name="rep_param_columns"> + <param name="param_column" value="nationality"/> + </repeat> + <output name="out" file="columnarrange_result2.tab"/> + </test> + </tests> + <help> +**What it does** + +With this tool you can specify (by naming the header) which columns need to be leftmost. +The columns which are not specified will be ordered as before, right of the columns which were specified. + +Input file:: + + AHeader BHeader CHeader DHeader + a b c d + a b c d + +Specifying **CHeader** and **BHeader**, as the columns that should be leftmost, generates:: + + CHeader BHeader AHeader DHeader + c b a d + c b a d + + </help> + <citations> + <citation type="bibtex"> + @ARTICLE{bgruening_galaxytools, + Author = {Björn Grüning, Cameron Smith, Torsten Houwaart, Nicola Soranzo, Eric Rasche}, + keywords = {bioinformatics, ngs, galaxy, cheminformatics, rna}, + title = {{Galaxy Tools - A collection of bioinformatics and cheminformatics tools for the Galaxy environment}}, + url = {https://github.com/bgruening/galaxytools} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/column_arrange.py Fri Oct 16 14:31:13 2015 -0400 @@ -0,0 +1,18 @@ +#!/usr/bin/env python +import pandas as pd +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('-i', '--input', help='Tabular Input File Name') +parser.add_argument('-o','--output', help='Tabular Output File') +parser.add_argument('-c', '--columns', nargs='+', help='Column Headers to Sort By') +args=parser.parse_args() + +cols = args.columns +table = pd.read_csv(args.input, sep='\t') +blist = list(table.columns) +for token in cols: + blist.remove(token) +sorted_table = table[args.columns + blist] +# write without index, seperated by tabs +sorted_table.to_csv(args.output, sep='\t', index=False)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/columnarrange_input1.tab Fri Oct 16 14:31:13 2015 -0400 @@ -0,0 +1,5 @@ +name fname age nationality +Woryt Heiko 22 german +Humte Alfons 49 austrian +Witz Gerald 12 french +Koulibaly Ansgard 33 nigerian
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/columnarrange_input2.tab Fri Oct 16 14:31:13 2015 -0400 @@ -0,0 +1,5 @@ +second name age nationality first name +Woryt 22 german Heiko +Humte 49 austrian Alfons +Witz 12 french Gerald +Koulibaly 33 nigerian Ansgard
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/columnarrange_result1.tab Fri Oct 16 14:31:13 2015 -0400 @@ -0,0 +1,5 @@ +fname age name nationality +Heiko 22 Woryt german +Alfons 49 Humte austrian +Gerald 12 Witz french +Ansgard 33 Koulibaly nigerian
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/columnarrange_result2.tab Fri Oct 16 14:31:13 2015 -0400 @@ -0,0 +1,5 @@ +first name nationality second name age +Heiko german Woryt 22 +Alfons austrian Humte 49 +Gerald french Witz 12 +Ansgard nigerian Koulibaly 33
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Oct 16 14:31:13 2015 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="pandas" version="0.14.1"> + <repository changeset_revision="ac9f317487a9" name="package_pandas_0_14" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>