view split_file_on_column.xml @ 1:0ba6d09a71d6 draft

Uploaded
author bgruening
date Wed, 18 Dec 2013 10:57:09 -0500
parents ad6a989924ac
children 90b52d8d6f2c
line wrap: on
line source

<tool id="tp_split_on_column" name="Split file" force_history_refresh="True" version="0.1.1">
    <description>according to the values of a column</description>
    <requirements>
        <requirement type="package" version="4.1.0">gnu_awk</requirement>
    </requirements>
    <command>
        awk -F'\t' '{print > "primary_${outfile.id}_" \$$column "_visible_${infile.ext}" }' $infile;
        echo 'Created' `ls -l | wc -l` 'files:' > $outfile;
        ls -1 --hide="*_stdout" --hide="*_stderr" >> $outfile;
    </command>

    <inputs>
        <param format="txt" name="infile" type="data" label="File to select" />
        <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
    </inputs>

    <outputs>
        <data format="input" name="outfile" metadata_source="infile" label="${tool.name} on ${on_string}"/>
    </outputs>
    <tests>
        <test>
        </test>
    </tests>
    <help>

**What it does**

This tool splits a file into different smaller files using a specific column. 
It will work like the group tool, but every group is saved to its own file.

-----

**Example**

Splitting on column 4 from this::

    chr7  56632  56652  cluster 1
    chr7  56736  56756  cluster 1
    chr7  56761  56781  cluster 2
    chr7  56772  56792  cluster 2
    chr7  56775  56795  cluster 2

will produce 2 files with different clusters::

    chr7  56632  56652  cluster 1
    chr7  56736  56756  cluster 1


    chr7  56761  56781  cluster 2
    chr7  56772  56792  cluster 2
    chr7  56775  56795  cluster 2


    </help>
</tool>