Mercurial > repos > bgruening > split_file_on_column
diff split_file_on_column.xml @ 5:d4b5b70e82cb draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit df81dd26ed1cf67a0d95b9614738b1d59667773f
author | bgruening |
---|---|
date | Mon, 04 Jul 2022 12:26:46 +0000 |
parents | 37a53100b67e |
children | ff2a81aa3f08 |
line wrap: on
line diff
--- a/split_file_on_column.xml Thu Feb 25 15:54:13 2021 +0000 +++ b/split_file_on_column.xml Mon Jul 04 12:26:46 2022 +0000 @@ -1,5 +1,4 @@ -<tool id="tp_split_on_column" name="Split file" version="0.4"> - <description>according to the values of a column</description> +<tool id="tp_split_on_column" name="Split by group" version="0.5"> <requirements> <requirement type="package" version="5.0.1">gawk</requirement> </requirements> @@ -14,14 +13,14 @@ ]]> </command> <inputs> - <param format="tabular" name="infile" type="data" label="File to select" /> + <param format="tabular" name="infile" type="data" label="File to split" /> <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> - <param name="include_header" type="boolean" label="Include the header in all splitted files?" - help="Include the first line (the assumed header line) in all splitted files." /> + <param name="include_header" type="boolean" label="Include header in splits?" + help="Include the first line (the assumed header line) to all split files." /> </inputs> <outputs> - <collection name="split_output" type="list" label="Table split on first column"> + <collection name="split_output" type="list" label="Split by group collection"> <discover_datasets pattern="__name_and_ext__" directory="tmp_out" /> </collection> </outputs> @@ -85,11 +84,19 @@ <help> <![CDATA[ -**What it does** +======== +Synopsis +======== + +Given a single input dataset this tool splits the file on unique values from a specified column. -This tool splits a file into different smaller files using a specific column. -It will work like the group tool, but every group is saved to its own file. -You have the option to include the header (first line) in all splitted files. +=========== +Description +=========== + +This tool splits a file into a collection based on unique values of a speific column. +It performs a grouping operation with every group saved as a separate collection element. +You have the option to include the header (first line) to all splits. If you have a header and don't want keep it, please remove it before you use this tool. For example with the "Remove beginning of a file" tool. @@ -97,23 +104,28 @@ **Example** -Splitting a file without header on column 5 from this:: +Splitting this file on column 1:: - chr7 56632 56652 cluster 1 - chr7 56736 56756 cluster 1 - chr7 56761 56781 cluster 2 - chr7 56772 56792 cluster 2 - chr7 56775 56795 cluster 2 - -will produce 2 files with different clusters:: - - chr7 56632 56652 cluster 1 - chr7 56736 56756 cluster 1 + chr1 10 20 + chr1 30 40 + chr2 40 70 + chr4 60 80 - chr7 56761 56781 cluster 2 - chr7 56772 56792 cluster 2 - chr7 56775 56795 cluster 2 +will produce a collectiion with 4 elements:: + + chr1 10 20 + chr1 30 40 + + chr2 40 70 + + chr4 60 80 + +------ + +.. image:: $PATH_TO_IMAGES/split_by_group.svg + :width: 800 + :alt: Split by group ]]> </help>