Mercurial > repos > bgruening > split_file_on_column
changeset 4:37a53100b67e draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
author | bgruening |
---|---|
date | Thu, 25 Feb 2021 15:54:13 +0000 |
parents | b60f2452580e |
children | d4b5b70e82cb |
files | split_file_on_column.tar.gz split_file_on_column.xml test-data/5cols-with-header.tabular test-data/5cols.tabular tool_dependencies.xml |
diffstat | 5 files changed, 74 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/split_file_on_column.xml Wed Dec 23 03:50:48 2015 -0500 +++ b/split_file_on_column.xml Thu Feb 25 15:54:13 2021 +0000 @@ -1,17 +1,24 @@ -<tool id="tp_split_on_column" name="Split file" version="0.2"> +<tool id="tp_split_on_column" name="Split file" version="0.4"> <description>according to the values of a column</description> <requirements> - <requirement type="package" version="4.1.0">gnu_awk</requirement> + <requirement type="package" version="5.0.1">gawk</requirement> </requirements> <command> <![CDATA[ - mkdir tmp_out && - awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' $infile + mkdir tmp_out && + #if $include_header + awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile + #else + awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile' + #end if ]]> </command> <inputs> <param format="tabular" name="infile" type="data" label="File to select" /> <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> + + <param name="include_header" type="boolean" label="Include the header in all splitted files?" + help="Include the first line (the assumed header line) in all splitted files." /> </inputs> <outputs> <collection name="split_output" type="list" label="Table split on first column"> @@ -22,6 +29,45 @@ <test> <param name="infile" value="5cols.tabular" ftype="tabular"/> <param name="column" value="5" /> + <param name="include_header" value="false"/> + <output_collection name="split_output" type="list"> + <element name="1"> + <assert_contents> + <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" /> + </assert_contents> + </element> + <element name="2"> + <assert_contents> + <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="infile" value="5cols-with-header.tabular" ftype="tabular" /> + <param name="column" value="5" /> + <param name="include_header" value="true"/> + <output_collection name="split_output" type="list"> + <element name="1"> + <assert_contents> + <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" /> + <has_n_lines n="3" /> + <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" /> + </assert_contents> + </element> + <element name="2"> + <assert_contents> + <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" /> + <has_n_lines n="4" /> + <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <param name="infile" value="5cols-with-header.tabular" ftype="tabular" /> + <param name="column" value="5" /> + <param name="include_header" value="false"/> <output_collection name="split_output" type="list"> <element name="1"> <assert_contents> @@ -43,12 +89,15 @@ This tool splits a file into different smaller files using a specific column. It will work like the group tool, but every group is saved to its own file. +You have the option to include the header (first line) in all splitted files. +If you have a header and don't want keep it, please remove it before you use this tool. +For example with the "Remove beginning of a file" tool. ----- **Example** -Splitting on column 5 from this:: +Splitting a file without header on column 5 from this:: chr7 56632 56652 cluster 1 chr7 56736 56756 cluster 1 @@ -66,7 +115,19 @@ chr7 56772 56792 cluster 2 chr7 56775 56795 cluster 2 - ]]> </help> + <citations> + <citation type="bibtex"> +@misc{githubsplit_file_on_column, + author = {Gruening, Bjoern}, + year = {2015}, + title = {split_file_on_column}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/bgruening/galaxytools}, + } + </citation> + </citations> </tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/5cols-with-header.tabular Thu Feb 25 15:54:13 2021 +0000 @@ -0,0 +1,6 @@ +Column1 Column2 Column3 Column4 Column5 +chr7 56632 56652 cluster 1 +chr7 56736 56756 cluster 1 +chr7 56761 56781 cluster 2 +chr7 56772 56792 cluster 2 +chr7 56775 56795 cluster 2
--- a/test-data/5cols.tabular Wed Dec 23 03:50:48 2015 -0500 +++ b/test-data/5cols.tabular Thu Feb 25 15:54:13 2021 +0000 @@ -1,5 +1,5 @@ chr7 56632 56652 cluster 1 -chr7 56736 56756 cluster 1 +chr7 56736 56756 cluster 1 chr7 56761 56781 cluster 2 chr7 56772 56792 cluster 2 chr7 56775 56795 cluster 2
--- a/tool_dependencies.xml Wed Dec 23 03:50:48 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="gnu_awk" version="4.1.0"> - <repository changeset_revision="f145f856ec57" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>