split_file_on_column: split_file_on

comparison split_file_on_column.xml @ 4:37a53100b67e draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"

author	bgruening
date	Thu, 25 Feb 2021 15:54:13 +0000
parents	b60f2452580e
children	d4b5b70e82cb

comparison

equal deleted inserted replaced

-:b60f2452580e
+:37a53100b67e
-<tool id="tp_split_on_column" name="Split file" version="0.2">
+<tool id="tp_split_on_column" name="Split file" version="0.4">
 <description>according to the values of a column</description>
 <requirements>
-<requirement type="package" version="4.1.0">gnu_awk</requirement>
+<requirement type="package" version="5.0.1">gawk</requirement>
 </requirements>
 <command>
 <![CDATA[
 mkdir tmp_out &&
-awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' $infile
+#if $include_header
+awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile
+#else
+awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile'
+#end if
 ]]>
 </command>
 <inputs>
 <param format="tabular" name="infile" type="data" label="File to select" />
 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
+<param name="include_header" type="boolean" label="Include the header in all splitted files?"
+help="Include the first line (the assumed header line) in all splitted files." />
 </inputs>
 <outputs>
 <collection name="split_output" type="list" label="Table split on first column">
 <discover_datasets pattern="__name_and_ext__" directory="tmp_out" />
 </collection>
 </outputs>
 <tests>
 <test>
 <param name="infile" value="5cols.tabular" ftype="tabular"/>
 <param name="column" value="5" />
+<param name="include_header" value="false"/>
+<output_collection name="split_output" type="list">
+<element name="1">
+<assert_contents>
+<has_text_matching expression="chr7\t56632\t56652\tcluster\t1" />
+</assert_contents>
+</element>
+<element name="2">
+<assert_contents>
+<has_text_matching expression="chr7\t56761\t56781\tcluster\t2" />
+</assert_contents>
+</element>
+</output_collection>
+</test>
+<test>
+<param name="infile" value="5cols-with-header.tabular" ftype="tabular" />
+<param name="column" value="5" />
+<param name="include_header" value="true"/>
+<output_collection name="split_output" type="list">
+<element name="1">
+<assert_contents>
+<has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" />
+<has_n_lines n="3" />
+<has_text_matching expression="chr7\t56632\t56652\tcluster\t1" />
+</assert_contents>
+</element>
+<element name="2">
+<assert_contents>
+<has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" />
+<has_n_lines n="4" />
+<has_text_matching expression="chr7\t56761\t56781\tcluster\t2" />
+</assert_contents>
+</element>
+</output_collection>
+</test>
+<test>
+<param name="infile" value="5cols-with-header.tabular" ftype="tabular" />
+<param name="column" value="5" />
+<param name="include_header" value="false"/>
 <output_collection name="split_output" type="list">
 <element name="1">
 <assert_contents>
 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" />
 </assert_contents>
 **What it does**
 This tool splits a file into different smaller files using a specific column.
 It will work like the group tool, but every group is saved to its own file.
+You have the option to include the header (first line) in all splitted files.
+If you have a header and don't want keep it, please remove it before you use this tool.
+For example with the "Remove beginning of a file" tool.
 -----
 **Example**
-Splitting on column 5 from this::
+Splitting a file without header on column 5 from this::
 chr7  56632  56652  cluster 1
 chr7  56736  56756  cluster 1
 chr7  56761  56781  cluster 2
 chr7  56772  56792  cluster 2
 chr7  56761  56781  cluster 2
 chr7  56772  56792  cluster 2
 chr7  56775  56795  cluster 2
 ]]>
 </help>
+<citations>
+<citation type="bibtex">
+@misc{githubsplit_file_on_column,
+author = {Gruening, Bjoern},
+year = {2015},
+title = {split_file_on_column},
+publisher = {GitHub},
+journal = {GitHub repository},
+url = {https://github.com/bgruening/galaxytools},
+}
+</citation>
+</citations>
 </tool>

Mercurial > repos > bgruening > split_file_on_column

comparison split_file_on_column.xml @ 4:37a53100b67e draft