Mercurial > repos > bgruening > split_file_on_column

Binary file split_file_on_column.tar.gz has changed
--- a/split_file_on_column.xml	Wed Dec 18 10:57:09 2013 -0500
+++ b/split_file_on_column.xml	Wed Aug 26 07:32:06 2015 -0400
@@ -1,38 +1,56 @@
-<tool id="tp_split_on_column" name="Split file" force_history_refresh="True" version="0.1.1">
+<tool id="tp_split_on_column" name="Split file" version="0.2">
     <description>according to the values of a column</description>
     <requirements>
         <requirement type="package" version="4.1.0">gnu_awk</requirement>
     </requirements>
     <command>
-        awk -F'\t' '{print > "primary_${outfile.id}_" \$$column "_visible_${infile.ext}" }' $infile;
-        echo 'Created' `ls -l | wc -l` 'files:' > $outfile;
-        ls -1 --hide="*_stdout" --hide="*_stderr" >> $outfile;
+<![CDATA[
+        awk -F'\t' '{print > \$$column ".$infile.ext" }' $infile
+]]>
     </command>
-
     <inputs>
-        <param format="txt" name="infile" type="data" label="File to select" />
+        <param format="tabular" name="infile" type="data" label="File to select" />
         <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
     </inputs>
-
     <outputs>
+        <collection name="split_output" type="list" label="Table split on first column">
+            <discover_datasets pattern="__name_and_ext__" directory="." />
+        </collection>
+    </outputs>
+    <!--outputs>
         <data format="input" name="outfile" metadata_source="infile" label="${tool.name} on ${on_string}"/>
-    </outputs>
+    </outputs-->
     <tests>
         <test>
+            <param name="infile" value="5cols.tabular" ftype="tabular"/>
+            <param name="column" value="5" />
+            <output_collection name="split_output" type="list">
+                <element name="1">
+                    <assert_contents>
+                        <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" />
+                    </assert_contents>
+                </element>
+                <element name="2">
+                    <assert_contents>
+                        <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" />
+                    </assert_contents>
+                </element>
+            </output_collection>
         </test>
     </tests>
     <help>
+<![CDATA[

 **What it does**

-This tool splits a file into different smaller files using a specific column.
+This tool splits a file into different smaller files using a specific column.
 It will work like the group tool, but every group is saved to its own file.

 -----

 **Example**

-Splitting on column 4 from this::
+Splitting on column 5 from this::

     chr7  56632  56652  cluster 1
     chr7  56736  56756  cluster 1
@@ -51,5 +69,6 @@
     chr7  56775  56795  cluster 2


+]]>
     </help>
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/5cols.tabular	Wed Aug 26 07:32:06 2015 -0400
@@ -0,0 +1,5 @@
+chr7	56632	56652	cluster	1
+chr7	56736	56756	cluster	1
+chr7	56761	56781	cluster	2
+chr7	56772	56792	cluster	2
+chr7	56775	56795	cluster	2
--- a/tool_dependencies.xml	Wed Dec 18 10:57:09 2013 -0500
+++ b/tool_dependencies.xml	Wed Aug 26 07:32:06 2015 -0400
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="gnu_awk" version="4.1.0">
-        <repository changeset_revision="ec65852050ef" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="52a8268bb49f" name="package_gnu_awk_4_1_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>