Mercurial > repos > iuc > datamash_transpose

--- a/datamash-transpose.xml	Fri Jul 01 16:17:42 2022 +0000
+++ b/datamash-transpose.xml	Thu Mar 23 20:47:02 2023 +0000
@@ -3,19 +3,57 @@
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements" />
-    <expand macro="stdio" />
+    <edam_topics>
+        <edam_topic>topic_3570</edam_topic> <!-- Pure math / linear algebra -->
+    </edam_topics>
+    <edam_operations>
+         <!-- <edam_operation>operation_1234</edam_operation> -->
+    </edam_operations>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
     <command><![CDATA[
-        datamash transpose
-        @FIELD_SEPARATOR@
-        < $in_file > $out_file
+        #import os
+        #set file_size_MB = os.path.getsize(str($in_file)) / (1024 * 1024)
+        #set size_threshold_MB = 1024
+        #if $file_size_MB <= $size_threshold_MB:
+            datamash transpose @FIELD_SEPARATOR@ < $in_file > $out_file
+        #else:
+            ## Input matrix is very big: divide and conquer
+            ## If the input file is very big, datamash runs out of memory (much earlier than file size ~ available RAM.
+            ## Split into managable chunks of row vectors, transpose the chunks and juxtapose column vector chunks.
+            #set num_chunks = 1 + int(file_size_MB/size_threshold_MB)
+            echo Huge matrix detected, processing in $num_chunks chunks. &&
+            split -n l/$num_chunks $in_file split_input_ &&
+            for chunk in \$(ls split_input*); do
+                datamash transpose @FIELD_SEPARATOR@ < \$chunk > \${chunk}_transposed;
+            done &&
+            paste split_input_*_transposed > $out_file
+        #end if
     ]]></command>
-    <expand macro="inputs_outputs" />
+    <expand macro="inputs_outputs"/>
     <tests>
+        <test expect_num_outputs="1">
+            <param name="in_file" value="datamash_transpose_input.txt"/>
+            <output file="datamash_transpose_output.txt" name="out_file"/>
+        </test>
+        <!-- Test for transposing an extremely big input matrix
+         Disabled to keep the repository size reasonable.
+        For testing, manually download a pathological in- and output from:
+        https://usegalaxy.eu/u/tunc/h/very-big-scrna-matrix
+        -->
+        <!--
         <test>
-            <param name="in_file" value="datamash_transpose_input.txt" />
-            <output file="datamash_transpose_output.txt" name="out_file" />
+            <param name="in_file" value="big.tabular"/>
+            <output file="transposed_big.tabular" name="out_file"/>
         </test>
+        -->
+        <!-- transpose(transpose(A)) = A -->
+        <!--
+        <test>
+            <param name="in_file" value="transposed_big.tabular"/>
+            <output file="big.tabular" name="out_file"/>
+        </test>
+        -->
     </tests>
     <help>
 <![CDATA[
@@ -45,4 +83,5 @@
 @HELP_FOOTER@
 ]]>
     </help>
+    <expand macro="citation"/>
 </tool>
--- a/macros.xml	Fri Jul 01 16:17:42 2022 +0000
+++ b/macros.xml	Thu Mar 23 20:47:02 2023 +0000
@@ -1,7 +1,7 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.1.0</token>
-    <token name="@VERSION_SUFFIX@">2</token>
-    <token name="@PROFILE@">21.01</token>
+    <token name="@TOOL_VERSION@">1.8</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">22.01</token>
     <xml name="inputs_outputs">
         <inputs>
             <param name="in_file" type="data" format="tabular,csv,tsv" label="Input tabular dataset" help="" />
@@ -44,4 +44,14 @@

 -----
     </token>
+    <xml name="citation">
+        <citations>
+            <citation type="bibtex">
+                @ONLINE{datamash,
+                    title = {GNU Datamash},
+                    url = {https://www.gnu.org/software/datamash/}
+                }
+            </citation>
+        </citations>
+    </xml>
 </macros>