Mercurial > repos > iuc > datamash_transpose
changeset 5:374cb875d38a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/datamash commit 9edd054251bc74685d627360bb0fbe8ea60fa6a2
author | iuc |
---|---|
date | Thu, 23 Mar 2023 20:47:02 +0000 |
parents | ac092723240d |
children | a513e3fbb4c5 |
files | datamash-transpose.xml macros.xml |
diffstat | 2 files changed, 60 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/datamash-transpose.xml Fri Jul 01 16:17:42 2022 +0000 +++ b/datamash-transpose.xml Thu Mar 23 20:47:02 2023 +0000 @@ -3,19 +3,57 @@ <macros> <import>macros.xml</import> </macros> - <expand macro="requirements" /> - <expand macro="stdio" /> + <edam_topics> + <edam_topic>topic_3570</edam_topic> <!-- Pure math / linear algebra --> + </edam_topics> + <edam_operations> + <!-- <edam_operation>operation_1234</edam_operation> --> + </edam_operations> + <expand macro="requirements"/> + <expand macro="stdio"/> <command><![CDATA[ - datamash transpose - @FIELD_SEPARATOR@ - < $in_file > $out_file + #import os + #set file_size_MB = os.path.getsize(str($in_file)) / (1024 * 1024) + #set size_threshold_MB = 1024 + #if $file_size_MB <= $size_threshold_MB: + datamash transpose @FIELD_SEPARATOR@ < $in_file > $out_file + #else: + ## Input matrix is very big: divide and conquer + ## If the input file is very big, datamash runs out of memory (much earlier than file size ~ available RAM. + ## Split into managable chunks of row vectors, transpose the chunks and juxtapose column vector chunks. + #set num_chunks = 1 + int(file_size_MB/size_threshold_MB) + echo Huge matrix detected, processing in $num_chunks chunks. && + split -n l/$num_chunks $in_file split_input_ && + for chunk in \$(ls split_input*); do + datamash transpose @FIELD_SEPARATOR@ < \$chunk > \${chunk}_transposed; + done && + paste split_input_*_transposed > $out_file + #end if ]]></command> - <expand macro="inputs_outputs" /> + <expand macro="inputs_outputs"/> <tests> + <test expect_num_outputs="1"> + <param name="in_file" value="datamash_transpose_input.txt"/> + <output file="datamash_transpose_output.txt" name="out_file"/> + </test> + <!-- Test for transposing an extremely big input matrix + Disabled to keep the repository size reasonable. + For testing, manually download a pathological in- and output from: + https://usegalaxy.eu/u/tunc/h/very-big-scrna-matrix + --> + <!-- <test> - <param name="in_file" value="datamash_transpose_input.txt" /> - <output file="datamash_transpose_output.txt" name="out_file" /> + <param name="in_file" value="big.tabular"/> + <output file="transposed_big.tabular" name="out_file"/> </test> + --> + <!-- transpose(transpose(A)) = A --> + <!-- + <test> + <param name="in_file" value="transposed_big.tabular"/> + <output file="big.tabular" name="out_file"/> + </test> + --> </tests> <help> <![CDATA[ @@ -45,4 +83,5 @@ @HELP_FOOTER@ ]]> </help> + <expand macro="citation"/> </tool>
--- a/macros.xml Fri Jul 01 16:17:42 2022 +0000 +++ b/macros.xml Thu Mar 23 20:47:02 2023 +0000 @@ -1,7 +1,7 @@ <macros> - <token name="@TOOL_VERSION@">1.1.0</token> - <token name="@VERSION_SUFFIX@">2</token> - <token name="@PROFILE@">21.01</token> + <token name="@TOOL_VERSION@">1.8</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">22.01</token> <xml name="inputs_outputs"> <inputs> <param name="in_file" type="data" format="tabular,csv,tsv" label="Input tabular dataset" help="" /> @@ -44,4 +44,14 @@ ----- </token> + <xml name="citation"> + <citations> + <citation type="bibtex"> + @ONLINE{datamash, + title = {GNU Datamash}, + url = {https://www.gnu.org/software/datamash/} + } + </citation> + </citations> + </xml> </macros>