Mercurial > repos > iuc > khmer_normalize_by_median

--- a/macros.xml	Fri Sep 07 11:01:41 2018 -0400
+++ b/macros.xml	Wed Dec 18 16:01:09 2019 -0500
@@ -1,8 +1,10 @@
 <macros>
-    <token name="@WRAPPER_VERSION@">3.0.0a1</token>
+    <token name="@WRAPPER_VERSION@">3.0.0a3</token>
+    <token name="@TOOL_VERSION@">+galaxy1</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@WRAPPER_VERSION@">khmer</requirement>
+            <yield/>
         </requirements>
     </xml>
     <xml name="version">
@@ -35,7 +37,7 @@
                 <option value="specific">Show</option>
             </param>
             <when value="simple">
-                <param name="tablesize" type="select" label="Sample Type" display="radio">
+                <param argument="" name="tablesize" type="select" label="Sample Type" display="radio">
                     <option value="1e9" selected="true">Microbial Genome</option>
                     <option value="2e9">Animal Transcriptome</option>
                     <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option>
@@ -43,15 +45,32 @@
                 </param>
             </when>
             <when value="specific">
-                <param name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" />
-                <param name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" />
-                <param name="tablesize_specific" type="text" value="1000000.0"
+                <param argument="--ksize" name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" />
+                <param argument="--n_tables" name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" />
+                <param argument="--max-tablesize" name="tablesize_specific" type="text" value="1000000.0"
                     label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" />
             </when>
         </conditional>
     </xml>
+    <token name="@LINK_SEQUENCES@">
+<![CDATA[
+#import re
+mkdir input/ &&
+#set gzip=""
+#for $num, $input in enumerate($inputs)
+    ln -s '${input}' 'input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext' &&
+    #if str($input).endswith(".gz"):
+        #set gzip="--gzip"
+    #end if
+#end for]]></token>
+    <token name="@USE_SEQUENCES@">
+<![CDATA[
+#for $num, $input in enumerate($inputs)
+    '../input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext'
+#end for]]></token>
+
     <xml name="input_sequences_filenames">
-        <param  name="inputs" multiple="true" type="data" format="fasta,fastq"
+        <param  name="inputs" multiple="true" type="data" format="fasta,fastq,fasta.gz,fastq.gz"
             label="Sequences in FASTA or FASTQ format"
             help="Put in order of precedence such as longest reads first." />
     </xml>
@@ -66,27 +85,26 @@
     </xml>
     <xml name="abundance-histogram-output">
         <data name="output_histogram_filename" format="txt"
-            label="${tool.name} k-mer abundance histogram. The
+            label="${tool.name} on ${on_string}: k-mer abundance histogram. The
                 columns are: (1) k-mer abundance, (2) k-mer count, (3)
                 cumulative count, (4) fraction of total distinct k-mers." />
     </xml>
-    <xml name="output_sequences">
-        <data name="output" format_source="inputs"
-            label="${tool.name} processed nucleotide sequence file">
-            <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true"/>
-        </data>
+    <xml name="output_sequences" token_extension="">
+        <collection name="sequences" type="list">
+            <discover_datasets pattern="(?P&lt;name&gt;.*)\.(?P&lt;ext&gt;fast[aq](\.gz)?)\.@EXTENSION@" directory="output" />
+        </collection>
     </xml>
     <xml name="output_sequences_single">
         <data name="output" format_source="input_sequence_filename"
-            label="${tool.name} processed nucleotide sequence file" />
+            label="${tool.name} on ${on_string}: processed nucleotide sequence file" />
     </xml>
     <xml name="input_zero">
-        <param name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true"
-            help="Output zero count bins (--no-zero)" />
+        <param argument="--no-zero" name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true"
+            help="Output zero count bins" />
     </xml>
     <xml name="input_bigcount">
-        <param  name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount"
-            checked="true" help="Count k-mers past 255 occurences (--no-bigcount)" />
+        <param  argument="--no-bigcount" name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount"
+            checked="true" help="Count k-mers past 255 occurences" />
     </xml>
     <token name="@HELP_FOOTER@"><![CDATA[
 (from the khmer project: http://khmer.readthedocs.org/en/v2.0/ )]]></token>
--- a/normalize-by-median.xml	Fri Sep 07 11:01:41 2018 -0400
+++ b/normalize-by-median.xml	Wed Dec 18 16:01:09 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="khmer_normalize_by_median" name="Normalize By Median" version="@WRAPPER_VERSION@.0">
+<tool id="khmer_normalize_by_median" name="khmer: Normalize By Median" version="@WRAPPER_VERSION@@TOOL_VERSION@">
     <description>Filter reads using digital normalization via k-mer abundances</description>
     <macros>
         <token name="@BINARY@">normalize-by-median.py</token>
@@ -8,12 +8,12 @@
     <expand macro="stdio" />
     <expand macro="version" />
     <command><![CDATA[
-set -xu &&
-#for $num, $input in enumerate($inputs)
-    ln -s ${input} sequence-${num} &&
-#end for
+#import re
+set -u &&
 mkdir output &&
-cd output &&
+
+@LINK_SEQUENCES@
+cd output/ &&
 normalize-by-median.py
 ${paired_switch}
 ${force_single_switch}
@@ -29,25 +29,26 @@
     --loadgraph=${countgraph_to_load}
 #end if
 --report=${report}
-../sequence-*
+$gzip
+@USE_SEQUENCES@
 ]]>
     </command>
     <inputs>
         <expand macro="input_sequences_filenames" />
-        <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue=""
+        <param argument="--paired" name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue=""
             label="Require all sequences be properly paired?"
-            help="(--paired) The tool will fail if given improperly paired reads and this option is selected." />
-        <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue=""
+            help="The tool will fail if given improperly paired reads and this option is selected." />
+        <param argument="--force_single" name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue=""
             label="Ignore all pairing information?"
-            help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." />
-        <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true"
+            help="By default this tool process reads in a pair-aware manner. This option disables that behavior." />
+        <param argument="--unpaired-reads" name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true"
             label="Extra unpaired reads"
-            help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." />
-        <param name="countgraph_to_load" type="data" format="oxlicg" optional="true"
+            help="If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." />
+        <param argument="--loadgraph" name="countgraph_to_load" type="data" format="oxlicg" optional="true"
             label="Optional k-mer countgraph"
-            help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
-        <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" />
-        <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" />
+            help="The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
+        <param argument="--savegraph" name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="" />
+        <param argument="--cutoff" name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="" />
         <expand macro="tableinputs" />
     </inputs>
     <outputs>
@@ -55,19 +56,17 @@
             <filter>save_countgraph == True</filter>
         </data>
         <data name="report" format="txt" label="${tool.name} report" />
-        <collection name="sequences" type="list">
-            <discover_datasets pattern="__name__" directory="output" />
-        </collection>
+        <expand macro="output_sequences" extension="keep"/>
     </outputs>
     <tests>
         <test>
-            <param name="inputs" value="test-abund-read-2.fa"/>
+            <param name="inputs" value="test-abund-read-2.fa" ftype="fasta"/>
             <param name="type" value="specific" />
             <param name="cutoff" value="1" />
             <param name="ksize" value="17" />
             <output name="report" file="normalize-by-median.report.txt" />
             <output_collection name="sequences" type="list">
-                <element name="sequence-0.keep">
+                <element name="test-abund-read-2.fa" ftype="fasta">
                     <assert_contents>
                         <has_text text="GGTTGACGGGGCTCAGGGGG" />
                     </assert_contents>
@@ -75,13 +74,13 @@
             </output_collection>
         </test>
         <test>
-            <param name="inputs" value="test-abund-read-2.fa" />
+            <param name="inputs" value="test-abund-read-2.fa.gz"  ftype="fasta.gz"/>
             <param name="type" value="specific" />
             <param name="cutoff" value="2" />
             <param name="ksize" value="17" />
             <output name="report" file="normalize-by-median.c2.report.txt" />
             <output_collection name="sequences" type="list">
-                <element name="sequence-0.keep">
+                <element name="test-abund-read-2.fa.gz" ftype="fasta.gz">
                     <assert_contents>
                         <has_text text="GGTTGACGGGGCTCAGGGGG" />
                         <has_text text="GGTTGACGGGGCTCAGGG" />
@@ -90,14 +89,14 @@
             </output_collection>
         </test>
         <test>
-            <param name="inputs" value="test-abund-read-paired.fa" />
+            <param name="inputs" value="test-abund-read-paired.fa" ftype="fasta"/>
             <param name="type" value="specific" />
             <param name="cutoff" value="1" />
             <param name="ksize" value="17" />
             <param name="paired" value="true" />
             <output name="report" file="normalize-by-median.paired.report.txt" />
             <output_collection name="sequences" type="list">
-                <element name="sequence-0.keep">
+                <element name="test-abund-read-paired.fa" ftype="fasta">
                     <assert_contents>
                         <has_text text="GGTTGACGGGGCTCAGGGGG" />
                         <has_text text="GGTTGACGGGGCTCAGGG" />
Binary file test-data/test-abund-read-2.fa.gz has changed