Mercurial > repos > iuc > khmer_normalize_by_median
changeset 7:557cc16931f4 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit 7de685f4763d988a5a9abce4a9c2b4714daaf165"
author | iuc |
---|---|
date | Wed, 18 Dec 2019 16:01:09 -0500 |
parents | bfd859f04a89 |
children | e84073b420a8 |
files | macros.xml normalize-by-median.xml test-data/test-abund-read-2.fa.gz |
diffstat | 3 files changed, 60 insertions(+), 43 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Fri Sep 07 11:01:41 2018 -0400 +++ b/macros.xml Wed Dec 18 16:01:09 2019 -0500 @@ -1,8 +1,10 @@ <macros> - <token name="@WRAPPER_VERSION@">3.0.0a1</token> + <token name="@WRAPPER_VERSION@">3.0.0a3</token> + <token name="@TOOL_VERSION@">+galaxy1</token> <xml name="requirements"> <requirements> <requirement type="package" version="@WRAPPER_VERSION@">khmer</requirement> + <yield/> </requirements> </xml> <xml name="version"> @@ -35,7 +37,7 @@ <option value="specific">Show</option> </param> <when value="simple"> - <param name="tablesize" type="select" label="Sample Type" display="radio"> + <param argument="" name="tablesize" type="select" label="Sample Type" display="radio"> <option value="1e9" selected="true">Microbial Genome</option> <option value="2e9">Animal Transcriptome</option> <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option> @@ -43,15 +45,32 @@ </param> </when> <when value="specific"> - <param name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" /> - <param name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" /> - <param name="tablesize_specific" type="text" value="1000000.0" + <param argument="--ksize" name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" /> + <param argument="--n_tables" name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" /> + <param argument="--max-tablesize" name="tablesize_specific" type="text" value="1000000.0" label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" /> </when> </conditional> </xml> + <token name="@LINK_SEQUENCES@"> +<![CDATA[ +#import re +mkdir input/ && +#set gzip="" +#for $num, $input in enumerate($inputs) + ln -s '${input}' 'input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext' && + #if str($input).endswith(".gz"): + #set gzip="--gzip" + #end if +#end for]]></token> + <token name="@USE_SEQUENCES@"> +<![CDATA[ +#for $num, $input in enumerate($inputs) + '../input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext' +#end for]]></token> + <xml name="input_sequences_filenames"> - <param name="inputs" multiple="true" type="data" format="fasta,fastq" + <param name="inputs" multiple="true" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Sequences in FASTA or FASTQ format" help="Put in order of precedence such as longest reads first." /> </xml> @@ -66,27 +85,26 @@ </xml> <xml name="abundance-histogram-output"> <data name="output_histogram_filename" format="txt" - label="${tool.name} k-mer abundance histogram. The + label="${tool.name} on ${on_string}: k-mer abundance histogram. The columns are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers." /> </xml> - <xml name="output_sequences"> - <data name="output" format_source="inputs" - label="${tool.name} processed nucleotide sequence file"> - <discover_datasets pattern="__designation_and_ext__" directory="output" visible="true"/> - </data> + <xml name="output_sequences" token_extension=""> + <collection name="sequences" type="list"> + <discover_datasets pattern="(?P<name>.*)\.(?P<ext>fast[aq](\.gz)?)\.@EXTENSION@" directory="output" /> + </collection> </xml> <xml name="output_sequences_single"> <data name="output" format_source="input_sequence_filename" - label="${tool.name} processed nucleotide sequence file" /> + label="${tool.name} on ${on_string}: processed nucleotide sequence file" /> </xml> <xml name="input_zero"> - <param name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true" - help="Output zero count bins (--no-zero)" /> + <param argument="--no-zero" name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true" + help="Output zero count bins" /> </xml> <xml name="input_bigcount"> - <param name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount" - checked="true" help="Count k-mers past 255 occurences (--no-bigcount)" /> + <param argument="--no-bigcount" name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount" + checked="true" help="Count k-mers past 255 occurences" /> </xml> <token name="@HELP_FOOTER@"><![CDATA[ (from the khmer project: http://khmer.readthedocs.org/en/v2.0/ )]]></token>
--- a/normalize-by-median.xml Fri Sep 07 11:01:41 2018 -0400 +++ b/normalize-by-median.xml Wed Dec 18 16:01:09 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="khmer_normalize_by_median" name="Normalize By Median" version="@WRAPPER_VERSION@.0"> +<tool id="khmer_normalize_by_median" name="khmer: Normalize By Median" version="@WRAPPER_VERSION@@TOOL_VERSION@"> <description>Filter reads using digital normalization via k-mer abundances</description> <macros> <token name="@BINARY@">normalize-by-median.py</token> @@ -8,12 +8,12 @@ <expand macro="stdio" /> <expand macro="version" /> <command><![CDATA[ -set -xu && -#for $num, $input in enumerate($inputs) - ln -s ${input} sequence-${num} && -#end for +#import re +set -u && mkdir output && -cd output && + +@LINK_SEQUENCES@ +cd output/ && normalize-by-median.py ${paired_switch} ${force_single_switch} @@ -29,25 +29,26 @@ --loadgraph=${countgraph_to_load} #end if --report=${report} -../sequence-* +$gzip +@USE_SEQUENCES@ ]]> </command> <inputs> <expand macro="input_sequences_filenames" /> - <param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue="" + <param argument="--paired" name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue="" label="Require all sequences be properly paired?" - help="(--paired) The tool will fail if given improperly paired reads and this option is selected." /> - <param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue="" + help="The tool will fail if given improperly paired reads and this option is selected." /> + <param argument="--force_single" name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue="" label="Ignore all pairing information?" - help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." /> - <param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true" + help="By default this tool process reads in a pair-aware manner. This option disables that behavior." /> + <param argument="--unpaired-reads" name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true" label="Extra unpaired reads" - help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." /> - <param name="countgraph_to_load" type="data" format="oxlicg" optional="true" + help="If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." /> + <param argument="--loadgraph" name="countgraph_to_load" type="data" format="oxlicg" optional="true" label="Optional k-mer countgraph" - help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." /> - <param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" /> - <param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" /> + help="The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." /> + <param argument="--savegraph" name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="" /> + <param argument="--cutoff" name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="" /> <expand macro="tableinputs" /> </inputs> <outputs> @@ -55,19 +56,17 @@ <filter>save_countgraph == True</filter> </data> <data name="report" format="txt" label="${tool.name} report" /> - <collection name="sequences" type="list"> - <discover_datasets pattern="__name__" directory="output" /> - </collection> + <expand macro="output_sequences" extension="keep"/> </outputs> <tests> <test> - <param name="inputs" value="test-abund-read-2.fa"/> + <param name="inputs" value="test-abund-read-2.fa" ftype="fasta"/> <param name="type" value="specific" /> <param name="cutoff" value="1" /> <param name="ksize" value="17" /> <output name="report" file="normalize-by-median.report.txt" /> <output_collection name="sequences" type="list"> - <element name="sequence-0.keep"> + <element name="test-abund-read-2.fa" ftype="fasta"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> </assert_contents> @@ -75,13 +74,13 @@ </output_collection> </test> <test> - <param name="inputs" value="test-abund-read-2.fa" /> + <param name="inputs" value="test-abund-read-2.fa.gz" ftype="fasta.gz"/> <param name="type" value="specific" /> <param name="cutoff" value="2" /> <param name="ksize" value="17" /> <output name="report" file="normalize-by-median.c2.report.txt" /> <output_collection name="sequences" type="list"> - <element name="sequence-0.keep"> + <element name="test-abund-read-2.fa.gz" ftype="fasta.gz"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> <has_text text="GGTTGACGGGGCTCAGGG" /> @@ -90,14 +89,14 @@ </output_collection> </test> <test> - <param name="inputs" value="test-abund-read-paired.fa" /> + <param name="inputs" value="test-abund-read-paired.fa" ftype="fasta"/> <param name="type" value="specific" /> <param name="cutoff" value="1" /> <param name="ksize" value="17" /> <param name="paired" value="true" /> <output name="report" file="normalize-by-median.paired.report.txt" /> <output_collection name="sequences" type="list"> - <element name="sequence-0.keep"> + <element name="test-abund-read-paired.fa" ftype="fasta"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> <has_text text="GGTTGACGGGGCTCAGGG" />