Mercurial > repos > iuc > khmer_normalize_by_median
view normalize-by-median.xml @ 10:4d23ab83ea29 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit 238d0992c63de53623c4fc05eec8bd8d67001997
author | iuc |
---|---|
date | Thu, 03 Oct 2024 13:46:42 +0000 |
parents | b1fe2ef3d244 |
children |
line wrap: on
line source
<tool id="khmer_normalize_by_median" name="khmer: Normalize By Median" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Filter reads using digital normalization via k-mer abundances</description> <macros> <token name="@BINARY@">normalize-by-median.py</token> <import>macros.xml</import> </macros> <expand macro="bio_tools"/> <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version" /> <command><![CDATA[ #import re set -u && mkdir output && @LINK_SEQUENCES@ cd output/ && normalize-by-median.py ${paired_switch} ${force_single_switch} @TABLEPARAMS@ --cutoff=${cutoff} #if $unpaired_reads_filename --unpaired-reads=${unpaired_reads_filename} #end if #if $save_countgraph --savegraph=${countgraph} #end if #if $countgraph_to_load --loadgraph=${countgraph_to_load} #end if --report=${report} $gzip @USE_SEQUENCES@ ]]> </command> <inputs> <expand macro="input_sequences_filenames" /> <param argument="--paired" name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue="" label="Require all sequences be properly paired?" help="The tool will fail if given improperly paired reads and this option is selected." /> <param argument="--force_single" name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue="" label="Ignore all pairing information?" help="By default this tool process reads in a pair-aware manner. This option disables that behavior." /> <param argument="--unpaired-reads" name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true" label="Extra unpaired reads" help="If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." /> <param argument="--loadgraph" name="countgraph_to_load" type="data" format="oxlicg" optional="true" label="Optional k-mer countgraph" help="The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." /> <param argument="--savegraph" name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="" /> <param argument="--cutoff" type="integer" min="1" value="20" label="Cutoff" help="" /> <expand macro="tableinputs" /> </inputs> <outputs> <data name="countgraph" format="oxlicg" label="${tool.name} on ${on_string}: k-mer countgraph"> <filter>save_countgraph == True</filter> </data> <data name="report" format="csv" label="${tool.name} on ${on_string}: report" /> <expand macro="output_sequences" extension="keep"/> </outputs> <tests> <test expect_num_outputs="2"> <param name="inputs" value="test-abund-read-2.fa" ftype="fasta"/> <param name="type" value="specific" /> <param name="cutoff" value="1" /> <param name="ksize" value="17" /> <output name="report" file="normalize-by-median.report.txt" /> <output_collection name="sequences" type="list"> <element name="test-abund-read-2.fa" ftype="fasta"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="2"> <param name="inputs" value="test-abund-read-2.fa.gz" ftype="fasta.gz"/> <param name="type" value="specific" /> <param name="cutoff" value="2" /> <param name="ksize" value="17" /> <output name="report" file="normalize-by-median.c2.report.txt" /> <output_collection name="sequences" type="list"> <element name="test-abund-read-2.fa.gz" ftype="fasta.gz"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> <has_text text="GGTTGACGGGGCTCAGGG" /> </assert_contents> </element> </output_collection> </test> <test expect_num_outputs="3"> <param name="inputs" value="test-abund-read-paired.fa" ftype="fasta"/> <param name="type" value="specific" /> <param name="cutoff" value="1" /> <param name="ksize" value="17" /> <param name="paired" value="true" /> <param name="save_countgraph" value="true"/> <output name="report" file="normalize-by-median.paired.report.txt" /> <output_collection name="sequences" type="list"> <element name="test-abund-read-paired.fa" ftype="fasta"> <assert_contents> <has_text text="GGTTGACGGGGCTCAGGGGG" /> <has_text text="GGTTGACGGGGCTCAGGG" /> </assert_contents> </element> </output_collection> <output name="countgraph"> <assert_contents> <has_size size="1k"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ Do digital normalization (remove mostly redundant sequences) Discard sequences based on whether or not their median k-mer abundance lies above a specified cutoff. Kept sequences will be placed in <fileN>.keep. By default, Paired end reads will be considered together; if either read will be kept, then both will be kept. (This keeps both reads from a fragment, and helps with retention of repeats.) Unpaired reads are treated individually. If `--paired` is set then proper pairing is required and the tool will exit on unpaired reads, although `--unpaired-reads` can be used to supply a file of orphan reads to be read after the paired reads. `--force_single` will ignore all pairing information and treat reads individually. With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified file after all sequences have been processed. `--loadgraph` will load the specified k-mer countgraph before processing the specified files. Note that the countgraph is in same format as those produced by `load-into-counting.py` and consumed by `abundance-dist.py`. @HELP_FOOTER@ ]]> </help> <citations> <expand macro="software-citation" /> <expand macro="diginorm-citation" /> </citations> </tool>