Mercurial > repos > iuc > purge_dups
view purge_dups.xml @ 2:17b378303f2d draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/purge_dups commit 3199d3f9c401663ffe45a679c3918489d4f6d149"
author | iuc |
---|---|
date | Tue, 27 Apr 2021 20:48:51 +0000 |
parents | 29151e779524 |
children | 76d4cbefff85 |
line wrap: on
line source
<tool id="purge_dups" name="Purge overlaps" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> <description>and haplotigs in an assembly based on read depth (purge_dups)</description> <macros> <token name="@TOOL_VERSION@">1.2.5</token> <token name="@VERSION_SUFFIX@">2</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">purge_dups</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #if $function_select.functions == "purge_dups": #for $i, $file in enumerate($function_select.input): #if $file.is_of_type("paf"): gzip -c '${file}' > '${i}.gz' && #else ln -s '${file}' '${i}.gz' && #end if #end for purge_dups #if $function_select.coverage: -c '$function_select.coverage' #end if #if $function_select.cutoffs: -T '$function_select.cutoffs' #end if #if $function_select.min_bad: -f $function_select.min_bad #end if #if $function_select.min_align: -a $function_select.min_align #end if #if $function_select.min_match: -b $function_select.min_match #end if #if $function_select.min_chain: -m $function_select.min_chain #end if #if $function_select.max_gap: -M $function_select.max_gap #end if #if $function_select.double_chain.chaining_rounds == "two": -2 #if $function_select.double_chain.max_gap_2: -G $function_select.double_chain.max_gap_2 #end if #end if #if $function_select.min_chain_score: -l $function_select.min_chain_score #end if #if $function_select.max_extend: -E $function_select.max_extend #end if #for $i, $file in enumerate($function_select.input): '${i}.gz' #end for > dups.bed 2> purge_dups.log #else if $function_select.functions == "split_fa": split_fa #if $function_select.split: -n $function_select.split #end if '$function_select.input' > split.fasta #else if $function_select.functions == "pbcstat": #for $i, $file in enumerate($function_select.input): #if $file.is_of_type("paf"): gzip -c '${file}' > '${i}.gz' && #else ln -s '${file}' '${i}.gz' && #end if #end for pbcstat #if $function_select.max_cov: -M $function_select.max_cov #end if #if $function_select.min_map_ratio: -f $function_select.min_map_ratio #end if #if $function_select.min_map_qual: -q $function_select.min_map_qual #end if #if $function_select.flank: -l $function_select.flank #end if $function_select.primary_alignments #for $i, $file in enumerate($function_select.input): '${i}.gz' #end for #else if $function_select.functions == "ngscstat": ngscstat #if $function_select.min_align_qual: -q $function_select.min_align_qual #end if ## #if $function_select.max_depth: ## -M $function_select.max_depth ## #end if #if $function_select.max_insert: -L $function_select.max_insert #end if '$function_select.input' #else if $function_select.functions == "calcuts": calcuts #if $function_select.min_depth: -f $function_select.min_depth #end if #if $function_select.low_depth: -l $function_select.low_depth #end if #if $function_select.transition: -m $function_select.transition #end if #if $function_select.upper_depth: -u $function_select.upper_depth #end if $function_select.ploidy '$function_select.input' > cutoffs.tsv 2>calcuts.log #else if $function_select.functions == "get_seqs": get_seqs $function_select.coverage $function_select.haplotigs $function_select.end_trim $function_select.split #if $function_select.length: -l $function_select.length #end if #if $function_select.min_ratio: -m $function_select.min_ratio #end if #if $function_select.min_gap: -g $function_select.min_gap #end if '$function_select.bed_input' '$function_select.fasta_input' #end if ]]></command> <inputs> <conditional name="function_select"> <param type="select" name="functions" label="Select the purge_dups function"> <option value="purge_dups">purge haplotigs and overlaps for an assembly</option> <option value="split_fa">split FASTA file by 'N's</option> <option value="pbcstat">create read depth histogram and base-level read depth for pacbio data</option> <option value="ngscstat">create read depth histogram and base-level read detph for illumina data</option> <option value="calcuts">calculate coverage cutoffs</option> <option value="get_seqs">obtain seqeuences after purging</option> </param> <when value="purge_dups"> <param name="input" type="data" format="paf,paf.gz" multiple="true" label="PAF input file"/> <param name="coverage" type="data" format="tabular" optional="true" argument="-c" label="Base-level coverage file" /> <param name="cutoffs" type="data" format="tabular" label ="Cutoffs file" optional="true" argument="-T"/> <param name="min_bad" type="float" min="0" max="1" argument="-f" optional="true" label="Minimum fraction of haploid/diploid/bad/repetitive bases in a sequence" help="Default = 0.8"/> <param name="min_align" type="integer" label="Minimum alignment score" argument="-a" optional="true"/> <param name="min_match" type="integer" label="Minimum max match score" argument="-b" optional="true"/> <param name="min_chain" label="Minimum matching bases for chaining" type="integer" argument="-m" optional="true"/> <param name="max_gap" label="Maximum gap size for chaining" type="integer" argument="-M" optional="true"/> <conditional name="double_chain"> <param type="select" name="chaining_rounds" label="Rounds of chaining"> <option value="one">1 round</option> <option value="two">2 rounds</option> </param> <when value="two"> <param name="max_gap_2" argument="-G" optional="true" label="Maximum gap size for second round of chaining" type="integer"/> </when> <when value="one"/> </conditional> <param name="min_chain_score" argument="-l" optional="true" label="Minimum chaining score for a match" type="integer" /> <param name="max_extend" argument="-E" optional="true" label="Maximum extension for contig ends" type="integer" /> </when> <when value="split_fa"> <param name="input" type="data" format="fasta" label="Base-level coverage file"/> <param name="split" type="boolean" truevalue="-n" falsevalue="" checked="false" label="Base-level coverage file" /> </when> <when value="pbcstat"> <param name="input" type="data" format="paf,paf.gz" multiple="true" label="PAF input file"/> <param name="max_cov" type="integer" label="Maximum coverage" argument="-M" optional="true"/> <param name="min_map_ratio" argument="-f" type="float" min="0" max="1" value="0" label="Minimum mapping length ratio"/> <param name="min_map_qual" type="integer" argument="-q" optional="true" label="Minimum mapping quality"/> <param name="flank" type="integer" argument="-l" optional="true" label="Flanking space" /> <param name="primary_alignments" argument="-p" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Use only primary alignments" /> </when> <when value="ngscstat"> <param name="input" type="data" format="bam" label="BAM input file"/> <param name="min_align_qual" type="integer" argument="-q" optional="true" label="Minimum alignment quality" /> <!-- Param exists in help text, but isn't actually part of the code. Maybe in the next release? --> <!-- <param name="max_depth" type="integer" label="Maximum read depth" argument="-M" optional="true"/> --> <param name="max_insert" type="integer" argument="-L" optional="true" label="Maximum insert size"/> </when> <when value="calcuts"> <param name="input" type="data" format="tabular" label="STAT input file"/> <param name="min_depth" type="float" label="Minimum depth count fraction to maximum depth coun" min="0" max="1" argument="-f" optional="true" help="Default = 0.1"/> <param name="low_depth" label="Lower bound for read depth" type="integer" argument="-l" optional="true"/> <param name="transition" label="Transition between haploid and diploid" type="integer" argument="-m" optional="true"/> <param name="upper_depth" label="Upper bound for read depth" type="integer" argument="-u" optional="true"/> <param name="ploidy" argument="-d" type="select" label="Ploidy"> <option value="-d 0" selected="true">Diploid [0]</option> <option value="-d 1">Haploid [1]</option> </param> </when> <when value="get_seqs"> <param name="fasta_input" type="data" format="fasta" label="Fasta input file"/> <param name="bed_input" type="data" format="bed" label="Bed input file"/> <param name="coverage" type="boolean" argument="-c" truevalue="-c" falsevalue="" checked="false" label="Keep high coverage contigs in the primary contig set"/> <param name="haplotigs" type="boolean" argument="-a" truevalue="-a" falsevalue="" checked="false" label="Do not add prefix to haplotigs"/> <param name="length" type="integer" argument="-l" optional="true" label="Minimum primary contig length" help="Default: 1000"/> <param name="min_ratio" type="float" min="0" max="1" argument="-m" optional="true" label="Minimum ratio of remaining primary contig length to the original contig length"/> <param name="end_trim" type="boolean" argument="-e" truevalue="-e" falsevalue="" checked="true" label="Trim end sequences" help="Only remove sequences at end of halplotigs If you also want to remove the duplications in the middle, set to false, however that may delete false positive duplications."/> <param name="split" type="boolean" argument="-s" truevalue="-s" falsevalue="" checked="false" label="Split contigs"/> <param name="min_gap" type="integer" argument="-g" optional="true" help="default=10k" label="Minimum gap size between duplications" /> </when> </conditional> </inputs> <outputs> <!-- Get Seqs --> <data name="get_seqs_hap" format="fasta" from_work_dir="hap.fa" label="${tool.name} on ${on_string}: get seqs haplotype fasta" > <filter>function_select['functions'] == 'get_seqs'</filter> </data> <data name="get_seqs_purged" format="fasta" from_work_dir="purged.fa" label="${tool.name} on ${on_string}: get seqs purged fasta"> <filter>function_select['functions'] == 'get_seqs'</filter> </data> <!-- Split FA --> <data name="split_fasta" format="fasta" from_work_dir="split.fasta" label="${tool.name} on ${on_string}: split fasta"> <filter>function_select['functions'] == 'split_fa'</filter> </data> <!-- Ngscstat --> <data name="ngscstat_cov" format="tabular" from_work_dir="TX.base.cov" label="${tool.name} on ${on_string}: ngscstat base coverage file"> <filter>function_select['functions'] == 'ngscstat'</filter> </data> <data name="ngscstat_stat" format="tabular" from_work_dir="TX.stat" label="${tool.name} on ${on_string}: ngscstat stat file"> <filter>function_select['functions'] == 'ngscstat'</filter> </data> <!-- Pbcstat --> <data name="pbcstat_cov" format="tabular" from_work_dir="PB.base.cov" label="${tool.name} on ${on_string}: pbcstat base coverage file"> <filter>function_select['functions'] == 'pbcstat'</filter> </data> <data name="pbcstat_wig" format="wig" from_work_dir="PB.cov.wig" label="${tool.name} on ${on_string}: pbcstat base wig file"> <filter>function_select['functions'] == 'pbcstat'</filter> </data> <data name="pbcstat_stat" format="tabular" from_work_dir="PB.stat" label="${tool.name} on ${on_string}: stat file"> <filter>function_select['functions'] == 'pbcstat'</filter> </data> <!-- Calcuts --> <data name="calcuts_log" format="txt" from_work_dir="calcuts.log" label="${tool.name} on ${on_string}: calcuts log file"> <filter>function_select['functions'] == 'calcuts'</filter> </data> <data name="calcuts_tab" format="tabular" from_work_dir="cutoffs.tsv" label="${tool.name} on ${on_string}: calcuts cutoff file"> <filter>function_select['functions'] == 'calcuts'</filter> </data> <!-- Purge dups --> <data name="purge_dups_log" format="txt" from_work_dir="purge_dups.log" label="${tool.name} on ${on_string}: purge_dups log file"> <filter>function_select['functions'] == 'purge_dups'</filter> </data> <data name="purge_dups_bed" format="bed" from_work_dir="dups.bed" label="${tool.name} on ${on_string}: purge_dups bed file"> <filter>function_select['functions'] == 'purge_dups'</filter> </data> </outputs> <tests> <!-- Purge dups --> <test expect_num_outputs="2"> <conditional name="function_select"> <param name="functions" value="purge_dups"/> <param name="input" value="test.paf"/> <param name="coverage" value="test.cov" ftype="tabular"/> <param name="cutoffs" value="cutoffs.tsv" ftype="tabular"/> <param name="min_bad" value="0.01"/> <param name="min_align" value="10"/> <param name="min_match" value="100"/> <param name="min_chain" value="1"/> <param name="max_gap" value="1000"/> <conditional name="double_chain"> <param name="chaining_rounds" value="two"/> <param name="max_gap_2" value="1001"/> </conditional> <param name="min_chain_score" value="1"/> <param name="max_extend" value="100"/> </conditional> <output name="purge_dups_bed" value="purge_dups_out.bed"/> </test> <!-- Purge dups gzip --> <test expect_num_outputs="2"> <conditional name="function_select"> <param name="functions" value="purge_dups"/> <param name="input" value="test.paf.gz" ftype="paf.gz"/> <param name="coverage" value="test.cov" ftype="tabular"/> <param name="cutoffs" value="cutoffs.tsv" ftype="tabular"/> <param name="min_bad" value="0.01"/> <param name="min_align" value="10"/> <param name="min_match" value="100"/> <param name="min_chain" value="1"/> <param name="max_gap" value="1000"/> <conditional name="double_chain"> <param name="chaining_rounds" value="two"/> <param name="max_gap_2" value="1001"/> </conditional> <param name="min_chain_score" value="1"/> <param name="max_extend" value="100"/> </conditional> <output name="purge_dups_bed" value="purge_dups_out.bed"/> </test> <!-- Purge dups multiple input --> <test expect_num_outputs="2"> <conditional name="function_select"> <param name="functions" value="purge_dups"/> <param name="input" value="test.paf,test2.paf.gz"/> </conditional> <output name="purge_dups_bed" value="purge_dups_out_2.bed"/> </test> <!-- Split fa --> <test expect_num_outputs="1"> <conditional name="function_select"> <param name="functions" value="split_fa"/> <param name="input" value="test.fasta"/> <param name="split" value="-n"/> </conditional> <output name="split_fasta" value="split_out.fasta"/> </test> <!-- pbcstat --> <test expect_num_outputs="3"> <conditional name="function_select"> <param name="functions" value="pbcstat"/> <param name="input" value="test.paf"/> <param name="max_cov" value="1000"/> <param name="min_map_ratio" value="0.01"/> <param name="min_map_qual" value="1"/> <param name="flank" value="1"/> <param name="primary_alignments" value="-p"/> </conditional> <output name="pbcstat_cov" value="out.cov"/> <output name="pbcstat_wig" value="out.wig"/> </test> <!-- pbcstat gzip --> <test expect_num_outputs="3"> <conditional name="function_select"> <param name="functions" value="pbcstat"/> <param name="input" value="test.paf.gz" ftype="paf.gz"/> <param name="max_cov" value="1000"/> <param name="min_map_ratio" value="0.01"/> <param name="min_map_qual" value="1"/> <param name="flank" value="1"/> <param name="primary_alignments" value="-p"/> </conditional> <output name="pbcstat_cov" value="out.cov"/> <output name="pbcstat_wig" value="out.wig"/> </test> <!-- Pbcstat multiple input --> <test expect_num_outputs="3"> <conditional name="function_select"> <param name="functions" value="pbcstat"/> <param name="input" value="test.paf,test2.paf.gz"/> </conditional> <output name="pbcstat_cov" value="out2.cov"/> <output name="pbcstat_wig" value="out2.wig"/> </test> <!-- ngscstat --> <test expect_num_outputs="2"> <conditional name="function_select"> <param name="functions" value="ngscstat"/> <param name="input" value="test.bam"/> <param name="min_align_qual" value="10"/> <param name="max_insert" value="100"/> </conditional> <output name="ngscstat_cov" value="ngsc_out.cov"/> </test> <!-- Calcuts --> <test expect_num_outputs="2"> <conditional name="function_select"> <param name="functions" value="calcuts"/> <param name="input" value="test.stat"/> <param name="min_depth" value="0.01"/> <param name="low_depth" value="1"/> <param name="transition" value="1"/> <param name="upper_depth" value="100"/> <param name="ploidy" value="-d 0"/> </conditional> <output name="calcuts_tab" value="calcuts_out.tsv"/> </test> <!-- Get seqs --> <test expect_num_outputs="2"> <conditional name="function_select"> <param name="functions" value="get_seqs"/> <param name="fasta_input" value="split_out.fasta"/> <param name="bed_input" value="dups.bed"/> <param name="coverage" value="-c"/> <param name="length" value="10"/> <param name="haplotigs" value="-a"/> <param name="min_ratio" value=".01"/> <param name="end_trim" value="-e"/> <param name="split" value="-s"/> <param name="min_gap" value="100000"/> </conditional> <output name="get_seqs_purged" value="purged_out.fa"/> </test> </tests> <help><![CDATA[ .. class:: infomark **What it does** The purge_dups tools are designed to remove haplotigs and contig overlaps in a de novo assembly based on read depth. ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btaa025</citation> </citations> </tool>