Mercurial > repos > iuc > purge_dups
diff purge_dups.xml @ 3:76d4cbefff85 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/purge_dups commit 5d56aa02b0f905507e1d98a2d74f0629b7591cd3"
author | iuc |
---|---|
date | Mon, 14 Jun 2021 18:01:05 +0000 |
parents | 17b378303f2d |
children | a315c25dc813 |
line wrap: on
line diff
--- a/purge_dups.xml Tue Apr 27 20:48:51 2021 +0000 +++ b/purge_dups.xml Mon Jun 14 18:01:05 2021 +0000 @@ -2,13 +2,72 @@ <description>and haplotigs in an assembly based on read depth (purge_dups)</description> <macros> <token name="@TOOL_VERSION@">1.2.5</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> + <xml name="trimmers"> + <section name="section_hist" title="Histogram plot options" > + <!--<param name="cutoffs_his" type="data" optional="true" format="txt" label="Read depth cutoffs file" />--> + <param argument="--ymin" type="integer" optional="true" min="0" label="Specify a minimum for the Y axis"/> + <param argument="--ymax" type="integer" optional="true" label="Specify a maximum for the Y axis"/> + <param argument="--xmin" type="integer" optional="true" min="0" label="Specify a minimum for the X axis"/> + <param argument="--xmax" type="integer" optional="true" label="Specify a maximum for the X axis"/> + <param argument="--title" type="text" value="Read depth histogram plot" label="Histogram title"/> + </section> + </xml> + <token name="@HIST_PLOT@"><![CDATA[ + python '$__tool_directory__/hist_plot.py' + --cutoffs cutoffs.tsv + #if $function_select.section_hist.ymin + --ymin $function_select.section_hist.ymin + #end if + #if $function_select.section_hist.ymax + --ymax $function_select.section_hist.ymax + #end if + #if $function_select.section_hist.xmin + --xmin $function_select.section_hist.xmin + #end if + #if $function_select.section_hist.xmax + --xmax $function_select.section_hist.xmax + #end if + #if $function_select.section_hist.title + --title '${function_select.section_hist.title}' + #end if + depth.stat hist.png + ]]></token> + <token name="@CALCUTS@"><![CDATA[ + calcuts + #if $function_select.section_calcuts.min_depth: + -f $function_select.section_calcuts.min_depth + #end if + #if $function_select.section_calcuts.low_depth: + -l $function_select.section_calcuts.low_depth + #end if + #if $function_select.section_calcuts.transition: + -m $function_select.section_calcuts.transition + #end if + #if $function_select.section_calcuts.upper_depth: + -u $function_select.section_calcuts.upper_depth + #end if + $function_select.section_calcuts.ploidy + ]]></token> + <xml name="calcuts"> + <section name="section_calcuts" title="Calcuts options"> + <param name="min_depth" type="float" label="Minimum depth count fraction to maximum depth coun" min="0" max="1" argument="-f" optional="true" help="Default = 0.1"/> + <param name="low_depth" label="Lower bound for read depth" type="integer" argument="-l" optional="true"/> + <param name="transition" label="Transition between haploid and diploid" type="integer" argument="-m" optional="true"/> + <param name="upper_depth" label="Upper bound for read depth" type="integer" argument="-u" optional="true"/> + <param name="ploidy" argument="-d" type="select" label="Ploidy"> + <option value="-d 0" selected="true">Diploid [0]</option> + <option value="-d 1">Haploid [1]</option> + </param> + </section> + </xml> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">purge_dups</requirement> + <requirement type="package" version="3.4.2">matplotlib-base</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - #if $function_select.functions == "purge_dups": + #if $function_select.functions == 'purge_dups': #for $i, $file in enumerate($function_select.input): #if $file.is_of_type("paf"): gzip -c '${file}' > '${i}.gz' && @@ -18,10 +77,10 @@ #end for purge_dups #if $function_select.coverage: - -c '$function_select.coverage' + -c '${function_select.coverage}' #end if #if $function_select.cutoffs: - -T '$function_select.cutoffs' + -T '${function_select.cutoffs}' #end if #if $function_select.min_bad: -f $function_select.min_bad @@ -38,7 +97,7 @@ #if $function_select.max_gap: -M $function_select.max_gap #end if - #if $function_select.double_chain.chaining_rounds == "two": + #if $function_select.double_chain.chaining_rounds == 'two': -2 #if $function_select.double_chain.max_gap_2: -G $function_select.double_chain.max_gap_2 @@ -54,15 +113,15 @@ '${i}.gz' #end for > dups.bed 2> purge_dups.log - #else if $function_select.functions == "split_fa": + #else if $function_select.functions == 'split_fa': split_fa #if $function_select.split: -n $function_select.split #end if - '$function_select.input' > split.fasta - #else if $function_select.functions == "pbcstat": + '${function_select.input}' > split.fasta + #else if $function_select.functions == 'pbcstat': #for $i, $file in enumerate($function_select.input): - #if $file.is_of_type("paf"): + #if $file.is_of_type('paf'): gzip -c '${file}' > '${i}.gz' && #else ln -s '${file}' '${i}.gz' && @@ -82,10 +141,15 @@ -l $function_select.flank #end if $function_select.primary_alignments + #for $i, $file in enumerate($function_select.input): '${i}.gz' - #end for - #else if $function_select.functions == "ngscstat": + #end for + && mv PB.stat depth.stat + && @CALCUTS@ depth.stat > cutoffs.tsv 2>calcuts.log + && @HIST_PLOT@ + + #else if $function_select.functions == 'ngscstat': ngscstat #if $function_select.min_align_qual: -q $function_select.min_align_qual @@ -96,24 +160,15 @@ #if $function_select.max_insert: -L $function_select.max_insert #end if - '$function_select.input' - #else if $function_select.functions == "calcuts": - calcuts - #if $function_select.min_depth: - -f $function_select.min_depth - #end if - #if $function_select.low_depth: - -l $function_select.low_depth - #end if - #if $function_select.transition: - -m $function_select.transition - #end if - #if $function_select.upper_depth: - -u $function_select.upper_depth - #end if - $function_select.ploidy - '$function_select.input' > cutoffs.tsv 2>calcuts.log - #else if $function_select.functions == "get_seqs": + '${function_select.input}' + && mv TX.stat depth.stat + && @CALCUTS@ depth.stat > cutoffs.tsv 2>calcuts.log + && @HIST_PLOT@ + + #else if $function_select.functions == 'calcuts': + @CALCUTS@ '${function_select.input}' > cutoffs.tsv 2>calcuts.log + + #else if $function_select.functions == 'get_seqs': get_seqs $function_select.coverage $function_select.haplotigs @@ -128,18 +183,18 @@ #if $function_select.min_gap: -g $function_select.min_gap #end if - '$function_select.bed_input' '$function_select.fasta_input' + '${function_select.bed_input}' '${function_select.fasta_input}' #end if ]]></command> <inputs> <conditional name="function_select"> <param type="select" name="functions" label="Select the purge_dups function"> - <option value="purge_dups">purge haplotigs and overlaps for an assembly</option> - <option value="split_fa">split FASTA file by 'N's</option> - <option value="pbcstat">create read depth histogram and base-level read depth for pacbio data</option> - <option value="ngscstat">create read depth histogram and base-level read detph for illumina data</option> - <option value="calcuts">calculate coverage cutoffs</option> - <option value="get_seqs">obtain seqeuences after purging</option> + <option value="purge_dups">Purge haplotigs and overlaps for an assembly (purge_dups)</option> + <option value="split_fa">Split FASTA file by 'N's (split_fa)</option> + <option value="pbcstat">Calculate coverage cutoff and create read depth histogram and base-levelread depth for PacBio data (calcuts+pbcstats)</option> + <option value="ngscstat">Calculate coverage cutoff and create read depth histogram and base-level read detph for Illumina data (calcuts+ngscstat)</option> + <option value="calcuts">calculate coverage cutoffs (calcuts)</option> + <option value="get_seqs">Obtain seqeuences after purging (get_seqs)</option> </param> <when value="purge_dups"> <param name="input" type="data" format="paf,paf.gz" multiple="true" label="PAF input file"/> @@ -174,6 +229,8 @@ <param name="min_map_qual" type="integer" argument="-q" optional="true" label="Minimum mapping quality"/> <param name="flank" type="integer" argument="-l" optional="true" label="Flanking space" /> <param name="primary_alignments" argument="-p" type="boolean" truevalue="-p" falsevalue="" checked="true" label="Use only primary alignments" /> + <expand macro="calcuts" /> + <expand macro="trimmers"/> </when> <when value="ngscstat"> <param name="input" type="data" format="bam" label="BAM input file"/> @@ -181,18 +238,15 @@ <!-- Param exists in help text, but isn't actually part of the code. Maybe in the next release? --> <!-- <param name="max_depth" type="integer" label="Maximum read depth" argument="-M" optional="true"/> --> <param name="max_insert" type="integer" argument="-L" optional="true" label="Maximum insert size"/> + <expand macro="calcuts" /> + <expand macro="trimmers"/> </when> + <when value="calcuts"> <param name="input" type="data" format="tabular" label="STAT input file"/> - <param name="min_depth" type="float" label="Minimum depth count fraction to maximum depth coun" min="0" max="1" argument="-f" optional="true" help="Default = 0.1"/> - <param name="low_depth" label="Lower bound for read depth" type="integer" argument="-l" optional="true"/> - <param name="transition" label="Transition between haploid and diploid" type="integer" argument="-m" optional="true"/> - <param name="upper_depth" label="Upper bound for read depth" type="integer" argument="-u" optional="true"/> - <param name="ploidy" argument="-d" type="select" label="Ploidy"> - <option value="-d 0" selected="true">Diploid [0]</option> - <option value="-d 1">Haploid [1]</option> - </param> + <expand macro="calcuts" /> </when> + <when value="get_seqs"> <param name="fasta_input" type="data" format="fasta" label="Fasta input file"/> <param name="bed_input" type="data" format="bed" label="Bed input file"/> @@ -222,8 +276,8 @@ <data name="ngscstat_cov" format="tabular" from_work_dir="TX.base.cov" label="${tool.name} on ${on_string}: ngscstat base coverage file"> <filter>function_select['functions'] == 'ngscstat'</filter> </data> - <data name="ngscstat_stat" format="tabular" from_work_dir="TX.stat" label="${tool.name} on ${on_string}: ngscstat stat file"> - <filter>function_select['functions'] == 'ngscstat'</filter> + <data name="stat_file" format="tabular" from_work_dir="depth.stat" label="${tool.name} on ${on_string}: stat file"> + <filter>function_select['functions'] == 'ngscstat' or function_select['functions'] == 'pbcstat'</filter> </data> <!-- Pbcstat --> <data name="pbcstat_cov" format="tabular" from_work_dir="PB.base.cov" label="${tool.name} on ${on_string}: pbcstat base coverage file"> @@ -232,15 +286,17 @@ <data name="pbcstat_wig" format="wig" from_work_dir="PB.cov.wig" label="${tool.name} on ${on_string}: pbcstat base wig file"> <filter>function_select['functions'] == 'pbcstat'</filter> </data> - <data name="pbcstat_stat" format="tabular" from_work_dir="PB.stat" label="${tool.name} on ${on_string}: stat file"> - <filter>function_select['functions'] == 'pbcstat'</filter> + + <data name="hist" format="png" from_work_dir="hist.png" label="${tool.name} on ${on_string}: histogram plot"> + <filter>function_select['functions'] == 'pbcstat' or function_select['functions'] == 'ngscstat'</filter> </data> + <!-- Calcuts --> <data name="calcuts_log" format="txt" from_work_dir="calcuts.log" label="${tool.name} on ${on_string}: calcuts log file"> - <filter>function_select['functions'] == 'calcuts'</filter> + <filter>function_select['functions'] in ('pbcstat', 'ngscstat', 'calcuts')</filter> </data> <data name="calcuts_tab" format="tabular" from_work_dir="cutoffs.tsv" label="${tool.name} on ${on_string}: calcuts cutoff file"> - <filter>function_select['functions'] == 'calcuts'</filter> + <filter>function_select['functions'] in ('pbcstat', 'ngscstat', 'calcuts')</filter> </data> <!-- Purge dups --> <data name="purge_dups_log" format="txt" from_work_dir="purge_dups.log" label="${tool.name} on ${on_string}: purge_dups log file"> @@ -311,7 +367,7 @@ <output name="split_fasta" value="split_out.fasta"/> </test> <!-- pbcstat --> - <test expect_num_outputs="3"> + <test expect_num_outputs="6"> <conditional name="function_select"> <param name="functions" value="pbcstat"/> <param name="input" value="test.paf"/> @@ -320,12 +376,22 @@ <param name="min_map_qual" value="1"/> <param name="flank" value="1"/> <param name="primary_alignments" value="-p"/> + <section name="section_calcuts"> + <param name="min_depth" value="0.01"/> + <param name="low_depth" value="1"/> + <param name="transition" value="1"/> + <param name="upper_depth" value="100"/> + <param name="ploidy" value="-d 0"/> + </section> </conditional> + <output name="calcuts_tab" value="calcuts_out.tsv"/> <output name="pbcstat_cov" value="out.cov"/> <output name="pbcstat_wig" value="out.wig"/> + <output name="stat_file" value="pbcstats.tabular"/> + <output name="hist" value="hist.png" ftype="png" compare="sim_size"/> </test> <!-- pbcstat gzip --> - <test expect_num_outputs="3"> + <test expect_num_outputs="6"> <conditional name="function_select"> <param name="functions" value="pbcstat"/> <param name="input" value="test.paf.gz" ftype="paf.gz"/> @@ -334,39 +400,68 @@ <param name="min_map_qual" value="1"/> <param name="flank" value="1"/> <param name="primary_alignments" value="-p"/> + <section name="section_calcuts"> + <param name="min_depth" value="0.01"/> + <param name="low_depth" value="1"/> + <param name="transition" value="1"/> + <param name="upper_depth" value="100"/> + <param name="ploidy" value="-d 0"/> + </section> </conditional> + <output name="calcuts_tab" value="calcuts_out.tsv"/> <output name="pbcstat_cov" value="out.cov"/> <output name="pbcstat_wig" value="out.wig"/> </test> - <!-- Pbcstat multiple input --> - <test expect_num_outputs="3"> + <!-- Pbcstat multiple input --> + <test expect_num_outputs="6"> <conditional name="function_select"> <param name="functions" value="pbcstat"/> <param name="input" value="test.paf,test2.paf.gz"/> + <section name="section_calcuts"> + <param name="min_depth" value="0.01"/> + <param name="low_depth" value="1"/> + <param name="transition" value="1"/> + <param name="upper_depth" value="100"/> + <param name="ploidy" value="-d 0"/> + </section> </conditional> + <output name="calcuts_tab" value="calcuts_out.tsv"/> <output name="pbcstat_cov" value="out2.cov"/> - <output name="pbcstat_wig" value="out2.wig"/> + <output name="stat_file" value="pbcstats2.tabular"/> + <output name="pbcstat_wig" value="out2.wig"/> </test> <!-- ngscstat --> - <test expect_num_outputs="2"> + <test expect_num_outputs="5"> <conditional name="function_select"> <param name="functions" value="ngscstat"/> <param name="input" value="test.bam"/> <param name="min_align_qual" value="10"/> <param name="max_insert" value="100"/> + <section name="section_calcuts"> + <param name="min_depth" value="0.01"/> + <param name="low_depth" value="1"/> + <param name="transition" value="1"/> + <param name="upper_depth" value="100"/> + <param name="ploidy" value="-d 0"/> + </section> </conditional> + <output name="calcuts_tab" value="calcuts_out.tsv"/> <output name="ngscstat_cov" value="ngsc_out.cov"/> + <output name="stat_file" value="tx_stats.tabular"/> + <output name="hist" value="hist.png" ftype="png" compare="sim_size"/> </test> <!-- Calcuts --> <test expect_num_outputs="2"> <conditional name="function_select"> <param name="functions" value="calcuts"/> <param name="input" value="test.stat"/> - <param name="min_depth" value="0.01"/> - <param name="low_depth" value="1"/> - <param name="transition" value="1"/> - <param name="upper_depth" value="100"/> - <param name="ploidy" value="-d 0"/> + <section name="section_calcuts"> + <param name="min_depth" value="0.01"/> + <param name="low_depth" value="1"/> + <param name="transition" value="1"/> + <param name="upper_depth" value="100"/> + <param name="ploidy" value="-d 0"/> + </section> </conditional> <output name="calcuts_tab" value="calcuts_out.tsv"/> </test> @@ -386,6 +481,35 @@ </conditional> <output name="get_seqs_purged" value="purged_out.fa"/> </test> + <!-- pbcstat histogram options--> + <test expect_num_outputs="6"> + <conditional name="function_select"> + <param name="functions" value="pbcstat"/> + <param name="input" value="test.paf"/> + <param name="max_cov" value="1000"/> + <param name="min_map_ratio" value="0.01"/> + <param name="min_map_qual" value="1"/> + <param name="flank" value="1"/> + <param name="primary_alignments" value="-p"/> + <section name="section_calcuts"> + <param name="min_depth" value="0.01"/> + <param name="low_depth" value="1"/> + <param name="transition" value="1"/> + <param name="upper_depth" value="100"/> + <param name="ploidy" value="-d 0"/> + </section> + <section name="section_hist"> + <param name="ymax" value="100"/> + <param name="xmax" value="100"/> + <param name="cutoffs_his" value="calcuts_out.tsv"/> + </section> + </conditional> + <output name="calcuts_tab" value="calcuts_out.tsv"/> + <output name="pbcstat_cov" value="out_hist_options.cov"/> + <output name="pbcstat_wig" value="out_hist_options.wig"/> + <output name="stat_file" value="pbcstats_hist_options.tabular"/> + <output name="hist" value="hist_options.png" ftype="png" compare="sim_size"/> + </test> </tests> <help><![CDATA[ .. class:: infomark