Mercurial > repos > iuc > bcftools_stats
view bcftools_stats.xml @ 22:591cc8518033 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit 784611c9caf2680d41414ca2880b93a69d719701
author | iuc |
---|---|
date | Sun, 18 Aug 2024 10:16:43 +0000 |
parents | bf8325a07ce7 |
children |
line wrap: on
line source
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Parses VCF or BCF and produces stats which can be plotted using plot-vcfstats</description> <macros> <token name="@EXECUTABLE@">stats</token> <import>macros.xml</import> </macros> <expand macro="bio_tools" /> <expand macro="requirements"> <expand macro="samtools_requirement"/> <expand macro="matplotlib_requirement" /> <requirement type="package" version="0.12.0">tectonic</requirement> </expand> <expand macro="version_command" /> <command detect_errors="aggressive"><![CDATA[ @PREPARE_ENV@ #set $input_files = [$input_file] #if $inputB_file: #silent $input_files.append($inputB_file) #end if @PREPARE_INPUT_FILES@ #set $section = $sec_restrict @PREPARE_TARGETS_FILE@ @PREPARE_REGIONS_FILE@ ## Stats section #set $section = $sec_default.reference_source @PREPARE_FASTA_REF@ #set $section = $sec_default @PREPARE_EXONS_FILE@ bcftools @EXECUTABLE@ ## Stats section #set $section = $sec_default.reference_source @FASTA_REF@ #set $section = $sec_default @EXONS_FILE@ ${section.first_allele_only} #if $section.depth.set_depth == 'yes': --depth ${section.depth.depth_min},${section.depth.depth_max},${section.depth.depth_bin_size} #end if #if $section.user_tstv: --user-tstv '${section.user_tstv}' #end if #if $section.afbins.afbins_select == 'af_bins_list': --af-bins '$section.afbins.af_bins_list' #elif $section.afbins.afbins_select == 'af_bins_file': --af-bins '$section.afbins.af_bins_file' #end if #if $section.af_tag: --af-tag '${section.af_tag}' #end if #if len($input_vcfs) == 1: ${section.split_by_ID} #end if ${section.verbose} ## Stats section #set $section = $sec_restrict @APPLY_FILTERS@ @COLLAPSE@ @REGIONS@ @SAMPLES@ @TARGETS@ @INCLUDE@ @EXCLUDE@ ## Primary Input/Outputs @INPUT_FILES@ > '$output_file' #if $plot_title: && plot-vcfstats -p 'plot_tmp/' -T '$plot_title' -s '$output_file' || (printf "The content of plot_tmp/plot-vcfstats.log is:\n" >&2 && cat plot_tmp/plot-vcfstats.log >&2 && exit 1) #end if ]]></command> <inputs> <expand macro="macro_input" /> <param name="inputB_file" type="data" format="vcf,vcf_bgzip,bcf" optional="true" label="Optional VCF/BCF Data to compare against" help="When this second dataset is also specified, separate stats for intersection and the complements are generated" /> <section name="sec_restrict" expanded="false" title="Restrict to"> <expand macro="macro_samples" /> <expand macro="macro_apply_filters" /> <expand macro="macro_collapse" /> <expand macro="macro_restrict" /> <expand macro="macro_restrict" type="target" label_type="Target" /> <expand macro="macro_include" /> <expand macro="macro_exclude" /> </section> <section name="sec_default" expanded="true" title="Stats options"> <param name="first_allele_only" type="boolean" truevalue="--1st-allele-only" falsevalue="" label="First allele only" help="Include only first allele at multiallelic sites" /> <conditional name="depth"> <param name="set_depth" type="select" label="Depth distribution"> <option value="no">Use depth defaults</option> <option value="yes">Set depth values</option> </param> <when value="no"/> <when value="yes"> <param name="depth_min" type="integer" value="0" min="0" label="Depth min"/> <param name="depth_max" type="integer" value="500" min="1" label="Depth max" /> <param name="depth_bin_size" type="integer" value="1" min="1" label="Depth bin size" /> </when> </conditional> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Choose a reference genome"> <option value="">Run without a reference genome</option> <option value="cached">Use a built-in genome</option> <option value="history">Use a genome from the history</option> </param> <when value="" /> <when value="cached"> <param name="fasta_ref" type="select" label="Reference genome"> <options from_data_table="fasta_indexes"> <filter type="data_meta" column="dbkey" key="dbkey" ref="input_file" /> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> </options> </param> </when> <when value="history"> <param name="fasta_ref" type="data" format="fasta" label="Reference genome" /> </when> </conditional> <expand macro="macro_exons_file" /> <param name="split_by_ID" type="boolean" truevalue="--split-by-ID" falsevalue="" label="Split by ID (ignored on multiple inputs)" help="Collect stats for sites with ID separately (known vs novel)" /> <param name="user_tstv" type="text" value="" optional="true" label="User Tstv" help="Collect Ts/Tv stats for any tag using the given binning: TAG[:min:max:binsize]" > <validator type="regex" message="TAG optionally followed by :min:max:binsize">^([^ \t\n\r\f\v:,](:\d+:\d+:\d+)?)?$</validator> </param> <conditional name="afbins"> <param name="afbins_select" type="select" label="Set af-bins"> <option value="default">Use default</option> <option value="af_bins_list">Enter bins</option> <option value="af_bins_file">Read bins from file</option> </param> <when value="default"/> <when value="af_bins_list"> <param name="af_bins_list" type="text" value="0.1,0.5,1" label="List of allele frequency bins" help="e.g. 0.1,0.5,1"> <validator type="regex" message="Comma-separated list of floats of increasing value">^[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?(,[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)*$</validator> </param> </when> <when value="af_bins_file"> <param name="af_bins_file" type="data" format="tabular" label="File listing the allele frequency bins one per line"/> </when> </conditional> <param name="af_tag" type="text" value="" optional="true" label="Allele frequency tag to use, by default estimated from AN,AC or GT"> <validator type="regex" message="TAG">^\w*$</validator> </param> <param name="verbose" type="boolean" truevalue="--verbose" falsevalue="" label="Verbose" help="Produce verbose per-site and per-sample output" /> </section> <param name="plot_title" type="text" value="" optional="true" label="Create a plots pdf with this title"> <validator type="regex" message="">^\w.*\w$</validator> </param> </inputs> <outputs> <data name="output_file" format="txt" label="${tool.name} on ${on_string}: txt" /> <data name="output_pdf" format="pdf" label="${tool.name} on ${on_string}: PDF" from_work_dir="plot_tmp/summary.pdf"> <filter>plot_title</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <param name="input_file" ftype="vcf" value="stats.b.vcf" /> <param name="inputB_file" ftype="vcf" value="stats.a.vcf" /> <output name="output_file"> <assert_contents> <has_text_matching expression="SN\t0\tnumber of samples:\t3"/> <has_text_matching expression="SN\t1\tnumber of samples:\t3"/> </assert_contents> </output> </test> <test expect_num_outputs="1"> <param name="input_file" ftype="vcf" value="mpileup.vcf" /> <section name="sec_default"> <expand macro="test_using_reference" select_from="cached" ref="mpileup" /> </section> <output name="output_file"> <assert_contents> <has_text_matching expression="SN\t0\tnumber of samples:\t3"/> <has_text_matching expression="SN\t0\tnumber of records:\t4103"/> <has_text_matching expression="ST\t0\tA>C\t16"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <param name="input_file" ftype="vcf" value="mpileup.vcf" /> <param name="plot_title" value="Plot for mpileup.vcf" /> <output name="output_file"> <assert_contents> <has_text_matching expression="SN\t0\tnumber of samples:\t3"/> <has_text_matching expression="SN\t0\tnumber of records:\t4103"/> <has_text_matching expression="ST\t0\tA>C\t16"/> </assert_contents> </output> <output name="output_pdf" file="summary.pdf" compare="sim_size" delta="25000" /> </test> <!-- Test region overlap option--> <test expect_num_outputs="1"> <param name="input_file" ftype="vcf" value="stats.b.vcf" /> <param name="inputB_file" ftype="vcf" value="stats.a.vcf" /> <section name="sec_restrict"> <param name="regions_overlap" value="1"/> </section> <output name="output_file"> <assert_contents> <has_text_matching expression="SN\t0\tnumber of samples:\t3"/> <has_text_matching expression="SN\t1\tnumber of samples:\t3"/> </assert_contents> </output> <assert_command> <has_text text="--regions-overlap" /> </assert_command> </test> <!-- Test VCF.gz input file -> REQUIRES https://github.com/galaxyproject/galaxy/pull/14605 <test expect_num_outputs="1"> <param name="input_file" value="mpileup.vcf.gz" /> <output name="output_file"> <assert_contents> <has_text_matching expression="bcftools stats input0.vcf.gz"/> <has_text_matching expression="SN\t0\tnumber of samples:\t3"/> <has_text_matching expression="SN\t0\tnumber of records:\t4103"/> </assert_contents> </output> </test> --> <!-- Test modification in samples option --> <test expect_num_outputs="1"> <param name="input_file" ftype="vcf" value="mpileup.vcf" /> <section name="sec_restrict"> <param name="samples" value="-" /> </section> <output name="output_file"> <assert_contents> <has_text_matching expression="bcftools stats --samples - "/> <has_text_matching expression="PSC\t0\tHG00101\t0\t0\t0\t0\t0\t0\t4.8"/> <has_text_matching expression="PSI\t0\tHG00102\t0"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ ===================================== bcftools @EXECUTABLE@ ===================================== Parses VCF or BCF and produces stats which can be plotted using plot-vcfstats. When two files are given, the program generates separate stats for intersection and the complements. By default only sites are compared, -s/-S must given to include also sample columns. When one VCF file is specified, then stats by non-reference allele frequency, depth distribution, stats by quality and per-sample counts, singleton stats, etc. are printed. When two VCF files are given, then stats such as concordance (Genotype concordance by non-reference allele frequency, Genotype concordance by sample, Non-Reference Discordance) and correlation are also printed. Per-site discordance (PSD) is also printed in --verbose mode. @COLLAPSE_HELP@ @REGIONS_HELP@ @TARGETS_HELP@ @EXPRESSIONS_HELP@ @BCFTOOLS_MANPAGE@#@EXECUTABLE@ @BCFTOOLS_WIKI@ ]]> </help> <expand macro="citations" /> </tool>