Mercurial > repos > iuc > bcftools_stats

--- a/bcftools_stats.xml	Tue Dec 12 13:56:27 2017 -0500
+++ b/bcftools_stats.xml	Thu Feb 21 15:56:16 2019 -0500
@@ -1,5 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
-<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@VERSION@">
+<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@">
     <description>Parses VCF or BCF and produces stats which can be plotted using plot-vcfstats</description>
     <macros>
         <token name="@EXECUTABLE@">stats</token>
@@ -7,11 +7,16 @@
     </macros>
     <expand macro="requirements">
         <expand macro="samtools_requirement"/>
-        <requirement type="package" version="2.0.0">matplotlib</requirement>
+        <requirement type="package" version="3.0.2">matplotlib</requirement>
+        <requirement type="package" version="0.1.11">tectonic</requirement>
     </expand>
     <expand macro="version_command" />
     <command detect_errors="aggressive"><![CDATA[
 @PREPARE_ENV@
+#set $input_files = [$input_file]
+#if $inputB_file:
+    #silent $input_files.append($inputB_file)
+#end if
 @PREPARE_INPUT_FILES@
 #set $section = $sec_restrict
 @PREPARE_TARGETS_FILE@
@@ -29,21 +34,21 @@
 @EXONS_FILE@
 ${section.first_allele_only}
 #if $section.depth.set_depth == 'yes':
-  --depth "${section.depth.depth_min},${section.depth.depth_max},${section.depth.depth_bin_size}"
+    --depth ${section.depth.depth_min},${section.depth.depth_max},${section.depth.depth_bin_size}
 #end if
 #if $section.user_tstv:
-  --user-tstv "${section.user_tstv}"
+    --user-tstv '${section.user_tstv}'
 #end if
 #if $section.afbins.afbins_select == 'af_bins_list':
-  --af-bins $section.afbins.af_bins_list
+    --af-bins '$section.afbins.af_bins_list'
 #elif $section.afbins.afbins_select == 'af_bins_file':
-  --af-bins $section.afbins.af_bins_file
+    --af-bins '$section.afbins.af_bins_file'
 #end if
 #if $section.af_tag:
-  --af-tag "${section.af_tag}"
+    --af-tag '${section.af_tag}'
 #end if
 #if len($input_vcfs) == 1:
-${section.split_by_ID}
+    ${section.split_by_ID}
 #end if
 ${section.verbose}

@@ -59,15 +64,19 @@

 ## Primary Input/Outputs
 @INPUT_FILES@
-> $output_file
-## requires: matplotlib and pdflatex
+> '$output_file'
 #if $plot_title:
-&& plot-vcfstats -p 'plot_tmp/' -T $plot_title -s $output_file
+    && plot-vcfstats
+    -p 'plot_tmp/'
+    -T '$plot_title'
+    -s
+    '$output_file'
+    || (printf "The content of plot_tmp/plot-vcfstats.log is:\n" >&2 && cat plot_tmp/plot-vcfstats.log >&2 && exit 1)
 #end if
-]]>
-    </command>
+    ]]></command>
     <inputs>
-        <expand macro="macro_inputs" />
+        <expand macro="macro_input" />
+        <param name="inputB_file" type="data" format="vcf,vcf_bgzip,bcf" optional="true" label="Optional VCF/BCF Data to compare against" help="When this second dataset is also specified, separate stats for intersection and the complements are generated" />
         <section name="sec_restrict" expanded="false" title="Restrict to">
             <expand macro="macro_samples" />
             <expand macro="macro_apply_filters" />
@@ -128,13 +137,14 @@
     </inputs>
     <outputs>
         <data name="output_file" format="txt"/>
-        <data name="output_pdf" format="pdf">
+        <data name="output_pdf" format="pdf" from_work_dir="plot_tmp/summary.pdf">
             <filter>plot_title</filter>
         </data>
     </outputs>
     <tests>
         <test>
-            <param name="input_files" ftype="vcf" value="stats.b.vcf,stats.a.vcf" />
+            <param name="input_file" ftype="vcf" value="stats.b.vcf" />
+            <param name="inputB_file" ftype="vcf" value="stats.a.vcf" />
             <output name="output_file">
                 <assert_contents>
                     <has_text_matching expression="SN\t0\tnumber of samples:\t3"/>
@@ -143,7 +153,8 @@
             </output>
         </test>
         <test>
-            <param name="input_files" ftype="vcf" value="mpileup.vcf" />
+            <param name="input_file" ftype="vcf" value="mpileup.vcf" />
+            <param name="plot_title" value="Plot for mpileup.vcf" />
             <output name="output_file">
                 <assert_contents>
                     <has_text_matching expression="SN\t0\tnumber of samples:\t3"/>
@@ -151,6 +162,7 @@
                     <has_text_matching expression="ST\t0\tA>C\t16"/>
                 </assert_contents>
             </output>
+            <output name="output_pdf" file="summary.pdf" compare="sim_size" delta="20000" />
         </test>
     </tests>
     <help><![CDATA[
@@ -163,6 +175,8 @@
 When two files are given, the program generates separate stats for intersection and the complements.
 By default only sites are compared, -s/-S must given to include also sample columns.

+When one VCF file is specified, then stats by non-reference allele frequency, depth distribution, stats by quality and per-sample counts, singleton stats, etc. are printed. When two VCF files are given, then stats such as concordance (Genotype concordance by non-reference allele frequency, Genotype concordance by sample, Non-Reference Discordance) and correlation are also printed. Per-site discordance (PSD) is also printed in --verbose mode.
+
 @COLLAPSE_HELP@
 @REGIONS_HELP@
 @TARGETS_HELP@
--- a/macros.xml	Tue Dec 12 13:56:27 2017 -0500
+++ b/macros.xml	Thu Feb 21 15:56:16 2019 -0500
@@ -1,5 +1,5 @@
 <macros>
-  <token name="@VERSION@">1.4.0</token>
+  <token name="@TOOL_VERSION@">1.9</token>
   <xml name="stdio">
     <stdio>
       <exit_code range="1:" />
@@ -10,13 +10,13 @@
   </xml>
   <xml name="requirements">
     <requirements>
-      <requirement type="package" version="1.4">bcftools</requirement>
-      <requirement type="package" version="1.4">htslib</requirement>
+      <requirement type="package" version="@TOOL_VERSION@">bcftools</requirement>
+      <requirement type="package" version="1.9">htslib</requirement>
       <yield />
     </requirements>
   </xml>
   <xml name="samtools_requirement">
-      <requirement type="package" version="1.3.1">samtools</requirement>
+      <requirement type="package" version="1.9">samtools</requirement>
   </xml>
   <xml name="version_command">
     <version_command>bcftools 2&gt;&amp;1 | grep 'Version:'</version_command>
@@ -39,7 +39,7 @@
 ]]>
   </token>
   <xml name="macro_input">
-    <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf,bcf_bgzip" label="VCF/BCF Data" />
+    <param name="input_file" type="data" format="vcf,vcf_bgzip,bcf" label="VCF/BCF Data" />
   </xml>
   <token name="@PREPARE_INPUT_FILE@">
 <![CDATA[
@@ -50,6 +50,11 @@
   bcftools index $input_vcf &&
 #elif $input_file.is_of_type('vcf_bgzip')
   ln -s '$input_file' $input_vcf &&
+  #if $input_file.metadata.tabix_index:
+    ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi &&
+  #else
+    bcftools index $input_vcf &&
+  #end if
 #elif $input_file.is_of_type('bcf')
   #set $input_vcf = 'input.bcf'
   ln -s '$input_file' $input_vcf &&
@@ -58,8 +63,6 @@
   #else
     bcftools index $input_vcf &&
   #end if
-#elif $input_file.is_of_type('bcf_bgzip')
-  ln -s '$input_file' $input_vcf &&
 #end if
 ]]>
   </token>
@@ -68,7 +71,7 @@
   </token>

   <xml name="macro_inputs">
-    <param name="input_files" type="data" format="vcf,bcf" label="Other VCF/BCF Datasets" multiple="True" />
+    <param name="input_files" type="data" format="vcf,vcf_bgzip,bcf" label="Other VCF/BCF Datasets" multiple="True" />
   </xml>
   <token name="@PREPARE_INPUT_FILES@">
 <![CDATA[
@@ -80,8 +83,13 @@
   #if $input_file.is_of_type('vcf')
     bgzip -c '$input_file' > $input_vcf &&
     bcftools index $input_vcf &&
-  #elif $input_file.is_of_type('vcf_bgz')
-    ln -s '$input_file' $input_vcf
+  #elif $input_file.is_of_type('vcf_bgzip')
+    ln -s '$input_file' $input_vcf &&
+    #if $input_file.metadata.tabix_index:
+      ln -s '${input_file.metadata.tabix_index}' ${input_vcf}.tbi &&
+    #else
+      bcftools index $input_vcf &&
+    #end if
   #elif $input_file.is_of_type('bcf')
     #set $input_vcf = 'input' + str($i) + '.bcf.gz'
     ln -s '$input_file' $input_vcf &&
@@ -90,8 +98,6 @@
     #else
       bcftools index $input_vcf &&
     #end if
-  #elif $input_file.is_of_type('bcfvcf_bgz')
-    ln -s '$input_file' $input_vcf &&
   #end if
   echo '$input_vcf' >> $vcfs_list_file &&
   $input_vcfs.append($input_vcf)
@@ -106,7 +112,7 @@
   </token>

   <xml name="macro_fasta_ref">
-    <param name="fasta_ref" argument="--fasta-ref" type="data" format="data" label="Reference sequence in FASTA format" optional="True" />
+    <param name="fasta_ref" argument="--fasta-ref" type="data" format="data" optional="true" label="Reference sequence in FASTA format" />
   </xml>
   <token name="@PREPARE_FASTA_REF@">
 <![CDATA[
@@ -148,7 +154,7 @@


   <xml name="macro_AF_file">
-    <param name="AF_file" argument="--AF-file" type="data" format="tabular" label="Allele frequencies file" optional="True" help="Tab-delimited file containing the columns CHR,POS,REF,ALT,AF" />
+    <param name="AF_file" argument="--AF-file" type="data" format="tabular" optional="true" label="Allele frequencies file" help="Tab-delimited file containing the columns CHR,POS,REF,ALT,AF" />
   </xml>
   <!-- This may need to bgzip and tabix the file -->
   <token name="@PREPARE_AF_FILE@">
@@ -165,7 +171,7 @@
   </token>

   <xml name="macro_estimate_AF">
-      <param name="estimate_AF" argument="--estimate-AF" type="data" format="data" label="Estimate allele frequency" optional="True" help="calculate AC,AN counts on the fly, using either all samples (&quot;-&quot;) or samples listed in &lt;file&gt;" />
+      <param name="estimate_AF" argument="--estimate-AF" type="data" format="data" optional="true" label="Estimate allele frequency" help="Calculate AC,AN counts on the fly, using either all samples (&quot;-&quot;) or samples listed in &lt;file&gt;" />
   </xml>
   <token name="@ESTIMATE_AF@">
 #if 'estimate_AF' in $section and $section.estimate_AF:
@@ -174,7 +180,7 @@
   </token>

   <xml name="macro_exons_file">
-    <param name="exons_file" type="data" format="tabular" label="exons file" optional="True" help="tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)" />
+    <param name="exons_file" type="data" format="tabular" optional="true" label="Exons file" help="Tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)" />
   </xml>
   <token name="@PREPARE_EXONS_FILE@">
 <![CDATA[
@@ -193,7 +199,7 @@
   </token>

   <xml name="macro_ploidy_file">
-    <param name="ploidy_file" type="data" format="tabular" label="Ploidy file" optional="True" help="tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" />
+    <param name="ploidy_file" type="data" format="tabular" optional="true" label="Ploidy file" help="Tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY" />
   </xml>
   <token name="@PLOIDY_FILE@">
 #if 'ploidy_file' in $section and $section.ploidy_file:
@@ -208,7 +214,7 @@
       <option value="id">id - only records with identical ID column are compatible. </option>
   </xml>
   <xml name="macro_collapse">
-    <param name="collapse" type="select" label="Collapse" optional="True" help="Controls how to treat records with duplicate positions and defines compatible records across multiple input files">
+    <param name="collapse" type="select" optional="true" label="Collapse" help="Controls how to treat records with duplicate positions and defines compatible records across multiple input files">
       <option value="snps">snps - allow different alleles, as long as they all are SNPs</option>
       <option value="indels">indels - allow different alleles, as long as they all are indels</option>
       <option value="both">both - indels and snps </option>
@@ -224,8 +230,8 @@
   </token>

   <xml name="macro_apply_filters">
-    <param name="apply_filters" type="text" value="" label="Apply Filters" optional="true"
-           help="(-f --apply-filters) Skip sites where FILTER column does not contain any of the strings listed (e.g. &quot;PASS,.&quot;)">
+    <param argument="--apply_filters" type="text" value="" optional="true" label="Apply filters"
+           help="Skip sites where FILTER column does not contain any of the strings listed (e.g. &quot;PASS,.&quot;)">
       <validator type="regex" message="FILTER terms separated by commas">^([^ \t\n\r\f\v,]+(,[^ \t\n\r\f\v,]+)*)?$</validator>
     </param>
   </xml>
@@ -271,13 +277,13 @@
         </param>
         <when value="__none__"/>
         <when value="regions">
-            <param name="regions" type="text" value="" label="restrict to comma-separated list of regions" optional="true"
+            <param name="regions" type="text" value="" optional="true" label="Restrict to comma-separated list of regions"
                    help="Each region is specifed as: chr or chr:pos or chr:from-to">
                  <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
             </param>
         </when>
         <when value="regions_file">
-            <param name="regions_file" type="data" format="vcf,bed,tabular" label="Regions File" optional="True" help="restrict to regions listed in a file" />
+            <param name="regions_file" type="data" format="vcf,bed,tabular" optional="true" label="Regions file" help="Restrict to regions listed in a file" />
         </when>
     </conditional>
   </xml>
@@ -346,7 +352,7 @@
         </param>
         <when value="__none__"/>
         <when value="targets">
-            <param name="targets" type="text" value="" label="Restrict to comma-separated list of targets" optional="true"
+            <param name="targets" type="text" value="" optional="true" label="Restrict to comma-separated list of targets"
                    help="Each target is specifed as: chr or chr:pos or chr:from-to">
                  <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
             </param>
@@ -373,16 +379,16 @@
   </token>

   <xml name="macro_samples">
-      <param name="samples" type="text" value="" label="Samples" optional="true"
-             help="(-s) comma separated list of samples to annotate (or exclude)">
+      <param argument="--samples" type="text" value="" optional="true" label="Samples"
+             help="Comma separated list of samples to annotate (or exclude)">
           <validator type="regex" message="">^(\w+(,\w+)*)?$</validator>
       </param>
       <param name="invert_samples" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples"
-             help="inverts the query/filtering applied by Samples (adds &quot;^&quot; prefix to exclude)" />
-      <param name="samples_file" type="data" format="tabular" label="Samples File" optional="True"
-             help="(-S) file of samples to include" />
-      <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples File"
-             help="inverts the query/filtering applied by Samples File" />
+             help="Inverts the query/filtering applied by Samples (adds &quot;^&quot; prefix to exclude)" />
+      <param argument="--samples_file" type="data" format="tabular" optional="true" label="Samples file"
+             help="File of samples to include" />
+      <param name="invert_samples_file" type="boolean" truevalue="^" falsevalue="" checked="false" label="Invert Samples file"
+             help="inverts the query/filtering applied by Samples file" />
   </xml>
   <token name="@SAMPLES@">
 #set $samples_defined = False
@@ -397,7 +403,7 @@
   </token>

   <xml name="macro_sample">
-      <param name="sample" type="text" label="Sample" optional="True" help="apply variants of the given sample" />
+      <param name="sample" type="text" optional="true" label="Sample" help="Apply variants of the given sample" />
   </xml>
   <token name="@SAMPLE@">
 #if $section.sample:
@@ -407,7 +413,7 @@


   <xml name="macro_include">
-    <param name="include" type="text" label="Include" optional="True" help="(-i) select sites for which the expression is true">
+    <param argument="--include" type="text" optional="true" label="Include" help="Select sites for which the expression is true">
         <validator type="regex" message="Single quote not allowed">^[^']*$</validator>
         <sanitizer sanitize="False"/>
     </param>
@@ -419,7 +425,7 @@
   </token>

   <xml name="macro_exclude">
-    <param name="exclude" type="text" label="Exclude" optional="True" help="(-e) exclude sites for which the expression is true">
+    <param argument="--exclude" type="text" optional="true" label="Exclude" help="Exclude sites for which the expression is true">
         <validator type="regex" message="Single quote not allowed">^[^']*$</validator>
         <sanitizer sanitize="False"/>
     </param>
@@ -431,8 +437,8 @@
   </token>

   <xml name="macro_columns">
-    <param name="columns" type="text" value="" label="Columns" optional="true"
-            help="list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details">
+    <param name="columns" type="text" value="" optional="true" label="Columns"
+            help="List of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details">
         <validator type="regex" message="COLUMN names  separated by commas">^([^,]+(,[^,]+)*)?$</validator>
     </param>
   </xml>
Binary file test-data/summary.pdf has changed
Binary file test-data/view.bcf has changed
Binary file test-data/view.bcf.csi has changed
Binary file test-data/view.vcf_bgzip has changed