Mercurial > repos > iuc > delly_lr

--- a/lr.xml	Thu Oct 29 20:51:54 2020 +0000
+++ b/lr.xml	Fri Jan 22 14:32:45 2021 +0000
@@ -9,7 +9,7 @@
     <command detect_errors="exit_code"><![CDATA[
 ## initialize
 @BAM@
-
+
 ## run
 delly lr
 ## generic options
@@ -26,99 +26,97 @@
 --min-clique-size $discovery.mincliquesize
 --minrefsep $discovery.minrefsep
 --maxreadsep $discovery.maxreadsep
+## consensus options
+--max-reads $consensus.maxreads
+--flank-size $consensus.flanksize
+--flank-quality $consensus.flankquality
 ## genotyping options
-#if $genotyping.vcffile
-    --vcffile '$genotyping.vcffile'
-#end if
 --geno-qual $genotyping.genoqual
 #if 'dump' in $oo.out
     --dump 'dump.tsv.gz'
 #end if
-## samples
-#for $i, $current in enumerate($samples)
-    'sample_${i}.bam'
+## input
+#for $i, $current in enumerate($input)
+    'input_${i}.bam'
 #end for

 ## postprocessing
 @LOG@
+@DUMP@
 @VCF@
-@DUMP@
     ]]></command>
     <inputs>
-        <expand macro="samples"/>
+        <expand macro="input" format="bam" multiple="true" label="Select input file(s)"/>
         <section name="generic" title="Generic options" expanded="true">
-            <expand macro="genome"/>
             <expand macro="svtype"/>
-            <expand macro="exclude"/>
             <param argument="--technology" type="select" label="Select sequencing technology">
                 <option value="ont" selected="true">Oxford Nanopore (ont)</option>
-                <option value="pb">Pacbio (pb)</option>
+                <option value="pb">PacBio (pb)</option>
             </param>
+            <expand macro="genome"/>
+            <expand macro="exclude"/>
         </section>
         <section name="discovery" title="Discovery options" expanded="true">
-            <param argument="--mapqual" type="integer" value="1" label="Set minimum mapping quality"/>
+            <param argument="--mapqual" type="integer" value="10" label="Set minimum mapping quality"/>
             <expand macro="minclip"/>
             <expand macro="mincliquesize"/>
-            <expand macro="minrefsep" defaut="30"/>
-            <expand macro="maxreadsep" defaut="75"/>
+            <expand macro="minrefsep" default="30"/>
+            <expand macro="maxreadsep" default="75"/>
+        </section>
+        <section name="consensus" title="Consensus options" expanded="true">
+            <param name="maxreads" type="integer" value="5" label="Set maximum reads for consensus computation" help="(--max-reads)"/>
+            <param name="flanksize" type="integer" value="400" label="Set minimum flank size" help="(--flank-size)"/>
+            <param name="flankquality" type="float" min="0.0" max="1.0" value="0.9" label="Set minimum flank quality" help="(--flank-quality)"/>
         </section>
         <section name="genotyping" title="Genotyping options" expanded="true">
-            <expand macro="vcffile"/>
             <expand macro="genoqual"/>
         </section>
-        <section name="oo" title="Output options">
+        <section name="oo" title="Output options" expanded="true">
             <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)">
                 <option value="bcf" selected="true">BCF</option>
-                <option value="vcf">VCF</option>
+                <option value="log">Log</option>
                 <option value="dump">SV-reads</option>
-                <option value="log">Log</option>
+                <option value="vcf">VCF</option>
             </param>
         </section>
     </inputs>
     <outputs>
-        <expand macro="vcf"/>
         <expand macro="bcf"/>
         <expand macro="dump"/>
         <expand macro="log"/>
+        <expand macro="vcf"/>
     </outputs>
     <tests>
-        <!-- no test implemented for parameter vcffile -->
-
         <!-- #1 default, single -->
         <test expect_num_outputs="2">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
             </section>
             <section name="oo">
                 <param name="out" value="vcf,bcf"/>
             </section>
+            <output name="out_bcf">
+                <assert_contents>
+                    <has_size value="1184" delta="10"/>
+                </assert_contents>
+            </output>
             <output name="out_vcf">
                 <assert_contents>
                     <has_size value="3661" delta="10"/>
                     <has_line line="#CHROM&#009;POS&#009;ID&#009;REF&#009;ALT&#009;QUAL&#009;FILTER&#009;INFO&#009;FORMAT&#009;normal"/>
                 </assert_contents>
             </output>
-            <output name="out_bcf">
-                <assert_contents>
-                    <has_size value="1184" delta="10"/>
-                </assert_contents>
-            </output>
         </test>
         <!-- #2 default, multi; test data to small, results are empty -->
         <test expect_num_outputs="3">
-            <param name="samples" value="normal.bam,tumor.bam"/>
+            <param name="input" value="normal.bam,tumor.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
             </section>
             <section name="oo">
                 <param name="out" value="vcf,bcf,log"/>
             </section>
-            <output name="out_vcf">
-                <assert_contents>
-                    <has_size value="3667" delta="10"/>
-                </assert_contents>
-            </output>
             <output name="out_bcf">
                 <assert_contents>
                     <has_size value="1189" delta="10"/>
@@ -129,10 +127,15 @@
                     <has_text_matching expression=".+Done.+"/>
                 </assert_contents>
             </output>
+            <output name="out_vcf">
+                <assert_contents>
+                    <has_size value="3667" delta="10"/>
+                </assert_contents>
+            </output>
         </test>
        <!-- #3 -->
         <test expect_num_outputs="4">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
                 <param name="exclude" value="exclude.tsv"/>
@@ -140,11 +143,6 @@
             <section name="oo">
                 <param name="out" value="vcf,bcf,dump,log"/>
             </section>
-            <output name="out_vcf">
-                <assert_contents>
-                    <has_size value="3661" delta="10"/>
-                </assert_contents>
-            </output>
             <output name="out_bcf">
                 <assert_contents>
                     <has_size value="1186" delta="10"/>
@@ -160,10 +158,15 @@
                     <has_text_matching expression=".+Done.+"/>
                 </assert_contents>
             </output>
+            <output name="out_vcf">
+                <assert_contents>
+                    <has_size value="3661" delta="10"/>
+                </assert_contents>
+            </output>
         </test>
         <!-- #4 -->
         <test expect_num_outputs="4">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
                 <param name="svtype" value="DEL"/>
@@ -178,6 +181,11 @@
                 <param name="minrefsep" value="24"/>
                 <param name="maxreadsep" value="39"/>
             </section>
+            <section name="consensus">
+                <param name="maxreads" value="6"/>
+                <param name="flanksize" value="399"/>
+                <param name="flankquality" value="0.91"/>
+            </section>
             <section name="genotyping">
                 <param name="genoqual" value="4"/>
             </section>
@@ -189,12 +197,6 @@
                     <has_size value="1182" delta="10"/>
                 </assert_contents>
             </output>
-            <output name="out_vcf">
-                <assert_contents>
-                    <has_size value="3661" delta="10"/>
-                    <has_line line="#CHROM&#009;POS&#009;ID&#009;REF&#009;ALT&#009;QUAL&#009;FILTER&#009;INFO&#009;FORMAT&#009;normal"/>
-                </assert_contents>
-            </output>
             <output name="out_dump">
                 <assert_contents>
                     <has_size value="0"/>
@@ -205,10 +207,16 @@
                     <has_text_matching expression=".+"/>
                 </assert_contents>
             </output>
+            <output name="out_vcf">
+                <assert_contents>
+                    <has_size value="3661" delta="10"/>
+                    <has_line line="#CHROM&#009;POS&#009;ID&#009;REF&#009;ALT&#009;QUAL&#009;FILTER&#009;INFO&#009;FORMAT&#009;normal"/>
+                </assert_contents>
+            </output>
         </test>
         <!-- #5 -->
         <test expect_num_outputs="1">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
                 <param name="svtype" value="INS"/>
@@ -225,7 +233,7 @@
         </test>
         <!-- #6 -->
         <test expect_num_outputs="1">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
                 <param name="svtype" value="DUP"/>
@@ -241,7 +249,7 @@
         </test>
         <!-- #7 -->
         <test expect_num_outputs="1">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
                 <param name="svtype" value="INV"/>
@@ -257,7 +265,7 @@
         </test>
         <!-- #8 -->
         <test expect_num_outputs="1">
-            <param name="samples" value="normal.bam"/>
+            <param name="input" value="normal.bam"/>
             <section name="generic">
                 <param name="genome" value="genome.fasta"/>
                 <param name="svtype" value="BND"/>
@@ -279,15 +287,13 @@

 @WID@

-Delly *long-read (lr)* uses the long-read SV discovery mode.
-
 **Input**

-Delly *long-read (lr)* needs a sorted, indexed and duplicate marked BAM file for every input sample. An indexed reference genome is required to identify split-reads. Additionally a VCF/BCF file for genotyping can be applied.
+Delly *long-read (lr)* needs a sorted, indexed and duplicate marked BAM file for every input sample. An indexed reference genome is required to identify split-reads.

 **Output**

-The output is available in BCF and VCF format. Additionally an output file for SV-reads is provided.
+The output is available in BCF and VCF format. Additionally an output file for SV-reads and a log file are provided.

 .. class:: infomark
--- a/macros.xml	Thu Oct 29 20:51:54 2020 +0000
+++ b/macros.xml	Fri Jan 22 14:32:45 2021 +0000
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@TOOL_VERSION@">0.8.5</token>
+    <token name="@TOOL_VERSION@">0.8.7</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
@@ -17,14 +17,12 @@
         </citations>
     </xml>

-    <!--
-        command
-    -->
+    <!-- command -->

     <token name="@BAM@"><![CDATA[
-#for $i, $current in enumerate($samples)
-    ln -s '${current}' 'sample_${i}.bam' &&
-    ln -s '${current.metadata.bam_index}' 'sample_${i}.bam.bai' &&
+#for $i, $current in enumerate($input)
+    ln -s '${current}' 'input_${i}.bam' &&
+    ln -s '${current.metadata.bam_index}' 'input_${i}.bam.bai' &&
 #end for
     ]]></token>
     <token name="@DUMP@"><![CDATA[
@@ -43,68 +41,79 @@
 #end if
     ]]></token>

-    <!--
-        input
-    -->
+    <!-- input -->

+    <xml name="cnoffset" token_default="">
+        <param name="cnoffset" type="float" min="0.0" max="1.0" value="@DEFAULT@" label="Set minimum CN offset" help="(--cn-offset)"/>
+    </xml>
+    <xml name="coverage" token_label="">
+        <param argument="--coverage" type="integer" value="10" label="@LABEL@"/>
+    </xml>
     <xml name="exclude">
         <param argument="--exclude" type="data" format="tabular" optional="true" label="Select file with regions to exclude"/>
     </xml>
     <xml name="genome">
-        <param argument="--genome" type="data" format="fasta" label="Select genome"/>
+        <param argument="--genome" type="data" format="fasta" label="Select genome file"/>
     </xml>
     <xml name="genoqual">
         <param name="genoqual" type="integer" value="5" label="Set minimum mapping quality for genotyping" help="(--geno-qual)"/>
     </xml>
+    <xml name="input" token_format="" token_multiple="false" token_label="">
+        <param name="input" type="data" format="@FORMAT@" multiple="@MULTIPLE@" label="@LABEL@"/>
+    </xml>
+    <xml name="maxreadsep" token_default="">
+        <param argument="--maxreadsep" type="integer" value="@DEFAULT@" label="Set maximum read separation"/>
+    </xml>
+    <xml name="maxsize" token_default="" token_label="">
+        <param argument="--maxsize" type="integer" value="@DEFAULT@" label="@LABEL@"/>
+    </xml>
     <xml name="minclip">
         <param argument="--minclip" type="integer" value="25" label="Set minimum clipping length"/>
     </xml>
-    <xml name="maxreadsep" token_default="40">
-        <param argument="--maxreadsep" type="integer" value="@DEFAULT@" label="Set maximum read separation"/>
+    <xml name="mincliquesize">
+        <param name="mincliquesize" type="integer" value="2" label="Set minimum paired-end/single-read clique size" help="(--min-clique-size)"/>
     </xml>
-    <xml name="maxsize" token_default="1000000">
-        <param argument="--maxsize" type="integer" value="@DEFAULT@" label="Set maximum SV size"/>
-    </xml>
-    <xml name="mincliquesize">
-        <param name="mincliquesize" type="integer" value="2" label="Set minimum min. PE/SR clique size" help="(--min-clique-size)"/>
-    </xml>
-    <xml name="minrefsep" token_default="25">
+    <xml name="minrefsep" token_default="">
         <param argument="--minrefsep" type="integer" value="@DEFAULT@" label="Set minimum reference separation"/>
     </xml>
-    <xml name="minsize">
-        <param argument="--minsize" type="integer" value="0" label="Set minimum SV size"/>
+    <xml name="minsize" token_default="" token_label="">
+        <param argument="--minsize" type="integer" value="@DEFAULT@" label="@LABEL@"/>
+    </xml>
+    <xml name="pass">
+        <param argument="--pass" type="boolean" truevalue="--pass" falsevalue="" label="Filter sites for PASS?"/>
     </xml>
-    <xml name="samples" token_format="bam" token_multiple="true" token_label="Select sample file(s)">
-        <param name="samples" type="data" format="@FORMAT@" multiple="@MULTIPLE@" label="@LABEL@"/>
+    <xml name="ploidy">
+        <param argument="--ploidy" type="integer" value="2" label="Set baseline ploidy"/>
+    </xml>
+    <xml name="samples">
+        <param argument="--samples" type="data" format="tabular" label="Select sample file" help="Two-column sample file listing sample name and tumor or control."/>
     </xml>
     <xml name="svtype">
         <param argument="--svtype" type="select" label="Select type(s) of structural variants to detect">
             <option value="ALL" selected="true">All types (ALL)</option>
             <option value="DEL">Deletion (DEL)</option>
+            <option value="DUP">Duplication (DUP)</option>
             <option value="INS">Insertion (INS)</option>
-            <option value="DUP">Duplication (DUP)</option>
             <option value="INV">Inversion (INV)</option>
             <option value="BND">Translocation (BND)</option>
         </param>
     </xml>
     <xml name="vcffile">
-        <param argument="--vcffile" type="data" format="vcf,bcf" optional="true" label="Select genotyping file"/>
+        <param argument="--vcffile" type="data" format="bcf,vcf" optional="true" label="Select genotyping file"/>
     </xml>

-    <!--
-        output
-    -->
+    <!-- output -->

+    <xml name="bcf">
+        <data name="out_bcf" format="bcf" from_work_dir="result.bcf" label="${tool.name} on ${on_string}: Result (BCF)">
+            <filter>'bcf' in oo['out']</filter>
+        </data>
+    </xml>
     <xml name="vcf">
         <data name="out_vcf" format="vcf" from_work_dir="result.vcf" label="${tool.name} on ${on_string}: Result (VCF)">
             <filter>'vcf' in oo['out']</filter>
         </data>
     </xml>
-     <xml name="bcf">
-        <data name="out_bcf" format="bcf" from_work_dir="result.bcf" label="${tool.name} on ${on_string}: Result (BCF)">
-            <filter>'bcf' in oo['out']</filter>
-        </data>
-    </xml>
     <xml name="dump">
         <data name="out_dump" format="tabular" from_work_dir="dump.tsv" label="${tool.name} on ${on_string}: SV-reads">
             <filter>'dump' in oo['out']</filter>
@@ -116,12 +125,25 @@
         </data>
     </xml>

-    <!--
-        Help
-    -->
+    <!-- help -->

     <token name="@WID@"><![CDATA[
 Delly is an integrated structural variant (SV) prediction method that can discover, genotype and visualize deletions, tandem duplications, inversions and translocations at single-nucleotide resolution in short-read massively parallel sequencing data. It uses paired-ends, split-reads and read-depth to sensitively and accurately delineate genomic rearrangements throughout the genome.
+
+Short-read SV calling
+
+- *call* to discover and genotype structural variants
+- *merge* structural variants across VCF/BCF files and within a single VCF/BCF file
+- *filter* somatic or germline structural variants
+
+Long-read SV calling
+
+- *lr* for long-read SV discovery
+
+Copy-number variant calling
+
+- *cnv* to discover and genotype copy-number variants
+- *classify* somatic or germline copy-number variants
     ]]></token>
     <token name="@REFERENCES@"><![CDATA[
 More information are available on `GitHub <https://github.com/dellytools/delly>`_.