diff snp.xml @ 0:179342c7b86c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 4d3dfd4bcb567178107dcfd808ff03f9fec0bdbd
author iuc
date Wed, 12 Oct 2022 07:43:59 +0000
parents
children 630e6aeeb7e8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snp.xml	Wed Oct 12 07:43:59 2022 +0000
@@ -0,0 +1,169 @@
+<tool id="medaka_snp" name="medaka SNP tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>decodes probabilities to SNPs</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+## initialize
+@REF_FASTA@
+
+
+#if $pool.pool_mode == "Yes":
+    ## run
+    medaka snp
+    ## optional
+    --debug
+    #if $regions
+        --regions '${regions}'
+    #end if
+    --threshold $threshold
+    #if $ref_vcf
+        --ref_vcf '$ref_vcf'
+    #end if
+    $verbose
+    ## required
+    reference.fa
+    #for $current in $pool.inputs
+        '$current'
+    #end for
+#elif $pool.pool_mode == "No":
+    ## run
+    medaka snp
+    ## optional
+    --debug
+    #if $regions
+        --regions '${regions}'
+    #end if
+    --threshold $threshold
+    #if $ref_vcf
+        --ref_vcf '$ref_vcf'
+    #end if
+    $verbose
+    ## required
+    reference.fa
+    '$pool.input'
+#end if
+#if str($output_annotated.output_annotated_select) == 'false':
+    '$out_SNPs' ##output
+    2>&1 | tee '$out_log'
+#else
+    raw.vcf ##output of medaka snp
+    2>&1 | tee '$out_log'
+    && ln -s '$output_annotated.in_bam' in.bam
+    && ln -s '$output_annotated.in_bam.metadata.bam_index' in.bai
+    && medaka tools annotate --dpsp --pad $output_annotated.pad raw.vcf reference.fa in.bam tmp.vcf
+    && python '$__tool_directory__/convert_VCF_info_fields.py' tmp.vcf '$out_SNPs'
+#end if
+    ]]></command>
+    <inputs>
+        <conditional name="pool">
+            <param name="pool_mode"  type="select" label="Are you pooling HDF5 datasets?">
+                <option value="No" selected="true">No</option>
+                <option value="Yes">Yes</option>
+            </param>
+            <when value="Yes">
+                <param name="inputs" type="data" format="h5" multiple="true" label="Select consensus file(s)"/>
+            </when>
+            <when value="No">
+                <param name="input" type="data" format="h5" label="Select consensus file(s)"/>
+            </when>
+        </conditional>
+        <expand macro="reference"/>
+        <param argument="--regions" type="text" value="" optional="true" label="Set reference names to limit SNP calling" help="Separated by ','.">
+            <sanitizer invalid_char="">
+                <valid initial="string.ascii_letters,string.digits">
+                    <add value="_"/>
+                    <add value=","/>
+                    <add value="."/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param argument="--threshold" type="float" label="Threshold for considering secondary calls" value="0.04" min="0" max="1" help="A value of 1 will result in haploid decoding" optional="true"/>
+        <param name="ref_vcf" type="data" format="vcf" optional="true" label="Reference vcf"/>        
+        <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" label="Populate VCF info fields?"/>
+        <conditional name="output_annotated">
+            <param name="output_annotated_select" type="select"
+            label="Type of VCF to generate"
+            help="SNP INFO fields in the VCF can be extended to include allele frequency, depth of coverage, etc., but this requires a BAM dataset to calculate those values from.">
+                <option value="true" selected="true">Write annotated VCF with extended INFO</option>
+                <option value="false">Write original decoded VCF with minimal INFO field</option>
+            </param>
+            <when value="true">
+                <param name="in_bam" type="data" format="bam" optional="false" label="BAM to caclulate additional INFO fields from"/>
+                <param name="pad" type="integer" min="1" value="25"
+                label="Padding width on either side of SNP for realignment"
+                help="To calculate the additional INFO fields the tool will run medaka tools anntotate, which performs local realignment of the region +- this width around each SNP. All calculated new fields will depend on the width chosen, so only change this value if you know what you are doing." />
+            </when>
+            <when value="false"/>
+        </conditional>
+        <param name="output_log_bool" type="boolean" label="Output log file?" checked="true"/>
+    </inputs>
+    <outputs>
+        <data name="out_SNPs" format="vcf" label="${tool.name} on ${on_string}: called SNPs"/>
+        <data name="out_log" format="tabular" label="${tool.name} on ${on_string}: Log">
+            <filter>output_log_bool</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!--No annotation or log-->
+        <test expect_num_outputs="1">
+            <conditional name="pool">
+                <param name="pool_mode" value="No"/>
+                <param name="input" value="medaka_test.hdf"/>
+            </conditional>
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="history"/>
+                <param name="ref_file" value="ref.fasta"/>
+            </conditional>
+            <conditional name="output_annotated">
+                <param name="output_annotated_select" value="false"/>
+            </conditional>
+            <param name="output_log_bool" value="false"/>
+            <output name="out_SNPs">
+                <assert_contents>
+                    <has_n_lines n="7"/>
+                    <has_line line="##fileformat=VCFv4.1" />
+                    <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+@WID@
+
+This module decodes probabilities to SNPs but NOT indels.
+
+For a more general solution see the medaka *variant* tool.
+----
+
+.. class:: infomark
+
+**Input**
+
+- reference sequence (FASTA)
+- (several) consensus files (H5/HDF)
+
+----
+
+.. class:: infomark
+
+**Output**
+
+- decoded SNP probabilities (VCF)
+
+----
+
+.. class:: infomark
+
+**References**
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>