Mercurial > repos > iuc > medaka_snp
diff snp.xml @ 0:179342c7b86c draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 4d3dfd4bcb567178107dcfd808ff03f9fec0bdbd
author | iuc |
---|---|
date | Wed, 12 Oct 2022 07:43:59 +0000 |
parents | |
children | 630e6aeeb7e8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/snp.xml Wed Oct 12 07:43:59 2022 +0000 @@ -0,0 +1,169 @@ +<tool id="medaka_snp" name="medaka SNP tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>decodes probabilities to SNPs</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +## initialize +@REF_FASTA@ + + +#if $pool.pool_mode == "Yes": + ## run + medaka snp + ## optional + --debug + #if $regions + --regions '${regions}' + #end if + --threshold $threshold + #if $ref_vcf + --ref_vcf '$ref_vcf' + #end if + $verbose + ## required + reference.fa + #for $current in $pool.inputs + '$current' + #end for +#elif $pool.pool_mode == "No": + ## run + medaka snp + ## optional + --debug + #if $regions + --regions '${regions}' + #end if + --threshold $threshold + #if $ref_vcf + --ref_vcf '$ref_vcf' + #end if + $verbose + ## required + reference.fa + '$pool.input' +#end if +#if str($output_annotated.output_annotated_select) == 'false': + '$out_SNPs' ##output + 2>&1 | tee '$out_log' +#else + raw.vcf ##output of medaka snp + 2>&1 | tee '$out_log' + && ln -s '$output_annotated.in_bam' in.bam + && ln -s '$output_annotated.in_bam.metadata.bam_index' in.bai + && medaka tools annotate --dpsp --pad $output_annotated.pad raw.vcf reference.fa in.bam tmp.vcf + && python '$__tool_directory__/convert_VCF_info_fields.py' tmp.vcf '$out_SNPs' +#end if + ]]></command> + <inputs> + <conditional name="pool"> + <param name="pool_mode" type="select" label="Are you pooling HDF5 datasets?"> + <option value="No" selected="true">No</option> + <option value="Yes">Yes</option> + </param> + <when value="Yes"> + <param name="inputs" type="data" format="h5" multiple="true" label="Select consensus file(s)"/> + </when> + <when value="No"> + <param name="input" type="data" format="h5" label="Select consensus file(s)"/> + </when> + </conditional> + <expand macro="reference"/> + <param argument="--regions" type="text" value="" optional="true" label="Set reference names to limit SNP calling" help="Separated by ','."> + <sanitizer invalid_char=""> + <valid initial="string.ascii_letters,string.digits"> + <add value="_"/> + <add value=","/> + <add value="."/> + </valid> + </sanitizer> + </param> + <param argument="--threshold" type="float" label="Threshold for considering secondary calls" value="0.04" min="0" max="1" help="A value of 1 will result in haploid decoding" optional="true"/> + <param name="ref_vcf" type="data" format="vcf" optional="true" label="Reference vcf"/> + <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" label="Populate VCF info fields?"/> + <conditional name="output_annotated"> + <param name="output_annotated_select" type="select" + label="Type of VCF to generate" + help="SNP INFO fields in the VCF can be extended to include allele frequency, depth of coverage, etc., but this requires a BAM dataset to calculate those values from."> + <option value="true" selected="true">Write annotated VCF with extended INFO</option> + <option value="false">Write original decoded VCF with minimal INFO field</option> + </param> + <when value="true"> + <param name="in_bam" type="data" format="bam" optional="false" label="BAM to caclulate additional INFO fields from"/> + <param name="pad" type="integer" min="1" value="25" + label="Padding width on either side of SNP for realignment" + help="To calculate the additional INFO fields the tool will run medaka tools anntotate, which performs local realignment of the region +- this width around each SNP. All calculated new fields will depend on the width chosen, so only change this value if you know what you are doing." /> + </when> + <when value="false"/> + </conditional> + <param name="output_log_bool" type="boolean" label="Output log file?" checked="true"/> + </inputs> + <outputs> + <data name="out_SNPs" format="vcf" label="${tool.name} on ${on_string}: called SNPs"/> + <data name="out_log" format="tabular" label="${tool.name} on ${on_string}: Log"> + <filter>output_log_bool</filter> + </data> + </outputs> + <tests> + <!--No annotation or log--> + <test expect_num_outputs="1"> + <conditional name="pool"> + <param name="pool_mode" value="No"/> + <param name="input" value="medaka_test.hdf"/> + </conditional> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="ref.fasta"/> + </conditional> + <conditional name="output_annotated"> + <param name="output_annotated_select" value="false"/> + </conditional> + <param name="output_log_bool" value="false"/> + <output name="out_SNPs"> + <assert_contents> + <has_n_lines n="7"/> + <has_line line="##fileformat=VCFv4.1" /> + <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +This module decodes probabilities to SNPs but NOT indels. + +For a more general solution see the medaka *variant* tool. +---- + +.. class:: infomark + +**Input** + +- reference sequence (FASTA) +- (several) consensus files (H5/HDF) + +---- + +.. class:: infomark + +**Output** + +- decoded SNP probabilities (VCF) + +---- + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool>