Mercurial > repos > iuc > vsnp_add_zero_coverage
view vsnp_add_zero_coverage.xml @ 0:0ad85e7db2fc draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
author | iuc |
---|---|
date | Sun, 27 Sep 2020 10:07:44 +0000 |
parents | |
children | 2e863710a2f0 |
line wrap: on
line source
<tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="@WRAPPER_VERSION@.0" profile="@PROFILE@"> <description></description> <macros> <import>macros.xml</import> </macros> <requirements> <requirement type="package" version="1.76">biopython</requirement> <requirement type="package" version="0.25.3">pandas</requirement> <requirement type="package" version="0.15.4">pysam</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #import os #import re #set input_type = $input_type_cond.input_type #set input_bam_dir = 'input_bam_dir' #set input_vcf_dir = 'input_vcf_dir' #set output_vcf_dir = 'output_vcf_dir' #set output_metrics_dir = 'output_metrics_dir' mkdir -p $input_bam_dir && mkdir -p $input_vcf_dir && mkdir -p $output_vcf_dir && mkdir -p $output_metrics_dir && #if str($input_type) == "single": #set bam_input = $input_type_cond.bam_input #set file_name = $bam_input.file_name #set file_name_base = $os.path.basename($file_name) ln -s '$file_name' '$input_bam_dir/$file_name_base' && #set vcf_input = $input_type_cond.vcf_input #set file_name = $vcf_input.file_name #set file_name_base = $os.path.basename($file_name) ln -s '$file_name' '$input_vcf_dir/$file_name_base' && #else: #for $i in $input_type_cond.bam_input_collection: #set filename = $i.file_name #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) ln -s '$filename' '$input_bam_dir/$identifier' && #end for #for $i in $input_type_cond.vcf_input_collection: #set filename = $i.file_name #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) ln -s '$filename' '$input_vcf_dir/$identifier' && #end for #end if python '$__tool_directory__/vsnp_add_zero_coverage.py' --processes \${GALAXY_SLOTS:-4} #if str($reference_cond.reference_source) == "cached" --reference '$reference_cond.reference.fields.path' #else: --reference '$reference_cond.reference' #end if #if str($input_type) == "single": --output_metrics '$output_metrics' --output_vcf '$output_vcf' #end if ]]></command> <inputs> <conditional name="input_type_cond"> <expand macro="param_input_type"/> <when value="single"> <param name="bam_input" type="data" format="bam" label="BAM file"> <validator type="unspecified_build"/> </param> <param name="vcf_input" type="data" format="vcf" label="VCF file"> <validator type="unspecified_build"/> </param> </when> <when value="collection"> <param name="bam_input_collection" type="data_collection" format="bam" collection_type="list" label="Collection of BAM files"> <validator type="unspecified_build"/> </param> <param name="vcf_input_collection" type="data_collection" format="vcf" collection_type="list" label="Collection of VCF files"> <validator type="unspecified_build"/> </param> </when> </conditional> <conditional name="reference_cond"> <expand macro="param_reference_source"/> <when value="cached"> <param name="reference" type="select" label="Using reference genome"> <options from_data_table="fasta_indexes"/> <!-- No <filter> tag here! --> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/> </param> </when> <when value="history"> <param name="reference" type="data" format="fasta" label="Using reference genome"> <validator type="no_options" message="The current history does not include a fasta dataset"/> </param> </when> </conditional> </inputs> <outputs> <data name="output_vcf" format="vcf" label="${tool.name} (filtered VCF) on ${on_string}"> <filter>input_type_cond['input_type'] == 'single'</filter> </data> <collection name="output_vcf_collection" type="list" label="${tool.name} (filtered VCFs) on ${on_string}"> <discover_datasets pattern="__name__" directory="output_vcf_dir" format="vcf" /> <filter>input_type_cond['input_type'] == 'collection'</filter> </collection> <data name="output_metrics" format="tabular" label="${tool.name} (metrics) on ${on_string}"> <filter>input_type_cond['input_type'] == 'single'</filter> </data> <collection name="output_metrics_collection" type="list" label="${tool.name} (metrics) on ${on_string}"> <discover_datasets pattern="__name__" directory="output_metrics_dir" format="tabular" /> <filter>input_type_cond['input_type'] == 'collection'</filter> </collection> </outputs> <tests> <test> <param name="input_type" value="collection"/> <param name="bam_input_collection"> <collection type="list"> <element name="bam_input.bam" value="bam_input.bam" dbkey="89"/> <element name="bam_input2.bam" value="bam_input.bam" dbkey="89"/> </collection> </param> <param name="vcf_input_collection"> <collection type="list"> <element name="vcf_input.vcf" value="vcf_input.vcf" dbkey="89"/> <element name="vcf_input2.vcf" value="vcf_input.vcf" dbkey="89"/> </collection> </param> <param name="reference_source" value="history"/> <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/> <output_collection name="output_vcf_collection" type="list"> <element name="vcf_input.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/> <element name="vcf_input2.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/> </output_collection> <output_collection name="output_metrics_collection" type="list"> <element name="vcf_input.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/> <element name="vcf_input2.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/> </output_collection> </test> <test> <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/> <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/> <param name="reference_source" value="history"/> <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/> <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/> <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/> </test> <test> <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/> <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/> <param name="reference_source" value="cached"/> <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/> <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/> </test> </tests> <help> **What it does** Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions along the reference with no coverage. A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome coverage percentage. **Required Options** * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single BAM and VCF files or collections of BAM and VCF files) based on the selected option. * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history. </help> <expand macro="citations" /> </tool>