2
|
1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="@WRAPPER_VERSION@.2" profile="@PROFILE@">
|
0
|
2 <description></description>
|
2
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
0
|
6 <requirements>
|
|
7 <requirement type="package" version="1.76">biopython</requirement>
|
|
8 <requirement type="package" version="0.25.3">pandas</requirement>
|
|
9 <requirement type="package" version="0.15.4">pysam</requirement>
|
|
10 </requirements>
|
|
11 <command detect_errors="exit_code"><![CDATA[
|
|
12 #import re
|
2
|
13
|
|
14 ## The identifer for both of the following files is likely the same
|
|
15 ## string, so we append a file extension to allow for both links.
|
|
16 #set bam_identifier = re.sub('[^\s\w\-]', '_', str($bam_input.element_identifier)) + '.bam'
|
|
17 ln -s '${bam_input}' '${bam_identifier}' &&
|
|
18 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($vcf_input.element_identifier)) + '.vcf'
|
|
19 ln -s '${vcf_input}' '${vcf_identifier}' &&
|
|
20
|
0
|
21 python '$__tool_directory__/vsnp_add_zero_coverage.py'
|
2
|
22 --bam_input '$bam_identifier'
|
|
23 --vcf_input '$vcf_identifier'
|
|
24 #if str($reference_cond.reference_source) == 'cached'
|
0
|
25 --reference '$reference_cond.reference.fields.path'
|
|
26 #else:
|
|
27 --reference '$reference_cond.reference'
|
|
28 #end if
|
2
|
29 --output_metrics '$output_metrics'
|
|
30 --output_vcf '$output_vcf'
|
0
|
31 ]]></command>
|
|
32 <inputs>
|
2
|
33 <param name="bam_input" type="data" format="bam" label="BAM file"/>
|
|
34 <param name="vcf_input" type="data" format="vcf" label="VCF file"/>
|
0
|
35 <conditional name="reference_cond">
|
2
|
36 <expand macro="param_reference_source"/>
|
0
|
37 <when value="cached">
|
|
38 <param name="reference" type="select" label="Using reference genome">
|
2
|
39 <options from_data_table="fasta_indexes">
|
|
40 <filter type="data_meta" column="1" key="dbkey" ref="bam_input"/>
|
|
41 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
|
|
42 </options>
|
0
|
43 </param>
|
|
44 </when>
|
|
45 <when value="history">
|
2
|
46 <param name="reference" type="data" format="fasta,fasta.gz" label="Using reference genome">
|
0
|
47 <validator type="no_options" message="The current history does not include a fasta dataset"/>
|
|
48 </param>
|
|
49 </when>
|
|
50 </conditional>
|
|
51 </inputs>
|
|
52 <outputs>
|
2
|
53 <data name="output_vcf" format="vcf" label="${tool.name} on ${on_string} (filtered VCF)"/>
|
|
54 <data name="output_metrics" format="tabular" label="${tool.name} on ${on_string} (metrics)"/>
|
0
|
55 </outputs>
|
|
56 <tests>
|
2
|
57 <test expect_num_outputs="2">
|
0
|
58 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
|
|
59 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
|
|
60 <param name="reference_source" value="history"/>
|
|
61 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
|
2
|
62 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
|
|
63 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular"/>
|
|
64 </test>
|
|
65 <test expect_num_outputs="2">
|
|
66 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
|
|
67 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
|
|
68 <param name="reference_source" value="cached"/>
|
|
69 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
|
0
|
70 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
|
|
71 </test>
|
|
72 </tests>
|
|
73 <help>
|
|
74 **What it does**
|
|
75
|
|
76 Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each
|
|
77 combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions
|
|
78 along the reference with no coverage.
|
|
79
|
|
80 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
|
|
81 coverage percentage.
|
|
82
|
|
83 **Required Options**
|
|
84
|
|
85 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
|
|
86 </help>
|
2
|
87 <expand macro="citations" />
|
0
|
88 </tool>
|
|
89
|