0
|
1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="1.0.0">
|
|
2 <description></description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.76">biopython</requirement>
|
|
5 <requirement type="package" version="1.16.5">numpy</requirement>
|
|
6 <requirement type="package" version="0.25.3">pandas</requirement>
|
|
7 <requirement type="package" version="0.15.4">pysam</requirement>
|
|
8 </requirements>
|
|
9 <command detect_errors="exit_code"><![CDATA[
|
|
10 #import os
|
|
11 #import re
|
|
12 #set input_type = $input_type_cond.input_type
|
|
13 #set input_bam_dir = 'input_bam_dir'
|
|
14 #set input_vcf_dir = 'input_vcf_dir'
|
|
15 #set output_vcf_dir = 'output_vcf_dir'
|
|
16 #set output_metrics_dir = 'output_metrics_dir'
|
|
17 mkdir -p $input_bam_dir &&
|
|
18 mkdir -p $input_vcf_dir &&
|
|
19 mkdir -p $output_vcf_dir &&
|
|
20 mkdir -p $output_metrics_dir &&
|
|
21 #if str($input_type) == "single":
|
|
22 #set bam_input = $input_type_cond.bam_input
|
|
23 #set file_name = $bam_input.file_name
|
|
24 #set file_name_base = $os.path.basename($file_name)
|
|
25 ln -s $file_name $input_bam_dir/$file_name_base &&
|
|
26 #set vcf_input = $input_type_cond.vcf_input
|
|
27 #set file_name = $vcf_input.file_name
|
|
28 #set file_name_base = $os.path.basename($file_name)
|
|
29 ln -s $file_name $input_vcf_dir/$file_name_base &&
|
|
30 #else:
|
|
31 #for $i in $input_type_cond.bam_input_collection:
|
|
32 #set filename = $i.file_name
|
|
33 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
|
|
34 ln -s $filename $input_bam_dir/$identifier &&
|
|
35 #end for
|
|
36 #for $i in $input_type_cond.vcf_input_collection:
|
|
37 #set filename = $i.file_name
|
|
38 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
|
|
39 ln -s $filename $input_vcf_dir/$identifier &&
|
|
40 #end for
|
|
41 #end if
|
|
42 python '$__tool_directory__/vsnp_add_zero_coverage.py'
|
|
43 --processes $processes
|
|
44 #if str($reference_cond.reference_source) == "cached"
|
|
45 --reference '$reference_cond.reference.fields.path'
|
|
46 #else:
|
|
47 --reference '$reference_cond.reference'
|
|
48 #end if
|
|
49 #if str($input_type) == "single":
|
|
50 --output_metrics '$output_metrics'
|
|
51 --output_vcf '$output_vcf'
|
|
52 #end if
|
|
53 ]]></command>
|
|
54 <inputs>
|
|
55 <conditional name="input_type_cond">
|
|
56 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
|
|
57 <option value="single" selected="true">Single files</option>
|
|
58 <option value="collection">Collections of files</option>
|
|
59 </param>
|
|
60 <when value="single">
|
|
61 <param name="bam_input" type="data" format="bam" label="BAM file">
|
|
62 <validator type="unspecified_build"/>
|
|
63 </param>
|
|
64 <param name="vcf_input" type="data" format="vcf" label="VCF file">
|
|
65 <validator type="unspecified_build"/>
|
|
66 </param>
|
|
67 </when>
|
|
68 <when value="collection">
|
|
69 <param name="bam_input_collection" type="data_collection" format="bam" collection_type="list" label="Collection of BAM files">
|
|
70 <validator type="unspecified_build"/>
|
|
71 </param>
|
|
72 <param name="vcf_input_collection" type="data_collection" format="vcf" collection_type="list" label="Collection of VCF files">
|
|
73 <validator type="unspecified_build"/>
|
|
74 </param>
|
|
75 </when>
|
|
76 </conditional>
|
|
77 <conditional name="reference_cond">
|
|
78 <param name="reference_source" type="select" label="Choose the source for the reference genome">
|
|
79 <option value="cached" selected="true">locally cached</option>
|
|
80 <option value="history">from history</option>
|
|
81 </param>
|
|
82 <when value="cached">
|
|
83 <param name="reference" type="select" label="Using reference genome">
|
|
84 <options from_data_table="fasta_indexes"/>
|
|
85 <!-- No <filter> tag here! -->
|
|
86 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
|
|
87 </param>
|
|
88 </when>
|
|
89 <when value="history">
|
|
90 <param name="reference" type="data" format="fasta" label="Using reference genome">
|
|
91 <validator type="no_options" message="The current history does not include a fasta dataset"/>
|
|
92 </param>
|
|
93 </when>
|
|
94 </conditional>
|
|
95 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
|
|
96 </inputs>
|
|
97 <outputs>
|
|
98 <data name="output_vcf" format="vcf" label="${tool.name} (filtered VCF) on ${on_string}">
|
|
99 <filter>input_type_cond['input_type'] == 'single'</filter>
|
|
100 </data>
|
|
101 <collection name="output_vcf_collection" type="list" label="${tool.name} (filtered VCFs) on ${on_string}">
|
|
102 <discover_datasets pattern="__name__" directory="output_vcf_dir" format="vcf" />
|
|
103 <filter>input_type_cond['input_type'] == 'collection'</filter>
|
|
104 </collection>
|
|
105 <data name="output_metrics" format="tabular" label="${tool.name} (metrics) on ${on_string}">
|
|
106 <filter>input_type_cond['input_type'] == 'single'</filter>
|
|
107 </data>
|
|
108 <collection name="output_metrics_collection" type="list" label="${tool.name} (metrics) on ${on_string}">
|
|
109 <discover_datasets pattern="__name__" directory="output_metrics_dir" format="tabular" />
|
|
110 <filter>input_type_cond['input_type'] == 'collection'</filter>
|
|
111 </collection>
|
|
112 </outputs>
|
|
113 <tests>
|
|
114 <test>
|
|
115 <param name="input_type" value="collection"/>
|
|
116 <param name="bam_input_collection">
|
|
117 <collection type="list">
|
|
118 <element name="bam_input.bam" value="bam_input.bam" dbkey="89"/>
|
|
119 <element name="bam_input2.bam" value="bam_input2.bam" dbkey="89"/>
|
|
120 </collection>
|
|
121 </param>
|
|
122 <param name="vcf_input_collection">
|
|
123 <collection type="list">
|
|
124 <element name="vcf_input.vcf" value="vcf_input.vcf" dbkey="89"/>
|
|
125 <element name="vcf_input2.vcf" value="vcf_input2.vcf" dbkey="89"/>
|
|
126 </collection>
|
|
127 </param>
|
|
128 <param name="reference_source" value="history"/>
|
|
129 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
|
|
130 <output_collection name="output_vcf_collection" type="list">
|
|
131 <element name="vcf_input.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
|
|
132 <element name="vcf_input2.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
|
|
133 </output_collection>
|
|
134 <output_collection name="output_metrics_collection" type="list">
|
|
135 <element name="vcf_input.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
|
|
136 <element name="vcf_input2.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
|
|
137 </output_collection>
|
|
138 </test>
|
|
139 <test>
|
|
140 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
|
|
141 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
|
|
142 <param name="reference_source" value="history"/>
|
|
143 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
|
|
144 <param name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
|
|
145 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
|
|
146 </test>
|
|
147 </tests>
|
|
148 <help>
|
|
149 **What it does**
|
|
150
|
|
151 Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each
|
|
152 combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions
|
|
153 along the reference with no coverage.
|
|
154
|
|
155 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
|
|
156 coverage percentage.
|
|
157
|
|
158 **Required Options**
|
|
159
|
|
160 * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single BAM and VCF files or collections of BAM and VCF files) based on the selected option.
|
|
161 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
|
|
162 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
|
|
163 </help>
|
|
164 <citations>
|
|
165 <citation type="bibtex">
|
|
166 @misc{None,
|
|
167 journal = {None},
|
|
168 author = {1. Stuber T},
|
|
169 title = {Manuscript in preparation},
|
|
170 year = {None},
|
|
171 url = {https://github.com/USDA-VS/vSNP},}
|
|
172 </citation>
|
|
173 </citations>
|
|
174 </tool>
|
|
175
|