comparison vsnp_get_snps.xml @ 3:14285a94fb13 draft

Uploaded
author greg
date Sun, 03 Jan 2021 16:06:33 +0000
parents 7471707d3fb4
children b53282eecec2
comparison
equal deleted inserted replaced
2:7471707d3fb4 3:14285a94fb13
1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="1.0.0"> 1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="@WRAPPER_VERSION@.0" profile="@PROFILE@">
2 <description></description> 2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
3 <requirements> 6 <requirements>
4 <requirement type="package" version="0.25.3">pandas</requirement> 7 <requirement type="package" version="0.25.3">pandas</requirement>
5 <requirement type="package" version="0.6.8">pyvcf</requirement> 8 <requirement type="package" version="0.6.8">pyvcf</requirement>
6 <requirement type="package" version="1.2.0">xlrd</requirement> 9 <requirement type="package" version="1.2.0">xlrd</requirement>
7 </requirements> 10 </requirements>
8 <command detect_errors="exit_code"><![CDATA[ 11 <command detect_errors="exit_code"><![CDATA[
9 #import os 12 #import re
13
10 #set input_vcf_dir = 'input_vcf_dir' 14 #set input_vcf_dir = 'input_vcf_dir'
11 #set input_zc_vcf_type = $input_zc_vcf_type_cond.input_zc_vcf_type
12 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir' 15 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir'
13 #set output_json_snps_dir = 'output_json_snps_dir' 16 #set output_json_snps_dir = 'output_json_snps_dir'
14 #set output_snps_dir = 'output_snps_dir' 17 #set output_snps_dir = 'output_snps_dir'
18
15 mkdir -p $input_vcf_dir && 19 mkdir -p $input_vcf_dir &&
16 mkdir -p $output_json_avg_mq_dir && 20 mkdir -p $output_json_avg_mq_dir &&
17 mkdir -p $output_json_snps_dir && 21 mkdir -p $output_json_snps_dir &&
18 mkdir -p $output_snps_dir && 22 mkdir -p $output_snps_dir &&
19 #set reference = '?' 23
24 #set dbkey = '?'
20 #for $i in $input_vcf_collection: 25 #for $i in $input_vcf_collection:
21 #set reference = $i.metadata.dbkey 26 #if str($dbkey) == '?':
22 #set filename = $i.file_name 27 #set dbkey = $i.metadata.dbkey
23 #set name = $i.name 28 #else if str($dbkey) != $i.metadata.dbkey:
24 ln -s '$filename' '$input_vcf_dir/$name' && 29 >&2 echo "The dbkeys associated with the zero coverage VCF files with SNPs found in closely related isolate groups are not unique" &&
30 exit 1
31 #end if
32 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
33 ln -s '${i}' '$input_vcf_dir/${vcf_identifier}' &&
25 #end for 34 #end for
26 #if str($input_zc_vcf_type) == "single": 35 #if str($dbkey) == '?':
27 #set input_zc_vcf = $input_zc_vcf_type_cond.input_zc_vcf 36 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1
28 #set file_name_base = $os.path.basename($input_zc_vcf.file_name) 37 #end if
29 ln -s '$input_zc_vcf' '$input_vcf_dir/$file_name_base' && 38 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single":
39 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier))
40 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' &&
30 #else 41 #else
31 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection: 42 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection:
32 #set filename = $i.file_name 43 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
33 #set name = $i.name 44 ln -s '${i}' '$input_vcf_dir/${zc_vcf_identifier}' &&
34 ln -s '$filename' '$input_vcf_dir/$name' &&
35 #end for 45 #end for
36 #end if 46 #end if
37 #if str($excel_grouper_cond.excel_grouper) == "yes": 47 #if str($input_excel_cond.input_excel_param) == 'yes':
38 #set excel_file = 'No genome specified for input VCF (database) file(s)' 48 #if str($input_excel_cond.excel_source_cond.excel_source) == 'cached':
39 #set excel_grouper_source = $excel_grouper_cond.excel_grouper_source_cond.excel_grouper_source 49 #set excel_file = 'No genome specified for input VCF (database) file(s)'
40 #if str($excel_grouper_source) == "cached":
41 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields() 50 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields()
51 ## The value of excel_fields is a nested list that looks like this.
52 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...]
42 #for $i in $excel_fields: 53 #for $i in $excel_fields:
43 #if str($i[0]) == $reference: 54 #if str($i[0]) == $dbkey:
44 #set excel_file = $i[2] 55 #set excel_file = $i[2]
45 #break 56 #break
46 #end if 57 #end if
47 #end for 58 #end for
48 #else: 59 #else:
49 #set excel_file = $excel_grouper_cond.excel_grouper_source_cond.excel_grouper_file 60 #set excel_file = $input_excel_cond.excel_source_cond.input_excel
50 #end if 61 #end if
51 #end if 62 #end if
52 python '$__tool_directory__/vsnp_get_snps.py' 63 python '$__tool_directory__/vsnp_get_snps.py'
53 --processes $processes 64 --ac $ac
54 --reference '$reference' 65 #if str($input_excel_cond.input_excel_param) == 'yes':
55 #if str($excel_grouper_cond.excel_grouper) == "yes": 66 --input_excel '$excel_file'
56 --excel_grouper_file '$excel_file' 67 #end if
57 #end if 68 $all_isolates
58 #if str($all_isolates) == "Yes": 69 --input_vcf_dir '$input_vcf_dir'
59 --all_isolates '$all_isolates' 70 --min_mq $min_mq
60 #end if 71 --min_quality_score $min_quality_score
72 --output_json_avg_mq_dir '$output_json_avg_mq_dir'
73 --output_json_snps_dir '$output_json_snps_dir'
74 --output_snps_dir '$output_snps_dir'
61 --output_summary '$output_summary' 75 --output_summary '$output_summary'
76 --processes \${GALAXY_SLOTS:-8}
77 --quality_score_n_threshold $quality_score_n_threshold
78 --dbkey '$dbkey'
62 ]]></command> 79 ]]></command>
63 <inputs> 80 <inputs>
64 <conditional name="input_zc_vcf_type_cond"> 81 <conditional name="input_zc_vcf_type_cond">
65 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed"> 82 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed">
66 <option value="single" selected="true">A single zero coverage VCF file</option> 83 <option value="collection" selected="true">A collection of zero coverage VCF files</option>
67 <option value="collection">A collection of zero coverage VCF files</option> 84 <option value="single">A single zero coverage VCF file</option>
68 </param> 85 </param>
69 <when value="single"> 86 <when value="single">
70 <param name="input_zc_vcf" type="data" format="vcf" label="Zero coverage VCF file"> 87 <param name="input_zc_vcf" type="data" format="vcf" label="Zero coverage VCF file"/>
71 <validator type="unspecified_build"/>
72 </param>
73 </when> 88 </when>
74 <when value="collection"> 89 <when value="collection">
75 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"> 90 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/>
76 <validator type="unspecified_build"/>
77 </param>
78 </when> 91 </when>
79 </conditional> 92 </conditional>
80 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of VCF files against which to analyze the zero coverages VCF file(s)"> 93 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files with SNPs found in closely related isolate groups"/>
81 <validator type="unspecified_build"/> 94 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/>
82 </param> 95 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/>
83 <conditional name="excel_grouper_cond"> 96 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/>
84 <param name="excel_grouper" type="select" label="Use Excel file for grouping and filtering?"> 97 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/>
98 <conditional name="input_excel_cond">
99 <param name="input_excel_param" type="select" label="Use Excel file for grouping and filtering?">
85 <option value="yes" selected="true">Yes</option> 100 <option value="yes" selected="true">Yes</option>
86 <option value="no">No</option> 101 <option value="no">No</option>
87 </param> 102 </param>
88 <when value="yes"> 103 <when value="yes">
89 <conditional name="excel_grouper_source_cond"> 104 <conditional name="excel_source_cond">
90 <param name="excel_grouper_source" type="select" label="Choose the source for the Excel file"> 105 <param name="excel_source" type="select" label="Choose the source for the Excel file">
91 <option value="cached">locally cached</option> 106 <option value="cached">locally cached</option>
92 <option value="history">from history</option> 107 <option value="history">from history</option>
93 </param> 108 </param>
94 <when value="cached"> 109 <when value="cached">
95 <param name="excel_grouper_file" type="select" label="Excel file" help="Selection will be overridden if it does not match the dbkeys associated with the collection of VCF files being analyzed"> 110 <param name="input_excel" type="select" label="Excel file">
96 <options from_data_table="vsnp_excel"/> 111 <options from_data_table="vsnp_excel">
97 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/> 112 <filter type="data_meta" column="0" key="dbkey" ref="input_vcf_collection"/>
113 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/>
114 </options>
98 </param> 115 </param>
99 </when> 116 </when>
100 <when value="history"> 117 <when value="history">
101 <param name="excel_grouper_file" type="data" format="xlsx" label="Excel file"> 118 <param name="input_excel" type="data" format="xlsx" label="Excel file"/>
102 <validator type="no_options" message="The current history does not include an xlsx dataset that can be used for grouping and filtering"/>
103 </param>
104 </when> 119 </when>
105 </conditional> 120 </conditional>
106 </when> 121 </when>
107 <when value="no"/> 122 <when value="no"/>
108 </conditional> 123 </conditional>
109 <param name="all_isolates" type="select" display="radio" label="Create table with all isolates?"> 124 <param argument="all_isolates" type="boolean" truevalue="--all_isolates" falsevalue="" checked="false" label="Create a group containing all isolates?"/>
110 <option value="No" selected="true">No</option>
111 <option value="Yes">Yes</option>
112 </param>
113 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
114 </inputs> 125 </inputs>
115 <outputs> 126 <outputs>
116 <collection name="snps" type="list" label="${tool.name} (SNPs) on ${on_string}"> 127 <collection name="snps" type="list" label="${tool.name} on ${on_string} (SNPs)">
117 <discover_datasets pattern="__name__" directory="output_snps_dir" format="fasta"/> 128 <discover_datasets pattern="__name_and_ext__" directory="output_snps_dir"/>
118 </collection> 129 </collection>
119 <collection name="json_avg_mq" type="list" label="${tool.name} (average MQ) on ${on_string}"> 130 <collection name="json_avg_mq" type="list" label="${tool.name} on ${on_string} (average mq)">
120 <discover_datasets pattern="__name__" directory="output_json_avg_mq_dir" format="json"/> 131 <discover_datasets pattern="__name_and_ext__" directory="output_json_avg_mq_dir"/>
121 </collection> 132 </collection>
122 <collection name="json_snps" type="list" label="${tool.name} (SNPs as json) on ${on_string}"> 133 <collection name="json_snps" type="list" label="${tool.name} on ${on_string} (SNPs as json)">
123 <discover_datasets pattern="__name__" directory="output_json_snps_dir" format="json"/> 134 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/>
124 </collection> 135 </collection>
125 <data name="output_summary" format="html" label="${tool.name} (summary) on ${on_string}"/> 136 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/>
126 </outputs> 137 </outputs>
127 <tests> 138 <tests>
128 <test> 139 <!--
140 Unfortunately the test files cannot be gzipped since Galaxy changes the file names
141 to be something like 00-0121_WI_Cervid_99-A_vcf_gz, and the VCF Reader requires
142 gzipped files to have a .gz extension. The exception is
143 UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte
144 -->
145 <!-- A single vcf input, no excel file, all_isolates is False -->
146 <test expect_num_outputs="4">
147 <param name="input_zc_vcf_type" value="single"/>
129 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/> 148 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
130 <param name="input_vcf_collection"> 149 <param name="input_vcf_collection">
131 <collection type="list"> 150 <collection type="list">
132 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/> 151 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/>
133 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/> 152 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/>
134 </collection> 153 </collection>
135 </param> 154 </param>
136 <param name="excel_grouper" value="no"/> 155 <param name="input_excel_param" value="no"/>
137 <output_collection name="snps" type="list"> 156 <output_collection name="snps" type="list" count="1">
138 <element name="all_vcf.fasta" file="all_vcf.fasta" ftype="fasta" compare="contains"/> 157 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/>
139 </output_collection> 158 </output_collection>
140 <output_collection name="json_avg_mq" type="list"> 159 <output_collection name="json_avg_mq" type="list" count="1">
141 <element name="all_vcf.json" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/> 160 <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/>
142 </output_collection> 161 </output_collection>
143 <output_collection name="json_snps" type="list"> 162 <output_collection name="json_snps" type="list" count="1">
144 <element name="all_vcf.json" file="json_all_vcf.json" ftype="json" compare="contains"/> 163 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/>
145 </output_collection> 164 </output_collection>
146 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> 165 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
166 </test>
167 <!-- An input collection, no excel file, all_isolates is False -->
168 <test expect_num_outputs="4">
169 <param name="input_zc_vcf_type" value="collection"/>
170 <param name="input_zc_vcf_collection">
171 <collection type="list">
172 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
173 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
174 </collection>
175 </param>
176 <param name="input_vcf_collection">
177 <collection type="list">
178 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
179 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
180 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
181 </collection>
182 </param>
183 <param name="input_excel_param" value="no"/>
184 <output_collection name="snps" type="list" count="1">
185 <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/>
186 </output_collection>
187 <output_collection name="json_avg_mq" type="list" count="1">
188 <element name="all_vcf" file="json_avg_mq_all_vcf2.json" ftype="json" compare="contains"/>
189 </output_collection>
190 <output_collection name="json_snps" type="list" count="1">
191 <element name="all_vcf" file="json_all_vcf2.json" ftype="json" compare="contains"/>
192 </output_collection>
193 <output name="output_summary" file="output_summary2.html" ftype="html" compare="contains"/>
194 </test>
195 <!-- An input collection, an excel file, all_isolates is False -->
196 <test expect_num_outputs="4">
197 <param name="input_zc_vcf_type" value="collection"/>
198 <param name="input_zc_vcf_collection">
199 <collection type="list">
200 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
201 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
202 </collection>
203 </param>
204 <param name="input_vcf_collection">
205 <collection type="list">
206 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
207 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
208 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
209 </collection>
210 </param>
211 <param name="input_excel_param" value="yes"/>
212 <param name="input_excel" value="89"/>
213 <output_collection name="snps" type="list" count="1">
214 <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta"/>
215 </output_collection>
216 <output_collection name="json_avg_mq" type="list" count="1">
217 <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
218 </output_collection>
219 <output_collection name="json_snps" type="list" count="1">
220 <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
221 </output_collection>
222 <output name="output_summary" file="output_summary3.html" ftype="html" compare="contains"/>
223 </test>
224 <!-- An input collection, an excel file, all_isolates is True -->
225 <test expect_num_outputs="4">
226 <param name="input_zc_vcf_type" value="collection"/>
227 <param name="input_zc_vcf_collection">
228 <collection type="list">
229 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
230 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
231 </collection>
232 </param>
233 <param name="input_vcf_collection">
234 <collection type="list">
235 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
236 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
237 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
238 </collection>
239 </param>
240 <param name="input_excel_param" value="yes"/>
241 <param name="input_excel" value="89"/>
242 <param name="all_isolates" value="--all_isolates"/>
243 <output_collection name="snps" type="list" count="2">
244 <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta"/>
245 <element name="all_vcf" file="all_vcf3.fasta" ftype="fasta"/>
246 </output_collection>
247 <output_collection name="json_avg_mq" type="list" count="2">
248 <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
249 <element name="all_vcf" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
250 </output_collection>
251 <output_collection name="json_snps" type="list" count="2">
252 <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
253 <element name="all_vcf" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
254 </output_collection>
255 <output name="output_summary" file="output_summary4.html" ftype="html" compare="contains"/>
147 </test> 256 </test>
148 </tests> 257 </tests>
149 <help> 258 <help>
150 **What it does** 259 **What it does**
151 260
152 Accepts a zero-coverage VCF file (or a collection of them) produced by the **vSNP: add zero coverage** tool 261 Accepts a zero coverage VCF file produced by the **vSNP: add zero coverage** tool (or a collection of them) along with a collection
153 along with a collection of VCF files that have been aligned with the same reference. The inputs are analyzed 262 of zero coverage VCF files that have been aligned with the same reference and contain SNPs called between closely related isolate groups.
154 to discover quality parsimonious SNPs in the zero-coverage VCF file(s). An Excel spreadsheet containing 263 The tool produces fasta files containing SNP alignments, json files containing the SNP positions and additional json files containing
155 specified SNPs can optiomally be used to filter desired SNP positions by group. Users can choose whether to 264 the average map quality values.
156 select a locally cached Excel spreadsheet or one from their current history. 265
266 The SNP alignments produced by this tool are used to create phylogenetic trees, so larger input collections result in more populated
267 phylogenetic trees. Both of the json outputs are used by the **vSNP: build tables** tool to produce annotated SNP tables in the form
268 of Excel spreadsheets.
269
270 An Excel spreadsheet containing specified SNPs can optiomally be used to filter desired SNP positions by group. Users can choose a
271 locally cached Excel spreadsheet or one from their current history.
272
273 A SNP is added to a group if it has at least one position with a specified allele count value, a quality score greater than a specified
274 value, and a map quality greater than a specified value.
275
276 If the allele count equals the specified value (2) and the quality score for a SNP position is greater than the minimum quality score
277 value (150), the alternate allele is called.
278
279 However, if the allele count is 1, the position is called ambiguous. Deletions are called when the alternate allele is a gap. If the
280 quality score is less than or equal to the minimum quality score N value for alleles (150), the allele is marked "N".
157 281
158 **Required Options** 282 **Required Options**
159 283
160 * **Choose the category of the files to be analyzed** - select single file or a collection of files, then select the appropriate history item (single VCF item or dataset collection of VCF elements) based on the selected option. 284 * **Zero coverage VCF file(s)** - Select a single or collection of zero coverage VCF files, typically produced by the **vSNP: add zero coverage** tool, from the current history.
161 * **Collection of VCF files against which to analyze the zero coverages VCF file(s)** - select a dataset collection from the current history that is associated with the same reference as the selected zero-coverage VCF file(s). 285 * **Collection of zero coverage VCF files with SNPs found in closely related isolate groups** - Select a dataset collection of zero coverage vcf files from the current history.
162 286
163 **Additional Options** 287 **Additional Options**
164 288
289 * **Allele count threshold** - At least 1 position must have an allele count greater than this value for a SNP to be added to a group (2 is optimal).
290 * **Map quality threshold** - At least 1 position must have a higher MQ value for a SNP to be added to a group (56 is optimal).
291 * **Quality score threshold** -At least 1 position must have a higher quality score for a SNP to be added to a group (150 is optimal).
292 * **Minimum quality score N value for alleles** - If none of the avove 3 requirements is met and the quality score is less than or equal to the minimum quality score N value for alleles, the allele is marked "N" (150 is optimal).
165 * **Use Excel file for grouping and filtering?** - select Yes to filter desired SNP positions by group. A cached Excel spreadsheet provides the most widely used SNP positions for grouping, but a custom spreadhseet can be selected from the current history. 293 * **Use Excel file for grouping and filtering?** - select Yes to filter desired SNP positions by group. A cached Excel spreadsheet provides the most widely used SNP positions for grouping, but a custom spreadhseet can be selected from the current history.
166 * **Job Resource Parameters** - an administrator for the Galaxy instance must configure this tool to display this option, so it may not be available. If it is, you can choose the number of processors to use for tool execution. 294 * **Create a group containing all isolates?** - select Yes to output an additional group containing of all isolates.
167 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
168 </help> 295 </help>
169 <citations> 296 <expand macro="citations"/>
170 <citation type="bibtex">
171 @misc{None,
172 journal = {None},
173 author = {1. Stuber T},
174 title = {Manuscript in preparation},
175 year = {None},
176 url = {https://github.com/USDA-VS/vSNP},}
177 </citation>
178 </citations>
179 </tool> 297 </tool>
180 298