Mercurial > repos > greg > vsnp_get_snps
annotate vsnp_get_snps.xml @ 8:5e4595b9f63c draft
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
author | greg |
---|---|
date | Thu, 29 Jul 2021 12:50:01 +0000 |
parents | 2286f3a13e4d |
children | 0fe292b20b9d |
rev | line source |
---|---|
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
1 <tool id="vsnp_get_snps_broken" name="vSNP: get SNPs broken" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@"> |
0 | 2 <description></description> |
3 | 3 <macros> |
4 <import>macros.xml</import> | |
5 </macros> | |
0 | 6 <requirements> |
7 | 7 <requirement type="package" version="3.0.7">openpyxl</requirement> |
8 <requirement type="package" version="1.3.0">pandas</requirement> | |
0 | 9 <requirement type="package" version="0.6.8">pyvcf</requirement> |
7 | 10 <requirement type="package" version="2.0.1">xlrd</requirement> |
0 | 11 </requirements> |
12 <command detect_errors="exit_code"><![CDATA[ | |
3 | 13 #import re |
14 | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
15 #if str($input.metadata.dbkey) == '?': |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
16 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1 |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
17 #end if |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
18 |
0 | 19 #set input_vcf_dir = 'input_vcf_dir' |
20 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir' | |
21 #set output_json_snps_dir = 'output_json_snps_dir' | |
22 #set output_snps_dir = 'output_snps_dir' | |
3 | 23 |
0 | 24 mkdir -p $input_vcf_dir && |
25 mkdir -p $output_json_avg_mq_dir && | |
26 mkdir -p $output_json_snps_dir && | |
27 mkdir -p $output_snps_dir && | |
3 | 28 |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
29 #set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier)) |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
30 ln -s '${input}' '$input_vcf_dir/${input_identifier}' && |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
31 |
3 | 32 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single": |
33 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier)) | |
34 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' && | |
0 | 35 #else |
36 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection: | |
3 | 37 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) |
38 ln -s '${i}' '$input_vcf_dir/${zc_vcf_identifier}' && | |
0 | 39 #end for |
40 #end if | |
3 | 41 #if str($input_excel_cond.input_excel_param) == 'yes': |
42 #if str($input_excel_cond.excel_source_cond.excel_source) == 'cached': | |
43 #set excel_file = 'No genome specified for input VCF (database) file(s)' | |
0 | 44 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields() |
3 | 45 ## The value of excel_fields is a nested list that looks like this. |
46 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...] | |
0 | 47 #for $i in $excel_fields: |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
48 #if str($i[0]) == $input.metadata.dbkey: |
0 | 49 #set excel_file = $i[2] |
50 #break | |
51 #end if | |
52 #end for | |
53 #else: | |
3 | 54 #set excel_file = $input_excel_cond.excel_source_cond.input_excel |
0 | 55 #end if |
56 #end if | |
57 python '$__tool_directory__/vsnp_get_snps.py' | |
3 | 58 --ac $ac |
59 #if str($input_excel_cond.input_excel_param) == 'yes': | |
60 --input_excel '$excel_file' | |
0 | 61 #end if |
3 | 62 $all_isolates |
63 --input_vcf_dir '$input_vcf_dir' | |
64 --min_mq $min_mq | |
65 --min_quality_score $min_quality_score | |
66 --output_json_avg_mq_dir '$output_json_avg_mq_dir' | |
67 --output_json_snps_dir '$output_json_snps_dir' | |
68 --output_snps_dir '$output_snps_dir' | |
0 | 69 --output_summary '$output_summary' |
3 | 70 --processes \${GALAXY_SLOTS:-8} |
71 --quality_score_n_threshold $quality_score_n_threshold | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
72 --dbkey '$input.metadata.dbkey' |
0 | 73 ]]></command> |
74 <inputs> | |
75 <conditional name="input_zc_vcf_type_cond"> | |
76 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed"> | |
3 | 77 <option value="collection" selected="true">A collection of zero coverage VCF files</option> |
78 <option value="single">A single zero coverage VCF file</option> | |
0 | 79 </param> |
80 <when value="single"> | |
3 | 81 <param name="input_zc_vcf" type="data" format="vcf" label="Zero coverage VCF file"/> |
0 | 82 </when> |
83 <when value="collection"> | |
3 | 84 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/> |
0 | 85 </when> |
86 </conditional> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
87 <param name="input" format="vcf" type="data" label="Zero coverage VCF file with SNPs found in closely related isolate groups"/> |
3 | 88 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/> |
89 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/> | |
90 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/> | |
91 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/> | |
92 <conditional name="input_excel_cond"> | |
93 <param name="input_excel_param" type="select" label="Use Excel file for grouping and filtering?"> | |
0 | 94 <option value="yes" selected="true">Yes</option> |
95 <option value="no">No</option> | |
96 </param> | |
97 <when value="yes"> | |
3 | 98 <conditional name="excel_source_cond"> |
99 <param name="excel_source" type="select" label="Choose the source for the Excel file"> | |
0 | 100 <option value="cached">locally cached</option> |
101 <option value="history">from history</option> | |
102 </param> | |
103 <when value="cached"> | |
3 | 104 <param name="input_excel" type="select" label="Excel file"> |
105 <options from_data_table="vsnp_excel"> | |
106 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/> | |
107 </options> | |
0 | 108 </param> |
109 </when> | |
110 <when value="history"> | |
3 | 111 <param name="input_excel" type="data" format="xlsx" label="Excel file"/> |
0 | 112 </when> |
113 </conditional> | |
114 </when> | |
115 <when value="no"/> | |
116 </conditional> | |
3 | 117 <param argument="all_isolates" type="boolean" truevalue="--all_isolates" falsevalue="" checked="false" label="Create a group containing all isolates?"/> |
0 | 118 </inputs> |
119 <outputs> | |
3 | 120 <collection name="snps" type="list" label="${tool.name} on ${on_string} (SNPs)"> |
121 <discover_datasets pattern="__name_and_ext__" directory="output_snps_dir"/> | |
0 | 122 </collection> |
3 | 123 <collection name="json_avg_mq" type="list" label="${tool.name} on ${on_string} (average mq)"> |
124 <discover_datasets pattern="__name_and_ext__" directory="output_json_avg_mq_dir"/> | |
0 | 125 </collection> |
3 | 126 <collection name="json_snps" type="list" label="${tool.name} on ${on_string} (SNPs as json)"> |
127 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/> | |
0 | 128 </collection> |
3 | 129 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/> |
0 | 130 </outputs> |
131 <tests> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
132 <!-- No excel file, all_isolates is False --> |
3 | 133 <test expect_num_outputs="4"> |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
134 <param name="input" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/> |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
135 <param name="input_zc_vcf_collection"> |
0 | 136 <collection type="list"> |
137 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/> | |
138 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/> | |
139 </collection> | |
140 </param> | |
3 | 141 <param name="input_excel_param" value="no"/> |
142 <output_collection name="snps" type="list" count="1"> | |
143 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/> | |
0 | 144 </output_collection> |
3 | 145 <output_collection name="json_avg_mq" type="list" count="1"> |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
146 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/> |
0 | 147 </output_collection> |
3 | 148 <output_collection name="json_snps" type="list" count="1"> |
149 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/> | |
0 | 150 </output_collection> |
151 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> | |
152 </test> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
153 <!-- Excel file, all_isolates is False --> |
3 | 154 <test expect_num_outputs="4"> |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
155 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/> |
3 | 156 <param name="input_zc_vcf_collection"> |
157 <collection type="list"> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
158 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/> |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
159 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/> |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
160 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/> |
3 | 161 </collection> |
162 </param> | |
163 <param name="input_excel_param" value="yes"/> | |
164 <param name="input_excel" value="89"/> | |
165 <output_collection name="snps" type="list" count="1"> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
166 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/> |
3 | 167 </output_collection> |
168 <output_collection name="json_avg_mq" type="list" count="1"> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
169 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/> |
3 | 170 </output_collection> |
171 <output_collection name="json_snps" type="list" count="1"> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
172 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/> |
3 | 173 </output_collection> |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
174 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> |
3 | 175 </test> |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
176 <!-- Excel file, all_isolates is True --> |
3 | 177 <test expect_num_outputs="4"> |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
178 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/> |
3 | 179 <param name="input_zc_vcf_collection"> |
180 <collection type="list"> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
181 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/> |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
182 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/> |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
183 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/> |
3 | 184 </collection> |
185 </param> | |
186 <param name="input_excel_param" value="yes"/> | |
187 <param name="input_excel" value="89"/> | |
188 <param name="all_isolates" value="--all_isolates"/> | |
189 <output_collection name="snps" type="list" count="2"> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
190 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/> |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
191 <element name="all_vcf" file="all_vcf3.fasta" ftype="fasta" compare="contains"/> |
3 | 192 </output_collection> |
193 <output_collection name="json_avg_mq" type="list" count="2"> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
194 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/> |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
195 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/> |
3 | 196 </output_collection> |
197 <output_collection name="json_snps" type="list" count="2"> | |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
198 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/> |
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
199 <element name="all_vcf" file="all_vcf_snps_json.json" ftype="json" compare="contains"/> |
3 | 200 </output_collection> |
8
5e4595b9f63c
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents:
7
diff
changeset
|
201 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> |
3 | 202 </test> |
0 | 203 </tests> |
204 <help> | |
205 **What it does** | |
206 | |
3 | 207 Accepts a zero coverage VCF file produced by the **vSNP: add zero coverage** tool (or a collection of them) along with a collection |
208 of zero coverage VCF files that have been aligned with the same reference and contain SNPs called between closely related isolate groups. | |
209 The tool produces fasta files containing SNP alignments, json files containing the SNP positions and additional json files containing | |
210 the average map quality values. | |
211 | |
212 The SNP alignments produced by this tool are used to create phylogenetic trees, so larger input collections result in more populated | |
213 phylogenetic trees. Both of the json outputs are used by the **vSNP: build tables** tool to produce annotated SNP tables in the form | |
214 of Excel spreadsheets. | |
215 | |
216 An Excel spreadsheet containing specified SNPs can optiomally be used to filter desired SNP positions by group. Users can choose a | |
217 locally cached Excel spreadsheet or one from their current history. | |
218 | |
219 A SNP is added to a group if it has at least one position with a specified allele count value, a quality score greater than a specified | |
220 value, and a map quality greater than a specified value. | |
221 | |
222 If the allele count equals the specified value (2) and the quality score for a SNP position is greater than the minimum quality score | |
223 value (150), the alternate allele is called. | |
224 | |
225 However, if the allele count is 1, the position is called ambiguous. Deletions are called when the alternate allele is a gap. If the | |
226 quality score is less than or equal to the minimum quality score N value for alleles (150), the allele is marked "N". | |
0 | 227 |
228 **Required Options** | |
229 | |
3 | 230 * **Zero coverage VCF file(s)** - Select a single or collection of zero coverage VCF files, typically produced by the **vSNP: add zero coverage** tool, from the current history. |
231 * **Collection of zero coverage VCF files with SNPs found in closely related isolate groups** - Select a dataset collection of zero coverage vcf files from the current history. | |
0 | 232 |
233 **Additional Options** | |
234 | |
3 | 235 * **Allele count threshold** - At least 1 position must have an allele count greater than this value for a SNP to be added to a group (2 is optimal). |
236 * **Map quality threshold** - At least 1 position must have a higher MQ value for a SNP to be added to a group (56 is optimal). | |
237 * **Quality score threshold** -At least 1 position must have a higher quality score for a SNP to be added to a group (150 is optimal). | |
238 * **Minimum quality score N value for alleles** - If none of the avove 3 requirements is met and the quality score is less than or equal to the minimum quality score N value for alleles, the allele is marked "N" (150 is optimal). | |
0 | 239 * **Use Excel file for grouping and filtering?** - select Yes to filter desired SNP positions by group. A cached Excel spreadsheet provides the most widely used SNP positions for grouping, but a custom spreadhseet can be selected from the current history. |
3 | 240 * **Create a group containing all isolates?** - select Yes to output an additional group containing of all isolates. |
0 | 241 </help> |
3 | 242 <expand macro="citations"/> |
0 | 243 </tool> |
244 |