annotate vsnp_get_snps.xml @ 8:5e4595b9f63c draft

"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
author greg
date Thu, 29 Jul 2021 12:50:01 +0000
parents 2286f3a13e4d
children 0fe292b20b9d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
1 <tool id="vsnp_get_snps_broken" name="vSNP: get SNPs broken" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@">
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
2 <description></description>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
3 <macros>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
4 <import>macros.xml</import>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
5 </macros>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
6 <requirements>
7
2286f3a13e4d Uploaded
greg
parents: 6
diff changeset
7 <requirement type="package" version="3.0.7">openpyxl</requirement>
2286f3a13e4d Uploaded
greg
parents: 6
diff changeset
8 <requirement type="package" version="1.3.0">pandas</requirement>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
9 <requirement type="package" version="0.6.8">pyvcf</requirement>
7
2286f3a13e4d Uploaded
greg
parents: 6
diff changeset
10 <requirement type="package" version="2.0.1">xlrd</requirement>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
11 </requirements>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
12 <command detect_errors="exit_code"><![CDATA[
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
13 #import re
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
14
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
15 #if str($input.metadata.dbkey) == '?':
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
16 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
17 #end if
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
18
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
19 #set input_vcf_dir = 'input_vcf_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
20 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
21 #set output_json_snps_dir = 'output_json_snps_dir'
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
22 #set output_snps_dir = 'output_snps_dir'
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
23
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
24 mkdir -p $input_vcf_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
25 mkdir -p $output_json_avg_mq_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
26 mkdir -p $output_json_snps_dir &&
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
27 mkdir -p $output_snps_dir &&
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
28
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
29 #set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier))
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
30 ln -s '${input}' '$input_vcf_dir/${input_identifier}' &&
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
31
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
32 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single":
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
33 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier))
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
34 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' &&
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
35 #else
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
36 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection:
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
37 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
38 ln -s '${i}' '$input_vcf_dir/${zc_vcf_identifier}' &&
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
39 #end for
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
40 #end if
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
41 #if str($input_excel_cond.input_excel_param) == 'yes':
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
42 #if str($input_excel_cond.excel_source_cond.excel_source) == 'cached':
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
43 #set excel_file = 'No genome specified for input VCF (database) file(s)'
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
44 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields()
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
45 ## The value of excel_fields is a nested list that looks like this.
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
46 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...]
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
47 #for $i in $excel_fields:
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
48 #if str($i[0]) == $input.metadata.dbkey:
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
49 #set excel_file = $i[2]
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
50 #break
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
51 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
52 #end for
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
53 #else:
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
54 #set excel_file = $input_excel_cond.excel_source_cond.input_excel
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
55 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
56 #end if
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
57 python '$__tool_directory__/vsnp_get_snps.py'
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
58 --ac $ac
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
59 #if str($input_excel_cond.input_excel_param) == 'yes':
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
60 --input_excel '$excel_file'
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
61 #end if
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
62 $all_isolates
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
63 --input_vcf_dir '$input_vcf_dir'
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
64 --min_mq $min_mq
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
65 --min_quality_score $min_quality_score
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
66 --output_json_avg_mq_dir '$output_json_avg_mq_dir'
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
67 --output_json_snps_dir '$output_json_snps_dir'
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
68 --output_snps_dir '$output_snps_dir'
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
69 --output_summary '$output_summary'
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
70 --processes \${GALAXY_SLOTS:-8}
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
71 --quality_score_n_threshold $quality_score_n_threshold
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
72 --dbkey '$input.metadata.dbkey'
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
73 ]]></command>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
74 <inputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
75 <conditional name="input_zc_vcf_type_cond">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
76 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed">
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
77 <option value="collection" selected="true">A collection of zero coverage VCF files</option>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
78 <option value="single">A single zero coverage VCF file</option>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
79 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
80 <when value="single">
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
81 <param name="input_zc_vcf" type="data" format="vcf" label="Zero coverage VCF file"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
82 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
83 <when value="collection">
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
84 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
85 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
86 </conditional>
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
87 <param name="input" format="vcf" type="data" label="Zero coverage VCF file with SNPs found in closely related isolate groups"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
88 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
89 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
90 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
91 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
92 <conditional name="input_excel_cond">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
93 <param name="input_excel_param" type="select" label="Use Excel file for grouping and filtering?">
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
94 <option value="yes" selected="true">Yes</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
95 <option value="no">No</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
96 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
97 <when value="yes">
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
98 <conditional name="excel_source_cond">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
99 <param name="excel_source" type="select" label="Choose the source for the Excel file">
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
100 <option value="cached">locally cached</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
101 <option value="history">from history</option>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
102 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
103 <when value="cached">
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
104 <param name="input_excel" type="select" label="Excel file">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
105 <options from_data_table="vsnp_excel">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
106 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
107 </options>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
108 </param>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
109 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
110 <when value="history">
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
111 <param name="input_excel" type="data" format="xlsx" label="Excel file"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
112 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
113 </conditional>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
114 </when>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
115 <when value="no"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
116 </conditional>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
117 <param argument="all_isolates" type="boolean" truevalue="--all_isolates" falsevalue="" checked="false" label="Create a group containing all isolates?"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
118 </inputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
119 <outputs>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
120 <collection name="snps" type="list" label="${tool.name} on ${on_string} (SNPs)">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
121 <discover_datasets pattern="__name_and_ext__" directory="output_snps_dir"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
122 </collection>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
123 <collection name="json_avg_mq" type="list" label="${tool.name} on ${on_string} (average mq)">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
124 <discover_datasets pattern="__name_and_ext__" directory="output_json_avg_mq_dir"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
125 </collection>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
126 <collection name="json_snps" type="list" label="${tool.name} on ${on_string} (SNPs as json)">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
127 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
128 </collection>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
129 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
130 </outputs>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
131 <tests>
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
132 <!-- No excel file, all_isolates is False -->
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
133 <test expect_num_outputs="4">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
134 <param name="input" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
135 <param name="input_zc_vcf_collection">
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
136 <collection type="list">
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
137 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
138 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
139 </collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
140 </param>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
141 <param name="input_excel_param" value="no"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
142 <output_collection name="snps" type="list" count="1">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
143 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
144 </output_collection>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
145 <output_collection name="json_avg_mq" type="list" count="1">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
146 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
147 </output_collection>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
148 <output_collection name="json_snps" type="list" count="1">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
149 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
150 </output_collection>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
151 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
152 </test>
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
153 <!-- Excel file, all_isolates is False -->
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
154 <test expect_num_outputs="4">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
155 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
156 <param name="input_zc_vcf_collection">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
157 <collection type="list">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
158 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/>
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
159 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/>
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
160 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
161 </collection>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
162 </param>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
163 <param name="input_excel_param" value="yes"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
164 <param name="input_excel" value="89"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
165 <output_collection name="snps" type="list" count="1">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
166 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
167 </output_collection>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
168 <output_collection name="json_avg_mq" type="list" count="1">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
169 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
170 </output_collection>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
171 <output_collection name="json_snps" type="list" count="1">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
172 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
173 </output_collection>
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
174 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
175 </test>
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
176 <!-- Excel file, all_isolates is True -->
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
177 <test expect_num_outputs="4">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
178 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
179 <param name="input_zc_vcf_collection">
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
180 <collection type="list">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
181 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/>
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
182 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/>
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
183 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
184 </collection>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
185 </param>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
186 <param name="input_excel_param" value="yes"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
187 <param name="input_excel" value="89"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
188 <param name="all_isolates" value="--all_isolates"/>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
189 <output_collection name="snps" type="list" count="2">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
190 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/>
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
191 <element name="all_vcf" file="all_vcf3.fasta" ftype="fasta" compare="contains"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
192 </output_collection>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
193 <output_collection name="json_avg_mq" type="list" count="2">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
194 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/>
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
195 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
196 </output_collection>
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
197 <output_collection name="json_snps" type="list" count="2">
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
198 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/>
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
199 <element name="all_vcf" file="all_vcf_snps_json.json" ftype="json" compare="contains"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
200 </output_collection>
8
5e4595b9f63c "planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
greg
parents: 7
diff changeset
201 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
202 </test>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
203 </tests>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
204 <help>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
205 **What it does**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
206
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
207 Accepts a zero coverage VCF file produced by the **vSNP: add zero coverage** tool (or a collection of them) along with a collection
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
208 of zero coverage VCF files that have been aligned with the same reference and contain SNPs called between closely related isolate groups.
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
209 The tool produces fasta files containing SNP alignments, json files containing the SNP positions and additional json files containing
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
210 the average map quality values.
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
211
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
212 The SNP alignments produced by this tool are used to create phylogenetic trees, so larger input collections result in more populated
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
213 phylogenetic trees. Both of the json outputs are used by the **vSNP: build tables** tool to produce annotated SNP tables in the form
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
214 of Excel spreadsheets.
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
215
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
216 An Excel spreadsheet containing specified SNPs can optiomally be used to filter desired SNP positions by group. Users can choose a
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
217 locally cached Excel spreadsheet or one from their current history.
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
218
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
219 A SNP is added to a group if it has at least one position with a specified allele count value, a quality score greater than a specified
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
220 value, and a map quality greater than a specified value.
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
221
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
222 If the allele count equals the specified value (2) and the quality score for a SNP position is greater than the minimum quality score
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
223 value (150), the alternate allele is called.
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
224
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
225 However, if the allele count is 1, the position is called ambiguous. Deletions are called when the alternate allele is a gap. If the
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
226 quality score is less than or equal to the minimum quality score N value for alleles (150), the allele is marked "N".
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
227
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
228 **Required Options**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
229
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
230 * **Zero coverage VCF file(s)** - Select a single or collection of zero coverage VCF files, typically produced by the **vSNP: add zero coverage** tool, from the current history.
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
231 * **Collection of zero coverage VCF files with SNPs found in closely related isolate groups** - Select a dataset collection of zero coverage vcf files from the current history.
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
232
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
233 **Additional Options**
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
234
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
235 * **Allele count threshold** - At least 1 position must have an allele count greater than this value for a SNP to be added to a group (2 is optimal).
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
236 * **Map quality threshold** - At least 1 position must have a higher MQ value for a SNP to be added to a group (56 is optimal).
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
237 * **Quality score threshold** -At least 1 position must have a higher quality score for a SNP to be added to a group (150 is optimal).
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
238 * **Minimum quality score N value for alleles** - If none of the avove 3 requirements is met and the quality score is less than or equal to the minimum quality score N value for alleles, the allele is marked "N" (150 is optimal).
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
239 * **Use Excel file for grouping and filtering?** - select Yes to filter desired SNP positions by group. A cached Excel spreadsheet provides the most widely used SNP positions for grouping, but a custom spreadhseet can be selected from the current history.
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
240 * **Create a group containing all isolates?** - select Yes to output an additional group containing of all isolates.
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
241 </help>
3
14285a94fb13 Uploaded
greg
parents: 2
diff changeset
242 <expand macro="citations"/>
0
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
243 </tool>
ee4ef1fc23c6 Uploaded
greg
parents:
diff changeset
244