Mercurial > repos > greg > vsnp_get_snps
comparison vsnp_get_snps.xml @ 8:5e4595b9f63c draft
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 7423e5bb852a786195c095b9f663aac0ec9c8fd9"
author | greg |
---|---|
date | Thu, 29 Jul 2021 12:50:01 +0000 |
parents | 2286f3a13e4d |
children | 0fe292b20b9d |
comparison
equal
deleted
inserted
replaced
7:2286f3a13e4d | 8:5e4595b9f63c |
---|---|
1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@"> | 1 <tool id="vsnp_get_snps_broken" name="vSNP: get SNPs broken" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@"> |
2 <description></description> | 2 <description></description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <requirements> | 6 <requirements> |
10 <requirement type="package" version="2.0.1">xlrd</requirement> | 10 <requirement type="package" version="2.0.1">xlrd</requirement> |
11 </requirements> | 11 </requirements> |
12 <command detect_errors="exit_code"><![CDATA[ | 12 <command detect_errors="exit_code"><![CDATA[ |
13 #import re | 13 #import re |
14 | 14 |
15 #if str($input.metadata.dbkey) == '?': | |
16 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1 | |
17 #end if | |
18 | |
15 #set input_vcf_dir = 'input_vcf_dir' | 19 #set input_vcf_dir = 'input_vcf_dir' |
16 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir' | 20 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir' |
17 #set output_json_snps_dir = 'output_json_snps_dir' | 21 #set output_json_snps_dir = 'output_json_snps_dir' |
18 #set output_snps_dir = 'output_snps_dir' | 22 #set output_snps_dir = 'output_snps_dir' |
19 | 23 |
20 mkdir -p $input_vcf_dir && | 24 mkdir -p $input_vcf_dir && |
21 mkdir -p $output_json_avg_mq_dir && | 25 mkdir -p $output_json_avg_mq_dir && |
22 mkdir -p $output_json_snps_dir && | 26 mkdir -p $output_json_snps_dir && |
23 mkdir -p $output_snps_dir && | 27 mkdir -p $output_snps_dir && |
24 | 28 |
25 #set dbkey = '?' | 29 #set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier)) |
26 #for $i in $input_vcf_collection: | 30 ln -s '${input}' '$input_vcf_dir/${input_identifier}' && |
27 #if str($dbkey) == '?': | 31 |
28 #set dbkey = $i.metadata.dbkey | |
29 #else if str($dbkey) != $i.metadata.dbkey: | |
30 >&2 echo "The dbkeys associated with the zero coverage VCF files with SNPs found in closely related isolate groups are not unique" && | |
31 exit 1 | |
32 #end if | |
33 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) | |
34 ln -s '${i}' '$input_vcf_dir/${vcf_identifier}' && | |
35 #end for | |
36 #if str($dbkey) == '?': | |
37 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1 | |
38 #end if | |
39 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single": | 32 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single": |
40 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier)) | 33 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier)) |
41 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' && | 34 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' && |
42 #else | 35 #else |
43 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection: | 36 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection: |
50 #set excel_file = 'No genome specified for input VCF (database) file(s)' | 43 #set excel_file = 'No genome specified for input VCF (database) file(s)' |
51 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields() | 44 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields() |
52 ## The value of excel_fields is a nested list that looks like this. | 45 ## The value of excel_fields is a nested list that looks like this. |
53 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...] | 46 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...] |
54 #for $i in $excel_fields: | 47 #for $i in $excel_fields: |
55 #if str($i[0]) == $dbkey: | 48 #if str($i[0]) == $input.metadata.dbkey: |
56 #set excel_file = $i[2] | 49 #set excel_file = $i[2] |
57 #break | 50 #break |
58 #end if | 51 #end if |
59 #end for | 52 #end for |
60 #else: | 53 #else: |
74 --output_json_snps_dir '$output_json_snps_dir' | 67 --output_json_snps_dir '$output_json_snps_dir' |
75 --output_snps_dir '$output_snps_dir' | 68 --output_snps_dir '$output_snps_dir' |
76 --output_summary '$output_summary' | 69 --output_summary '$output_summary' |
77 --processes \${GALAXY_SLOTS:-8} | 70 --processes \${GALAXY_SLOTS:-8} |
78 --quality_score_n_threshold $quality_score_n_threshold | 71 --quality_score_n_threshold $quality_score_n_threshold |
79 --dbkey '$dbkey' | 72 --dbkey '$input.metadata.dbkey' |
80 ]]></command> | 73 ]]></command> |
81 <inputs> | 74 <inputs> |
82 <conditional name="input_zc_vcf_type_cond"> | 75 <conditional name="input_zc_vcf_type_cond"> |
83 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed"> | 76 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed"> |
84 <option value="collection" selected="true">A collection of zero coverage VCF files</option> | 77 <option value="collection" selected="true">A collection of zero coverage VCF files</option> |
89 </when> | 82 </when> |
90 <when value="collection"> | 83 <when value="collection"> |
91 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/> | 84 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/> |
92 </when> | 85 </when> |
93 </conditional> | 86 </conditional> |
94 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files with SNPs found in closely related isolate groups"/> | 87 <param name="input" format="vcf" type="data" label="Zero coverage VCF file with SNPs found in closely related isolate groups"/> |
95 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/> | 88 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/> |
96 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/> | 89 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/> |
97 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/> | 90 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/> |
98 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/> | 91 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/> |
99 <conditional name="input_excel_cond"> | 92 <conditional name="input_excel_cond"> |
108 <option value="history">from history</option> | 101 <option value="history">from history</option> |
109 </param> | 102 </param> |
110 <when value="cached"> | 103 <when value="cached"> |
111 <param name="input_excel" type="select" label="Excel file"> | 104 <param name="input_excel" type="select" label="Excel file"> |
112 <options from_data_table="vsnp_excel"> | 105 <options from_data_table="vsnp_excel"> |
113 <filter type="data_meta" column="0" key="dbkey" ref="input_vcf_collection"/> | |
114 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/> | 106 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/> |
115 </options> | 107 </options> |
116 </param> | 108 </param> |
117 </when> | 109 </when> |
118 <when value="history"> | 110 <when value="history"> |
135 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/> | 127 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/> |
136 </collection> | 128 </collection> |
137 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/> | 129 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/> |
138 </outputs> | 130 </outputs> |
139 <tests> | 131 <tests> |
140 <!-- | 132 <!-- No excel file, all_isolates is False --> |
141 Unfortunately the test files cannot be gzipped since Galaxy changes the file names | |
142 to be something like 00-0121_WI_Cervid_99-A_vcf_gz, and the VCF Reader requires | |
143 gzipped files to have a .gz extension. The exception is | |
144 UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte | |
145 --> | |
146 <!-- A single vcf input, no excel file, all_isolates is False --> | |
147 <test expect_num_outputs="4"> | 133 <test expect_num_outputs="4"> |
148 <param name="input_zc_vcf_type" value="single"/> | 134 <param name="input" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/> |
149 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/> | 135 <param name="input_zc_vcf_collection"> |
150 <param name="input_vcf_collection"> | |
151 <collection type="list"> | 136 <collection type="list"> |
152 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/> | 137 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/> |
153 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/> | 138 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/> |
154 </collection> | 139 </collection> |
155 </param> | 140 </param> |
156 <param name="input_excel_param" value="no"/> | 141 <param name="input_excel_param" value="no"/> |
157 <output_collection name="snps" type="list" count="1"> | 142 <output_collection name="snps" type="list" count="1"> |
158 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/> | 143 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/> |
159 </output_collection> | 144 </output_collection> |
160 <output_collection name="json_avg_mq" type="list" count="1"> | 145 <output_collection name="json_avg_mq" type="list" count="1"> |
161 <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/> | 146 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/> |
162 </output_collection> | 147 </output_collection> |
163 <output_collection name="json_snps" type="list" count="1"> | 148 <output_collection name="json_snps" type="list" count="1"> |
164 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/> | 149 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/> |
165 </output_collection> | 150 </output_collection> |
166 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> | 151 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> |
167 </test> | 152 </test> |
168 <!-- An input collection, no excel file, all_isolates is False --> | 153 <!-- Excel file, all_isolates is False --> |
169 <test expect_num_outputs="4"> | 154 <test expect_num_outputs="4"> |
170 <param name="input_zc_vcf_type" value="collection"/> | 155 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/> |
171 <param name="input_zc_vcf_collection"> | 156 <param name="input_zc_vcf_collection"> |
172 <collection type="list"> | 157 <collection type="list"> |
173 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> | 158 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/> |
174 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> | 159 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/> |
175 </collection> | 160 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/> |
176 </param> | |
177 <param name="input_vcf_collection"> | |
178 <collection type="list"> | |
179 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> | |
180 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> | |
181 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> | |
182 </collection> | |
183 </param> | |
184 <param name="input_excel_param" value="no"/> | |
185 <output_collection name="snps" type="list" count="1"> | |
186 <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/> | |
187 </output_collection> | |
188 <output_collection name="json_avg_mq" type="list" count="1"> | |
189 <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/> | |
190 </output_collection> | |
191 <output_collection name="json_snps" type="list" count="1"> | |
192 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/> | |
193 </output_collection> | |
194 <output name="output_summary" file="output_summary2.html" ftype="html" compare="contains"/> | |
195 </test> | |
196 <!-- An input collection, an excel file, all_isolates is False --> | |
197 <test expect_num_outputs="4"> | |
198 <param name="input_zc_vcf_type" value="collection"/> | |
199 <param name="input_zc_vcf_collection"> | |
200 <collection type="list"> | |
201 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> | |
202 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> | |
203 </collection> | |
204 </param> | |
205 <param name="input_vcf_collection"> | |
206 <collection type="list"> | |
207 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> | |
208 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> | |
209 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> | |
210 </collection> | 161 </collection> |
211 </param> | 162 </param> |
212 <param name="input_excel_param" value="yes"/> | 163 <param name="input_excel_param" value="yes"/> |
213 <param name="input_excel" value="89"/> | 164 <param name="input_excel" value="89"/> |
214 <output_collection name="snps" type="list" count="1"> | 165 <output_collection name="snps" type="list" count="1"> |
215 <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta"/> | 166 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/> |
216 </output_collection> | 167 </output_collection> |
217 <output_collection name="json_avg_mq" type="list" count="1"> | 168 <output_collection name="json_avg_mq" type="list" count="1"> |
218 <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/> | 169 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/> |
219 </output_collection> | 170 </output_collection> |
220 <output_collection name="json_snps" type="list" count="1"> | 171 <output_collection name="json_snps" type="list" count="1"> |
221 <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/> | 172 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/> |
222 </output_collection> | 173 </output_collection> |
223 <output name="output_summary" file="output_summary3.html" ftype="html" compare="contains"/> | 174 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> |
224 </test> | 175 </test> |
225 <!-- An input collection, an excel file, all_isolates is True --> | 176 <!-- Excel file, all_isolates is True --> |
226 <test expect_num_outputs="4"> | 177 <test expect_num_outputs="4"> |
227 <param name="input_zc_vcf_type" value="collection"/> | 178 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/> |
228 <param name="input_zc_vcf_collection"> | 179 <param name="input_zc_vcf_collection"> |
229 <collection type="list"> | 180 <collection type="list"> |
230 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> | 181 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/> |
231 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> | 182 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/> |
232 </collection> | 183 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/> |
233 </param> | |
234 <param name="input_vcf_collection"> | |
235 <collection type="list"> | |
236 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> | |
237 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> | |
238 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> | |
239 </collection> | 184 </collection> |
240 </param> | 185 </param> |
241 <param name="input_excel_param" value="yes"/> | 186 <param name="input_excel_param" value="yes"/> |
242 <param name="input_excel" value="89"/> | 187 <param name="input_excel" value="89"/> |
243 <param name="all_isolates" value="--all_isolates"/> | 188 <param name="all_isolates" value="--all_isolates"/> |
244 <output_collection name="snps" type="list" count="2"> | 189 <output_collection name="snps" type="list" count="2"> |
245 <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta"/> | 190 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/> |
246 <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/> | 191 <element name="all_vcf" file="all_vcf3.fasta" ftype="fasta" compare="contains"/> |
247 </output_collection> | 192 </output_collection> |
248 <output_collection name="json_avg_mq" type="list" count="2"> | 193 <output_collection name="json_avg_mq" type="list" count="2"> |
249 <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/> | 194 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/> |
250 <element name="all_vcf" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/> | 195 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/> |
251 </output_collection> | 196 </output_collection> |
252 <output_collection name="json_snps" type="list" count="2"> | 197 <output_collection name="json_snps" type="list" count="2"> |
253 <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/> | 198 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/> |
254 <element name="all_vcf" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/> | 199 <element name="all_vcf" file="all_vcf_snps_json.json" ftype="json" compare="contains"/> |
255 </output_collection> | 200 </output_collection> |
256 <output name="output_summary" file="output_summary4.html" ftype="html" compare="contains"/> | 201 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> |
257 </test> | 202 </test> |
258 </tests> | 203 </tests> |
259 <help> | 204 <help> |
260 **What it does** | 205 **What it does** |
261 | 206 |