comparison vsnp_get_snps.xml @ 1:9ac0b1d5560d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
author iuc
date Tue, 16 Nov 2021 20:11:30 +0000
parents ec6e02f4eab7
children ae7b1b97a2a0
comparison
equal deleted inserted replaced
0:ec6e02f4eab7 1:9ac0b1d5560d
1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="@WRAPPER_VERSION@.0" profile="@PROFILE@"> 1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description></description> 2 <description></description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <requirements>
7 <requirement type="package" version="3.0.9">openpyxl</requirement> 7 <expand macro="openpyxl_requirement"/>
8 <requirement type="package" version="1.3.4">pandas</requirement> 8 <expand macro="pandas_requirement"/>
9 <requirement type="package" version="0.6.8">pyvcf</requirement> 9 <expand macro="pyvcf_requirement"/>
10 <requirement type="package" version="2.0.1">xlrd</requirement> 10 <expand macro="xlrd_requirement"/>
11 </requirements> 11 </requirements>
12 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
13 #import re 13 #import re
14 14
15 #set input_vcf_dir = 'input_vcf_dir' 15 #set input_vcf_dir = 'input_vcf_dir'
20 mkdir -p $input_vcf_dir && 20 mkdir -p $input_vcf_dir &&
21 mkdir -p $output_json_avg_mq_dir && 21 mkdir -p $output_json_avg_mq_dir &&
22 mkdir -p $output_json_snps_dir && 22 mkdir -p $output_json_snps_dir &&
23 mkdir -p $output_snps_dir && 23 mkdir -p $output_snps_dir &&
24 24
25 #set dbkey = '?' 25 #if str($input_additional_zc_vcf_collection_cond.input_additional_zc_vcf_collection) =='yes':
26 #for $i in $input_vcf_collection: 26 #set dbkey = '?'
27 #for $i in $input_additional_zc_vcf_collection_cond.input_vcf_collection:
28 #if str($dbkey) == '?':
29 #set dbkey = $i.metadata.dbkey
30 #else if str($dbkey) != $i.metadata.dbkey:
31 >&2 echo "The dbkeys associated with the zero coverage VCF files with SNPs found in closely related isolate groups are not unique, check ${i.name}" && exit 1
32 #end if
33 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
34 ln -s '${i}' '$input_vcf_dir/${vcf_identifier}' &&
35 #end for
27 #if str($dbkey) == '?': 36 #if str($dbkey) == '?':
28 #set dbkey = $i.metadata.dbkey 37 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1
29 #else if str($dbkey) != $i.metadata.dbkey:
30 >&2 echo "The dbkeys associated with the zero coverage VCF files with SNPs found in closely related isolate groups are not unique" &&
31 exit 1
32 #end if 38 #end if
33 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
34 ln -s '${i}' '$input_vcf_dir/${vcf_identifier}' &&
35 #end for
36 #if str($dbkey) == '?':
37 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1
38 #end if 39 #end if
39 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single": 40 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single":
40 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier)) 41 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier))
41 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' && 42 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' &&
42 #else 43 #else
47 #end if 48 #end if
48 #if str($input_excel_cond.input_excel_param) == 'yes': 49 #if str($input_excel_cond.input_excel_param) == 'yes':
49 #if str($input_excel_cond.excel_source_cond.excel_source) == 'cached': 50 #if str($input_excel_cond.excel_source_cond.excel_source) == 'cached':
50 #set excel_file = 'No genome specified for input VCF (database) file(s)' 51 #set excel_file = 'No genome specified for input VCF (database) file(s)'
51 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields() 52 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields()
52 ## The value of excel_fields is a nested list that looks like this. 53 ## The value of excel_fields is a nested list that looks like this.
53 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...] 54 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...]
54 #for $i in $excel_fields: 55 #for $i in $excel_fields:
55 #if str($i[0]) == $dbkey: 56 #if str($i[0]) == $dbkey:
56 #set excel_file = $i[2] 57 #set excel_file = $i[2]
57 #break 58 #break
89 </when> 90 </when>
90 <when value="collection"> 91 <when value="collection">
91 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/> 92 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/>
92 </when> 93 </when>
93 </conditional> 94 </conditional>
94 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files with SNPs found in closely related isolate groups"/> 95 <conditional name="input_additional_zc_vcf_collection_cond">
96 <param name="input_additional_zc_vcf_collection" type="select" label="Include an additional collection of curated zero-coverage VCF files?">
97 <option value="no" selected="true">No</option>
98 <option value="yes">Yes</option>
99 </param>
100 <when value="no"/>
101 <when value="yes">
102 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files with SNPs found in closely related isolate groups"/>
103 </when>
104 </conditional>
95 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/> 105 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/>
96 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/> 106 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/>
97 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/> 107 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/>
98 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/> 108 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/>
99 <conditional name="input_excel_cond"> 109 <conditional name="input_excel_cond">
108 <option value="history">from history</option> 118 <option value="history">from history</option>
109 </param> 119 </param>
110 <when value="cached"> 120 <when value="cached">
111 <param name="input_excel" type="select" label="Excel file"> 121 <param name="input_excel" type="select" label="Excel file">
112 <options from_data_table="vsnp_excel"> 122 <options from_data_table="vsnp_excel">
113 <filter type="data_meta" column="0" key="dbkey" ref="input_vcf_collection"/>
114 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/> 123 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/>
115 </options> 124 </options>
116 </param> 125 </param>
117 </when> 126 </when>
118 <when value="history"> 127 <when value="history">
120 </when> 129 </when>
121 </conditional> 130 </conditional>
122 </when> 131 </when>
123 <when value="no"/> 132 <when value="no"/>
124 </conditional> 133 </conditional>
125 <param name="all_isolates" type="boolean" truevalue="--all_isolates" falsevalue="" checked="false" label="Create a group containing all isolates?"/> 134 <param name="all_isolates" type="boolean" truevalue="--all_isolates" falsevalue="" checked="true" label="Create a group containing all isolates?"/>
126 </inputs> 135 </inputs>
127 <outputs> 136 <outputs>
128 <collection name="snps" type="list" label="${tool.name} on ${on_string} (SNPs)"> 137 <collection name="snps" type="list" label="${tool.name} on ${on_string} (SNPs)">
129 <discover_datasets pattern="__name_and_ext__" directory="output_snps_dir"/> 138 <discover_datasets pattern="__name_and_ext__" directory="output_snps_dir"/>
130 </collection> 139 </collection>
145 --> 154 -->
146 <!-- A single vcf input, no excel file, all_isolates is False --> 155 <!-- A single vcf input, no excel file, all_isolates is False -->
147 <test expect_num_outputs="4"> 156 <test expect_num_outputs="4">
148 <param name="input_zc_vcf_type" value="single"/> 157 <param name="input_zc_vcf_type" value="single"/>
149 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/> 158 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
159 <param name="input_additional_zc_vcf_collection" value="yes"/>
150 <param name="input_vcf_collection"> 160 <param name="input_vcf_collection">
151 <collection type="list"> 161 <collection type="list">
152 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/> 162 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/>
153 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/> 163 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/>
154 </collection> 164 </collection>
155 </param> 165 </param>
156 <param name="input_excel_param" value="no"/> 166 <param name="input_excel_param" value="no"/>
167 <param name="all_isolates" value=""/>
157 <output_collection name="snps" type="list" count="1"> 168 <output_collection name="snps" type="list" count="1">
158 <element name="all_vcf" ftype="fasta"> 169 <element name="all_vcf" ftype="fasta">
159 <assert_contents> 170 <assert_contents>
160 <has_size value="150"/> 171 <has_size value="150"/>
161 </assert_contents> 172 </assert_contents>
188 <collection type="list"> 199 <collection type="list">
189 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> 200 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
190 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> 201 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
191 </collection> 202 </collection>
192 </param> 203 </param>
204 <param name="input_additional_zc_vcf_collection" value="yes"/>
193 <param name="input_vcf_collection"> 205 <param name="input_vcf_collection">
194 <collection type="list"> 206 <collection type="list">
195 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> 207 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
196 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> 208 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
197 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> 209 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
198 </collection> 210 </collection>
199 </param> 211 </param>
200 <param name="input_excel_param" value="no"/> 212 <param name="input_excel_param" value="no"/>
213 <param name="all_isolates" value=""/>
201 <output_collection name="snps" type="list" count="1"> 214 <output_collection name="snps" type="list" count="1">
202 <element name="all_vcf" ftype="fasta"> 215 <element name="all_vcf" ftype="fasta">
203 <assert_contents> 216 <assert_contents>
204 <has_size value="5226"/> 217 <has_size value="5226"/>
205 </assert_contents> 218 </assert_contents>
232 <collection type="list"> 245 <collection type="list">
233 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> 246 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
234 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> 247 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
235 </collection> 248 </collection>
236 </param> 249 </param>
250 <param name="input_additional_zc_vcf_collection" value="yes"/>
237 <param name="input_vcf_collection"> 251 <param name="input_vcf_collection">
238 <collection type="list"> 252 <collection type="list">
239 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> 253 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
240 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> 254 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
241 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> 255 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
242 </collection> 256 </collection>
243 </param> 257 </param>
244 <param name="input_excel_param" value="yes"/> 258 <param name="input_excel_param" value="yes"/>
245 <param name="input_excel" value="89"/> 259 <param name="input_excel" value="89"/>
260 <param name="all_isolates" value=""/>
246 <output_collection name="snps" type="list" count="1"> 261 <output_collection name="snps" type="list" count="1">
247 <element name="Mbovis-17" ftype="fasta"> 262 <element name="Mbovis-17" ftype="fasta">
248 <assert_contents> 263 <assert_contents>
249 <has_size value="749"/> 264 <has_size value="749"/>
250 </assert_contents> 265 </assert_contents>
277 <collection type="list"> 292 <collection type="list">
278 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> 293 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
279 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> 294 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
280 </collection> 295 </collection>
281 </param> 296 </param>
297 <param name="input_additional_zc_vcf_collection" value="yes"/>
282 <param name="input_vcf_collection"> 298 <param name="input_vcf_collection">
283 <collection type="list"> 299 <collection type="list">
284 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> 300 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
285 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> 301 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
286 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> 302 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>