comparison vsnp_get_snps.xml @ 9:0fe292b20b9d draft

"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 3b7fef2d17fec96647345e89c774d4af417d23d7"
author greg
date Thu, 29 Jul 2021 13:16:03 +0000
parents 5e4595b9f63c
children be5875f29ea4
comparison
equal deleted inserted replaced
8:5e4595b9f63c 9:0fe292b20b9d
1 <tool id="vsnp_get_snps_broken" name="vSNP: get SNPs broken" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@"> 1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@">
2 <description></description> 2 <description></description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements> 6 <requirements>
10 <requirement type="package" version="2.0.1">xlrd</requirement> 10 <requirement type="package" version="2.0.1">xlrd</requirement>
11 </requirements> 11 </requirements>
12 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
13 #import re 13 #import re
14 14
15 #if str($input.metadata.dbkey) == '?':
16 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1
17 #end if
18
19 #set input_vcf_dir = 'input_vcf_dir' 15 #set input_vcf_dir = 'input_vcf_dir'
20 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir' 16 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir'
21 #set output_json_snps_dir = 'output_json_snps_dir' 17 #set output_json_snps_dir = 'output_json_snps_dir'
22 #set output_snps_dir = 'output_snps_dir' 18 #set output_snps_dir = 'output_snps_dir'
23 19
24 mkdir -p $input_vcf_dir && 20 mkdir -p $input_vcf_dir &&
25 mkdir -p $output_json_avg_mq_dir && 21 mkdir -p $output_json_avg_mq_dir &&
26 mkdir -p $output_json_snps_dir && 22 mkdir -p $output_json_snps_dir &&
27 mkdir -p $output_snps_dir && 23 mkdir -p $output_snps_dir &&
28 24
29 #set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier)) 25 #set dbkey = '?'
30 ln -s '${input}' '$input_vcf_dir/${input_identifier}' && 26 #for $i in $input_vcf_collection:
31 27 #if str($dbkey) == '?':
28 #set dbkey = $i.metadata.dbkey
29 #else if str($dbkey) != $i.metadata.dbkey:
30 >&2 echo "The dbkeys associated with the zero coverage VCF files with SNPs found in closely related isolate groups are not unique" &&
31 exit 1
32 #end if
33 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
34 ln -s '${i}' '$input_vcf_dir/${vcf_identifier}' &&
35 #end for
36 #if str($dbkey) == '?':
37 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1
38 #end if
32 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single": 39 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single":
33 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier)) 40 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier))
34 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' && 41 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' &&
35 #else 42 #else
36 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection: 43 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection:
43 #set excel_file = 'No genome specified for input VCF (database) file(s)' 50 #set excel_file = 'No genome specified for input VCF (database) file(s)'
44 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields() 51 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields()
45 ## The value of excel_fields is a nested list that looks like this. 52 ## The value of excel_fields is a nested list that looks like this.
46 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...] 53 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...]
47 #for $i in $excel_fields: 54 #for $i in $excel_fields:
48 #if str($i[0]) == $input.metadata.dbkey: 55 #if str($i[0]) == $dbkey:
49 #set excel_file = $i[2] 56 #set excel_file = $i[2]
50 #break 57 #break
51 #end if 58 #end if
52 #end for 59 #end for
53 #else: 60 #else:
67 --output_json_snps_dir '$output_json_snps_dir' 74 --output_json_snps_dir '$output_json_snps_dir'
68 --output_snps_dir '$output_snps_dir' 75 --output_snps_dir '$output_snps_dir'
69 --output_summary '$output_summary' 76 --output_summary '$output_summary'
70 --processes \${GALAXY_SLOTS:-8} 77 --processes \${GALAXY_SLOTS:-8}
71 --quality_score_n_threshold $quality_score_n_threshold 78 --quality_score_n_threshold $quality_score_n_threshold
72 --dbkey '$input.metadata.dbkey' 79 --dbkey '$dbkey'
73 ]]></command> 80 ]]></command>
74 <inputs> 81 <inputs>
75 <conditional name="input_zc_vcf_type_cond"> 82 <conditional name="input_zc_vcf_type_cond">
76 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed"> 83 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed">
77 <option value="collection" selected="true">A collection of zero coverage VCF files</option> 84 <option value="collection" selected="true">A collection of zero coverage VCF files</option>
82 </when> 89 </when>
83 <when value="collection"> 90 <when value="collection">
84 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/> 91 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/>
85 </when> 92 </when>
86 </conditional> 93 </conditional>
87 <param name="input" format="vcf" type="data" label="Zero coverage VCF file with SNPs found in closely related isolate groups"/> 94 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files with SNPs found in closely related isolate groups"/>
88 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/> 95 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/>
89 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/> 96 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/>
90 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/> 97 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/>
91 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/> 98 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/>
92 <conditional name="input_excel_cond"> 99 <conditional name="input_excel_cond">
101 <option value="history">from history</option> 108 <option value="history">from history</option>
102 </param> 109 </param>
103 <when value="cached"> 110 <when value="cached">
104 <param name="input_excel" type="select" label="Excel file"> 111 <param name="input_excel" type="select" label="Excel file">
105 <options from_data_table="vsnp_excel"> 112 <options from_data_table="vsnp_excel">
113 <filter type="data_meta" column="0" key="dbkey" ref="input_vcf_collection"/>
106 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/> 114 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/>
107 </options> 115 </options>
108 </param> 116 </param>
109 </when> 117 </when>
110 <when value="history"> 118 <when value="history">
127 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/> 135 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/>
128 </collection> 136 </collection>
129 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/> 137 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/>
130 </outputs> 138 </outputs>
131 <tests> 139 <tests>
132 <!-- No excel file, all_isolates is False --> 140 <!--
133 <test expect_num_outputs="4"> 141 Unfortunately the test files cannot be gzipped since Galaxy changes the file names
134 <param name="input" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/> 142 to be something like 00-0121_WI_Cervid_99-A_vcf_gz, and the VCF Reader requires
135 <param name="input_zc_vcf_collection"> 143 gzipped files to have a .gz extension. The exception is
144 UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte
145 -->
146 <!-- A single vcf input, no excel file, all_isolates is False -->
147 <test expect_num_outputs="4">
148 <param name="input_zc_vcf_type" value="single"/>
149 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
150 <param name="input_vcf_collection">
136 <collection type="list"> 151 <collection type="list">
137 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/> 152 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/>
138 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/> 153 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/>
139 </collection> 154 </collection>
140 </param> 155 </param>
141 <param name="input_excel_param" value="no"/> 156 <param name="input_excel_param" value="no"/>
142 <output_collection name="snps" type="list" count="1"> 157 <output_collection name="snps" type="list" count="1">
143 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/> 158 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/>
144 </output_collection> 159 </output_collection>
145 <output_collection name="json_avg_mq" type="list" count="1"> 160 <output_collection name="json_avg_mq" type="list" count="1">
146 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/> 161 <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/>
147 </output_collection> 162 </output_collection>
148 <output_collection name="json_snps" type="list" count="1"> 163 <output_collection name="json_snps" type="list" count="1">
149 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/> 164 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/>
150 </output_collection> 165 </output_collection>
151 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> 166 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
152 </test> 167 </test>
153 <!-- Excel file, all_isolates is False --> 168 <!-- An input collection, no excel file, all_isolates is False -->
154 <test expect_num_outputs="4"> 169 <test expect_num_outputs="4">
155 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/> 170 <param name="input_zc_vcf_type" value="collection"/>
156 <param name="input_zc_vcf_collection"> 171 <param name="input_zc_vcf_collection">
157 <collection type="list"> 172 <collection type="list">
158 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/> 173 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
159 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/> 174 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
160 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/> 175 </collection>
176 </param>
177 <param name="input_vcf_collection">
178 <collection type="list">
179 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
180 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
181 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
182 </collection>
183 </param>
184 <param name="input_excel_param" value="no"/>
185 <output_collection name="snps" type="list" count="1">
186 <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/>
187 </output_collection>
188 <output_collection name="json_avg_mq" type="list" count="1">
189 <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/>
190 </output_collection>
191 <output_collection name="json_snps" type="list" count="1">
192 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/>
193 </output_collection>
194 <output name="output_summary" file="output_summary2.html" ftype="html" compare="contains"/>
195 </test>
196 <!-- An input collection, an excel file, all_isolates is False -->
197 <test expect_num_outputs="4">
198 <param name="input_zc_vcf_type" value="collection"/>
199 <param name="input_zc_vcf_collection">
200 <collection type="list">
201 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
202 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
203 </collection>
204 </param>
205 <param name="input_vcf_collection">
206 <collection type="list">
207 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
208 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
209 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
161 </collection> 210 </collection>
162 </param> 211 </param>
163 <param name="input_excel_param" value="yes"/> 212 <param name="input_excel_param" value="yes"/>
164 <param name="input_excel" value="89"/> 213 <param name="input_excel" value="89"/>
165 <output_collection name="snps" type="list" count="1"> 214 <output_collection name="snps" type="list" count="1">
166 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/> 215 <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta" compare="contains"/>
167 </output_collection> 216 </output_collection>
168 <output_collection name="json_avg_mq" type="list" count="1"> 217 <output_collection name="json_avg_mq" type="list" count="1">
169 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/> 218 <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
170 </output_collection> 219 </output_collection>
171 <output_collection name="json_snps" type="list" count="1"> 220 <output_collection name="json_snps" type="list" count="1">
172 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/> 221 <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
173 </output_collection> 222 </output_collection>
174 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> 223 <output name="output_summary" file="output_summary3.html" ftype="html" compare="contains"/>
175 </test> 224 </test>
176 <!-- Excel file, all_isolates is True --> 225 <!-- An input collection, an excel file, all_isolates is True -->
177 <test expect_num_outputs="4"> 226 <test expect_num_outputs="4">
178 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/> 227 <param name="input_zc_vcf_type" value="collection"/>
179 <param name="input_zc_vcf_collection"> 228 <param name="input_zc_vcf_collection">
180 <collection type="list"> 229 <collection type="list">
181 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/> 230 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
182 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/> 231 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
183 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/> 232 </collection>
233 </param>
234 <param name="input_vcf_collection">
235 <collection type="list">
236 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
237 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
238 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
184 </collection> 239 </collection>
185 </param> 240 </param>
186 <param name="input_excel_param" value="yes"/> 241 <param name="input_excel_param" value="yes"/>
187 <param name="input_excel" value="89"/> 242 <param name="input_excel" value="89"/>
188 <param name="all_isolates" value="--all_isolates"/> 243 <param name="all_isolates" value="--all_isolates"/>
189 <output_collection name="snps" type="list" count="2"> 244 <output_collection name="snps" type="list" count="2">
190 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/> 245 <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta" compare="contains"/>
191 <element name="all_vcf" file="all_vcf3.fasta" ftype="fasta" compare="contains"/> 246 <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/>
192 </output_collection> 247 </output_collection>
193 <output_collection name="json_avg_mq" type="list" count="2"> 248 <output_collection name="json_avg_mq" type="list" count="2">
194 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/> 249 <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
195 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/> 250 <element name="all_vcf" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
196 </output_collection> 251 </output_collection>
197 <output_collection name="json_snps" type="list" count="2"> 252 <output_collection name="json_snps" type="list" count="2">
198 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/> 253 <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
199 <element name="all_vcf" file="all_vcf_snps_json.json" ftype="json" compare="contains"/> 254 <element name="all_vcf" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
200 </output_collection> 255 </output_collection>
201 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> 256 <output name="output_summary" file="output_summary4.html" ftype="html" compare="contains"/>
202 </test> 257 </test>
203 </tests> 258 </tests>
204 <help> 259 <help>
205 **What it does** 260 **What it does**
206 261