Mercurial > repos > greg > vsnp_get_snps
comparison vsnp_get_snps.xml @ 9:0fe292b20b9d draft
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 3b7fef2d17fec96647345e89c774d4af417d23d7"
author | greg |
---|---|
date | Thu, 29 Jul 2021 13:16:03 +0000 |
parents | 5e4595b9f63c |
children | be5875f29ea4 |
comparison
equal
deleted
inserted
replaced
8:5e4595b9f63c | 9:0fe292b20b9d |
---|---|
1 <tool id="vsnp_get_snps_broken" name="vSNP: get SNPs broken" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@"> | 1 <tool id="vsnp_get_snps" name="vSNP: get SNPs" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@"> |
2 <description></description> | 2 <description></description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <requirements> | 6 <requirements> |
10 <requirement type="package" version="2.0.1">xlrd</requirement> | 10 <requirement type="package" version="2.0.1">xlrd</requirement> |
11 </requirements> | 11 </requirements> |
12 <command detect_errors="exit_code"><![CDATA[ | 12 <command detect_errors="exit_code"><![CDATA[ |
13 #import re | 13 #import re |
14 | 14 |
15 #if str($input.metadata.dbkey) == '?': | |
16 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1 | |
17 #end if | |
18 | |
19 #set input_vcf_dir = 'input_vcf_dir' | 15 #set input_vcf_dir = 'input_vcf_dir' |
20 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir' | 16 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir' |
21 #set output_json_snps_dir = 'output_json_snps_dir' | 17 #set output_json_snps_dir = 'output_json_snps_dir' |
22 #set output_snps_dir = 'output_snps_dir' | 18 #set output_snps_dir = 'output_snps_dir' |
23 | 19 |
24 mkdir -p $input_vcf_dir && | 20 mkdir -p $input_vcf_dir && |
25 mkdir -p $output_json_avg_mq_dir && | 21 mkdir -p $output_json_avg_mq_dir && |
26 mkdir -p $output_json_snps_dir && | 22 mkdir -p $output_json_snps_dir && |
27 mkdir -p $output_snps_dir && | 23 mkdir -p $output_snps_dir && |
28 | 24 |
29 #set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier)) | 25 #set dbkey = '?' |
30 ln -s '${input}' '$input_vcf_dir/${input_identifier}' && | 26 #for $i in $input_vcf_collection: |
31 | 27 #if str($dbkey) == '?': |
28 #set dbkey = $i.metadata.dbkey | |
29 #else if str($dbkey) != $i.metadata.dbkey: | |
30 >&2 echo "The dbkeys associated with the zero coverage VCF files with SNPs found in closely related isolate groups are not unique" && | |
31 exit 1 | |
32 #end if | |
33 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) | |
34 ln -s '${i}' '$input_vcf_dir/${vcf_identifier}' && | |
35 #end for | |
36 #if str($dbkey) == '?': | |
37 >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1 | |
38 #end if | |
32 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single": | 39 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single": |
33 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier)) | 40 #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier)) |
34 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' && | 41 ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' && |
35 #else | 42 #else |
36 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection: | 43 #for $i in $input_zc_vcf_type_cond.input_zc_vcf_collection: |
43 #set excel_file = 'No genome specified for input VCF (database) file(s)' | 50 #set excel_file = 'No genome specified for input VCF (database) file(s)' |
44 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields() | 51 #set excel_fields = $__app__.tool_data_tables['vsnp_excel'].get_fields() |
45 ## The value of excel_fields is a nested list that looks like this. | 52 ## The value of excel_fields is a nested list that looks like this. |
46 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...] | 53 ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...] |
47 #for $i in $excel_fields: | 54 #for $i in $excel_fields: |
48 #if str($i[0]) == $input.metadata.dbkey: | 55 #if str($i[0]) == $dbkey: |
49 #set excel_file = $i[2] | 56 #set excel_file = $i[2] |
50 #break | 57 #break |
51 #end if | 58 #end if |
52 #end for | 59 #end for |
53 #else: | 60 #else: |
67 --output_json_snps_dir '$output_json_snps_dir' | 74 --output_json_snps_dir '$output_json_snps_dir' |
68 --output_snps_dir '$output_snps_dir' | 75 --output_snps_dir '$output_snps_dir' |
69 --output_summary '$output_summary' | 76 --output_summary '$output_summary' |
70 --processes \${GALAXY_SLOTS:-8} | 77 --processes \${GALAXY_SLOTS:-8} |
71 --quality_score_n_threshold $quality_score_n_threshold | 78 --quality_score_n_threshold $quality_score_n_threshold |
72 --dbkey '$input.metadata.dbkey' | 79 --dbkey '$dbkey' |
73 ]]></command> | 80 ]]></command> |
74 <inputs> | 81 <inputs> |
75 <conditional name="input_zc_vcf_type_cond"> | 82 <conditional name="input_zc_vcf_type_cond"> |
76 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed"> | 83 <param name="input_zc_vcf_type" type="select" label="Choose the category of the files to be analyzed"> |
77 <option value="collection" selected="true">A collection of zero coverage VCF files</option> | 84 <option value="collection" selected="true">A collection of zero coverage VCF files</option> |
82 </when> | 89 </when> |
83 <when value="collection"> | 90 <when value="collection"> |
84 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/> | 91 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/> |
85 </when> | 92 </when> |
86 </conditional> | 93 </conditional> |
87 <param name="input" format="vcf" type="data" label="Zero coverage VCF file with SNPs found in closely related isolate groups"/> | 94 <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files with SNPs found in closely related isolate groups"/> |
88 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/> | 95 <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/> |
89 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/> | 96 <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/> |
90 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/> | 97 <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/> |
91 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/> | 98 <param name="quality_score_n_threshold" type="integer" min="0" value="150" label="Minimum quality score N value for alleles" help="Alleles are marked as N for quality scores between this value and the minimum quality score value above"/> |
92 <conditional name="input_excel_cond"> | 99 <conditional name="input_excel_cond"> |
101 <option value="history">from history</option> | 108 <option value="history">from history</option> |
102 </param> | 109 </param> |
103 <when value="cached"> | 110 <when value="cached"> |
104 <param name="input_excel" type="select" label="Excel file"> | 111 <param name="input_excel" type="select" label="Excel file"> |
105 <options from_data_table="vsnp_excel"> | 112 <options from_data_table="vsnp_excel"> |
113 <filter type="data_meta" column="0" key="dbkey" ref="input_vcf_collection"/> | |
106 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/> | 114 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/> |
107 </options> | 115 </options> |
108 </param> | 116 </param> |
109 </when> | 117 </when> |
110 <when value="history"> | 118 <when value="history"> |
127 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/> | 135 <discover_datasets pattern="__name_and_ext__" directory="output_json_snps_dir"/> |
128 </collection> | 136 </collection> |
129 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/> | 137 <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/> |
130 </outputs> | 138 </outputs> |
131 <tests> | 139 <tests> |
132 <!-- No excel file, all_isolates is False --> | 140 <!-- |
133 <test expect_num_outputs="4"> | 141 Unfortunately the test files cannot be gzipped since Galaxy changes the file names |
134 <param name="input" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/> | 142 to be something like 00-0121_WI_Cervid_99-A_vcf_gz, and the VCF Reader requires |
135 <param name="input_zc_vcf_collection"> | 143 gzipped files to have a .gz extension. The exception is |
144 UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte | |
145 --> | |
146 <!-- A single vcf input, no excel file, all_isolates is False --> | |
147 <test expect_num_outputs="4"> | |
148 <param name="input_zc_vcf_type" value="single"/> | |
149 <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/> | |
150 <param name="input_vcf_collection"> | |
136 <collection type="list"> | 151 <collection type="list"> |
137 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/> | 152 <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/> |
138 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/> | 153 <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/> |
139 </collection> | 154 </collection> |
140 </param> | 155 </param> |
141 <param name="input_excel_param" value="no"/> | 156 <param name="input_excel_param" value="no"/> |
142 <output_collection name="snps" type="list" count="1"> | 157 <output_collection name="snps" type="list" count="1"> |
143 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/> | 158 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/> |
144 </output_collection> | 159 </output_collection> |
145 <output_collection name="json_avg_mq" type="list" count="1"> | 160 <output_collection name="json_avg_mq" type="list" count="1"> |
146 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/> | 161 <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/> |
147 </output_collection> | 162 </output_collection> |
148 <output_collection name="json_snps" type="list" count="1"> | 163 <output_collection name="json_snps" type="list" count="1"> |
149 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/> | 164 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/> |
150 </output_collection> | 165 </output_collection> |
151 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> | 166 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> |
152 </test> | 167 </test> |
153 <!-- Excel file, all_isolates is False --> | 168 <!-- An input collection, no excel file, all_isolates is False --> |
154 <test expect_num_outputs="4"> | 169 <test expect_num_outputs="4"> |
155 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/> | 170 <param name="input_zc_vcf_type" value="collection"/> |
156 <param name="input_zc_vcf_collection"> | 171 <param name="input_zc_vcf_collection"> |
157 <collection type="list"> | 172 <collection type="list"> |
158 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/> | 173 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> |
159 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/> | 174 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> |
160 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/> | 175 </collection> |
176 </param> | |
177 <param name="input_vcf_collection"> | |
178 <collection type="list"> | |
179 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> | |
180 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> | |
181 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> | |
182 </collection> | |
183 </param> | |
184 <param name="input_excel_param" value="no"/> | |
185 <output_collection name="snps" type="list" count="1"> | |
186 <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/> | |
187 </output_collection> | |
188 <output_collection name="json_avg_mq" type="list" count="1"> | |
189 <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/> | |
190 </output_collection> | |
191 <output_collection name="json_snps" type="list" count="1"> | |
192 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/> | |
193 </output_collection> | |
194 <output name="output_summary" file="output_summary2.html" ftype="html" compare="contains"/> | |
195 </test> | |
196 <!-- An input collection, an excel file, all_isolates is False --> | |
197 <test expect_num_outputs="4"> | |
198 <param name="input_zc_vcf_type" value="collection"/> | |
199 <param name="input_zc_vcf_collection"> | |
200 <collection type="list"> | |
201 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> | |
202 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> | |
203 </collection> | |
204 </param> | |
205 <param name="input_vcf_collection"> | |
206 <collection type="list"> | |
207 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> | |
208 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> | |
209 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> | |
161 </collection> | 210 </collection> |
162 </param> | 211 </param> |
163 <param name="input_excel_param" value="yes"/> | 212 <param name="input_excel_param" value="yes"/> |
164 <param name="input_excel" value="89"/> | 213 <param name="input_excel" value="89"/> |
165 <output_collection name="snps" type="list" count="1"> | 214 <output_collection name="snps" type="list" count="1"> |
166 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/> | 215 <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta" compare="contains"/> |
167 </output_collection> | 216 </output_collection> |
168 <output_collection name="json_avg_mq" type="list" count="1"> | 217 <output_collection name="json_avg_mq" type="list" count="1"> |
169 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/> | 218 <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/> |
170 </output_collection> | 219 </output_collection> |
171 <output_collection name="json_snps" type="list" count="1"> | 220 <output_collection name="json_snps" type="list" count="1"> |
172 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/> | 221 <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/> |
173 </output_collection> | 222 </output_collection> |
174 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> | 223 <output name="output_summary" file="output_summary3.html" ftype="html" compare="contains"/> |
175 </test> | 224 </test> |
176 <!-- Excel file, all_isolates is True --> | 225 <!-- An input collection, an excel file, all_isolates is True --> |
177 <test expect_num_outputs="4"> | 226 <test expect_num_outputs="4"> |
178 <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/> | 227 <param name="input_zc_vcf_type" value="collection"/> |
179 <param name="input_zc_vcf_collection"> | 228 <param name="input_zc_vcf_collection"> |
180 <collection type="list"> | 229 <collection type="list"> |
181 <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/> | 230 <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/> |
182 <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/> | 231 <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/> |
183 <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/> | 232 </collection> |
233 </param> | |
234 <param name="input_vcf_collection"> | |
235 <collection type="list"> | |
236 <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/> | |
237 <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/> | |
238 <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/> | |
184 </collection> | 239 </collection> |
185 </param> | 240 </param> |
186 <param name="input_excel_param" value="yes"/> | 241 <param name="input_excel_param" value="yes"/> |
187 <param name="input_excel" value="89"/> | 242 <param name="input_excel" value="89"/> |
188 <param name="all_isolates" value="--all_isolates"/> | 243 <param name="all_isolates" value="--all_isolates"/> |
189 <output_collection name="snps" type="list" count="2"> | 244 <output_collection name="snps" type="list" count="2"> |
190 <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/> | 245 <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta" compare="contains"/> |
191 <element name="all_vcf" file="all_vcf3.fasta" ftype="fasta" compare="contains"/> | 246 <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/> |
192 </output_collection> | 247 </output_collection> |
193 <output_collection name="json_avg_mq" type="list" count="2"> | 248 <output_collection name="json_avg_mq" type="list" count="2"> |
194 <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/> | 249 <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/> |
195 <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/> | 250 <element name="all_vcf" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/> |
196 </output_collection> | 251 </output_collection> |
197 <output_collection name="json_snps" type="list" count="2"> | 252 <output_collection name="json_snps" type="list" count="2"> |
198 <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/> | 253 <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/> |
199 <element name="all_vcf" file="all_vcf_snps_json.json" ftype="json" compare="contains"/> | 254 <element name="all_vcf" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/> |
200 </output_collection> | 255 </output_collection> |
201 <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/> | 256 <output name="output_summary" file="output_summary4.html" ftype="html" compare="contains"/> |
202 </test> | 257 </test> |
203 </tests> | 258 </tests> |
204 <help> | 259 <help> |
205 **What it does** | 260 **What it does** |
206 | 261 |