Mercurial > repos > greg > vsnp_get_snps

diff vsnp_get_snps.xml @ 9:0fe292b20b9d draft
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_get_snps commit 3b7fef2d17fec96647345e89c774d4af417d23d7"
author: greg
date: Thu, 29 Jul 2021 13:16:03 +0000
parents: 5e4595b9f63c
children: be5875f29ea4
--- a/vsnp_get_snps.xml	Thu Jul 29 12:50:01 2021 +0000
+++ b/vsnp_get_snps.xml	Thu Jul 29 13:16:03 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="vsnp_get_snps_broken" name="vSNP: get SNPs broken" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@">
+<tool id="vsnp_get_snps" name="vSNP: get SNPs" version="@WRAPPER_VERSION@.0+galaxy0" profile="@PROFILE@">
     <description></description>
     <macros>
         <import>macros.xml</import>
@@ -12,10 +12,6 @@
     <command detect_errors="exit_code"><![CDATA[
 #import re
 
-#if str($input.metadata.dbkey) == '?':
-    >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1
-#end if
-
 #set input_vcf_dir = 'input_vcf_dir'
 #set output_json_avg_mq_dir = 'output_json_avg_mq_dir'
 #set output_json_snps_dir = 'output_json_snps_dir'
@@ -26,9 +22,20 @@
 mkdir -p $output_json_snps_dir &&
 mkdir -p $output_snps_dir &&
 
-#set input_identifier = re.sub('[^\s\w\-]', '_', str($input.element_identifier))
-ln -s '${input}' '$input_vcf_dir/${input_identifier}' &&
-
+#set dbkey = '?'
+#for $i in $input_vcf_collection:
+    #if str($dbkey) == '?':
+        #set dbkey = $i.metadata.dbkey
+    #else if str($dbkey) != $i.metadata.dbkey:
+        >&2 echo "The dbkeys associated with the zero coverage VCF files with SNPs found in closely related isolate groups are not unique" &&
+exit 1
+    #end if
+    #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
+    ln -s '${i}' '$input_vcf_dir/${vcf_identifier}' &&
+#end for
+#if str($dbkey) == '?':
+    >&2 echo "The dbkey must be set for the zero coverage VCF files with SNPs found in closely related isolate groups" && exit 1
+#end if
 #if str($input_zc_vcf_type_cond.input_zc_vcf_type) == "single":
     #set zc_vcf_identifier = re.sub('[^\s\w\-]', '_', str($input_zc_vcf.element_identifier))
     ln -s '${input_zc_vcf}' '$input_vcf_dir/${zc_vcf_identifier}' &&
@@ -45,7 +52,7 @@
 	## The value of excel_fields is a nested list that looks like this.
         ## [['AF2122', 'Mbovis_define_filter.xlsx', '~/tool-data/vsnp/AF2122/excel/Mbovis_define_filter.xlsx', 'Excel file for AF2122'],...]
         #for $i in $excel_fields:
-            #if str($i[0]) == $input.metadata.dbkey:
+            #if str($i[0]) == $dbkey:
                 #set excel_file = $i[2]
                 #break
             #end if
@@ -69,7 +76,7 @@
 --output_summary '$output_summary'
 --processes \${GALAXY_SLOTS:-8}
 --quality_score_n_threshold $quality_score_n_threshold
---dbkey '$input.metadata.dbkey'
+--dbkey '$dbkey'
 ]]></command>
     <inputs>
         <conditional name="input_zc_vcf_type_cond">
@@ -84,7 +91,7 @@
                 <param name="input_zc_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files"/>
             </when>
         </conditional>
-        <param name="input" format="vcf" type="data" label="Zero coverage VCF file with SNPs found in closely related isolate groups"/>
+        <param name="input_vcf_collection" format="vcf" type="data_collection" collection_type="list" label="Collection of zero coverage VCF files with SNPs found in closely related isolate groups"/>
         <param name="ac" type="integer" min="0" value="2" label="Allele count threshold" help="At least 1 position must have this value for a SNP to be added to a group"/>
         <param name="min_mq" type="integer" min="0" value="56" label="Map quality threshold" help="At least 1 position must have a higher MQ value for a SNP to be added to a group"/>
         <param name="min_quality_score" type="integer" min="0" value="150" label="Quality score threshold" help="At least 1 position must have a higher quality score for a SNP to be added to a group"/>
@@ -103,6 +110,7 @@
                     <when value="cached">
                         <param name="input_excel" type="select" label="Excel file">
                             <options from_data_table="vsnp_excel">
+                                <filter type="data_meta" column="0" key="dbkey" ref="input_vcf_collection"/>
                                 <validator type="no_options" message="No built-in Excel grouping and filtering datasets are available"/>
                             </options>
                         </param>
@@ -129,10 +137,17 @@
         <data name="output_summary" format="html" label="${tool.name} on ${on_string} (summary)"/>
     </outputs>
     <tests>
-        <!-- No excel file, all_isolates is False -->
+        <!--
+            Unfortunately the test files cannot be gzipped since Galaxy changes the file names
+            to be something like 00-0121_WI_Cervid_99-A_vcf_gz, and the VCF Reader requires
+            gzipped files to have a .gz extension.  The exception is
+            UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte
+        -->
+        <!-- A single vcf input, no excel file, all_isolates is False -->
         <test expect_num_outputs="4">
-            <param name="input" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
-            <param name="input_zc_vcf_collection">
+            <param name="input_zc_vcf_type" value="single"/>
+            <param name="input_zc_vcf" value="input_zc_vcf.vcf" ftype="vcf" dbkey="89"/>
+            <param name="input_vcf_collection">
                 <collection type="list">
                     <element name="SRR8073662_zc.vcf" value="SRR8073662_zc.vcf" dbkey="89"/>
                     <element name="SRR1792272_zc.vcf" value="SRR1792272_zc.vcf" dbkey="89"/>
@@ -143,62 +158,102 @@
                 <element name="all_vcf" file="all_vcf.fasta" ftype="fasta" compare="contains"/>
             </output_collection>
             <output_collection name="json_avg_mq" type="list" count="1">
-                <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/>
+                <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/>
             </output_collection>
             <output_collection name="json_snps" type="list" count="1">
                 <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/>
             </output_collection>
             <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
         </test>
-        <!-- Excel file, all_isolates is False -->
+        <!-- An input collection, no excel file, all_isolates is False -->
         <test expect_num_outputs="4">
-            <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/>
+            <param name="input_zc_vcf_type" value="collection"/>
             <param name="input_zc_vcf_collection">
                 <collection type="list">
-                    <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/>
-                    <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/>
-                    <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/>
+                    <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
+                    <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
+                </collection>
+            </param>
+            <param name="input_vcf_collection">
+                <collection type="list">
+                    <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
+                    <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
+                    <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
+                </collection>
+            </param>
+            <param name="input_excel_param" value="no"/>
+            <output_collection name="snps" type="list" count="1">
+                <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/>
+            </output_collection>
+            <output_collection name="json_avg_mq" type="list" count="1">
+                <element name="all_vcf" file="json_avg_mq_all_vcf.json" ftype="json" compare="contains"/>
+            </output_collection>
+            <output_collection name="json_snps" type="list" count="1">
+                <element name="all_vcf" file="json_all_vcf.json" ftype="json" compare="contains"/>
+            </output_collection>
+            <output name="output_summary" file="output_summary2.html" ftype="html" compare="contains"/>
+        </test>
+        <!-- An input collection, an excel file, all_isolates is False -->
+        <test expect_num_outputs="4">
+            <param name="input_zc_vcf_type" value="collection"/>
+            <param name="input_zc_vcf_collection">
+                <collection type="list">
+                    <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
+                    <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
+                </collection>
+            </param>
+            <param name="input_vcf_collection">
+                <collection type="list">
+                    <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
+                    <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
+                    <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
                 </collection>
             </param>
             <param name="input_excel_param" value="yes"/>
             <param name="input_excel" value="89"/>
             <output_collection name="snps" type="list" count="1">
-                <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/>
+                <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta" compare="contains"/>
             </output_collection>
             <output_collection name="json_avg_mq" type="list" count="1">
-                <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/>
+                <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
             </output_collection>
             <output_collection name="json_snps" type="list" count="1">
-                <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/>
+                <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
             </output_collection>
-            <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
+            <output name="output_summary" file="output_summary3.html" ftype="html" compare="contains"/>
         </test>
-        <!-- Excel file, all_isolates is True -->
+        <!-- An input collection, an excel file, all_isolates is True -->
         <test expect_num_outputs="4">
-            <param name="input" value="02_0585_COA_TX_Fed.vcf" ftype="vcf" dbkey="89"/>
+            <param name="input_zc_vcf_type" value="collection"/>
             <param name="input_zc_vcf_collection">
                 <collection type="list">
-                    <element name="L02-19-1908_zc.vcf" value="L02-19-1908_zc.vcf" dbkey="89"/>
-                    <element name="L03-13-2334_zc.vcf" value="L03-13-2334_zc.vcf" dbkey="89"/>
-                    <element name="L03-16-0255_zc.vcf" value="L03-16-0255_zc.vcf" dbkey="89"/>
+                    <element name="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" value="BCG_Pasteur_Unknown_FR_SRR8886989.vcf" dbkey="89"/>
+                    <element name="BCG_Tokyo_Unknown_JP_DRR029468.vcf" value="BCG_Tokyo_Unknown_JP_DRR029468.vcf" dbkey="89"/>
+                </collection>
+            </param>
+            <param name="input_vcf_collection">
+                <collection type="list">
+                    <element name="01_1787_FL_Zoo_Jaguar.vcf" value="01_1787_FL_Zoo_Jaguar.vcf" dbkey="89"/>
+                    <element name="02_5877_MEX_TX_Fed.vcf" value="02_5877_MEX_TX_Fed.vcf" dbkey="89"/>
+                    <element name="02_0585_COA_TX_Fed.vcf" value="02_0585_COA_TX_Fed.vcf" dbkey="89"/>
                 </collection>
             </param>
             <param name="input_excel_param" value="yes"/>
             <param name="input_excel" value="89"/>
             <param name="all_isolates" value="--all_isolates"/>
             <output_collection name="snps" type="list" count="2">
-                <element name="Mbovis-TB" file="Mbovis-TB.fasta" ftype="fasta" compare="contains"/>
-                <element name="all_vcf" file="all_vcf3.fasta" ftype="fasta" compare="contains"/>
+                <element name="Mbovis-17" file="Mbovis-17_snps.fasta" ftype="fasta" compare="contains"/>
+                <element name="all_vcf" file="all_vcf2.fasta" ftype="fasta" compare="contains"/>
             </output_collection>
             <output_collection name="json_avg_mq" type="list" count="2">
-                <element name="Mbovis-TB" file="avg_mq.json" ftype="json" compare="contains"/>
-                <element name="all_vcf" file="avg_mq.json" ftype="json" compare="contains"/>
+                <element name="Mbovis-17" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
+                <element name="all_vcf" file="Mbovis-17_avg_mq_json.json" ftype="json" compare="contains"/>
             </output_collection>
             <output_collection name="json_snps" type="list" count="2">
-                <element name="Mbovis-TB" file="Mbovis-TB_json.json" ftype="json" compare="contains"/>
-                <element name="all_vcf" file="all_vcf_snps_json.json" ftype="json" compare="contains"/>
+                <element name="Mbovis-17" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
+                <element name="all_vcf" file="Mbovis-17_snps_json.json" ftype="json" compare="contains"/>
             </output_collection>
-            <output name="output_summary" file="output_summary.html" ftype="html" compare="contains"/>
+            <output name="output_summary" file="output_summary4.html" ftype="html" compare="contains"/>
         </test>
     </tests>
     <help>
author	greg
date	Thu, 29 Jul 2021 13:16:03 +0000
parents	5e4595b9f63c
children	be5875f29ea4