Mercurial > repos > greg > vsnp_add_zero_coverage

diff vsnp_add_zero_coverage.xml @ 2:01312f8a6ca9 draft
Uploaded
author: greg
date: Sun, 03 Jan 2021 16:29:00 +0000
parents: 3cb0bf7e1b2d
children: bb6cc994707d
--- a/vsnp_add_zero_coverage.xml	Tue Apr 21 09:51:00 2020 -0400
+++ b/vsnp_add_zero_coverage.xml	Sun Jan 03 16:29:00 2021 +0000
@@ -1,147 +1,72 @@
-<tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="1.0.0">
+<tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="@WRAPPER_VERSION@.2" profile="@PROFILE@">
     <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
     <requirements>
         <requirement type="package" version="1.76">biopython</requirement>
-        <requirement type="package" version="1.16.5">numpy</requirement>
         <requirement type="package" version="0.25.3">pandas</requirement>
         <requirement type="package" version="0.15.4">pysam</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-#import os
 #import re
-#set input_type = $input_type_cond.input_type
-#set input_bam_dir = 'input_bam_dir'
-#set input_vcf_dir = 'input_vcf_dir'
-#set output_vcf_dir = 'output_vcf_dir'
-#set output_metrics_dir = 'output_metrics_dir'
-mkdir -p $input_bam_dir &&
-mkdir -p $input_vcf_dir &&
-mkdir -p $output_vcf_dir &&
-mkdir -p $output_metrics_dir &&
-#if str($input_type) == "single":
-    #set bam_input = $input_type_cond.bam_input
-    #set file_name = $bam_input.file_name
-    #set file_name_base = $os.path.basename($file_name)
-    ln -s $file_name $input_bam_dir/$file_name_base &&
-    #set vcf_input = $input_type_cond.vcf_input
-    #set file_name = $vcf_input.file_name
-    #set file_name_base = $os.path.basename($file_name)
-    ln -s $file_name $input_vcf_dir/$file_name_base &&
-#else:
-    #for $i in $input_type_cond.bam_input_collection:
-        #set filename = $i.file_name
-        #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
-        ln -s $filename $input_bam_dir/$identifier &&
-    #end for
-    #for $i in $input_type_cond.vcf_input_collection:
-        #set filename = $i.file_name
-        #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
-        ln -s $filename $input_vcf_dir/$identifier &&
-    #end for
-#end if
+
+## The identifer for both of the following files is likely the same
+## string, so we append a file extension to allow for both links.
+#set bam_identifier = re.sub('[^\s\w\-]', '_', str($bam_input.element_identifier)) + '.bam'
+ln -s '${bam_input}' '${bam_identifier}' &&
+#set vcf_identifier = re.sub('[^\s\w\-]', '_', str($vcf_input.element_identifier)) + '.vcf'
+ln -s '${vcf_input}' '${vcf_identifier}' &&
+
 python '$__tool_directory__/vsnp_add_zero_coverage.py'
---processes $processes
-#if str($reference_cond.reference_source) == "cached"
+--bam_input '$bam_identifier'
+--vcf_input '$vcf_identifier'
+#if str($reference_cond.reference_source) == 'cached'
     --reference '$reference_cond.reference.fields.path'
 #else:
     --reference '$reference_cond.reference'
 #end if
-#if str($input_type) == "single":
-    --output_metrics '$output_metrics'
-    --output_vcf '$output_vcf'
-#end if
+--output_metrics '$output_metrics'
+--output_vcf '$output_vcf'
 ]]></command>
     <inputs>
-        <conditional name="input_type_cond">
-            <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
-                <option value="single" selected="true">Single files</option>
-                <option value="collection">Collections of files</option>
-            </param>
-            <when value="single">
-                <param name="bam_input" type="data" format="bam" label="BAM file">
-                    <validator type="unspecified_build"/>
-                </param>
-                <param name="vcf_input" type="data" format="vcf" label="VCF file">
-                    <validator type="unspecified_build"/>
-                </param>
-            </when>
-            <when value="collection">
-                <param name="bam_input_collection" type="data_collection" format="bam" collection_type="list" label="Collection of BAM files">
-                    <validator type="unspecified_build"/>
-                </param>
-                <param name="vcf_input_collection" type="data_collection" format="vcf" collection_type="list" label="Collection of VCF files">
-                    <validator type="unspecified_build"/>
-                </param>
-            </when>
-        </conditional>
+        <param name="bam_input" type="data" format="bam" label="BAM file"/>
+        <param name="vcf_input" type="data" format="vcf" label="VCF file"/>
         <conditional name="reference_cond">
-            <param name="reference_source" type="select" label="Choose the source for the reference genome">
-                <option value="cached" selected="true">locally cached</option>
-                <option value="history">from history</option>
-            </param>
+            <expand macro="param_reference_source"/>
             <when value="cached">
                 <param name="reference" type="select" label="Using reference genome">
-                    <options from_data_table="fasta_indexes"/>
-                    <!-- No <filter> tag here! -->
-                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
+                    <options from_data_table="fasta_indexes">
+                        <filter type="data_meta" column="1" key="dbkey" ref="bam_input"/>
+                        <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
+                    </options>
                 </param>
             </when>
             <when value="history">
-                <param name="reference" type="data" format="fasta" label="Using reference genome">
+                <param name="reference" type="data" format="fasta,fasta.gz" label="Using reference genome">
                     <validator type="no_options" message="The current history does not include a fasta dataset"/>
                 </param>
             </when>
         </conditional>
-        <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
     </inputs>
     <outputs>
-        <data name="output_vcf" format="vcf"  label="${tool.name} (filtered VCF) on ${on_string}">
-            <filter>input_type_cond['input_type'] == 'single'</filter>
-        </data>
-        <collection name="output_vcf_collection" type="list" label="${tool.name} (filtered VCFs) on ${on_string}">
-            <discover_datasets pattern="__name__" directory="output_vcf_dir" format="vcf" />
-            <filter>input_type_cond['input_type'] == 'collection'</filter>
-        </collection>
-        <data name="output_metrics" format="tabular"  label="${tool.name} (metrics) on ${on_string}">
-            <filter>input_type_cond['input_type'] == 'single'</filter>
-        </data>
-        <collection name="output_metrics_collection" type="list" label="${tool.name} (metrics) on ${on_string}">
-            <discover_datasets pattern="__name__" directory="output_metrics_dir" format="tabular" />
-            <filter>input_type_cond['input_type'] == 'collection'</filter>
-        </collection>
+        <data name="output_vcf" format="vcf"  label="${tool.name} on ${on_string} (filtered VCF)"/>
+        <data name="output_metrics" format="tabular"  label="${tool.name} on ${on_string} (metrics)"/>
     </outputs>
     <tests>
-        <test>
-            <param name="input_type" value="collection"/>
-            <param name="bam_input_collection">
-                <collection type="list">
-                    <element name="bam_input.bam" value="bam_input.bam" dbkey="89"/>
-                    <element name="bam_input2.bam" value="bam_input2.bam" dbkey="89"/>
-                </collection>
-            </param>
-            <param name="vcf_input_collection">
-                <collection type="list">
-                    <element name="vcf_input.vcf" value="vcf_input.vcf" dbkey="89"/>
-                    <element name="vcf_input2.vcf" value="vcf_input2.vcf" dbkey="89"/>
-                </collection>
-            </param>
-            <param name="reference_source" value="history"/>
-            <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
-            <output_collection name="output_vcf_collection" type="list">
-                <element name="vcf_input.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
-                <element name="vcf_input2.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/>
-            </output_collection>
-            <output_collection name="output_metrics_collection" type="list">
-                <element name="vcf_input.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
-                <element name="vcf_input2.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
-            </output_collection>
-        </test>
-        <test>
+        <test expect_num_outputs="2">
             <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
             <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
             <param name="reference_source" value="history"/>
             <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
-            <param name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
+            <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
+            <output name="output_metrics" file="output_metrics.tabular" ftype="tabular"/>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
+            <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
+            <param name="reference_source" value="cached"/>
+            <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
             <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
         </test>
     </tests>
@@ -157,19 +82,8 @@
 
 **Required Options**
 
- * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single BAM and VCF files or collections of BAM and VCF files) based on the selected option.
  * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
- * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
     </help>
-    <citations>
-        <citation type="bibtex">
-            @misc{None,
-            journal = {None},
-            author = {1. Stuber T},
-            title = {Manuscript in preparation},
-            year = {None},
-            url = {https://github.com/USDA-VS/vSNP},}
-        </citation>
-    </citations>
+    <expand macro="citations" />
 </tool>
author	greg
date	Sun, 03 Jan 2021 16:29:00 +0000
parents	3cb0bf7e1b2d
children	bb6cc994707d