diff vsnp_statistics.xml @ 8:1becb6606626 draft

Uploaded
author greg
date Mon, 02 Aug 2021 13:34:09 +0000
parents de2af65c4633
children ce1f889b3340
line wrap: on
line diff
--- a/vsnp_statistics.xml	Thu Jul 22 18:05:22 2021 +0000
+++ b/vsnp_statistics.xml	Mon Aug 02 13:34:09 2021 +0000
@@ -4,122 +4,68 @@
         <import>macros.xml</import>
     </macros>
     <requirements>
-        <requirement type="package" version="1.79">biopython</requirement>
-        <requirement type="package" version="1.21.1">numpy</requirement>
-        <requirement type="package" version="3.0.7">openpyxl</requirement>
-        <requirement type="package" version="1.3.0">pandas</requirement>
-        <requirement type="package" version="2.0.1">xlrd</requirement>
+        <expand macro="biopython_requirement"/>
+        <expand macro="numpy_requirement"/>
+        <expand macro="openpyxl_requirement"/>
+        <expand macro="pandas_requirement"/>
+        <expand macro="xlrd_requirement"/>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
 #import re
-#set input_idxstats_dir = 'input_idxstats'
-#set input_metrics_dir = 'input_metrics'
-#set input_reads_dir = 'input_reads'
-mkdir -p $input_idxstats_dir &&
-mkdir -p $input_metrics_dir &&
-mkdir -p $input_reads_dir &&
 
-#if $input_type_cond.input_type  == 'single_files':
-    #set read1 = $input_type_cond.read_type_cond.read1
+#if $input_type_cond.input_type in ["single", "pair"]:
+    #set read1 = $input_type_cond.read1
     #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
     ln -s '${read1}' '${read1_identifier}' &&
-    #if $input_type_cond.read_type_cond.read_type == 'pair':
-        #set read2 = $input_type_cond.read_type_cond.read2
+    #if $input_type_cond.input_type == "pair":
+        #set read2 = $input_type_cond.read2
         #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
         ln -s '${read2}' '${read2_identifier}' &&
     #else:
-        #set read2 = None
+        #set read2 = None 
     #end if
 #else:
-    #if $input_type_cond.collection_type_cond.collection_type == 'single':
-        #for $i in $input_type_cond.collection_type_cond.reads_collection:
-            #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
-            ln -s '${i.file_name}' '$input_reads_dir/${identifier}' &&
-        #end for
-    #else:
-        #set read1 = $input_type_cond.collection_type_cond.reads_collection['forward']
-        #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name))
-        ln -s '${read1}' '$input_reads_dir/${read1_identifier}' &&
-        #set read2 = $input_type_cond.collection_type_cond.reads_collection['reverse']
-        #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name))
-        ln -s '${read2}' '$input_reads_dir/${read2_identifier}' &&
-    #end if
-    #for $i in $input_type_cond.samtools_idxstats:
-        #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
-        ln -s '${i.file_name}' '$input_idxstats_dir/${identifier}' &&
-    #end for
-    #for $i in $input_type_cond.vsnp_azc:
-        #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
-        ln -s '${i.file_name}' '$input_metrics_dir/${identifier}' &&
-    #end for
+    #set read1 = $input_type_cond.reads_collection['forward']
+    #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name))
+    ln -s '${read1}' '${read1_identifier}' &&
+    #set read2 = $input_type_cond.reads_collection['reverse']
+    #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name))
+    ln -s '${read2}' '${read2_identifier}' &&
 #end if
 
 python '$__tool_directory__/vsnp_statistics.py'
-#if $input_type_cond.input_type == 'single_files':
-    --dbkey '$input_type_cond.samtools_idxstats.metadata.dbkey'
-    #if $input_type_cond.read_type_cond.read1.is_of_type('fastqsanger.gz'):
-        --gzipped
-    #end if
-    --read1 '${read1_identifier}'
-    #if $input_type_cond.read_type_cond.read_type == 'pair':
-      --read2 '${read2_identifier}'
-    #end if
-    --samtools_idxstats '$input_type_cond.samtools_idxstats'
-    --vsnp_azc '$input_type_cond.vsnp_azc'
-#else:
-    --dbkey '$input_type_cond.samtools_idxstats[0].metadata.dbkey'
-    #if $input_type_cond.collection_type_cond.reads_collection[0].is_of_type('fastqsanger.gz'):
-        --gzipped
-    #end if
-    #if $input_type_cond.collection_type_cond.collection_type == 'paired':
-        --list_paired
-    #end if
-    --input_idxstats_dir '$input_idxstats_dir'
-    --input_metrics_dir '$input_metrics_dir'
-    --input_reads_dir '$input_reads_dir'
+--read1 '${read1_identifier}'
+#if $read2 is not None
+  --read2 '${read2_identifier}'
 #end if
+#if $read1.is_of_type('fastqsanger.gz'):
+    --gzipped
+#end if
+--dbkey '$samtools_idxstats.metadata.dbkey'
+--samtools_idxstats '$samtools_idxstats'
+--vsnp_azc_metrics '$vsnp_azc_metrics'
 --output '$output'
 ]]></command>
     <inputs>
         <conditional name="input_type_cond">
             <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
-                <option value="single_files" selected="true">Single files</option>
-                <option value="collections">Collections of files</option>
+                <option value="single" selected="true">Single files</option>
+                <option value="paired">Paired reads</option>
+                <option value="pair">Paired reads in separate data sets</option>
             </param>
-            <when value="single_files">
-                <conditional name="read_type_cond">
-                    <param name="read_type" type="select" label="Choose the read type">
-                        <option value="single" selected="true">Single reads</option>
-                         <option value="pair">Paired reads</option>
-                    </param>
-                     <when value="single">
-                        <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
-                    </when>
-                    <when value="pair">
-                        <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
-                        <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
-                    </when>
-                </conditional>
-                <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/>
-                <param name="vsnp_azc" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/>
+            <when value="single">
+                <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
             </when>
-            <when value="collections">
-                <conditional name="collection_type_cond">
-                    <param name="collection_type" type="select" label="Collections of single reads or paired reads?">
-                        <option value="single" selected="true">Single reads</option>
-                        <option value="paired">Paired reads in separate datasets</option>
-                    </param>
-                    <when value="single">
-                        <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
-                    </when>
-                    <when value="paired">
-                        <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
-                    </when>
-                </conditional>
-                <param name="samtools_idxstats" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
-                <param name="vsnp_azc" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP: add zero coverage metrics files"/>
+            <when value="paired">
+                <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
+            </when>
+            <when value="pair">
+                <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
+                <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
             </when>
         </conditional>
+        <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/>
+        <param name="vsnp_azc_metrics" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/>
     </inputs>
     <outputs>
         <data name="output" format="tabular"/>
@@ -127,67 +73,32 @@
     <tests>
         <!-- A single fastq file -->
         <test expect_num_outputs="1">
-            <param name="input_type" value="single_files"/>
-            <param name="read_type" value="single"/>
+            <param name="input_type" value="single"/>
             <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
             <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/>
-            <param name="vsnp_azc" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/>
+            <param name="vsnp_azc_metrics" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/>
             <output name="output" file="vsnp_statistics1.tabular" ftype="tabular"/>
         </test>
         <!-- A set of paired fastq files -->
         <test expect_num_outputs="1">
-            <param name="input_type" value="single_files"/>
-            <param name="read_type" value="pair"/>
+            <param name="input_type" value="pair"/>
             <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
             <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
             <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/>
-            <param name="vsnp_azc" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/>
+            <param name="vsnp_azc_metrics" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/>
             <output name="output" file="vsnp_statistics2.tabular" ftype="tabular"/>
         </test>
-        <!-- A collection of SE fastq files -->
+        <!-- A collection of paired fastq files -->
         <test expect_num_outputs="1">
-            <param name="input_type" value="collections"/>
-            <param name="read_type" value="single"/>
-            <param name="reads_collection">
-                <collection type="list">
-                    <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="Mcap_Deer_DE_SRR650221.fastq.gz" dbkey="89"/>
-                    <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="13-1941-6_S4_L001_R1_600000.fastq.gz" dbkey="89"/>
-                </collection>
-            </param>
-            <param name="samtools_idxstats">
-                <collection type="list">
-                    <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="samtools_idxstats3.tabular" dbkey="89"/>
-                    <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="samtools_idxstats4.tabular" dbkey="89"/>
-                </collection>
-            </param>
-            <param name="vsnp_azc">
-                <collection type="list">
-                    <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="add_zc_metrics3.tabular" dbkey="89"/>
-                    <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="add_zc_metrics4.tabular" dbkey="89"/>
-                </collection>
-            </param>
-            <output name="output" file="vsnp_statistics3.tabular" ftype="tabular"/>
-        </test>
-        <!-- A collection of PE fastq files -->
-        <test expect_num_outputs="1">
-            <param name="input_type" value="collections"/>
-            <param name="collection_type" value="paired"/>
+            <param name="input_type" value="paired"/>
             <param name="reads_collection">
                 <collection type="paired">
                     <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
                     <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
                 </collection>
             </param>
-            <param name="samtools_idxstats">
-                <collection type="list">
-                    <element name="13-1941-6_S4_L001_R1_600000.fastq" value="samtools_idxstats5.tabular" dbkey="89"/>
-                </collection>
-            </param>
-            <param name="vsnp_azc">
-                <collection type="list">
-                    <element name="13-1941-6_S4_L001_R1_600000.fastq" value="add_zc_metrics5.tabular" dbkey="89"/>
-                </collection>
-            </param>
+            <param name="samtools_idxstats" value="samtools_idxstats5.tabular" ftype="tabular" dbkey="89"/>
+            <param name="vsnp_azc_metrics" value="add_zc_metrics5.tabular" ftype="tabular" dbkey="89"/>
             <output name="output" file="vsnp_statistics4.tabular" ftype="tabular"/>
         </test>
     </tests>
@@ -195,8 +106,8 @@
 **What it does**
 
 Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them
-to produce an Excel spreadsheet containing statistics for each sample.  The samples can be single or paired reads, and all associated inputs
-can be either single files or collections of files.  The output statistics include reference, file size, mean read length, mean read quality,
+to produce an Excel spreadsheet containing statistics for each sample.  The samples can be a single read, a single set of paired reads in
+separate datasets or  collection of paired reads.  The output statistics include reference, file size, mean read length, mean read quality,
 reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth
 of coverage and good SNP count.
     </help>