comparison vsnp_statistics.xml @ 4:2d6c6b01319e draft

Uploaded
author greg
date Sun, 03 Jan 2021 15:47:28 +0000
parents 7fe0cbb8c894
children d0fbdeaaa488
comparison
equal deleted inserted replaced
3:321a8259e3f9 4:2d6c6b01319e
1 <tool id="vsnp_statistics" name="vSNP: statistics" version="1.0.0"> 1 <tool id="vsnp_statistics" name="vSNP: statistics" version="@WRAPPER_VERSION@.1" profile="@PROFILE@">
2 <description></description> 2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
3 <requirements> 6 <requirements>
4 <requirement type="package" version="1.16.5">numpy</requirement> 7 <requirement type="package" version="1.16.5">numpy</requirement>
5 <requirement type="package" version="0.25.3">pandas</requirement> 8 <requirement type="package" version="0.25.3">pandas</requirement>
6 <requirement type="package" version="1.2.0">xlrd</requirement> 9 <requirement type="package" version="1.2.0">xlrd</requirement>
7 <requirement type="package" version="1.2.8">xlsxwriter</requirement> 10 <requirement type="package" version="1.2.8">xlsxwriter</requirement>
8 </requirements> 11 </requirements>
9 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
10 #import os
11 #import re 13 #import re
12 #set gzipped = 'false'
13 #set input_type = $input_type_cond.input_type
14 #set input_idxstats_dir = 'input_idxstats' 14 #set input_idxstats_dir = 'input_idxstats'
15 #set input_metrics_dir = 'input_metrics' 15 #set input_metrics_dir = 'input_metrics'
16 #set input_reads_dir = 'input_reads' 16 #set input_reads_dir = 'input_reads'
17 mkdir -p $input_idxstats_dir && 17 mkdir -p $input_idxstats_dir &&
18 mkdir -p $input_metrics_dir && 18 mkdir -p $input_metrics_dir &&
19 mkdir -p $input_reads_dir && 19 mkdir -p $input_reads_dir &&
20 #if str($input_type) == "single": 20
21 #set read_type_cond = $input_type_cond.read_type_cond 21 #if $input_type_cond.input_type == 'single_files':
22 #set read1 = $read_type_cond.read1 22 #set read1 = $input_type_cond.read_type_cond.read1
23 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) 23 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
24 #if str($read_type_cond.read_type) == "single": 24 ln -s '${read1}' '${read1_identifier}' &&
25 ln -s '${read1}' '${read1_identifier}' && 25 #if $input_type_cond.read_type_cond.read_type == 'pair':
26 #if $read1.is_of_type('fastqsanger.gz'): 26 #set read2 = $input_type_cond.read_type_cond.read2
27 #set gzipped = 'true' 27 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
28 #end if 28 ln -s '${read2}' '${read2_identifier}' &&
29 #else: 29 #else:
30 #set read2 = $read_type_cond.read2 30 #set read2 = None
31 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) 31 #end if
32 ln -s '${read1}' '${read1_identifier}' &&
33 ln -s '${read2}' '${read2_identifier}' &&
34 #if $read1.is_of_type('fastqsanger.gz') and $read2.is_of_type('fastqsanger.gz'):
35 #set gzipped = 'true'
36 #end if
37 #end if
38 #set dbkey = $input_type_cond.vsnp_azc.metadata.dbkey
39 #else: 32 #else:
40 #for $i in $input_type_cond.reads_collection: 33 #if $input_type_cond.collection_type_cond.collection_type == 'single':
41 #if $i.is_of_type('fastqsanger.gz'): 34 #for $i in $input_type_cond.collection_type_cond.reads_collection:
42 #set gzipped = 'true' 35 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
43 #end if 36 ln -s '${i.file_name}' '$input_reads_dir/${identifier}' &&
44 #set filename = $i.file_name 37 #end for
38 #else:
39 #set read1 = $input_type_cond.collection_type_cond.reads_collection['forward']
40 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name))
41 ln -s '${read1}' '$input_reads_dir/${read1_identifier}' &&
42 #set read2 = $input_type_cond.collection_type_cond.reads_collection['reverse']
43 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name))
44 ln -s '${read2}' '$input_reads_dir/${read2_identifier}' &&
45 #end if
46 #for $i in $input_type_cond.samtools_idxstats:
45 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) 47 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
46 ln -s '$filename' '$input_reads_dir/$identifier' && 48 ln -s '${i.file_name}' '$input_idxstats_dir/${identifier}' &&
47 #end for 49 #end for
48 #for $i in $input_type_cond.samtools_idxstats_collection: 50 #for $i in $input_type_cond.vsnp_azc:
49 #set filename = $i.file_name
50 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) 51 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
51 ln -s '$filename' '$input_idxstats_dir/$identifier' && 52 ln -s '${i.file_name}' '$input_metrics_dir/${identifier}' &&
52 #end for
53 #for $i in $input_type_cond.azc_metrics_collection:
54 #set dbkey = $i.metadata.dbkey
55 #set filename = $i.file_name
56 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
57 ln -s '$filename' '$input_metrics_dir/$identifier' &&
58 #end for 53 #end for
59 #end if 54 #end if
55
60 python '$__tool_directory__/vsnp_statistics.py' 56 python '$__tool_directory__/vsnp_statistics.py'
61 --dbkey '$dbkey' 57 #if $input_type_cond.input_type == 'single_files':
62 --gzipped '$gzipped' 58 --dbkey '$input_type_cond.samtools_idxstats.metadata.dbkey'
63 #if str($input_type) == "single": 59 #if $input_type_cond.read_type_cond.read1.is_of_type('fastqsanger.gz'):
64 #if str($read_type_cond.read_type) == "single": 60 --gzipped
65 --read1 '${read1_identifier}' 61 #end if
66 #else: 62 --read1 '${read1_identifier}'
67 --read1 '${read1_identifier}' 63 #if $input_type_cond.read_type_cond.read_type == 'pair':
68 --read2 '${read2_identifier}' 64 --read2 '${read2_identifier}'
69 #end if 65 #end if
70 --samtools_idxstats '$samtools_idxstats' 66 --samtools_idxstats '$input_type_cond.samtools_idxstats'
71 --vsnp_azc '$vsnp_azc' 67 --vsnp_azc '$input_type_cond.vsnp_azc'
68 #else:
69 --dbkey '$input_type_cond.samtools_idxstats[0].metadata.dbkey'
70 #if $input_type_cond.collection_type_cond.reads_collection[0].is_of_type('fastqsanger.gz'):
71 --gzipped
72 #end if
73 #if $input_type_cond.collection_type_cond.collection_type == 'paired':
74 --list_paired
75 #end if
76 --input_idxstats_dir '$input_idxstats_dir'
77 --input_metrics_dir '$input_metrics_dir'
78 --input_reads_dir '$input_reads_dir'
72 #end if 79 #end if
73 --output '$output' 80 --output '$output'
74 ]]></command> 81 ]]></command>
75 <inputs> 82 <inputs>
76 <conditional name="input_type_cond"> 83 <conditional name="input_type_cond">
77 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> 84 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
78 <option value="single" selected="true">Single files</option> 85 <option value="single_files" selected="true">Single files</option>
79 <option value="collection">Collections of files</option> 86 <option value="collections">Collections of files</option>
80 </param> 87 </param>
81 <when value="single"> 88 <when value="single_files">
82 <conditional name="read_type_cond"> 89 <conditional name="read_type_cond">
83 <param name="read_type" type="select" label="Choose the read type"> 90 <param name="read_type" type="select" label="Choose the read type">
84 <option value="paired" selected="true">Paired</option> 91 <option value="single" selected="true">Single reads</option>
85 <option value="single">Single</option> 92 <option value="pair">Paired reads</option>
86 </param> 93 </param>
87 <when value="paired"> 94 <when value="single">
95 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
96 </when>
97 <when value="pair">
88 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> 98 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
89 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> 99 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
90 </when> 100 </when>
101 </conditional>
102 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/>
103 <param name="vsnp_azc" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/>
104 </when>
105 <when value="collections">
106 <conditional name="collection_type_cond">
107 <param name="collection_type" type="select" label="Collections of single reads or paired reads?">
108 <option value="single" selected="true">Single reads</option>
109 <option value="paired">Paired reads in separate datasets</option>
110 </param>
91 <when value="single"> 111 <when value="single">
92 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> 112 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
113 </when>
114 <when value="paired">
115 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
93 </when> 116 </when>
94 </conditional> 117 </conditional>
95 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"> 118 <param name="samtools_idxstats" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
96 <validator type="unspecified_build"/> 119 <param name="vsnp_azc" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP: add zero coverage metrics files"/>
97 </param>
98 <param name="vsnp_azc" type="data" format="tabular" label="vSNP zero coverage metrics file">
99 <validator type="unspecified_build"/>
100 </param>
101 </when>
102 <when value="collection">
103 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
104 <param name="samtools_idxstats_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
105 <param name="azc_metrics_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP zero-coverage metrics files"/>
106 </when> 120 </when>
107 </conditional> 121 </conditional>
108 </inputs> 122 </inputs>
109 <outputs> 123 <outputs>
110 <data name="output" format="xlsx"/> 124 <data name="output" format="xlsx"/>
111 </outputs> 125 </outputs>
112 <tests> 126 <tests>
113 <test> 127 <!-- A single fastq file -->
128 <test expect_num_outputs="1">
129 <param name="input_type" value="single_files"/>
130 <param name="read_type" value="single"/>
131 <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
132 <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/>
133 <param name="vsnp_azc" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/>
134 <output name="output" file="vsnp_statistics1.xlsx" ftype="xlsx" compare="sim_size"/>
135 </test>
136 <!-- A set of paired fastq files -->
137 <test expect_num_outputs="1">
138 <param name="input_type" value="single_files"/>
139 <param name="read_type" value="pair"/>
114 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> 140 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
115 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> 141 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
116 <param name="samtools_idxstats" value="samtools_idxstats.tabular" ftype="tabular" dbkey="89"/> 142 <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/>
117 <param name="vsnp_azc" value="add_zc_metrics.tabular" ftype="tabular" dbkey="89"/> 143 <param name="vsnp_azc" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/>
118 <output name="output" file="vsnp_statistics.xlsx" ftype="xlsx" compare="sim_size"/> 144 <output name="output" file="vsnp_statistics2.xlsx" ftype="xlsx" compare="sim_size"/>
145 </test>
146 <!-- A collection of SE fastq files -->
147 <test expect_num_outputs="1">
148 <param name="input_type" value="collections"/>
149 <param name="read_type" value="single"/>
150 <param name="reads_collection">
151 <collection type="list">
152 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="Mcap_Deer_DE_SRR650221.fastq.gz" dbkey="89"/>
153 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="13-1941-6_S4_L001_R1_600000.fastq.gz" dbkey="89"/>
154 </collection>
155 </param>
156 <param name="samtools_idxstats">
157 <collection type="list">
158 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="samtools_idxstats3.tabular" dbkey="89"/>
159 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="samtools_idxstats4.tabular" dbkey="89"/>
160 </collection>
161 </param>
162 <param name="vsnp_azc">
163 <collection type="list">
164 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="add_zc_metrics3.tabular" dbkey="89"/>
165 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="add_zc_metrics4.tabular" dbkey="89"/>
166 </collection>
167 </param>
168 <output name="output" file="vsnp_statistics3.xlsx" ftype="xlsx" compare="sim_size"/>
169 </test>
170 <!-- A collection of PE fastq files -->
171 <test expect_num_outputs="1">
172 <param name="input_type" value="collections"/>
173 <param name="collection_type" value="paired"/>
174 <param name="reads_collection">
175 <collection type="paired">
176 <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
177 <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
178 </collection>
179 </param>
180 <param name="samtools_idxstats">
181 <collection type="list">
182 <element name="13-1941-6_S4_L001_R1_600000.fastq" value="samtools_idxstats5.tabular" dbkey="89"/>
183 </collection>
184 </param>
185 <param name="vsnp_azc">
186 <collection type="list">
187 <element name="13-1941-6_S4_L001_R1_600000.fastq" value="add_zc_metrics5.tabular" dbkey="89"/>
188 </collection>
189 </param>
190 <output name="output" file="vsnp_statistics4.xlsx" ftype="xlsx" compare="sim_size"/>
119 </test> 191 </test>
120 </tests> 192 </tests>
121 <help> 193 <help>
122 **What it does** 194 **What it does**
123 195
124 Accepts a single fastqsanger sample, a set of paired read samples, or a collections of samples along with associated 196 Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them
125 SAMtools idxstats and vSNP zero coverage metrics files and extracts information from them to produce an Excel 197 to produce an Excel spreadsheet containing statistics for each sample. The samples can be single or paired reads, and all associated inputs
126 spreadsheet containing statistics for each sample. Statistics include reference, file size, mean read length, mean 198 can be either single files or collections of files. The output statistics include reference, file size, mean read length, mean read quality,
127 read quality, reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, 199 reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth
128 reference with coverage, average depth of coverage and good SNP count. 200 of coverage and good SNP count.
129
130 **Required options**
131
132 * **Choose the type for files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads and associated idxstats and vSNP zero coverage metrics files) based on the selected option..
133 </help> 201 </help>
134 <citations> 202 <expand macro="citations"/>
135 <citation type="bibtex">
136 @misc{None,
137 journal = {None},
138 author = {1. Stuber T},
139 title = {Manuscript in preparation},
140 year = {None},
141 url = {https://github.com/USDA-VS/vSNP},}
142 </citation>
143 </citations>
144 </tool> 203 </tool>
145 204