4
|
1 <tool id="vsnp_statistics" name="vSNP: statistics" version="@WRAPPER_VERSION@.1" profile="@PROFILE@">
|
0
|
2 <description></description>
|
4
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
0
|
6 <requirements>
|
|
7 <requirement type="package" version="1.16.5">numpy</requirement>
|
|
8 <requirement type="package" version="0.25.3">pandas</requirement>
|
1
|
9 <requirement type="package" version="1.2.0">xlrd</requirement>
|
|
10 <requirement type="package" version="1.2.8">xlsxwriter</requirement>
|
0
|
11 </requirements>
|
|
12 <command detect_errors="exit_code"><![CDATA[
|
|
13 #import re
|
|
14 #set input_idxstats_dir = 'input_idxstats'
|
|
15 #set input_metrics_dir = 'input_metrics'
|
|
16 #set input_reads_dir = 'input_reads'
|
|
17 mkdir -p $input_idxstats_dir &&
|
|
18 mkdir -p $input_metrics_dir &&
|
|
19 mkdir -p $input_reads_dir &&
|
4
|
20
|
|
21 #if $input_type_cond.input_type == 'single_files':
|
|
22 #set read1 = $input_type_cond.read_type_cond.read1
|
0
|
23 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
|
4
|
24 ln -s '${read1}' '${read1_identifier}' &&
|
|
25 #if $input_type_cond.read_type_cond.read_type == 'pair':
|
|
26 #set read2 = $input_type_cond.read_type_cond.read2
|
|
27 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
|
|
28 ln -s '${read2}' '${read2_identifier}' &&
|
0
|
29 #else:
|
4
|
30 #set read2 = None
|
0
|
31 #end if
|
|
32 #else:
|
4
|
33 #if $input_type_cond.collection_type_cond.collection_type == 'single':
|
|
34 #for $i in $input_type_cond.collection_type_cond.reads_collection:
|
|
35 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
|
|
36 ln -s '${i.file_name}' '$input_reads_dir/${identifier}' &&
|
|
37 #end for
|
|
38 #else:
|
|
39 #set read1 = $input_type_cond.collection_type_cond.reads_collection['forward']
|
|
40 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name))
|
|
41 ln -s '${read1}' '$input_reads_dir/${read1_identifier}' &&
|
|
42 #set read2 = $input_type_cond.collection_type_cond.reads_collection['reverse']
|
|
43 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name))
|
|
44 ln -s '${read2}' '$input_reads_dir/${read2_identifier}' &&
|
|
45 #end if
|
|
46 #for $i in $input_type_cond.samtools_idxstats:
|
0
|
47 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
|
4
|
48 ln -s '${i.file_name}' '$input_idxstats_dir/${identifier}' &&
|
0
|
49 #end for
|
4
|
50 #for $i in $input_type_cond.vsnp_azc:
|
0
|
51 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
|
4
|
52 ln -s '${i.file_name}' '$input_metrics_dir/${identifier}' &&
|
0
|
53 #end for
|
|
54 #end if
|
4
|
55
|
0
|
56 python '$__tool_directory__/vsnp_statistics.py'
|
4
|
57 #if $input_type_cond.input_type == 'single_files':
|
|
58 --dbkey '$input_type_cond.samtools_idxstats.metadata.dbkey'
|
|
59 #if $input_type_cond.read_type_cond.read1.is_of_type('fastqsanger.gz'):
|
|
60 --gzipped
|
|
61 #end if
|
|
62 --read1 '${read1_identifier}'
|
|
63 #if $input_type_cond.read_type_cond.read_type == 'pair':
|
|
64 --read2 '${read2_identifier}'
|
0
|
65 #end if
|
4
|
66 --samtools_idxstats '$input_type_cond.samtools_idxstats'
|
|
67 --vsnp_azc '$input_type_cond.vsnp_azc'
|
|
68 #else:
|
|
69 --dbkey '$input_type_cond.samtools_idxstats[0].metadata.dbkey'
|
|
70 #if $input_type_cond.collection_type_cond.reads_collection[0].is_of_type('fastqsanger.gz'):
|
|
71 --gzipped
|
|
72 #end if
|
|
73 #if $input_type_cond.collection_type_cond.collection_type == 'paired':
|
|
74 --list_paired
|
|
75 #end if
|
|
76 --input_idxstats_dir '$input_idxstats_dir'
|
|
77 --input_metrics_dir '$input_metrics_dir'
|
|
78 --input_reads_dir '$input_reads_dir'
|
0
|
79 #end if
|
1
|
80 --output '$output'
|
0
|
81 ]]></command>
|
|
82 <inputs>
|
|
83 <conditional name="input_type_cond">
|
|
84 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
|
4
|
85 <option value="single_files" selected="true">Single files</option>
|
|
86 <option value="collections">Collections of files</option>
|
0
|
87 </param>
|
4
|
88 <when value="single_files">
|
0
|
89 <conditional name="read_type_cond">
|
|
90 <param name="read_type" type="select" label="Choose the read type">
|
4
|
91 <option value="single" selected="true">Single reads</option>
|
|
92 <option value="pair">Paired reads</option>
|
0
|
93 </param>
|
4
|
94 <when value="single">
|
|
95 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
|
|
96 </when>
|
|
97 <when value="pair">
|
0
|
98 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
|
|
99 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
|
|
100 </when>
|
4
|
101 </conditional>
|
|
102 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/>
|
|
103 <param name="vsnp_azc" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/>
|
|
104 </when>
|
|
105 <when value="collections">
|
|
106 <conditional name="collection_type_cond">
|
|
107 <param name="collection_type" type="select" label="Collections of single reads or paired reads?">
|
|
108 <option value="single" selected="true">Single reads</option>
|
|
109 <option value="paired">Paired reads in separate datasets</option>
|
|
110 </param>
|
0
|
111 <when value="single">
|
4
|
112 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
|
|
113 </when>
|
|
114 <when value="paired">
|
|
115 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
|
0
|
116 </when>
|
|
117 </conditional>
|
4
|
118 <param name="samtools_idxstats" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
|
|
119 <param name="vsnp_azc" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP: add zero coverage metrics files"/>
|
0
|
120 </when>
|
|
121 </conditional>
|
|
122 </inputs>
|
|
123 <outputs>
|
1
|
124 <data name="output" format="xlsx"/>
|
0
|
125 </outputs>
|
|
126 <tests>
|
4
|
127 <!-- A single fastq file -->
|
|
128 <test expect_num_outputs="1">
|
|
129 <param name="input_type" value="single_files"/>
|
|
130 <param name="read_type" value="single"/>
|
|
131 <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
|
|
132 <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/>
|
|
133 <param name="vsnp_azc" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/>
|
|
134 <output name="output" file="vsnp_statistics1.xlsx" ftype="xlsx" compare="sim_size"/>
|
|
135 </test>
|
|
136 <!-- A set of paired fastq files -->
|
|
137 <test expect_num_outputs="1">
|
|
138 <param name="input_type" value="single_files"/>
|
|
139 <param name="read_type" value="pair"/>
|
0
|
140 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
|
|
141 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
|
4
|
142 <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/>
|
|
143 <param name="vsnp_azc" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/>
|
|
144 <output name="output" file="vsnp_statistics2.xlsx" ftype="xlsx" compare="sim_size"/>
|
|
145 </test>
|
|
146 <!-- A collection of SE fastq files -->
|
|
147 <test expect_num_outputs="1">
|
|
148 <param name="input_type" value="collections"/>
|
|
149 <param name="read_type" value="single"/>
|
|
150 <param name="reads_collection">
|
|
151 <collection type="list">
|
|
152 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="Mcap_Deer_DE_SRR650221.fastq.gz" dbkey="89"/>
|
|
153 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="13-1941-6_S4_L001_R1_600000.fastq.gz" dbkey="89"/>
|
|
154 </collection>
|
|
155 </param>
|
|
156 <param name="samtools_idxstats">
|
|
157 <collection type="list">
|
|
158 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="samtools_idxstats3.tabular" dbkey="89"/>
|
|
159 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="samtools_idxstats4.tabular" dbkey="89"/>
|
|
160 </collection>
|
|
161 </param>
|
|
162 <param name="vsnp_azc">
|
|
163 <collection type="list">
|
|
164 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="add_zc_metrics3.tabular" dbkey="89"/>
|
|
165 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="add_zc_metrics4.tabular" dbkey="89"/>
|
|
166 </collection>
|
|
167 </param>
|
|
168 <output name="output" file="vsnp_statistics3.xlsx" ftype="xlsx" compare="sim_size"/>
|
|
169 </test>
|
|
170 <!-- A collection of PE fastq files -->
|
|
171 <test expect_num_outputs="1">
|
|
172 <param name="input_type" value="collections"/>
|
|
173 <param name="collection_type" value="paired"/>
|
|
174 <param name="reads_collection">
|
|
175 <collection type="paired">
|
|
176 <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
|
|
177 <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
|
|
178 </collection>
|
|
179 </param>
|
|
180 <param name="samtools_idxstats">
|
|
181 <collection type="list">
|
|
182 <element name="13-1941-6_S4_L001_R1_600000.fastq" value="samtools_idxstats5.tabular" dbkey="89"/>
|
|
183 </collection>
|
|
184 </param>
|
|
185 <param name="vsnp_azc">
|
|
186 <collection type="list">
|
|
187 <element name="13-1941-6_S4_L001_R1_600000.fastq" value="add_zc_metrics5.tabular" dbkey="89"/>
|
|
188 </collection>
|
|
189 </param>
|
|
190 <output name="output" file="vsnp_statistics4.xlsx" ftype="xlsx" compare="sim_size"/>
|
0
|
191 </test>
|
|
192 </tests>
|
|
193 <help>
|
|
194 **What it does**
|
|
195
|
4
|
196 Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them
|
|
197 to produce an Excel spreadsheet containing statistics for each sample. The samples can be single or paired reads, and all associated inputs
|
|
198 can be either single files or collections of files. The output statistics include reference, file size, mean read length, mean read quality,
|
|
199 reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth
|
|
200 of coverage and good SNP count.
|
0
|
201 </help>
|
4
|
202 <expand macro="citations"/>
|
0
|
203 </tool>
|
|
204
|