0
|
1 <tool id="vsnp_statistics" name="vSNP: statistics" version="1.0.0">
|
|
2 <description></description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.5.1">humanize</requirement>
|
|
5 <requirement type="package" version="1.16.5">numpy</requirement>
|
|
6 <requirement type="package" version="0.25.3">pandas</requirement>
|
1
|
7 <requirement type="package" version="1.2.0">xlrd</requirement>
|
|
8 <requirement type="package" version="1.2.8">xlsxwriter</requirement>
|
0
|
9 </requirements>
|
|
10 <command detect_errors="exit_code"><![CDATA[
|
|
11 #import os
|
|
12 #import re
|
|
13 #set gzipped = 'false'
|
|
14 #set input_type = $input_type_cond.input_type
|
|
15 #set input_idxstats_dir = 'input_idxstats'
|
|
16 #set input_metrics_dir = 'input_metrics'
|
|
17 #set input_reads_dir = 'input_reads'
|
|
18 mkdir -p $input_idxstats_dir &&
|
|
19 mkdir -p $input_metrics_dir &&
|
|
20 mkdir -p $input_reads_dir &&
|
|
21 #if str($input_type) == "single":
|
|
22 #set read_type_cond = $input_type_cond.read_type_cond
|
|
23 #set read1 = $read_type_cond.read1
|
|
24 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
|
|
25 #if str($read_type_cond.read_type) == "single":
|
|
26 ln -s '${read1}' '${read1_identifier}' &&
|
|
27 #if $read1.is_of_type('fastqsanger.gz'):
|
|
28 #set gzipped = 'true'
|
|
29 #end if
|
|
30 #else:
|
|
31 #set read2 = $read_type_cond.read2
|
|
32 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
|
|
33 ln -s '${read1}' '${read1_identifier}' &&
|
|
34 ln -s '${read2}' '${read2_identifier}' &&
|
|
35 #if $read1.is_of_type('fastqsanger.gz') and $read2.is_of_type('fastqsanger.gz'):
|
|
36 #set gzipped = 'true'
|
|
37 #end if
|
|
38 #end if
|
|
39 #set dbkey = $input_type_cond.vsnp_azc.metadata.dbkey
|
|
40 #else:
|
|
41 #for $i in $input_type_cond.reads_collection:
|
|
42 #if $i.is_of_type('fastqsanger.gz'):
|
|
43 #set gzipped = 'true'
|
|
44 #end if
|
|
45 #set filename = $i.file_name
|
|
46 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
|
1
|
47 ln -s '$filename' '$input_reads_dir/$identifier' &&
|
0
|
48 #end for
|
|
49 #for $i in $input_type_cond.samtools_idxstats_collection:
|
|
50 #set filename = $i.file_name
|
|
51 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
|
1
|
52 ln -s '$filename' '$input_idxstats_dir/$identifier' &&
|
0
|
53 #end for
|
|
54 #for $i in $input_type_cond.azc_metrics_collection:
|
|
55 #set dbkey = $i.metadata.dbkey
|
|
56 #set filename = $i.file_name
|
|
57 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
|
1
|
58 ln -s '$filename' '$input_metrics_dir/$identifier' &&
|
0
|
59 #end for
|
|
60 #end if
|
|
61 python '$__tool_directory__/vsnp_statistics.py'
|
|
62 --dbkey '$dbkey'
|
|
63 --gzipped '$gzipped'
|
|
64 #if str($input_type) == "single":
|
|
65 #if str($read_type_cond.read_type) == "single":
|
|
66 --read1 '${read1_identifier}'
|
|
67 #else:
|
|
68 --read1 '${read1_identifier}'
|
|
69 --read2 '${read2_identifier}'
|
|
70 #end if
|
|
71 --samtools_idxstats '$samtools_idxstats'
|
|
72 --vsnp_azc '$vsnp_azc'
|
|
73 #end if
|
1
|
74 --output '$output'
|
0
|
75 ]]></command>
|
|
76 <inputs>
|
|
77 <conditional name="input_type_cond">
|
|
78 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
|
|
79 <option value="single" selected="true">Single files</option>
|
|
80 <option value="collection">Collections of files</option>
|
|
81 </param>
|
|
82 <when value="single">
|
|
83 <conditional name="read_type_cond">
|
|
84 <param name="read_type" type="select" label="Choose the read type">
|
|
85 <option value="paired" selected="true">Paired</option>
|
|
86 <option value="single">Single</option>
|
|
87 </param>
|
|
88 <when value="paired">
|
|
89 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
|
|
90 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
|
|
91 </when>
|
|
92 <when value="single">
|
|
93 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
|
|
94 </when>
|
|
95 </conditional>
|
|
96 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file">
|
|
97 <validator type="unspecified_build"/>
|
|
98 </param>
|
|
99 <param name="vsnp_azc" type="data" format="tabular" label="vSNP zero coverage metrics file">
|
|
100 <validator type="unspecified_build"/>
|
|
101 </param>
|
|
102 </when>
|
|
103 <when value="collection">
|
|
104 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
|
|
105 <param name="samtools_idxstats_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
|
|
106 <param name="azc_metrics_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP zero-coverage metrics files"/>
|
|
107 </when>
|
|
108 </conditional>
|
|
109 </inputs>
|
|
110 <outputs>
|
1
|
111 <data name="output" format="xlsx"/>
|
0
|
112 </outputs>
|
|
113 <tests>
|
|
114 <test>
|
|
115 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
|
|
116 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
|
|
117 <param name="samtools_idxstats" value="samtools_idxstats.tabular" ftype="tabular" dbkey="89"/>
|
|
118 <param name="vsnp_azc" value="add_zc_metrics.tabular" ftype="tabular" dbkey="89"/>
|
1
|
119 <output name="output" file="vsnp_statistics.xlsx" ftype="xlsx" compare="sim_size"/>
|
0
|
120 </test>
|
|
121 </tests>
|
|
122 <help>
|
|
123 **What it does**
|
|
124
|
|
125 Accepts a single fastqsanger read, a set of paired reads, or a collections of reads along with associated SAMtools
|
|
126 idxstats and vSNP zero coverage metrics files and extracts information from the files to produce a tabular statistics
|
|
127 dataset that includes total reads, mean read length and quality, reads passing Q30, mapped and unmapped reads, depth
|
|
128 of coverage, good SNP count and more.
|
|
129
|
|
130 **Required options**
|
|
131
|
|
132 * **Choose the type for files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads and associated idxstats and vSNP zero coverage metrics files) based on the selected option..
|
|
133 </help>
|
|
134 <citations>
|
|
135 <citation type="bibtex">
|
|
136 @misc{None,
|
|
137 journal = {None},
|
|
138 author = {1. Stuber T},
|
|
139 title = {Manuscript in preparation},
|
|
140 year = {None},
|
|
141 url = {https://github.com/USDA-VS/vSNP},}
|
|
142 </citation>
|
|
143 </citations>
|
|
144 </tool>
|
|
145
|