comparison vsnp_statistics.xml @ 0:c21d338dbdc4 draft

Uploaded
author greg
date Tue, 21 Apr 2020 10:19:53 -0400
parents
children 14e29f7d59ca
comparison
equal deleted inserted replaced
-1:000000000000 0:c21d338dbdc4
1 <tool id="vsnp_statistics" name="vSNP: statistics" version="1.0.0">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="0.5.1">humanize</requirement>
5 <requirement type="package" version="1.16.5">numpy</requirement>
6 <requirement type="package" version="0.25.3">pandas</requirement>
7 </requirements>
8 <command detect_errors="exit_code"><![CDATA[
9 #import os
10 #import re
11 #set gzipped = 'false'
12 #set input_type = $input_type_cond.input_type
13 #set input_idxstats_dir = 'input_idxstats'
14 #set input_metrics_dir = 'input_metrics'
15 #set input_reads_dir = 'input_reads'
16 #set output_dir = 'output'
17 mkdir -p $input_idxstats_dir &&
18 mkdir -p $input_metrics_dir &&
19 mkdir -p $input_reads_dir &&
20 mkdir -p $output_dir &&
21 #if str($input_type) == "single":
22 #set read_type_cond = $input_type_cond.read_type_cond
23 #set read1 = $read_type_cond.read1
24 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
25 #if str($read_type_cond.read_type) == "single":
26 ln -s '${read1}' '${read1_identifier}' &&
27 #if $read1.is_of_type('fastqsanger.gz'):
28 #set gzipped = 'true'
29 #end if
30 #else:
31 #set read2 = $read_type_cond.read2
32 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
33 ln -s '${read1}' '${read1_identifier}' &&
34 ln -s '${read2}' '${read2_identifier}' &&
35 #if $read1.is_of_type('fastqsanger.gz') and $read2.is_of_type('fastqsanger.gz'):
36 #set gzipped = 'true'
37 #end if
38 #end if
39 #set dbkey = $input_type_cond.vsnp_azc.metadata.dbkey
40 #else:
41 #for $i in $input_type_cond.reads_collection:
42 #if $i.is_of_type('fastqsanger.gz'):
43 #set gzipped = 'true'
44 #end if
45 #set filename = $i.file_name
46 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
47 ln -s $filename $input_reads_dir/$identifier &&
48 #end for
49 #for $i in $input_type_cond.samtools_idxstats_collection:
50 #set filename = $i.file_name
51 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
52 ln -s $filename $input_idxstats_dir/$identifier &&
53 #end for
54 #for $i in $input_type_cond.azc_metrics_collection:
55 #set dbkey = $i.metadata.dbkey
56 #set filename = $i.file_name
57 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
58 ln -s $filename $input_metrics_dir/$identifier &&
59 #end for
60 #end if
61 python '$__tool_directory__/vsnp_statistics.py'
62 --processes $processes
63 --dbkey '$dbkey'
64 --gzipped '$gzipped'
65 #if str($input_type) == "single":
66 #if str($read_type_cond.read_type) == "single":
67 --read1 '${read1_identifier}'
68 #else:
69 --read1 '${read1_identifier}'
70 --read2 '${read2_identifier}'
71 #end if
72 --samtools_idxstats '$samtools_idxstats'
73 --vsnp_azc '$vsnp_azc'
74 --output '$output'
75 #end if
76 ]]></command>
77 <inputs>
78 <conditional name="input_type_cond">
79 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
80 <option value="single" selected="true">Single files</option>
81 <option value="collection">Collections of files</option>
82 </param>
83 <when value="single">
84 <conditional name="read_type_cond">
85 <param name="read_type" type="select" label="Choose the read type">
86 <option value="paired" selected="true">Paired</option>
87 <option value="single">Single</option>
88 </param>
89 <when value="paired">
90 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
91 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
92 </when>
93 <when value="single">
94 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
95 </when>
96 </conditional>
97 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file">
98 <validator type="unspecified_build"/>
99 </param>
100 <param name="vsnp_azc" type="data" format="tabular" label="vSNP zero coverage metrics file">
101 <validator type="unspecified_build"/>
102 </param>
103 </when>
104 <when value="collection">
105 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
106 <param name="samtools_idxstats_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
107 <param name="azc_metrics_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP zero-coverage metrics files"/>
108 </when>
109 </conditional>
110 <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/>
111 </inputs>
112 <outputs>
113 <data name="output" format="tabular">
114 <filter>input_type_cond['input_type'] == 'single'</filter>
115 </data>
116 <collection name="output_collection" type="list">
117 <discover_datasets pattern="__name__" directory="output" format="tabular" />
118 <filter>input_type_cond['input_type'] == 'collection'</filter>
119 </collection>
120 </outputs>
121 <tests>
122 <test>
123 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
124 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
125 <param name="samtools_idxstats" value="samtools_idxstats.tabular" ftype="tabular" dbkey="89"/>
126 <param name="vsnp_azc" value="add_zc_metrics.tabular" ftype="tabular" dbkey="89"/>
127 <output name="output" file="vsnp_statistics.tabular" ftype="tabular" compare="contains"/>
128 </test>
129 </tests>
130 <help>
131 **What it does**
132
133 Accepts a single fastqsanger read, a set of paired reads, or a collections of reads along with associated SAMtools
134 idxstats and vSNP zero coverage metrics files and extracts information from the files to produce a tabular statistics
135 dataset that includes total reads, mean read length and quality, reads passing Q30, mapped and unmapped reads, depth
136 of coverage, good SNP count and more.
137
138 **Required options**
139
140 * **Choose the type for files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads and associated idxstats and vSNP zero coverage metrics files) based on the selected option..
141 * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time.
142 </help>
143 <citations>
144 <citation type="bibtex">
145 @misc{None,
146 journal = {None},
147 author = {1. Stuber T},
148 title = {Manuscript in preparation},
149 year = {None},
150 url = {https://github.com/USDA-VS/vSNP},}
151 </citation>
152 </citations>
153 </tool>
154