annotate vsnp_statistics.xml @ 1:14e29f7d59ca draft

Uploaded
author greg
date Wed, 29 Apr 2020 16:56:10 -0400
parents c21d338dbdc4
children 7fe0cbb8c894
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
1 <tool id="vsnp_statistics" name="vSNP: statistics" version="1.0.0">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
2 <description></description>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
3 <requirements>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="0.5.1">humanize</requirement>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="1.16.5">numpy</requirement>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
6 <requirement type="package" version="0.25.3">pandas</requirement>
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
7 <requirement type="package" version="1.2.0">xlrd</requirement>
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
8 <requirement type="package" version="1.2.8">xlsxwriter</requirement>
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
9 </requirements>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
11 #import os
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
12 #import re
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
13 #set gzipped = 'false'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
14 #set input_type = $input_type_cond.input_type
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
15 #set input_idxstats_dir = 'input_idxstats'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
16 #set input_metrics_dir = 'input_metrics'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
17 #set input_reads_dir = 'input_reads'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
18 mkdir -p $input_idxstats_dir &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
19 mkdir -p $input_metrics_dir &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
20 mkdir -p $input_reads_dir &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
21 #if str($input_type) == "single":
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
22 #set read_type_cond = $input_type_cond.read_type_cond
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
23 #set read1 = $read_type_cond.read1
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
24 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
25 #if str($read_type_cond.read_type) == "single":
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
26 ln -s '${read1}' '${read1_identifier}' &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
27 #if $read1.is_of_type('fastqsanger.gz'):
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
28 #set gzipped = 'true'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
29 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
30 #else:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
31 #set read2 = $read_type_cond.read2
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
32 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
33 ln -s '${read1}' '${read1_identifier}' &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
34 ln -s '${read2}' '${read2_identifier}' &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
35 #if $read1.is_of_type('fastqsanger.gz') and $read2.is_of_type('fastqsanger.gz'):
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
36 #set gzipped = 'true'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
37 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
38 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
39 #set dbkey = $input_type_cond.vsnp_azc.metadata.dbkey
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
40 #else:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
41 #for $i in $input_type_cond.reads_collection:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
42 #if $i.is_of_type('fastqsanger.gz'):
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
43 #set gzipped = 'true'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
44 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
45 #set filename = $i.file_name
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
46 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
47 ln -s '$filename' '$input_reads_dir/$identifier' &&
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
48 #end for
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
49 #for $i in $input_type_cond.samtools_idxstats_collection:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
50 #set filename = $i.file_name
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
51 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
52 ln -s '$filename' '$input_idxstats_dir/$identifier' &&
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
53 #end for
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
54 #for $i in $input_type_cond.azc_metrics_collection:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
55 #set dbkey = $i.metadata.dbkey
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
56 #set filename = $i.file_name
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
57 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
58 ln -s '$filename' '$input_metrics_dir/$identifier' &&
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
59 #end for
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
60 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
61 python '$__tool_directory__/vsnp_statistics.py'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
62 --dbkey '$dbkey'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
63 --gzipped '$gzipped'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
64 #if str($input_type) == "single":
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
65 #if str($read_type_cond.read_type) == "single":
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
66 --read1 '${read1_identifier}'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
67 #else:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
68 --read1 '${read1_identifier}'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
69 --read2 '${read2_identifier}'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
70 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
71 --samtools_idxstats '$samtools_idxstats'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
72 --vsnp_azc '$vsnp_azc'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
73 #end if
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
74 --output '$output'
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
75 ]]></command>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
76 <inputs>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
77 <conditional name="input_type_cond">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
78 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
79 <option value="single" selected="true">Single files</option>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
80 <option value="collection">Collections of files</option>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
81 </param>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
82 <when value="single">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
83 <conditional name="read_type_cond">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
84 <param name="read_type" type="select" label="Choose the read type">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
85 <option value="paired" selected="true">Paired</option>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
86 <option value="single">Single</option>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
87 </param>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
88 <when value="paired">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
89 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
90 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
91 </when>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
92 <when value="single">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
93 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
94 </when>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
95 </conditional>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
96 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
97 <validator type="unspecified_build"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
98 </param>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
99 <param name="vsnp_azc" type="data" format="tabular" label="vSNP zero coverage metrics file">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
100 <validator type="unspecified_build"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
101 </param>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
102 </when>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
103 <when value="collection">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
104 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
105 <param name="samtools_idxstats_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
106 <param name="azc_metrics_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP zero-coverage metrics files"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
107 </when>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
108 </conditional>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
109 </inputs>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
110 <outputs>
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
111 <data name="output" format="xlsx"/>
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
112 </outputs>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
113 <tests>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
114 <test>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
115 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
116 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
117 <param name="samtools_idxstats" value="samtools_idxstats.tabular" ftype="tabular" dbkey="89"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
118 <param name="vsnp_azc" value="add_zc_metrics.tabular" ftype="tabular" dbkey="89"/>
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
119 <output name="output" file="vsnp_statistics.xlsx" ftype="xlsx" compare="sim_size"/>
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
120 </test>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
121 </tests>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
122 <help>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
123 **What it does**
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
124
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
125 Accepts a single fastqsanger read, a set of paired reads, or a collections of reads along with associated SAMtools
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
126 idxstats and vSNP zero coverage metrics files and extracts information from the files to produce a tabular statistics
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
127 dataset that includes total reads, mean read length and quality, reads passing Q30, mapped and unmapped reads, depth
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
128 of coverage, good SNP count and more.
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
129
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
130 **Required options**
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
131
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
132 * **Choose the type for files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads and associated idxstats and vSNP zero coverage metrics files) based on the selected option..
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
133 </help>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
134 <citations>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
135 <citation type="bibtex">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
136 @misc{None,
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
137 journal = {None},
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
138 author = {1. Stuber T},
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
139 title = {Manuscript in preparation},
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
140 year = {None},
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
141 url = {https://github.com/USDA-VS/vSNP},}
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
142 </citation>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
143 </citations>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
144 </tool>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
145