annotate vsnp_statistics.xml @ 2:7fe0cbb8c894 draft

Uploaded
author greg
date Thu, 30 Apr 2020 10:26:20 -0400
parents 14e29f7d59ca
children 2d6c6b01319e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
1 <tool id="vsnp_statistics" name="vSNP: statistics" version="1.0.0">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
2 <description></description>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
3 <requirements>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="1.16.5">numpy</requirement>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
5 <requirement type="package" version="0.25.3">pandas</requirement>
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
6 <requirement type="package" version="1.2.0">xlrd</requirement>
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
7 <requirement type="package" version="1.2.8">xlsxwriter</requirement>
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
8 </requirements>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
9 <command detect_errors="exit_code"><![CDATA[
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
10 #import os
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
11 #import re
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
12 #set gzipped = 'false'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
13 #set input_type = $input_type_cond.input_type
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
14 #set input_idxstats_dir = 'input_idxstats'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
15 #set input_metrics_dir = 'input_metrics'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
16 #set input_reads_dir = 'input_reads'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
17 mkdir -p $input_idxstats_dir &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
18 mkdir -p $input_metrics_dir &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
19 mkdir -p $input_reads_dir &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
20 #if str($input_type) == "single":
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
21 #set read_type_cond = $input_type_cond.read_type_cond
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
22 #set read1 = $read_type_cond.read1
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
23 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
24 #if str($read_type_cond.read_type) == "single":
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
25 ln -s '${read1}' '${read1_identifier}' &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
26 #if $read1.is_of_type('fastqsanger.gz'):
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
27 #set gzipped = 'true'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
28 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
29 #else:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
30 #set read2 = $read_type_cond.read2
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
31 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
32 ln -s '${read1}' '${read1_identifier}' &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
33 ln -s '${read2}' '${read2_identifier}' &&
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
34 #if $read1.is_of_type('fastqsanger.gz') and $read2.is_of_type('fastqsanger.gz'):
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
35 #set gzipped = 'true'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
36 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
37 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
38 #set dbkey = $input_type_cond.vsnp_azc.metadata.dbkey
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
39 #else:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
40 #for $i in $input_type_cond.reads_collection:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
41 #if $i.is_of_type('fastqsanger.gz'):
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
42 #set gzipped = 'true'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
43 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
44 #set filename = $i.file_name
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
45 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
46 ln -s '$filename' '$input_reads_dir/$identifier' &&
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
47 #end for
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
48 #for $i in $input_type_cond.samtools_idxstats_collection:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
49 #set filename = $i.file_name
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
50 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
51 ln -s '$filename' '$input_idxstats_dir/$identifier' &&
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
52 #end for
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
53 #for $i in $input_type_cond.azc_metrics_collection:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
54 #set dbkey = $i.metadata.dbkey
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
55 #set filename = $i.file_name
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
56 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier))
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
57 ln -s '$filename' '$input_metrics_dir/$identifier' &&
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
58 #end for
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
59 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
60 python '$__tool_directory__/vsnp_statistics.py'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
61 --dbkey '$dbkey'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
62 --gzipped '$gzipped'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
63 #if str($input_type) == "single":
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
64 #if str($read_type_cond.read_type) == "single":
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
65 --read1 '${read1_identifier}'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
66 #else:
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
67 --read1 '${read1_identifier}'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
68 --read2 '${read2_identifier}'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
69 #end if
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
70 --samtools_idxstats '$samtools_idxstats'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
71 --vsnp_azc '$vsnp_azc'
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
72 #end if
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
73 --output '$output'
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
74 ]]></command>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
75 <inputs>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
76 <conditional name="input_type_cond">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
77 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
78 <option value="single" selected="true">Single files</option>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
79 <option value="collection">Collections of files</option>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
80 </param>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
81 <when value="single">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
82 <conditional name="read_type_cond">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
83 <param name="read_type" type="select" label="Choose the read type">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
84 <option value="paired" selected="true">Paired</option>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
85 <option value="single">Single</option>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
86 </param>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
87 <when value="paired">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
88 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
89 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
90 </when>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
91 <when value="single">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
92 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
93 </when>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
94 </conditional>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
95 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
96 <validator type="unspecified_build"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
97 </param>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
98 <param name="vsnp_azc" type="data" format="tabular" label="vSNP zero coverage metrics file">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
99 <validator type="unspecified_build"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
100 </param>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
101 </when>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
102 <when value="collection">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
103 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
104 <param name="samtools_idxstats_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
105 <param name="azc_metrics_collection" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP zero-coverage metrics files"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
106 </when>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
107 </conditional>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
108 </inputs>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
109 <outputs>
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
110 <data name="output" format="xlsx"/>
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
111 </outputs>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
112 <tests>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
113 <test>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
114 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
115 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
116 <param name="samtools_idxstats" value="samtools_idxstats.tabular" ftype="tabular" dbkey="89"/>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
117 <param name="vsnp_azc" value="add_zc_metrics.tabular" ftype="tabular" dbkey="89"/>
1
14e29f7d59ca Uploaded
greg
parents: 0
diff changeset
118 <output name="output" file="vsnp_statistics.xlsx" ftype="xlsx" compare="sim_size"/>
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
119 </test>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
120 </tests>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
121 <help>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
122 **What it does**
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
123
2
7fe0cbb8c894 Uploaded
greg
parents: 1
diff changeset
124 Accepts a single fastqsanger sample, a set of paired read samples, or a collections of samples along with associated
7fe0cbb8c894 Uploaded
greg
parents: 1
diff changeset
125 SAMtools idxstats and vSNP zero coverage metrics files and extracts information from them to produce an Excel
7fe0cbb8c894 Uploaded
greg
parents: 1
diff changeset
126 spreadsheet containing statistics for each sample. Statistics include reference, file size, mean read length, mean
7fe0cbb8c894 Uploaded
greg
parents: 1
diff changeset
127 read quality, reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total,
7fe0cbb8c894 Uploaded
greg
parents: 1
diff changeset
128 reference with coverage, average depth of coverage and good SNP count.
0
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
129
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
130 **Required options**
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
131
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
132 * **Choose the type for files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads and associated idxstats and vSNP zero coverage metrics files) based on the selected option..
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
133 </help>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
134 <citations>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
135 <citation type="bibtex">
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
136 @misc{None,
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
137 journal = {None},
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
138 author = {1. Stuber T},
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
139 title = {Manuscript in preparation},
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
140 year = {None},
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
141 url = {https://github.com/USDA-VS/vSNP},}
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
142 </citation>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
143 </citations>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
144 </tool>
c21d338dbdc4 Uploaded
greg
parents:
diff changeset
145