Mercurial > repos > greg > vsnp_statistics
comparison vsnp_statistics.xml @ 8:1becb6606626 draft
Uploaded
author | greg |
---|---|
date | Mon, 02 Aug 2021 13:34:09 +0000 |
parents | de2af65c4633 |
children | ce1f889b3340 |
comparison
equal
deleted
inserted
replaced
7:de2af65c4633 | 8:1becb6606626 |
---|---|
2 <description></description> | 2 <description></description> |
3 <macros> | 3 <macros> |
4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
5 </macros> | 5 </macros> |
6 <requirements> | 6 <requirements> |
7 <requirement type="package" version="1.79">biopython</requirement> | 7 <expand macro="biopython_requirement"/> |
8 <requirement type="package" version="1.21.1">numpy</requirement> | 8 <expand macro="numpy_requirement"/> |
9 <requirement type="package" version="3.0.7">openpyxl</requirement> | 9 <expand macro="openpyxl_requirement"/> |
10 <requirement type="package" version="1.3.0">pandas</requirement> | 10 <expand macro="pandas_requirement"/> |
11 <requirement type="package" version="2.0.1">xlrd</requirement> | 11 <expand macro="xlrd_requirement"/> |
12 </requirements> | 12 </requirements> |
13 <command detect_errors="exit_code"><![CDATA[ | 13 <command detect_errors="exit_code"><![CDATA[ |
14 #import re | 14 #import re |
15 #set input_idxstats_dir = 'input_idxstats' | |
16 #set input_metrics_dir = 'input_metrics' | |
17 #set input_reads_dir = 'input_reads' | |
18 mkdir -p $input_idxstats_dir && | |
19 mkdir -p $input_metrics_dir && | |
20 mkdir -p $input_reads_dir && | |
21 | 15 |
22 #if $input_type_cond.input_type == 'single_files': | 16 #if $input_type_cond.input_type in ["single", "pair"]: |
23 #set read1 = $input_type_cond.read_type_cond.read1 | 17 #set read1 = $input_type_cond.read1 |
24 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) | 18 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) |
25 ln -s '${read1}' '${read1_identifier}' && | 19 ln -s '${read1}' '${read1_identifier}' && |
26 #if $input_type_cond.read_type_cond.read_type == 'pair': | 20 #if $input_type_cond.input_type == "pair": |
27 #set read2 = $input_type_cond.read_type_cond.read2 | 21 #set read2 = $input_type_cond.read2 |
28 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) | 22 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) |
29 ln -s '${read2}' '${read2_identifier}' && | 23 ln -s '${read2}' '${read2_identifier}' && |
30 #else: | 24 #else: |
31 #set read2 = None | 25 #set read2 = None |
32 #end if | 26 #end if |
33 #else: | 27 #else: |
34 #if $input_type_cond.collection_type_cond.collection_type == 'single': | 28 #set read1 = $input_type_cond.reads_collection['forward'] |
35 #for $i in $input_type_cond.collection_type_cond.reads_collection: | 29 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name)) |
36 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) | 30 ln -s '${read1}' '${read1_identifier}' && |
37 ln -s '${i.file_name}' '$input_reads_dir/${identifier}' && | 31 #set read2 = $input_type_cond.reads_collection['reverse'] |
38 #end for | 32 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name)) |
39 #else: | 33 ln -s '${read2}' '${read2_identifier}' && |
40 #set read1 = $input_type_cond.collection_type_cond.reads_collection['forward'] | |
41 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.name)) | |
42 ln -s '${read1}' '$input_reads_dir/${read1_identifier}' && | |
43 #set read2 = $input_type_cond.collection_type_cond.reads_collection['reverse'] | |
44 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.name)) | |
45 ln -s '${read2}' '$input_reads_dir/${read2_identifier}' && | |
46 #end if | |
47 #for $i in $input_type_cond.samtools_idxstats: | |
48 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) | |
49 ln -s '${i.file_name}' '$input_idxstats_dir/${identifier}' && | |
50 #end for | |
51 #for $i in $input_type_cond.vsnp_azc: | |
52 #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) | |
53 ln -s '${i.file_name}' '$input_metrics_dir/${identifier}' && | |
54 #end for | |
55 #end if | 34 #end if |
56 | 35 |
57 python '$__tool_directory__/vsnp_statistics.py' | 36 python '$__tool_directory__/vsnp_statistics.py' |
58 #if $input_type_cond.input_type == 'single_files': | 37 --read1 '${read1_identifier}' |
59 --dbkey '$input_type_cond.samtools_idxstats.metadata.dbkey' | 38 #if $read2 is not None |
60 #if $input_type_cond.read_type_cond.read1.is_of_type('fastqsanger.gz'): | 39 --read2 '${read2_identifier}' |
61 --gzipped | |
62 #end if | |
63 --read1 '${read1_identifier}' | |
64 #if $input_type_cond.read_type_cond.read_type == 'pair': | |
65 --read2 '${read2_identifier}' | |
66 #end if | |
67 --samtools_idxstats '$input_type_cond.samtools_idxstats' | |
68 --vsnp_azc '$input_type_cond.vsnp_azc' | |
69 #else: | |
70 --dbkey '$input_type_cond.samtools_idxstats[0].metadata.dbkey' | |
71 #if $input_type_cond.collection_type_cond.reads_collection[0].is_of_type('fastqsanger.gz'): | |
72 --gzipped | |
73 #end if | |
74 #if $input_type_cond.collection_type_cond.collection_type == 'paired': | |
75 --list_paired | |
76 #end if | |
77 --input_idxstats_dir '$input_idxstats_dir' | |
78 --input_metrics_dir '$input_metrics_dir' | |
79 --input_reads_dir '$input_reads_dir' | |
80 #end if | 40 #end if |
41 #if $read1.is_of_type('fastqsanger.gz'): | |
42 --gzipped | |
43 #end if | |
44 --dbkey '$samtools_idxstats.metadata.dbkey' | |
45 --samtools_idxstats '$samtools_idxstats' | |
46 --vsnp_azc_metrics '$vsnp_azc_metrics' | |
81 --output '$output' | 47 --output '$output' |
82 ]]></command> | 48 ]]></command> |
83 <inputs> | 49 <inputs> |
84 <conditional name="input_type_cond"> | 50 <conditional name="input_type_cond"> |
85 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> | 51 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> |
86 <option value="single_files" selected="true">Single files</option> | 52 <option value="single" selected="true">Single files</option> |
87 <option value="collections">Collections of files</option> | 53 <option value="paired">Paired reads</option> |
54 <option value="pair">Paired reads in separate data sets</option> | |
88 </param> | 55 </param> |
89 <when value="single_files"> | 56 <when value="single"> |
90 <conditional name="read_type_cond"> | 57 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> |
91 <param name="read_type" type="select" label="Choose the read type"> | |
92 <option value="single" selected="true">Single reads</option> | |
93 <option value="pair">Paired reads</option> | |
94 </param> | |
95 <when value="single"> | |
96 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
97 </when> | |
98 <when value="pair"> | |
99 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
100 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> | |
101 </when> | |
102 </conditional> | |
103 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/> | |
104 <param name="vsnp_azc" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/> | |
105 </when> | 58 </when> |
106 <when value="collections"> | 59 <when value="paired"> |
107 <conditional name="collection_type_cond"> | 60 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> |
108 <param name="collection_type" type="select" label="Collections of single reads or paired reads?"> | 61 </when> |
109 <option value="single" selected="true">Single reads</option> | 62 <when value="pair"> |
110 <option value="paired">Paired reads in separate datasets</option> | 63 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> |
111 </param> | 64 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> |
112 <when value="single"> | |
113 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of fastqsanger files"/> | |
114 </when> | |
115 <when value="paired"> | |
116 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> | |
117 </when> | |
118 </conditional> | |
119 <param name="samtools_idxstats" type="data_collection" format="tabular" collection_type="list" label="Collection of samtools idxstats files"/> | |
120 <param name="vsnp_azc" type="data_collection" format="tabular" collection_type="list" label="Collection of vSNP: add zero coverage metrics files"/> | |
121 </when> | 65 </when> |
122 </conditional> | 66 </conditional> |
67 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/> | |
68 <param name="vsnp_azc_metrics" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/> | |
123 </inputs> | 69 </inputs> |
124 <outputs> | 70 <outputs> |
125 <data name="output" format="tabular"/> | 71 <data name="output" format="tabular"/> |
126 </outputs> | 72 </outputs> |
127 <tests> | 73 <tests> |
128 <!-- A single fastq file --> | 74 <!-- A single fastq file --> |
129 <test expect_num_outputs="1"> | 75 <test expect_num_outputs="1"> |
130 <param name="input_type" value="single_files"/> | 76 <param name="input_type" value="single"/> |
131 <param name="read_type" value="single"/> | |
132 <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> | 77 <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> |
133 <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/> | 78 <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/> |
134 <param name="vsnp_azc" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/> | 79 <param name="vsnp_azc_metrics" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/> |
135 <output name="output" file="vsnp_statistics1.tabular" ftype="tabular"/> | 80 <output name="output" file="vsnp_statistics1.tabular" ftype="tabular"/> |
136 </test> | 81 </test> |
137 <!-- A set of paired fastq files --> | 82 <!-- A set of paired fastq files --> |
138 <test expect_num_outputs="1"> | 83 <test expect_num_outputs="1"> |
139 <param name="input_type" value="single_files"/> | 84 <param name="input_type" value="pair"/> |
140 <param name="read_type" value="pair"/> | |
141 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> | 85 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> |
142 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> | 86 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> |
143 <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/> | 87 <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/> |
144 <param name="vsnp_azc" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/> | 88 <param name="vsnp_azc_metrics" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/> |
145 <output name="output" file="vsnp_statistics2.tabular" ftype="tabular"/> | 89 <output name="output" file="vsnp_statistics2.tabular" ftype="tabular"/> |
146 </test> | 90 </test> |
147 <!-- A collection of SE fastq files --> | 91 <!-- A collection of paired fastq files --> |
148 <test expect_num_outputs="1"> | 92 <test expect_num_outputs="1"> |
149 <param name="input_type" value="collections"/> | 93 <param name="input_type" value="paired"/> |
150 <param name="read_type" value="single"/> | |
151 <param name="reads_collection"> | |
152 <collection type="list"> | |
153 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="Mcap_Deer_DE_SRR650221.fastq.gz" dbkey="89"/> | |
154 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="13-1941-6_S4_L001_R1_600000.fastq.gz" dbkey="89"/> | |
155 </collection> | |
156 </param> | |
157 <param name="samtools_idxstats"> | |
158 <collection type="list"> | |
159 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="samtools_idxstats3.tabular" dbkey="89"/> | |
160 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="samtools_idxstats4.tabular" dbkey="89"/> | |
161 </collection> | |
162 </param> | |
163 <param name="vsnp_azc"> | |
164 <collection type="list"> | |
165 <element name="13-1941-6_S4_L001_R1_600000.fastq.gz" value="add_zc_metrics3.tabular" dbkey="89"/> | |
166 <element name="Mcap_Deer_DE_SRR650221.fastq.gz" value="add_zc_metrics4.tabular" dbkey="89"/> | |
167 </collection> | |
168 </param> | |
169 <output name="output" file="vsnp_statistics3.tabular" ftype="tabular"/> | |
170 </test> | |
171 <!-- A collection of PE fastq files --> | |
172 <test expect_num_outputs="1"> | |
173 <param name="input_type" value="collections"/> | |
174 <param name="collection_type" value="paired"/> | |
175 <param name="reads_collection"> | 94 <param name="reads_collection"> |
176 <collection type="paired"> | 95 <collection type="paired"> |
177 <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/> | 96 <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/> |
178 <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/> | 97 <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/> |
179 </collection> | 98 </collection> |
180 </param> | 99 </param> |
181 <param name="samtools_idxstats"> | 100 <param name="samtools_idxstats" value="samtools_idxstats5.tabular" ftype="tabular" dbkey="89"/> |
182 <collection type="list"> | 101 <param name="vsnp_azc_metrics" value="add_zc_metrics5.tabular" ftype="tabular" dbkey="89"/> |
183 <element name="13-1941-6_S4_L001_R1_600000.fastq" value="samtools_idxstats5.tabular" dbkey="89"/> | |
184 </collection> | |
185 </param> | |
186 <param name="vsnp_azc"> | |
187 <collection type="list"> | |
188 <element name="13-1941-6_S4_L001_R1_600000.fastq" value="add_zc_metrics5.tabular" dbkey="89"/> | |
189 </collection> | |
190 </param> | |
191 <output name="output" file="vsnp_statistics4.tabular" ftype="tabular"/> | 102 <output name="output" file="vsnp_statistics4.tabular" ftype="tabular"/> |
192 </test> | 103 </test> |
193 </tests> | 104 </tests> |
194 <help> | 105 <help> |
195 **What it does** | 106 **What it does** |
196 | 107 |
197 Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them | 108 Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them |
198 to produce an Excel spreadsheet containing statistics for each sample. The samples can be single or paired reads, and all associated inputs | 109 to produce an Excel spreadsheet containing statistics for each sample. The samples can be a single read, a single set of paired reads in |
199 can be either single files or collections of files. The output statistics include reference, file size, mean read length, mean read quality, | 110 separate datasets or collection of paired reads. The output statistics include reference, file size, mean read length, mean read quality, |
200 reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth | 111 reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth |
201 of coverage and good SNP count. | 112 of coverage and good SNP count. |
202 </help> | 113 </help> |
203 <expand macro="citations"/> | 114 <expand macro="citations"/> |
204 </tool> | 115 </tool> |