comparison vsnp_statistics.xml @ 4:a2f69b1598e0 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
author iuc
date Fri, 10 Jun 2022 06:09:36 +0000
parents b960f47c57a1
children
comparison
equal deleted inserted replaced
3:bc9c5eadd0d8 4:a2f69b1598e0
1 <tool id="vsnp_statistics" name="vSNP: statistics" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> 1 <tool id="vsnp_statistics" name="vSNP: statistics" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description></description> 2 <description></description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <requirements>
7 <expand macro="biopython_requirement"/>
8 <expand macro="numpy_requirement"/>
9 <expand macro="openpyxl_requirement"/>
10 <expand macro="pandas_requirement"/>
11 <expand macro="xlrd_requirement"/>
12 </requirements>
13 <command detect_errors="exit_code"><![CDATA[ 6 <command detect_errors="exit_code"><![CDATA[
14 #import re 7 #import re
15 8
16 #if $input_type_cond.input_type in ["single", "pair"]: 9 #if $input_type_cond.input_type in ["single", "pair"]:
17 #set read1 = $input_type_cond.read1 10 #set read1 = $input_type_cond.read1
18 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) 11 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier))
19 ln -s '${read1}' '${read1_identifier}' && 12 ln -s '${read1}' '${read1_identifier}' &&
13 #set read1_seqkit_stats = $input_type_cond.read1_seqkit_stats
14 #set read1_seqkit_fx2tab = $input_type_cond.read1_seqkit_fx2tab
20 #if $input_type_cond.input_type == "pair": 15 #if $input_type_cond.input_type == "pair":
21 #set read2 = $input_type_cond.read2 16 #set read2 = $input_type_cond.read2
22 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) 17 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier))
23 ln -s '${read2}' '${read2_identifier}' && 18 ln -s '${read2}' '${read2_identifier}' &&
19 #set read2_seqkit_stats = $input_type_cond.read2_seqkit_stats
20 #set read2_seqkit_fx2tab = $input_type_cond.read2_seqkit_fx2tab
24 #end if 21 #end if
25 #else: 22 #else:
26 #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.reads_collection.element_identifier)) 23 #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.reads_collection.element_identifier))
27 #set read1 = $input_type_cond.reads_collection.forward 24 #set read1 = $input_type_cond.reads_collection.forward
28 #set read1_identifier = $identifier + '_R1' 25 #set read1_identifier = $identifier + '_R1'
29 ln -s '${read1}' '${read1_identifier}' && 26 ln -s '${read1}' '${read1_identifier}' &&
30 #set read2 = $input_type_cond.reads_collection.reverse 27 #set read2 = $input_type_cond.reads_collection.reverse
31 #set read2_identifier = $identifier + '_R2' 28 #set read2_identifier = $identifier + '_R2'
32 ln -s '${read2}' '${read2_identifier}' && 29 ln -s '${read2}' '${read2_identifier}' &&
30 #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.seqkit_stats_collection.element_identifier))
31 #set read1_seqkit_stats = $input_type_cond.seqkit_stats_collection.forward
32 #set read2_seqkit_stats = $input_type_cond.seqkit_stats_collection.reverse
33 #set identifier = re.sub('[^\s\w\-]', '_', str($input_type_cond.seqkit_fx2tab_collection.element_identifier))
34 #set read1_seqkit_fx2tab = $input_type_cond.seqkit_fx2tab_collection.forward
35 #set read2_seqkit_fx2tab = $input_type_cond.seqkit_fx2tab_collection.reverse
33 #end if 36 #end if
34 37
35 python '$__tool_directory__/vsnp_statistics.py' 38 python '$__tool_directory__/vsnp_statistics.py'
36 --read1 '${read1_identifier}' 39 --read1 '${read1_identifier}'
37 #if $input_type_cond.input_type in ["pair", "paired"]: 40 --read1_seqkit_stats '$read1_seqkit_stats'
38 --read2 '${read2_identifier}' 41 --read1_seqkit_fx2tab '$read1_seqkit_fx2tab'
42 #if $input_type_cond.input_type in ['pair', 'paired']:
43 --read2 '${read2_identifier}'
44 --read2_seqkit_stats '$read2_seqkit_stats'
45 --read2_seqkit_fx2tab '$read2_seqkit_fx2tab'
39 #end if 46 #end if
40 #if $read1.is_of_type('fastqsanger.gz'):
41 --gzipped
42 #end if
43 --dbkey '$samtools_idxstats.metadata.dbkey'
44 --samtools_idxstats '$samtools_idxstats'
45 --vsnp_azc_metrics '$vsnp_azc_metrics'
46 --output '$output' 47 --output '$output'
47 ]]></command> 48 ]]></command>
48 <inputs> 49 <inputs>
49 <conditional name="input_type_cond"> 50 <conditional name="input_type_cond">
50 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> 51 <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
51 <option value="single" selected="true">Single files</option> 52 <option value="single" selected="true">Single files</option>
52 <option value="paired">Paired reads</option> 53 <option value="paired">Paired reads</option>
53 <option value="pair">Paired reads in separate data sets</option> 54 <option value="pair">Paired reads in separate data sets</option>
54 </param> 55 </param>
55 <when value="single"> 56 <when value="single">
56 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> 57 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Fastq file"/>
58 <param name="read1_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected Fastq file"/>
59 <param name="read1_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected Fastq file"/>
57 </when> 60 </when>
58 <when value="paired"> 61 <when value="paired">
59 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> 62 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/>
63 <param name="seqkit_stats_collection" type="data_collection" format="tabular" collection_type="paired" label="Collection of paired SeqKit statistics files"/>
64 <param name="seqkit_fx2tab_collection" type="data_collection" format="tabular" collection_type="paired" label="Collection of paired SeqKit fx2tab files"/>
60 </when> 65 </when>
61 <when value="pair"> 66 <when value="pair">
62 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> 67 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Forward read fastq file"/>
63 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> 68 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Reverse read fastq file"/>
69 <param name="read1_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected forward read"/>
70 <param name="read2_seqkit_stats" type="data" format="tabular" label="SeqKit statistics file for selected reverse read"/>
71 <param name="read1_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected forward read"/>
72 <param name="read2_seqkit_fx2tab" type="data" format="tabular" label="SeqKit fx2tab file for selected reverse read"/>
64 </when> 73 </when>
65 </conditional> 74 </conditional>
66 <param name="samtools_idxstats" type="data" format="tabular" label="Samtools idxstats file"/>
67 <param name="vsnp_azc_metrics" type="data" format="tabular" label="vSNP: add zero coverage metrics file"/>
68 </inputs> 75 </inputs>
69 <outputs> 76 <outputs>
70 <data name="output" format="tabular"/> 77 <data name="output" format="tabular"/>
71 </outputs> 78 </outputs>
72 <tests> 79 <tests>
73 <!-- A single fastq file --> 80 <!-- A single fastq file -->
74 <test expect_num_outputs="1"> 81 <test expect_num_outputs="1">
75 <param name="input_type" value="single"/> 82 <param name="input_type" value="single"/>
76 <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> 83 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
77 <param name="samtools_idxstats" value="samtools_idxstats1.tabular" ftype="tabular" dbkey="89"/> 84 <param name="read1_seqkit_stats" value="r1_seqkit_stats1.tabular" ftype="tabular"/>
78 <param name="vsnp_azc_metrics" value="add_zc_metrics1.tabular" ftype="tabular" dbkey="89"/> 85 <param name="read1_seqkit_fx2tab" value="r1_seqkit_fx2tab1.tabular" ftype="tabular"/>
79 <output name="output" ftype="tabular"> 86 <output name="output" file="statistics_output1.tabular" ftype="tabular"/>
80 <assert_contents>
81 <has_size value="332"/>
82 </assert_contents>
83 </output>
84 </test> 87 </test>
85 <!-- A set of paired fastq files --> 88 <!-- A set of paired fastq files -->
86 <test expect_num_outputs="1"> 89 <test expect_num_outputs="1">
87 <param name="input_type" value="pair"/> 90 <param name="input_type" value="pair"/>
88 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> 91 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
89 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz" dbkey="89"/> 92 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
90 <param name="samtools_idxstats" value="samtools_idxstats2.tabular" ftype="tabular" dbkey="89"/> 93 <param name="read1_seqkit_stats" value="r1_seqkit_stats2.tabular" ftype="tabular"/>
91 <param name="vsnp_azc_metrics" value="add_zc_metrics2.tabular" ftype="tabular" dbkey="89"/> 94 <param name="read2_seqkit_stats" value="r2_seqkit_stats2.tabular" ftype="tabular"/>
92 <output name="output" ftype="tabular"> 95 <param name="read1_seqkit_fx2tab" value="r1_seqkit_fx2tab2.tabular" ftype="tabular"/>
93 <assert_contents> 96 <param name="read2_seqkit_fx2tab" value="r2_seqkit_fx2tab2.tabular" ftype="tabular"/>
94 <has_size value="500"/> 97 <output name="output" file="statistics_output2.tabular" ftype="tabular"/>
95 </assert_contents>
96 </output>
97 </test> 98 </test>
98 <!-- A collection of paired fastq files --> 99 <!-- A collection of paired fastq files -->
99 <test expect_num_outputs="1"> 100 <test expect_num_outputs="1">
100 <param name="input_type" value="paired"/> 101 <param name="input_type" value="paired"/>
101 <param name="reads_collection"> 102 <param name="reads_collection">
102 <collection type="paired"> 103 <collection type="paired">
103 <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/> 104 <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
104 <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/> 105 <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
105 </collection> 106 </collection>
106 </param> 107 </param>
107 <param name="samtools_idxstats" value="samtools_idxstats5.tabular" ftype="tabular" dbkey="89"/> 108 <param name="seqkit_stats_collection">
108 <param name="vsnp_azc_metrics" value="add_zc_metrics5.tabular" ftype="tabular" dbkey="89"/> 109 <collection type="paired">
109 <output name="output" ftype="tabular"> 110 <element name="forward" value="r1_seqkit_stats2.tabular" ftype="tabular"/>
110 <assert_contents> 111 <element name="reverse" value="r2_seqkit_stats2.tabular" ftype="tabular"/>
111 <has_size value="466"/> 112 </collection>
112 </assert_contents> 113 </param>
113 </output> 114 <param name="seqkit_fx2tab" value="seqkit_fx2tab3.tabular" ftype="tabular"/>
115 <param name="seqkit_fx2tab_collection">
116 <collection type="paired">
117 <element name="forward" value="r1_seqkit_fx2tab2.tabular" ftype="tabular"/>
118 <element name="reverse" value="r2_seqkit_fx2tab2.tabular" ftype="tabular"/>
119 </collection>
120 </param>
121 <output name="output" file="statistics_output3.tabular" ftype="tabular"/>
114 </test> 122 </test>
115 </tests> 123 </tests>
116 <help> 124 <help>
117 **What it does** 125 **What it does**
118 126
119 Accepts associated fastq files, SAMtools idxstats files and **vSNP: add zero coverage** metrics files and extracts information from them 127 Accepts fastq samples and SeqKit stats and fx2tab files produced from the samples and extracts information from them to produce a tabular
120 to produce a tabular file containing statistics for each sample. The samples can be a single read, a single set of paired reads in 128 file containing statistics for each sample. The samples can be a single read, a single set of paired reads in separate datasets or a
121 separate datasets or a collection of paired reads. The output statistics include reference, file size, mean read length, mean read quality, 129 collection of paired reas. The output statistics include file size, read count, sum / avg / max read length, Q1, Q2, Q3, sum gap, N50,
122 reads passing Q30, total reads, all mapped reads, unmapped reads, unmapped reads percentage of total, reference with coverage, average depth 130 reads passing Q20 / Q30, and average read quality.
123 of coverage and good SNP count.
124 </help> 131 </help>
125 <expand macro="citations"/> 132 <expand macro="citations"/>
126 </tool> 133 </tool>
127 134