Mercurial > repos > iuc > vsnp_determine_ref_from_data
comparison vsnp_determine_ref_from_data.xml @ 0:12f2b14549f6 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
author | iuc |
---|---|
date | Wed, 02 Dec 2020 09:11:24 +0000 |
parents | |
children | b03e88e7bb1d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:12f2b14549f6 |
---|---|
1 <tool id="vsnp_determine_ref_from_data" name="vSNP: determine reference" version="1.0.0"> | |
2 <description>from input data</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <requirements> | |
7 <requirement type="package" version="1.76">biopython</requirement> | |
8 <requirement type="package" version="5.3">pyyaml</requirement> | |
9 </requirements> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 #import re | |
12 #set gzipped = 'false' | |
13 #set input_type = $input_type_cond.input_type | |
14 | |
15 #if $input_type in ["single", "pair"]: | |
16 #set read1 = $input_type_cond.read1 | |
17 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) | |
18 ln -s '${read1}' '${read1_identifier}' && | |
19 #if $input_type == "pair": | |
20 #set read2 = $input_type_cond.read2 | |
21 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) | |
22 ln -s '${read2}' '${read2_identifier}' && | |
23 #else: | |
24 #set read2 = None | |
25 #end if | |
26 #else: | |
27 #set read1 = $input_type_cond.reads_collection['forward'] | |
28 #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) | |
29 ln -s '${read1}' '${read1_identifier}' && | |
30 #set read2 = $input_type_cond.reads_collection['reverse'] | |
31 #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) | |
32 ln -s '${read2}' '${read2_identifier}' && | |
33 #end if | |
34 | |
35 python '$__tool_directory__/vsnp_determine_ref_from_data.py' | |
36 --read1 '${read1_identifier}' | |
37 #if $read2 is not None | |
38 --read2 '${read2_identifier}' | |
39 #end if | |
40 --output_dbkey '$output_dbkey' | |
41 --output_metrics '$output_metrics' | |
42 #if $read1.is_of_type('fastqsanger.gz'): | |
43 --gzipped | |
44 #end if | |
45 #set $dnaprint_fields = $__app__.tool_data_tables['vsnp_dnaprints'].get_fields() | |
46 #for $i in $dnaprint_fields: | |
47 --dnaprint_fields '${i[0]}' '${i[2]}' | |
48 #end for | |
49 ]]></command> | |
50 <inputs> | |
51 <conditional name="input_type_cond"> | |
52 <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> | |
53 <option value="single" selected="true">Single files</option> | |
54 <option value="paired">Paired reads</option> | |
55 <option value="pair">Paired reads in separate data sets</option> | |
56 </param> | |
57 <when value="single"> | |
58 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
59 </when> | |
60 <when value="paired"> | |
61 <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> | |
62 </when> | |
63 <when value="pair"> | |
64 <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> | |
65 <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> | |
66 </when> | |
67 </conditional> | |
68 </inputs> | |
69 <outputs> | |
70 <data name="output_dbkey" format="txt" label="${tool.name} (dbkey) on ${on_string}"/> | |
71 <data name="output_metrics" format="txt" label="${tool.name} (metrics) on ${on_string}"/> | |
72 </outputs> | |
73 <tests> | |
74 <!-- 1 single read --> | |
75 <test expect_num_outputs="2"> | |
76 <param name="input_type" value="single"/> | |
77 <param name="read1" value="Mcap_Deer_DE_SRR650221.fastq.gz" ftype="fastqsanger.gz"/> | |
78 <output name="output_dbkey" file="output_dbkey.txt" ftype="txt"/> | |
79 <output name="output_metrics" file="output_metrics.txt" ftype="txt"/> | |
80 </test> | |
81 <!-- 1 set of paired reads --> | |
82 <test expect_num_outputs="2"> | |
83 <param name="input_type" value="pair"/> | |
84 <param name="read1" value="forward.fastq.gz" ftype="fastqsanger.gz"/> | |
85 <param name="read2" value="reverse.fastq.gz" ftype="fastqsanger.gz"/> | |
86 <output name="output_dbkey" file="paired_dbkey.txt" ftype="txt"/> | |
87 <output name="output_metrics" file="paired_metrics.txt" ftype="txt"/> | |
88 </test> | |
89 <!-- A collection of paired reads --> | |
90 <test expect_num_outputs="2"> | |
91 <param name="input_type" value="paired"/> | |
92 <param name="reads_collection"> | |
93 <collection type="paired"> | |
94 <element name="forward" value="forward.fastq.gz" ftype="fastqsanger.gz"/> | |
95 <element name="reverse" value="reverse.fastq.gz" ftype="fastqsanger.gz"/> | |
96 </collection> | |
97 </param> | |
98 <output name="output_dbkey" file="paired_dbkey.txt" ftype="txt"/> | |
99 <output name="output_metrics" file="paired_metrics.txt" ftype="txt"/> | |
100 </test> | |
101 </tests> | |
102 <help> | |
103 **What it does** | |
104 | |
105 Accepts a single fastqsanger read, a set of paired reads, or a collection of single or paired reads (bacterial samples) and | |
106 inspects the data to discover the best reference genome for aligning the reads. | |
107 | |
108 The information needed to discover the best reference is maintained by the USDA in this repository_. References are curreently | |
109 | |
110 .. _repository: https://github.com/USDA-VS/vSNP_reference_options | |
111 | |
112 limited to TB complex, paraTB, and Brucella, but information for additional references will be added. The information for each | |
113 reference is a string consisting of zeros and ones, compiled by USDA researchers, which we call a "DNA print". These strings | |
114 are maintained in yaml files for use in Galaxy, and are installed via the **vSNP DNAprints data manager** tool. | |
115 | |
116 This tool creates an in-memory dictionary of these DNA print strings for matching with a string generated by inspecting the | |
117 input sample data. During inspection, this tool accrues sequence counts for supported species, ultimately generating a string | |
118 consisting of zeros and ones based on the counts, (i.e., a DNA print). This string is then compared to the strings contained | |
119 in the in-memory dictionary of DNA prints to find a match. | |
120 | |
121 The strings in the in-memory dictionary are each associated with a Galaxy "dbkey" (i.e., genome build), so when a match is found, | |
122 the associated "dbkey" is passed to a mapper (e.g., **Map with BWA-MEM**), typically within a workflow via an expression tool, | |
123 to align the reads to the associated reference. | |
124 | |
125 This tool produces 2 text files, a "dbkey" file that contains the dbkey string and a "metrics" file that provides information | |
126 about the sequence counts that were discovered in the input sample data that produced the "DNA print" string. | |
127 | |
128 This tool is important for samples containing bacterial species because many of the samples have a "mixed bag" of species, | |
129 and discovering the primary species is critical. DNA print matching is currently supported for the following genomes. | |
130 | |
131 * Mycobacterium bovis AF2122/97 | |
132 * Brucella abortus bv. 1 str. 9-941 | |
133 * Brucella abortus strain BER | |
134 * Brucella canis ATCC 23365 | |
135 * Brucella ceti TE10759-12 | |
136 * Brucella melitensis bv. 1 str. 16M | |
137 * Brucella melitensis bv. 3 str. Ether | |
138 * Brucella melitensis BwIM_SOM_36b | |
139 * Brucella melitensis ATCC 23457 | |
140 * Brucella ovis ATCC 25840 | |
141 * Brucella suis 1330 | |
142 * Mycobacterium tuberculosis H37Rv | |
143 * Mycobacterium avium subsp. paratuberculosis strain Telford | |
144 * Mycobacterium avium subsp. paratuberculosis K-10 | |
145 * Brucella suis ATCC 23445 | |
146 * Brucella suis bv. 3 str. 686 | |
147 | |
148 **Required Options** | |
149 | |
150 * **Choose the category of the files to be analyzed** - select "Single files" or "Collection of files", then select the appropriate history items (single or paired fastqsanger reads or a collection of fastqsanger reads) based on the selected option. | |
151 </help> | |
152 <expand macro="citations"/> | |
153 </tool> | |
154 |