comparison sortmerna.xml @ 0:a8ac09e937f3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit 04cfb5475292e4fd1f7c0ca86d8d0d5e5f886c3d-dirty
author rnateam
date Mon, 03 Aug 2015 08:18:26 -0400
parents
children b482293b2987
comparison
equal deleted inserted replaced
-1:000000000000 0:a8ac09e937f3
1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="1.9.0">
2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
3 <requirements>
4 <requirement type='package' version="1.9">sortmerna</requirement>
5 </requirements>
6 <stdio>
7 <regex match="This program builds a Burst trie on an input rRNA database"
8 source="both"
9 level="fatal"
10 description="Buildtrie program failed to execute." />
11 <regex match="The database name"
12 source="both"
13 level="fatal"
14 description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." />
15 </stdio>
16 <version_command>
17 <![CDATA[
18 sortmerna --version 2>&1|grep 'SortMeRNA version'
19 ]]>
20 </version_command>
21 <command interpreter="python">
22 <![CDATA[
23 sortmerna.py
24 --sortmerna "
25 $strand_search
26 #if str( $read_family.read_family_selector ) == 'other':
27 --I $input_reads -r $read_family.ratio_parameter
28 #else:
29 $read_family.read_family_selector $input_reads
30 #end if
31
32 #if str( $sequencing_type.sequencing_type_selector ) == 'paired':
33 $sequencing_type.paired_type
34 #end if
35
36 #if $outputs_selected:
37 #if 'accept' in $outputs_selected.value:
38 --accept accept_file
39 #end if
40 #if 'other' in $outputs_selected.value:
41 --other other_file
42 #end if
43 #end if
44
45 $log
46 -a \${GALAXY_SLOTS:-4}
47 "
48 #if str( $databases_type.databases_selector ) == 'history':
49 --buildtrie
50 #for $db in $databases_type.input_databases
51 $db.database_name
52 #end for
53 #else:
54 ## databases path is not directly accessible, must match by hand with LOC file contents
55 ${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y]
56 for y in $databases_type.input_databases.value])}
57 #end if
58 ]]>
59 </command>
60 <inputs>
61 <conditional name="read_family">
62 <param name="read_family_selector" type="select" format="text" label="Sequencing technology of querying sequences (reads)"
63 help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput.">
64 <option value="--I">Illumina Solexa</option>
65 <option value="--454">454 Roche</option>
66 <option value="other">Other</option>
67 </param>
68 <when value="other">
69 <param name="ratio_parameter" type="float" value="1" min="0" max="1"
70 label="Ratio parameter (the number of hits on the read / read length)"
71 help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads.
72 For other read types, if the sequencing technology produces high quality reads with a low substitution error rate
73 (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27].
74 If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent),
75 then the ratio parameter can be set to r=[0.13,0.17] (-r)."/>
76 </when>
77 </conditional>
78 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/>
79 <conditional name="sequencing_type">
80 <param name="sequencing_type_selector" type="select" label="Sequencing type">
81 <option value="not_paired">Reads are not paired</option>
82 <option value="paired">Reads are paired</option>
83 </param>
84 <when value="paired">
85 <param name="paired_type" type="select" display="radio" label="If one read of a pair is accepted and the other not, output both reads"
86 help="SortMeRNA does not use the pairing information for filtering RNA,
87 however if one read of a pair is accepted and the other is not,
88 the resulting output may break apart the pair into two separate files.
89 The purpose of 'Reads are paired' option is to preserve the pairing of the reads.">
90 <option value="--paired-in">to accepted file (--paired-in)</option>
91 <option value="--paired-out">to rejected file (--paired-out)</option>
92 </param>
93 </when>
94 </conditional>
95
96 <param name="strand_search" type="select" label="Which strands to search" display="radio">
97 <option value="">Search both strands</option>
98 <option value="-F">Search only the forward strand (-F)</option>
99 <option value="-R">Search only the reverse-complementary strand (-R)</option>
100 </param>
101
102 <conditional name="databases_type">
103 <param name="databases_selector" type="select" label="Databases to query"
104 help="Public rRNA databases provided with SortMeRNA have been indexed.
105 On the contrary, personal databases must be indexed each time SortMeRNA is launched.
106 Please be patient, this may take some time depending on the size of the given database.">
107 <option value="cached" selected="true">Public ribosomal databases</option>
108 <option value="history">Databases from your history</option>
109 </param>
110 <when value="cached">
111 <param name="input_databases" label="rRNA database" type="select" display="checkboxes" multiple="true">
112 <options from_data_table="rRNA_databases" />
113 <validator type="no_options" message="Select at least one database"/>
114 </param>
115 </when>
116 <when value="history">
117 <repeat name="input_databases" title="Database" min="1">
118 <param name="database_name" type="data" format="fasta" label="rRNA database"
119 help="Your database will be indexed first, which may take up to several minutes."/>
120 </repeat>
121 </when>
122 </conditional>
123
124 <!-- Outputs -->
125 <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options">
126 <option value="accept" selected="True">Reads matching to at least one database</option>
127 <option value="other">Reads not found in any database</option>
128 </param>
129 <param name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file"
130 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)">
131 </param>
132
133 </inputs>
134 <outputs>
135 <data format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat"
136 label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})">
137 <filter>outputs_selected and 'accept' in outputs_selected</filter>
138 </data>
139 <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat"
140 label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})">
141 <filter>outputs_selected and 'other' in outputs_selected</filter>
142 </data>
143 <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log">
144 <filter>log</filter>
145 </data>
146 </outputs>
147 <tests>
148 <test>
149 <param name="read_family_selector" value="I" />
150 <param name="input_reads" value="sortmerna_wrapper_in1.fastq" />
151 <param name="sequencing_type_selector" value="not_paired" />
152 <param name="strand_search" value="" />
153 <param name="databases_selector" value="cached" />
154 <param name="input_databases" value="rfam-5.8s,rfam-5s" />
155 <param name="outputs_selected" value="accept,other" />
156 <param name="log" value="" />
157 <param name="options_type_selector" value="less" />
158 <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" />
159 <output name="output_other" file="sortmerna_wrapper_other1.fastq" />
160 </test>
161 </tests>
162 <help>
163 <![CDATA[
164 **What it does**
165
166 SortMeRNA_ is a software designed to rapidly filter ribosomal RNA fragments
167 from metatransriptomic data produced by next-generation sequencers.
168 It is capable of handling large RNA databases and sorting out all fragments
169 matching to the database with high accuracy and specificity.
170
171 .. _SortMeRNA: http://bioinfo.lifl.fr/RNA/sortmerna/
172
173
174 **Input**
175
176 The input is one file of reads in FASTA or FASTQ format and any number of rRNA databases to search against.
177 If the user has two foward-reverse paired-sequencing reads files, they may use
178 the script "merge_paired_reads.sh" to interleave the reads into one file, preserving their order.
179
180 If the sequencing type for the reads is paired-ended, the user has two options under
181 "Sequencing type" to filter the reads and preserve their order in the file.
182 For a further example of each option, please refer to Section 4.2.3 in the `SortMeRNA User Manual`_.
183
184 .. _sortmerna user manual: http://bioinfo.lifl.fr/RNA/sortmerna/code/SortMeRNA-user-manual-v1.7.pdf
185
186
187 **Output**
188
189 The output will follow the same format (FASTA or FASTQ) as the reads. Optionally, a statistic file for the rRNA content of reads, as well as rRNA subunit distribution can be generated.
190
191
192 **rRNA databases**
193
194 SortMeRNA is distributed with 8 representative rRNA databases, which were
195 all constructed from the SILVA SSU,LSU (version 111) and the RFAM 5/5.8S
196 (version 11.0) databases using the tool UCLUST.
197
198 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
199 | Representative database | id % | average id% | # seq (clustered) | Origin | # seq (original) |
200 +==========================+======+=============+===================+========================+===================+
201 | SILVA 16S bacteria | 85 | 91.6 | 8174 | SILVA SSU Ref NR v.111 | 244077 |
202 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
203 | SILVA 16S archaea | 95 | 96.7 | 3845 | SILVA SSU Ref NR v.111 | 10919 |
204 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
205 | SILVA 18S eukarya | 95 | 96.7 | 4512 | SILVA SSU Ref NR v.111 | 31862 |
206 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
207 | SILVA 23S bacteria | 98 | 99.4 | 3055 | SILVA LSU Ref v.111 | 19580 |
208 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
209 | SILVA 23s archaea | 98 | 99.5 | 164 | SILVA LSU Ref v.111 | 405 |
210 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
211 | SILVA 28S eukarya | 98 | 99.1 | 4578 | SILVA LSU Ref v.111 | 9321 |
212 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
213 | Rfam 5S archaea/bacteria | 98 | 99.2 | 59513 | RFAM | 116760 |
214 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
215 | Rfam 5.8S eukarya | 98 | 98.9 | 13034 | RFAM | 225185 |
216 +--------------------------+------+-------------+-------------------+------------------------+-------------------+
217
218 id %: members of the cluster must have identity at least 'id %' identity with the representative sequence
219
220 average id %: average identity of a cluster member to the representative sequence
221
222 The user may also choose to use their own rRNA databases.
223
224 .. class:: warningmark
225
226 Note that your personal databases are indexed each time, and that
227 this may take some time depending on the size of the given database.
228 ]]>
229 </help>
230
231 <citations>
232 <citation type="doi">10.1093/bioinformatics/bts611</citation>
233 <citation type="doi">10.1093/nar/gks1219</citation>
234 <citation type="doi">10.1093/nar/gks1005</citation>
235 <citation type="doi">10.1093/bioinformatics/btq461</citation>
236 <citation type="doi">10.1038/nbt.2198</citation>
237 </citations>
238 </tool>