comparison sortmerna.xml @ 2:3699b6b771e0 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit 9fcf62e1e259381613e48a0ff28c27bd4fe82707
author rnateam
date Tue, 29 Mar 2016 07:01:13 -0400
parents b482293b2987
children 59252ca85c74
comparison
equal deleted inserted replaced
1:b482293b2987 2:3699b6b771e0
1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.0.0"> 1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.1b.0">
2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> 2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
3 <requirements> 3 <requirements>
4 <requirement type='package' version="2.0">sortmerna</requirement> 4 <requirement type="package" version="2.1b">sortmerna</requirement>
5 </requirements> 5 </requirements>
6 <stdio> 6 <stdio>
7 <regex match="This program builds a Burst trie on an input rRNA database" 7 <regex match="This program builds a Burst trie on an input rRNA database"
8 source="both" 8 source="both"
9 level="fatal" 9 level="fatal"
10 description="Buildtrie program failed to execute." /> 10 description="Buildtrie program failed to execute." />
11 <regex match="The database name" 11 <regex match="The database name"
12 source="both" 12 source="both"
13 level="fatal" 13 level="fatal"
14 description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." /> 14 description="The database ${databases} has not been preprocessed using buildtrie before using SortMeRNA." />
15 <regex match="ERROR"
16 source="both"
17 level="fatal"
18 description="ERROR" />
15 </stdio> 19 </stdio>
16 <version_command> 20 <version_command>
17 <![CDATA[ 21 <![CDATA[
18 sortmerna --version 2>&1|grep 'SortMeRNA version' 22 sortmerna --version 2>&1|grep 'SortMeRNA version'
19 ]]> 23 ]]>
20 </version_command> 24 </version_command>
21 <command> 25 <command>
22 <![CDATA[ 26 <![CDATA[
23 #set $ref = '' 27 #set $ref = ''
24 #set $sep='' 28 #set $sep=''
25 #if str( $databases_type.databases_selector ) == 'history': 29 #if str( $databases_type.databases_selector ) == 'history'
26 #for $db in $databases_type.database_name 30 #for $db in $databases_type.database_name
27 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0] 31 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0]
28 #set $sep = ':' 32 #set $sep = ':'
29 #end for 33 #end for
30 indexdb_rna --ref $ref 34 #else if str( $databases_type.databases_selector ) == 'cached_to_index'
31 && 35 ## databases path is not directly accessible, must match by hand with LOC file contents
36 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data])
37 #for $db in $databases_type.input_databases.value
38 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] + '-reindexed'
39 #set $sep = ':'
40 #end for
32 #else: 41 #else:
33 ## databases path is not directly accessible, must match by hand with LOC file contents 42 ## databases path is not directly accessible, must match by hand with LOC file contents
34 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data]) 43 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data])
35 #for $db in $databases_type.input_databases.value 44 #for $db in $databases_type.input_databases.value
36 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0] 45 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0]
37 #set $sep = ':' 46 #set $sep = ':'
38 #end for 47 #end for
39 #end if 48 #end if
40 sortmerna --ref $ref --reads $input_reads --aligned aligned 49
41 #if str( $sequencing_type.sequencing_type_selector ) == 'paired' 50 #if str( $databases_type.databases_selector ) != 'cached':
42 $sequencing_type.paired_type 51 indexdb_rna
52 --ref $ref
53 -L $databases_type.seed_length
54 --max_pos $databases_type.max_pos
55 &&
43 #end if 56 #end if
44 $strand_search 57
45 $aligned_fastx.aligned_fastx_selector 58 sortmerna
46 #if $aligned_fastx.aligned_fastx_selector == '--fastx' 59 --ref $ref
47 #if $aligned_fastx.other 60 --reads $input_reads
48 --other other_file 61 --aligned aligned
62
63 #if str( $sequencing_type.sequencing_type_selector ) == 'paired'
64 $sequencing_type.paired_type
65 #end if
66
67 $strand_search
68 $aligned_fastx.aligned_fastx_selector
69 #if $aligned_fastx.aligned_fastx_selector == '--fastx'
70 #if $aligned_fastx.other
71 --other other_file
72 #end if
73 #end if
74 $aligned_sam.aligned_sam_selector
75 #if $aligned_sam.aligned_sam_selector == '--sam'
76 $aligned_sam.sq
77 #end if
78 $aligned_blast
79
80 $log
81
82 #if $report.report_type == 'best'
83 #if $report.report_best.report_best_type == '1'
84 --best 1
85 --min_lis $report.report_best.report_best_min_lis
86 #else
87 --best $report.report_best.report_best_value
88 --min_lis $report.report_best.report_best_min_lis
89 #end if
90 #else
91 #if $report.report_num_alignments.report_num_alignments_type == 'other_value'
92 --num_alignments $report.report_num_alignments.report_num_alignments_value
93 #else
94 --num_alignments $report.report_num_alignments.report_num_alignments_type
95 #end if
49 #end if 96 #end if
50 #end if 97
51 $aligned_sam.aligned_sam_selector 98 -e $e_value
52 #if $aligned_sam.aligned_sam_selector == '--sam' 99 --match $match
53 $aligned_sam.sq 100 --mismatch $mismatch
54 #end if 101 --gap_open $gap_open
55 $aligned_blast 102 --gap_ext $gap_ext
56 $log 103 -N $ambiguous_letter
57 -a \${GALAXY_SLOTS:-1} 104 -a \${GALAXY_SLOTS:-1}
58 ]]> 105 ]]>
59 </command> 106 </command>
60 <inputs> 107 <inputs>
61 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/> 108 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/>
62 <conditional name="sequencing_type"> 109 <conditional name="sequencing_type">
63 <param name="sequencing_type_selector" type="select" label="Sequencing type"> 110 <param name="sequencing_type_selector" type="select" label="Sequencing type">
64 <option value="not_paired">Reads are not paired</option> 111 <option value="not_paired">Reads are not paired</option>
65 <option value="paired">Reads are paired</option> 112 <option value="paired">Reads are paired</option>
66 </param> 113 </param>
114 <when value="not_paired" />
67 <when value="paired"> 115 <when value="paired">
68 <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not"> 116 <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not">
69 <option value="">leave the reads split between aligned and rejected files</option> 117 <option value="">leave the reads split between aligned and rejected files</option>
70 <option value="--paired-in">output both reads to aligned file (--paired-in)</option> 118 <option value="--paired-in">output both reads to aligned file (--paired-in)</option>
71 <option value="--paired-out">output both reads to rejected file (--paired-out)</option> 119 <option value="--paired-out">output both reads to rejected file (--paired-out)</option>
72 </param> 120 </param>
73 </when> 121 </when>
74 </conditional> 122 </conditional>
75 123
76 <param name="strand_search" type="select" label="Which strands to search" display="radio"> 124 <param name="strand_search" type="select" label="Which strands to search">
77 <option value="">Search both strands</option> 125 <option value="">Search both strands</option>
78 <option value="-F">Search only the forward strand (-F)</option> 126 <option value="-F">Search only the forward strand (-F)</option>
79 <option value="-R">Search only the reverse-complementary strand (-R)</option> 127 <option value="-R">Search only the reverse-complementary strand (-R)</option>
80 </param> 128 </param>
81 129
82 <conditional name="databases_type"> 130 <conditional name="databases_type">
83 <param name="databases_selector" type="select" label="Databases to query" 131 <param name="databases_selector" type="select" label="Databases to query"
84 help="Public rRNA databases provided with SortMeRNA have been indexed. 132 help="Public rRNA databases provided with SortMeRNA have been indexed.
85 On the contrary, personal databases must be indexed each time SortMeRNA is launched. 133 On the contrary, personal databases must be indexed each time SortMeRNA is launched.
86 Please be patient, this may take some time depending on the size of the given database."> 134 Please be patient, this may take some time depending on the size of the given database.">
87 <option value="cached" selected="true">Public ribosomal databases</option> 135 <option value="cached" selected="true">Public pre-indexed ribosomal databases</option>
136 <option value="cached_to_index">Public ribosomal databases to index with non default parameters</option>
88 <option value="history">Databases from your history</option> 137 <option value="history">Databases from your history</option>
89 </param> 138 </param>
90 <when value="cached"> 139 <when value="cached">
91 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true"> 140 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
92 <options from_data_table="rRNA_databases" /> 141 <options from_data_table="rRNA_databases" />
93 <validator type="no_options" message="Select at least one database"/> 142 <validator type="no_options" message="Select at least one database"/>
94 </param> 143 </param>
95 </when> 144 </when>
145 <when value="cached_to_index">
146 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
147 <options from_data_table="rRNA_databases" />
148 <validator type="no_options" message="Select at least one database"/>
149 </param>
150 <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/>
151 <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/>
152 </when>
96 <when value="history"> 153 <when value="history">
97 <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases" 154 <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases"
98 help="Your databases will be indexed first, which may take up to several minutes."/> 155 help="Your databases will be indexed first, which may take up to several minutes."/>
156 <param name="seed_length" type="integer" min="0" max="100" value="18" label="Seed length for database indexing" help="(-L)"/>
157 <param name="max_pos" type="integer" min="0" max="100000" value="10000" label="Maximum number of positions to store for each k-mer for database indexing" help="With 0, all positions are stored (--max_pos)"/>
99 </when> 158 </when>
100 </conditional> 159 </conditional>
101 160
102 <!-- Outputs --> 161 <!-- Outputs -->
103 <conditional name="aligned_fastx"> 162 <conditional name="aligned_fastx">
104 <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format"> 163 <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format?">
105 <option value="--fastx">Yes (--fastx)</option> 164 <option value="--fastx">Yes (--fastx)</option>
106 <option value="">No</option> 165 <option value="">No</option>
107 </param> 166 </param>
108 <when value="--fastx"> 167 <when value="--fastx">
109 <param name="other" type="boolean" label="Include rejected reads file" help="(--other)" /> 168 <param name="other" type="boolean" label="Include rejected reads file?" help="(--other)" />
110 </when> 169 </when>
111 <when value="" /> 170 <when value="" />
112 </conditional> 171 </conditional>
113 <conditional name="aligned_sam"> 172 <conditional name="aligned_sam">
114 <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format"> 173 <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format?">
115 <option value="--sam">Yes (--sam)</option> 174 <option value="--sam">Yes (--sam)</option>
116 <option value="">No</option> 175 <option value="">No</option>
117 </param> 176 </param>
118 <when value="--sam"> 177 <when value="--sam">
119 <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" /> 178 <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" />
128 <option value="" selected="true">No</option> 187 <option value="" selected="true">No</option>
129 </param> 188 </param>
130 <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file" 189 <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file"
131 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)"> 190 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)">
132 </param> 191 </param>
192 <conditional name="report">
193 <param name="report_type" type="select" label="Parameters for filtering and read mapping" help="">
194 <option value="best" selected="true">Report best alignments per read reaching E-value</option>
195 <option value="num_alignments">Report first alignements per read reaching E-value</option>
196 </param>
197 <when value="best">
198 <conditional name="report_best">
199 <param name="report_best_type" type="select" label="Number of searched alignments" help="Only the best alignment is reported (--best)">
200 <option value="1" selected="true">Only one high-candidate reference sequence is searched for alignments (fast). The high-candidate sequences are determined heuristically using a LIS of seed matches)</option>
201 <option value="other_value">A custom number of reference sequences are searched for alignments (speed decrease for high value)</option>
202 </param>
203 <when value="1">
204 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/>
205 </when>
206 <when value="other_value">
207 <param name="report_best_value" type="integer" min="2" max="100" value="2" label="Number of alignments to be made" help="Only the best one is reported. The computation speed decrease with high value"/>
208 <param name="report_best_min_lis" type="integer" min="0" max="100" value="2" label="Number of longest LIS an alignement needs to be searched" help="The alignements having the first INT longest LIS. LIS stands for Longest Increasing Subsequence, it is computed using seeds' positions to expand hits into longer matches prior to Smith-Waterman alignment. (--min_lis)"/>
209 </when>
210 </conditional>
211 </when>
212 <when value="num_alignments">
213 <conditional name="report_num_alignments">
214 <param name="report_num_alignments_type" type="select" label="Number of output alignments" help="(--num_alignments)">
215 <option value="0">All alignments reaching the E-value threshold are reported (very slow, this option is not suggested for high similarity rRNA databases)</option>
216 <option value="1" selected="true">The first alignment passing E-value threshold are reported (very fast, best choice if only filtering is needed)</option>
217 <option value="other_value">A custom number of alignments are made and reported (speed decrease for high value)</option>
218 </param>
219 <when value="0" />
220 <when value="1" />
221 <when value="other_value">
222 <param name="report_num_alignments_value" type="integer" min="0" max="100" value="1" label="Number of alignments to be made and reported" help=""/>
223 </when>
224 </conditional>
225 </when>
226 </conditional>
227
228 <param name="e_value" type="float" min="0" max="10" value="1" label="E-value threshold" help="(-e)"/>
229 <param name="match" type="integer" min="0" max="10" value="2" label="SW score for a match" help="(--match)"/>
230 <param name="mismatch" type="integer" min="-10" max="0" value="-3" label="SW penalty for a mismatch" help="(--mismatch)"/>
231 <param name="gap_open" type="integer" min="0" max="10" value="5" label="SW penalty for introducing a gap" help="(--gap_open)"/>
232 <param name="gap_ext" type="integer" min="0" max="10" value="2" label="SW penalty for extending a gap" help="(--gap_ext)"/>
233 <param name="ambiguous_letter" type="integer" min="-10" max="0" value="-3" label="SW penalty for ambiguous letters (N's)" help="(-N)"/>
133 </inputs> 234 </inputs>
134 <outputs> 235 <outputs>
135 <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat" 236 <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat"
136 label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})"> 237 label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})">
137 <filter>aligned_fastx['aligned_fastx_selector']</filter> 238 <filter>aligned_fastx['aligned_fastx_selector']</filter>
243 344
244 The user may also choose to use their own rRNA databases. 345 The user may also choose to use their own rRNA databases.
245 346
246 .. class:: warningmark 347 .. class:: warningmark
247 348
248 Note that your personal databases are indexed each time, and that 349 Note that your personal databases are indexed each time. The public ribosomal
249 this may take some time depending on the size of the given database. 350 databases are indexed when added, but they can be re-indexed with non-default indexing
351 parameters. The indexing may take some time depending on the size of the given database.
352
250 ]]> 353 ]]>
251 </help> 354 </help>
252 355
253 <citations> 356 <citations>
254 <citation type="doi">10.1093/bioinformatics/bts611</citation> 357 <citation type="doi">10.1093/bioinformatics/bts611</citation>