comparison sortmerna.xml @ 1:b482293b2987 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sortmerna commit 844d980a7de5f199487ca9591420a23df63c5246-dirty
author iuc
date Wed, 05 Aug 2015 02:50:43 -0400
parents a8ac09e937f3
children 3699b6b771e0
comparison
equal deleted inserted replaced
0:a8ac09e937f3 1:b482293b2987
1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="1.9.0"> 1 <tool id="bg_sortmerna" name="Filter with SortMeRNA" version="2.0.0">
2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description> 2 <description>Fast and accurate filtering of ribosomal RNAs in metatranscriptomic data</description>
3 <requirements> 3 <requirements>
4 <requirement type='package' version="1.9">sortmerna</requirement> 4 <requirement type='package' version="2.0">sortmerna</requirement>
5 </requirements> 5 </requirements>
6 <stdio> 6 <stdio>
7 <regex match="This program builds a Burst trie on an input rRNA database" 7 <regex match="This program builds a Burst trie on an input rRNA database"
8 source="both" 8 source="both"
9 level="fatal" 9 level="fatal"
16 <version_command> 16 <version_command>
17 <![CDATA[ 17 <![CDATA[
18 sortmerna --version 2>&1|grep 'SortMeRNA version' 18 sortmerna --version 2>&1|grep 'SortMeRNA version'
19 ]]> 19 ]]>
20 </version_command> 20 </version_command>
21 <command interpreter="python"> 21 <command>
22 <![CDATA[ 22 <![CDATA[
23 sortmerna.py 23 #set $ref = ''
24 --sortmerna " 24 #set $sep=''
25 $strand_search 25 #if str( $databases_type.databases_selector ) == 'history':
26 #if str( $read_family.read_family_selector ) == 'other': 26 #for $db in $databases_type.database_name
27 --I $input_reads -r $read_family.ratio_parameter 27 #set $ref += $sep + str($db) + ',' + $os.path.splitext($os.path.basename(str($db)))[0]
28 #else: 28 #set $sep = ':'
29 $read_family.read_family_selector $input_reads 29 #end for
30 indexdb_rna --ref $ref
31 &&
32 #else:
33 ## databases path is not directly accessible, must match by hand with LOC file contents
34 #set $data_table = dict([(_[0], _[2]) for _ in $databases_type.input_databases.input.options.tool_data_table.data])
35 #for $db in $databases_type.input_databases.value
36 #set $ref += $sep + $data_table[$db] + ',' + $os.path.splitext($data_table[$db])[0]
37 #set $sep = ':'
38 #end for
39 #end if
40 sortmerna --ref $ref --reads $input_reads --aligned aligned
41 #if str( $sequencing_type.sequencing_type_selector ) == 'paired'
42 $sequencing_type.paired_type
43 #end if
44 $strand_search
45 $aligned_fastx.aligned_fastx_selector
46 #if $aligned_fastx.aligned_fastx_selector == '--fastx'
47 #if $aligned_fastx.other
48 --other other_file
30 #end if 49 #end if
31 50 #end if
32 #if str( $sequencing_type.sequencing_type_selector ) == 'paired': 51 $aligned_sam.aligned_sam_selector
33 $sequencing_type.paired_type 52 #if $aligned_sam.aligned_sam_selector == '--sam'
34 #end if 53 $aligned_sam.sq
35 54 #end if
36 #if $outputs_selected: 55 $aligned_blast
37 #if 'accept' in $outputs_selected.value: 56 $log
38 --accept accept_file 57 -a \${GALAXY_SLOTS:-1}
39 #end if
40 #if 'other' in $outputs_selected.value:
41 --other other_file
42 #end if
43 #end if
44
45 $log
46 -a \${GALAXY_SLOTS:-4}
47 "
48 #if str( $databases_type.databases_selector ) == 'history':
49 --buildtrie
50 #for $db in $databases_type.input_databases
51 $db.database_name
52 #end for
53 #else:
54 ## databases path is not directly accessible, must match by hand with LOC file contents
55 ${' '.join([dict([(x[0], x[2]) for x in $databases_type.input_databases.input.options.tool_data_table.data])[y]
56 for y in $databases_type.input_databases.value])}
57 #end if
58 ]]> 58 ]]>
59 </command> 59 </command>
60 <inputs> 60 <inputs>
61 <conditional name="read_family"> 61 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences" help="In FASTA or FASTQ format (--reads)"/>
62 <param name="read_family_selector" type="select" format="text" label="Sequencing technology of querying sequences (reads)" 62 <conditional name="sequencing_type">
63 help="The Illumina platform is more common for large scale metatranscriptomic projects requiring a high throughput."> 63 <param name="sequencing_type_selector" type="select" label="Sequencing type">
64 <option value="--I">Illumina Solexa</option> 64 <option value="not_paired">Reads are not paired</option>
65 <option value="--454">454 Roche</option> 65 <option value="paired">Reads are paired</option>
66 <option value="other">Other</option> 66 </param>
67 <when value="paired">
68 <param name="paired_type" type="select" display="radio" label="If one of the paired-end reads aligns and the other one does not">
69 <option value="">leave the reads split between aligned and rejected files</option>
70 <option value="--paired-in">output both reads to aligned file (--paired-in)</option>
71 <option value="--paired-out">output both reads to rejected file (--paired-out)</option>
72 </param>
73 </when>
74 </conditional>
75
76 <param name="strand_search" type="select" label="Which strands to search" display="radio">
77 <option value="">Search both strands</option>
78 <option value="-F">Search only the forward strand (-F)</option>
79 <option value="-R">Search only the reverse-complementary strand (-R)</option>
67 </param> 80 </param>
68 <when value="other"> 81
69 <param name="ratio_parameter" type="float" value="1" min="0" max="1" 82 <conditional name="databases_type">
70 label="Ratio parameter (the number of hits on the read / read length)" 83 <param name="databases_selector" type="select" label="Databases to query"
71 help="The ratio parameter for SortMeRNA has been set to r=0.25 for Illumina Solexa reads and to r=0.15 for 454 Roche reads. 84 help="Public rRNA databases provided with SortMeRNA have been indexed.
72 For other read types, if the sequencing technology produces high quality reads with a low substitution error rate 85 On the contrary, personal databases must be indexed each time SortMeRNA is launched.
73 (0.1 substitutions per 100 bases, such as Illumina), then the ratio parameter can be set to r=[0.23,0.27]. 86 Please be patient, this may take some time depending on the size of the given database.">
74 If the sequencing technology has a high indel error rate (1-2 indels per 100 bases, such as 454 or Ion Torrent), 87 <option value="cached" selected="true">Public ribosomal databases</option>
75 then the ratio parameter can be set to r=[0.13,0.17] (-r)."/> 88 <option value="history">Databases from your history</option>
76 </when> 89 </param>
77 </conditional> 90 <when value="cached">
78 <param format="fasta,fastq" name="input_reads" type="data" label="Querying sequences (reads)" help=""/> 91 <param name="input_databases" label="rRNA databases" type="select" display="checkboxes" multiple="true">
79 <conditional name="sequencing_type"> 92 <options from_data_table="rRNA_databases" />
80 <param name="sequencing_type_selector" type="select" label="Sequencing type"> 93 <validator type="no_options" message="Select at least one database"/>
81 <option value="not_paired">Reads are not paired</option> 94 </param>
82 <option value="paired">Reads are paired</option> 95 </when>
96 <when value="history">
97 <param name="database_name" type="data" format="fasta" multiple="true" label="rRNA databases"
98 help="Your databases will be indexed first, which may take up to several minutes."/>
99 </when>
100 </conditional>
101
102 <!-- Outputs -->
103 <conditional name="aligned_fastx">
104 <param name="aligned_fastx_selector" type="select" label="Include aligned reads in FASTA/FASTQ format">
105 <option value="--fastx">Yes (--fastx)</option>
106 <option value="">No</option>
107 </param>
108 <when value="--fastx">
109 <param name="other" type="boolean" label="Include rejected reads file" help="(--other)" />
110 </when>
111 <when value="" />
112 </conditional>
113 <conditional name="aligned_sam">
114 <param name="aligned_sam_selector" type="select" label="Include alignments in SAM format">
115 <option value="--sam">Yes (--sam)</option>
116 <option value="">No</option>
117 </param>
118 <when value="--sam">
119 <param name="sq" type="boolean" truevalue="--SQ" falsevalue="" label="Add SQ tags to the SAM file" help="(--SQ)" />
120 </when>
121 <when value="" />
122 </conditional>
123 <param name="aligned_blast" type="select" label="Include alignments in BLAST-like format">
124 <option value="--blast 0">pairwise (--blast 0)</option>
125 <option value="--blast 1">tabular BLAST -m 8 format (--blast 1)</option>
126 <option value="--blast 2">tabular + column for CIGAR (--blast 2)</option>
127 <option value="--blast 3">tabular + columns for CIGAR and query coverage (--blast 3)</option>
128 <option value="" selected="true">No</option>
83 </param> 129 </param>
84 <when value="paired"> 130 <param name="log" type="boolean" checked="False" truevalue="--log" falsevalue="" label="Generate statistics file"
85 <param name="paired_type" type="select" display="radio" label="If one read of a pair is accepted and the other not, output both reads" 131 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)">
86 help="SortMeRNA does not use the pairing information for filtering RNA,
87 however if one read of a pair is accepted and the other is not,
88 the resulting output may break apart the pair into two separate files.
89 The purpose of 'Reads are paired' option is to preserve the pairing of the reads.">
90 <option value="--paired-in">to accepted file (--paired-in)</option>
91 <option value="--paired-out">to rejected file (--paired-out)</option>
92 </param>
93 </when>
94 </conditional>
95
96 <param name="strand_search" type="select" label="Which strands to search" display="radio">
97 <option value="">Search both strands</option>
98 <option value="-F">Search only the forward strand (-F)</option>
99 <option value="-R">Search only the reverse-complementary strand (-R)</option>
100 </param>
101
102 <conditional name="databases_type">
103 <param name="databases_selector" type="select" label="Databases to query"
104 help="Public rRNA databases provided with SortMeRNA have been indexed.
105 On the contrary, personal databases must be indexed each time SortMeRNA is launched.
106 Please be patient, this may take some time depending on the size of the given database.">
107 <option value="cached" selected="true">Public ribosomal databases</option>
108 <option value="history">Databases from your history</option>
109 </param> 132 </param>
110 <when value="cached">
111 <param name="input_databases" label="rRNA database" type="select" display="checkboxes" multiple="true">
112 <options from_data_table="rRNA_databases" />
113 <validator type="no_options" message="Select at least one database"/>
114 </param>
115 </when>
116 <when value="history">
117 <repeat name="input_databases" title="Database" min="1">
118 <param name="database_name" type="data" format="fasta" label="rRNA database"
119 help="Your database will be indexed first, which may take up to several minutes."/>
120 </repeat>
121 </when>
122 </conditional>
123
124 <!-- Outputs -->
125 <param name="outputs_selected" type="select" display="checkboxes" multiple="true" label="Output options">
126 <option value="accept" selected="True">Reads matching to at least one database</option>
127 <option value="other">Reads not found in any database</option>
128 </param>
129 <param name="log" type="boolean" checked="False" truevalue="--log log_file" falsevalue="" label="Statistics file"
130 help="Generates statistics for the rRNA content of reads, as well as rRNA subunit distribution. (--log)">
131 </param>
132
133 </inputs> 133 </inputs>
134 <outputs> 134 <outputs>
135 <data format_source="input_reads" name="output_accept" from_work_dir="accept_file.dat" 135 <data format_source="input_reads" name="output_fastx" from_work_dir="aligned.dat"
136 label="Matching reads on ${on_string} (${input_reads.datatype.file_ext})"> 136 label="Aligned reads on ${on_string} (${input_reads.datatype.file_ext})">
137 <filter>outputs_selected and 'accept' in outputs_selected</filter> 137 <filter>aligned_fastx['aligned_fastx_selector']</filter>
138 </data> 138 </data>
139 <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat" 139 <data format_source="input_reads" name="output_other" from_work_dir="other_file.dat"
140 label="Reads not found on ${on_string} (${input_reads.datatype.file_ext})"> 140 label="Rejected reads on ${on_string} (${input_reads.datatype.file_ext})">
141 <filter>outputs_selected and 'other' in outputs_selected</filter> 141 <filter>aligned_fastx['aligned_fastx_selector'] and aligned_fastx['other']</filter>
142 </data> 142 </data>
143 <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="log_file.log"> 143 <data format="sam" name="output_sam" from_work_dir="aligned.sam"
144 label="Alignments on ${on_string} (SAM)">
145 <filter>aligned_sam['aligned_sam_selector']</filter>
146 </data>
147 <data format="tabular" name="output_blast" from_work_dir="aligned.blast"
148 label="Alignments on ${on_string} (BLAST)">
149 <filter>aligned_blast</filter>
150 <change_format>
151 <when input="aligned_blast" value="--blast 0" format="txt" />
152 </change_format>
153 </data>
154 <data format="txt" name="output_log" label="${tool.name} statistics (txt)" from_work_dir="aligned.log">
144 <filter>log</filter> 155 <filter>log</filter>
145 </data> 156 </data>
146 </outputs> 157 </outputs>
147 <tests> 158 <tests>
148 <test> 159 <test>
149 <param name="read_family_selector" value="I" /> 160 <param name="input_reads" value="read_small.fastq" />
150 <param name="input_reads" value="sortmerna_wrapper_in1.fastq" />
151 <param name="sequencing_type_selector" value="not_paired" /> 161 <param name="sequencing_type_selector" value="not_paired" />
152 <param name="strand_search" value="" /> 162 <param name="strand_search" value="" />
153 <param name="databases_selector" value="cached" /> 163 <param name="databases_selector" value="history" />
154 <param name="input_databases" value="rfam-5.8s,rfam-5s" /> 164 <param name="database_name" value="ref_small.fasta" />
155 <param name="outputs_selected" value="accept,other" /> 165 <param name="other" value="True" />
156 <param name="log" value="" /> 166 <param name="log" value="" />
157 <param name="options_type_selector" value="less" /> 167 <output name="output_fastx" file="sortmerna_wrapper_accept1.fastq" />
158 <output name="output_accept" file="sortmerna_wrapper_accept1.fastq" />
159 <output name="output_other" file="sortmerna_wrapper_other1.fastq" /> 168 <output name="output_other" file="sortmerna_wrapper_other1.fastq" />
169 <output name="output_sam" file="sortmerna_wrapper_sam1.sam" lines_diff="2" />
170 </test>
171 <test>
172 <param name="input_reads" value="read_small.fasta" />
173 <param name="sequencing_type_selector" value="not_paired" />
174 <param name="strand_search" value="" />
175 <param name="databases_selector" value="history" />
176 <param name="database_name" value="ref_small.fasta" />
177 <param name="other" value="True" />
178 <param name="log" value="" />
179 <output name="output_fastx" file="sortmerna_wrapper_accept2.fasta" />
180 <output name="output_other" file="sortmerna_wrapper_other2.fasta" />
181 <output name="output_sam" file="sortmerna_wrapper_sam2.sam" lines_diff="2" />
160 </test> 182 </test>
161 </tests> 183 </tests>
162 <help> 184 <help>
163 <![CDATA[ 185 <![CDATA[
164 **What it does** 186 **What it does**