comparison blast2rma.xml @ 0:fa3c3a64c993 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/megan commit 2a49a6cdc1b4d37ab30eb85b8c658ccf9f5a0644"
author iuc
date Wed, 24 Nov 2021 21:52:14 +0000
parents
children 2f8d3924bb3b
comparison
equal deleted inserted replaced
-1:000000000000 0:fa3c3a64c993
1 <tool id="megan_blast2rma" name="MEGAN: Generate RMA files" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>from BLAST output</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[
9 #import re
10
11 #if str($input_type_cond.input_type) in ['single', 'pair']:
12 #set read1 = $input_type_cond.read1
13 #set blast1 = $input_type_cond.blast1
14 #else:
15 ## Processing paired reads are tricky if we're
16 ## downstream from MALT. MALT doesn’t have a
17 ## paired-read mode, so it won’t attempt to analyze
18 ## reads in pairs. To do paired read processing,
19 ## set MALT to generate SAM files and then import the
20 ## SAM files into MEGAN, specifying paired read mode
21 ## there. If you have multiple SAM files for the same
22 ## sample, then import them all at the same time to
23 ## create one unified rma6 file.
24
25 #set read1 = $input_type_cond.reads_collection['forward']
26 #set blast1 = $input_type_cond.blast1
27 #end if
28
29 #if $read1.is_of_type('fasta', 'fasta.gz'):
30 #set read_ext = '.fasta'
31 #else:
32 #set read_ext = '.fastq'
33 #end if
34 #if $read1.ext.endswith('.gz'):
35 #set read_ext = $read_ext + '.gz'
36 #end if
37
38 #if $blast1.is_of_type('daa'):
39 #set blast_format = 'DAA'
40 #else if $blast1.is_of_type('txt'):
41 #set blast_format = 'BlastText'
42 #else if $blast1.is_of_type('blastxml'):
43 #set blast_format = 'BlastXML'
44 #else if $blast1.is_of_type('tabular'):
45 #set blast_format = 'BlastTab'
46 #else if $blast1.is_of_type('sam'):
47 #set blast_format = 'SAM'
48 #end if
49 #set blast_ext = '.' + $blast_format
50 #if $blast1.ext.endswith('.gz'):
51 #set blast_ext = $blast_ext + '.gz'
52 #end if
53
54 #set read1_identifier = 'read1' + $read_ext
55 ln -s '${read1}' '${read1_identifier}' &&
56
57 #set blast1_identifier = 'blast1' + $blast_ext
58 ln -s '${blast1}' '${blast1_identifier}' &&
59
60 #if str($input_type_cond.input_type) in ['pair', 'paired']:
61 #if str($input_type_cond.input_type) == 'pair':
62 #set read2 = $input_type_cond.read2
63 #set blast2 = $input_type_cond.blast2
64 #else if str($input_type_cond.input_type) == 'paired':
65 #set read2 = $input_type_cond.reads_collection['reverse']
66 #set blast2 = $input_type_cond.blast2
67 #end if
68 #set read2_identifier = 'read2' + $read_ext
69 ln -s '${read2}' '${read2_identifier}' &&
70 #set blast2_identifier = 'blast2' + $blast_ext
71 ln -s '${blast2}' '${blast2_identifier}' &&
72 #end if
73
74 blast2rma
75 #if str($input_type_cond.input_type) == 'single':
76 --in '${blast1_identifier}'
77 --reads '${read1_identifier}'
78 --out '${rma6_output}'
79 #else if str($input_type_cond.input_type) == 'pair':
80 --in '${blast1_identifier}' '${blast2_identifier}'
81 --reads '${read1_identifier}' '${read2_identifier}'
82 --paired
83 --pairedSuffixLength $input_type_cond.pairedSuffixLength
84 --out '${rma6_output}'
85 #else if str($input_type_cond.input_type) == 'paired':
86 --in '${blast1_identifier}' '${blast2_identifier}'
87 --reads '${read1_identifier}' '${read2_identifier}'
88 --paired
89 --pairedSuffixLength $input_type_cond.pairedSuffixLength
90 ## Strangely, megan requires an output
91 ## directory when processing paired reads
92 ## even though it produces a single file.
93 ## We'll accommodate thie by prepending ./
94 ## to a temporary output file and then move
95 ## it later.
96 --out './tmp.rma6'
97 #end if
98 --format '${blast_format}'
99 --blastMode '${blastMode}'
100 --threads \${GALAXY_SLOTS:-8}
101 --useCompression false
102 $advanced_options.longReads
103 --maxMatchesPerRead '$advanced_options.maxMatchesPerRead'
104 $advanced_options.classify
105 --minScore $advanced_options.minScore
106 --maxExpected $advanced_options.maxExpected
107 --minPercentIdentity $advanced_options.minPercentIdentity
108 --topPercent $advanced_options.topPercent
109 --minSupportPercent $advanced_options.minSupportPercent
110 --minSupport $advanced_options.minSupport
111 --minPercentReadCover $advanced_options.minPercentReadCover
112 --minPercentReferenceCover $advanced_options.minPercentReferenceCover
113 --minReadLength $advanced_options.minReadLength
114 --lcaAlgorithm '$advanced_options.lcaAlgorithm'
115 --lcaCoveragePercent $advanced_options.lcaCoveragePercent
116 --readAssignmentMode '$advanced_options.readAssignmentMode'
117 #if str($advanced_options.con_file_cond.conFile) == 'yes':
118 --conFile '$advanced_options.con_file_cond.conFile'
119 #end if
120 #if str($input_type_cond.input_type) == 'paired':
121 && mv './tmp.rma6' '$rma6_output'
122 #end if
123 ]]></command>
124 <inputs>
125 <expand macro="input_type_cond"/>
126 <param argument="--blastMode" type="select" label="Blast mode">
127 <expand macro="blast_mode_options"/>
128 </param>
129 <section name="advanced_options" title="Advanced options" expanded="false">
130 <param argument="--longReads" type="boolean" truevalue="--longReads" falsevalue="" checked="false" label="Parse and analyse input reads as long reads?"/>
131 <param argument="--maxMatchesPerRead" type="integer" value="100" label="Maximum matches per read"/>
132 <param argument="--classify" type="boolean" truevalue="--classify" falsevalue="" checked="true" label="Run classification algorithm?"/>
133 <expand macro="common_blast_params"/>
134 <param argument="--minSupportPercent" type="float" value="0.05" min="0.0" max="100.0" label="Minimum support as percent of assigned reads" help="0 value ignores"/>
135 <param argument="--minSupport" type="integer" value="0" label="Minimum support" help="0 value ignores"/>
136 <param argument="--minPercentReadCover" type="float" value="0.0" min="0.0" max="100.0" label="Minimum percent of read length to be covered by alignments"/>
137 <param argument="--minPercentReferenceCover" type="float" value="0.0" min="0.0" max="100.0" label="Minimum percent of reference length to be covered by alignments"/>
138 <param argument="--minReadLength" type="integer" value="0" label="Minimum read length"/>
139 <param argument="--lcaAlgorithm" type="select" label="Select the LCA algorithm to use for taxonomic assignment">
140 <option value="naive" selected="true">naive</option>
141 <option value="weighted">weighted</option>
142 <option value="longReads">longReads</option>
143 </param>
144 <param argument="--lcaCoveragePercent" type="float" value="100.0" min="0.0" max="100.0" label="Percent for the LCA to cover"/>
145 <param argument="--readAssignmentMode" type="select" label="Select the read assignment mode">
146 <option value="alignedBases" selected="true">alignedBases</option>
147 <option value="readCount">readCount</option>
148 </param>
149 <conditional name="con_file_cond">
150 <param argument="--conFile" type="select" label="Process a file of contaminant taxa" help="One id or name per line">
151 <option value="no" selected="true">No</option>
152 <option value="yes">Yes</option>
153 </param>
154 <when value="no"/>
155 <when value="yes">
156 <param argument="conFile" type="data" format="txt" label="File of contaminant taxa"/>
157 </when>
158 </conditional>
159 </section>
160 </inputs>
161 <outputs>
162 <data name="rma6_output" format="rma6"/>
163 </outputs>
164 <tests>
165 <!-- Single dataset input -->
166 <test expect_num_outputs="1">
167 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
168 <param name="blast1" value="blast_R1.txt" ftype="txt"/>
169 <param name="blastMode" value="BlastN"/>
170 <output name="rma6_output" ftype="rma6">
171 <assert_contents>
172 <has_size value="19596"/>
173 </assert_contents>
174 </output>
175 </test>
176 <!-- Single dataset input, contaminants file -->
177 <test expect_num_outputs="1">
178 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
179 <param name="blast1" value="blast_R1.txt" ftype="txt"/>
180 <param name="blastMode" value="BlastN"/>
181 <param name="conFile" value="yes"/>
182 <param name="conFile" value="contaminants.txt" ftype="txt"/>
183 <output name="rma6_output" ftype="rma6">
184 <assert_contents>
185 <has_size value="19596"/>
186 </assert_contents>
187 </output>
188 </test>
189 <!-- Dataset pair input -->
190 <test expect_num_outputs="1">
191 <param name="input_type" value="pair"/>
192 <param name="read1" value="13-1941-6_S4_L001_R1_600000.fastq.gz" ftype="fastqsanger.gz"/>
193 <param name="read2" value="13-1941-6_S4_L001_R2_600000.fastq.gz" ftype="fastqsanger.gz"/>
194 <param name="blast1" value="blast_R1.txt" ftype="txt"/>
195 <param name="blast2" value="blast_R2.txt" ftype="txt"/>
196 <param name="blastMode" value="BlastN"/>
197 <output name="rma6_output" ftype="rma6">
198 <assert_contents>
199 <has_size value="39887"/>
200 </assert_contents>
201 </output>
202 </test>
203 <!-- List of dataset pairs input -->
204 <test expect_num_outputs="1">
205 <param name="input_type" value="paired"/>
206 <param name="reads_collection">
207 <collection type="paired">
208 <element name="forward" value="13-1941-6_S4_L001_R1_600000.fastq.gz"/>
209 <element name="reverse" value="13-1941-6_S4_L001_R2_600000.fastq.gz"/>
210 </collection>
211 </param>
212 <param name="blast1" value="blast_R1.txt" ftype="txt"/>
213 <param name="blast2" value="blast_R2.txt" ftype="txt"/>
214 <param name="blastMode" value="BlastN"/>
215 <output name="rma6_output" ftype="rma6">
216 <assert_contents>
217 <has_size value="39806"/>
218 </assert_contents>
219 </output>
220 </test>
221 </tests>
222 <help>
223 **What it does**
224
225 Computes MEGAN RMA files from BLAST (or similar) files. Inputs consist of reads in fasta or fasqsanger format (gzip compressin
226 is supported) and associated Blast files. Each read file should have been used previously as the Blast input to produce the
227 associated Blast file for this tool.
228
229 This wrapper supports the following formats for the input Blast file. The SAM, Tabular and Text formats can be produced by
230 The Galaxy MALT Analyzer tool. When these formats are used, this tool will apply the SAM, BlastText and BlastTab format options
231 required by MEGAN.
232
233 * **Direct Access Archive (DAA)** - a proprietary file format developed by PowerISO Computing for disk image files
234 * **BlastXML** - XML output from Blast
235 * **Sequence Alignment/Map (SAM)** - a tab-delimited text format consisting of a header section, which is optional, and an alignment section
236 * **Tabular** - information presented in the form of a table with rows and columns
237 * **Text** - plain text format
238
239 This tool outputs a RealMedia Audio (RMA) file. MEGAN uses an update of the original RMA file format known as RMA6. This update
240 requires less disk space for files.
241 </help>
242 <citations>
243 <citation type="doi">https://doi.org/10.1101/050559</citation>
244 </citations>
245 </tool>
246