comparison tools/mira4_9/mirabait/mira4_9_mirabait.xml @ 0:c9269b5803d8 draft default tip

planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_9 commit 9a6640a7b7f516d028a9852f7bbf39083e50188f
author peterjc
date Wed, 07 Oct 2015 10:31:49 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c9269b5803d8
1 <tool id="mira4_9_mirabait" name="MIRA v4.9 mirabait" version="0.0.1">
2 <description>Filter reads using kmer matches</description>
3 <requirements>
4 <requirement type="binary">mirabait</requirement>
5 <requirement type="package" version="4.9.5">MIRA</requirement>
6 </requirements>
7 <stdio>
8 <!-- Assume anything other than zero is an error -->
9 <exit_code range="1:" />
10 <exit_code range=":-1" />
11 </stdio>
12 <version_command interpreter="python">mira_check_version.py ${MIRA4_9}mirabait</version_command>
13 <command interpreter="python">./mira_check_version.py \${MIRA4_9}mirabait 4.9 &amp;&amp;
14 ##First checked it is mirabait v4.9 on the path... now actually run it
15 ##-----------------------------------------------------------------------
16 \${MIRA4_9}mirabait -k "$kmer_length" -n "$min_occurence" -b "$bait_file"
17 ##-----------------------------------------------------------------------
18 ##Must now map Galaxy datatypes to MIRA file types...
19 ##exploiting the polymorphic naming of the input read parameter!
20 #if $reads.filename.ext.startswith("fastq")
21 ##MIRA doesn't like fastqsanger etc, just plain old fastq
22 -f fastq -t fastq
23 #elif $reads.filename.ext == "mira"
24 ##We're calling *.maf the "mira" format in Galaxy (name space collision)
25 -f maf -t maf
26 #else
27 ##MIRA is happy with fasta as name,
28 -f "$reads.filename.ext" -t "$reads.filename.ext"
29 #end if
30 ##-----------------------------------------------------------------------
31 #if str($output_choice_cond.output_choice)=="both"
32 -o "$output_pos" -O "$output_neg"
33 #elif str($output_choice_cond.output_choice)=="pos"
34 -o "$output_pos"
35 #elif str($output_choice_cond.output_choice)=="neg"
36 -i -O "$output_neg"
37 #end if
38 ##-----------------------------------------------------------------------
39 ##Do we need to ignore the reverse strand?
40 #if str($strand_choice) == "fwd"
41 -r
42 #end if
43 ##-----------------------------------------------------------------------
44 ##Default is to mark k-mers with upper case...
45 #if str($output_case) == "original"
46 -c
47 #end if
48 ##-----------------------------------------------------------------------
49 #if str($reads.type) == "paired"
50 #if $reads.filename.ext != $reads.filename2.ext
51 ##TODO: Is there a better way to signal an error to Galaxy here?
52 ; echo "ERROR: Paired read datatype mis-match!" ; false
53 #end if
54 -p "$reads.filename" "$reads.filename2"
55 #elif str($reads.type) == "interleaved"
56 -P "$reads.filename"
57 #elif str($reads.type) == "none"
58 "$reads.filename"
59 #end if
60 </command>
61 <inputs>
62 <!-- TODO: mirabait now allows multiple input files, and can do multiple outputs - or merge into one? -->
63 <!-- TODO: define a new Galaxy datatype for the bait hash file? -->
64 <param name="bait_file" type="data" format="fasta,fastq,mira" required="true" label="Bait file (what to look for)" />
65 <conditional name="reads">
66 <param name="type" type="select" label="Are these paired reads?">
67 <option value="paired">Paired reads (as two files)</option>
68 <option value="interleaved">Paired reads (as one interleaved file)</option>
69 <option value="none">Unpaired reads (single or orphan reads as one file)</option>
70 </param>
71 <when value="paired">
72 <param name="filename" type="data" format="fastq,fasta" required="true" label="Read file one"/>
73 <param name="filename2" type="data" format="fastq,fasta" required="true" label="Read file two"/>
74 </when>
75 <when value="interleaved">
76 <param name="filename" type="data" format="fasta,fastq" required="true" label="Interleaved paired reads to search" />
77 </when>
78 <when value="none">
79 <param name="filename" type="data" format="fasta,fastq,mira" required="true" label="Reads to search" />
80 </when>
81 </conditional>
82 <conditional name="output_choice_cond">
83 <param name="output_choice" type="select" label="Output positive matches, negative matches, or both?">
84 <option value="both">Both positive matches and negative matches, as two files</option>
85 <option value="pos" selected="true">Just positive matches, as a single file</option>
86 <option value="neg">Just negative matches, as a single file</option>
87 </param>
88 <!-- Seems need these dummy entries here, compare this to indels/indel_sam2interval.xml -->
89 <when value="both" />
90 <when value="pos" />
91 <when value="neg" />
92 </conditional>
93 <param name="output_case" type="select" label="How to use sequence case in output?">
94 <option value="original">Preserve case from input</option>
95 <option value="bait">Mark k-mer matches in upper case</option>
96 </param>
97 <param name="strand_choice" type="select" label="Check for matches on both strands?">
98 <option value="both">Check both strands</option>
99 <option value="fwd">Just forward strand</option>
100 </param>
101 <param name="kmer_length" type="integer" value="31" min="1" max="256"
102 label="k-mer length" help="Maximum 256" />
103 <param name="min_occurence" type="integer" value="1" min="1"
104 label="Minimum k-mer occurence"
105 help="How many k-mer matches do you want per read? Minimum one" />
106 </inputs>
107 <outputs>
108 <data name="output_pos" format_source="filename" metadata_source="filename"
109 label="$reads.filename.name #if str($reads.type)=='paired' then 'and $reads.filename2.name' else ''# matching $bait_file.name">
110 <filter>output_choice_cond["output_choice"] != "neg"</filter>
111 </data>
112 <data name="output_neg" format_source="filename" metadata_source="filename"
113 label="$reads.filename.name #if str($reads.type)=='paired' then 'and $reads.filename2.name' else ''# not matching $bait_file.name">
114 <filter>output_choice_cond["output_choice"] != "pos"</filter>
115 </data>
116 </outputs>
117 <tests>
118 <test>
119 <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
120 <param name="reads|type" value="none" />
121 <param name="reads|filename" value="tvc_mini.fastq" ftype="fastqsanger" />
122 <param name="output_choice" value="pos" />
123 <param name="output_case" value="original" />
124 <output name="output_pos" file="tvc_mini_bait_pos.fastq" ftype="fastqsanger" />
125 </test>
126 <test>
127 <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
128 <param name="reads|type" value="none" />
129 <param name="reads|filename" value="tvc_mini.fastq" ftype="fastqsanger" />
130 <param name="output_case" value="bait" />
131 <output name="output_pos" file="tvc_mini_bait_pos_case.fastq" ftype="fastqsanger" />
132 </test>
133 <test>
134 <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
135 <param name="reads|type" value="none" />
136 <param name="reads|filename" value="tvc_mini.fastq" ftype="fastqsanger" />
137 <param name="output_case" value="bait" />
138 <output name="output_pos" file="tvc_mini_bait_pos_case.fastq" ftype="fastqsanger" />
139 </test>
140 <test>
141 <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
142 <param name="reads|type" value="none" />
143 <param name="reads|filename" value="tvc_mini.fastq" ftype="fastqsanger" />
144 <param name="output_case" value="bait" />
145 <param name="kmer_length" value="32" />
146 <param name="min_occurence" value="50" />
147 <output name="output_pos" file="tvc_mini_bait_strict_case.fastq" ftype="fastqsanger" />
148 </test>
149 <test>
150 <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
151 <param name="reads|type" value="none" />
152 <param name="reads|filename" value="tvc_mini.fastq" ftype="fastqsanger" />
153 <param name="output_choice" value="neg" />
154 <param name="output_case" value="original" />
155 <output name="output_neg" file="tvc_mini_bait_neg.fastq" ftype="fastqsanger" />
156 </test>
157 <test>
158 <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
159 <param name="reads|type" value="none" />
160 <param name="reads|filename" value="tvc_mini.fastq" ftype="fastqsanger" />
161 <param name="output_choice" value="neg" />
162 <param name="output_case" value="bait" />
163 <output name="output_neg" file="tvc_mini_bait_neg_case.fastq" ftype="fastqsanger" />
164 </test>
165 <test>
166 <param name="bait_file" value="tvc_bait.fasta" ftype="fasta" />
167 <param name="reads|type" value="none" />
168 <param name="reads|filename" value="tvc_mini.fastq" ftype="fastqsanger" />
169 <param name="output_choice" value="both" />
170 <param name="output_case" value="original" />
171 <output name="output_pos" file="tvc_mini_bait_pos.fastq" ftype="fastqsanger" />
172 <output name="output_neg" file="tvc_mini_bait_neg.fastq" ftype="fastqsanger" />
173 </test>
174 </tests>
175 <help>
176 **What it does**
177
178 Runs the ``mirabait`` utility from MIRA v4.9 to filter your input reads
179 according to whether or not they contain perfect kmer matches to your
180 bait file. By default this looks for 31-mers (kmers or *k*-mers where
181 the fragment length *k* is 31), and only requires a single matching kmer.
182
183 The ``mirabait`` utility is useful in many applications and pipelines
184 outside of using the main MIRA tool for assembly or mapping.
185
186 .. class:: warningmark
187
188 Note ``mirabait`` cannot be used on protein (amino acid) sequences.
189
190 **Example Usage**
191
192 To remove over abundant entries like rRNA sequences, run ``mirabait`` with
193 known rRNA sequences as the bait and select the *negative* matches.
194
195 To do targeted assembly by fishing out reads belonging to a gene and just
196 assemble these, run ``mirabait`` with the gene of interest as the bait and
197 select the *positive* matches.
198
199 To iteratively reconstruct mitochondria you could start by fishing out reads
200 matching any known mitochondrial sequence, assembly those, and repeat.
201
202
203 **Notes on paired read**
204
205 .. class:: warningmark
206
207 Unlike ``mirabait`` from MIRA v4.0, this version is aware of paired reads
208 and will preserve the pairing (if either the forward or the reverse read
209 has enough *k*-mer matches, the pair is accepted).
210
211
212 **Citation**
213
214 If you use this Galaxy tool in work leading to a scientific publication please
215 cite the following papers:
216
217 Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
218 Galaxy tools and workflows for sequence analysis with applications
219 in molecular plant pathology. PeerJ 1:e167
220 http://dx.doi.org/10.7717/peerj.167
221
222 Bastien Chevreux, Thomas Wetter and Sándor Suhai (1999).
223 Genome Sequence Assembly Using Trace Signals and Additional Sequence Information.
224 Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB) 99, pp. 45-56.
225 http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html
226
227 This wrapper is available to install into other Galaxy Instances via the Galaxy
228 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/mira_assembler_4_9
229 </help>
230 <citations>
231 <citation type="doi">10.7717/peerj.167</citation>
232 <citation type="bibtex">@ARTICLE{Chevreux1999-mira3,
233 author = {B. Chevreux and T. Wetter and S. Suhai},
234 year = {1999},
235 title = {Genome Sequence Assembly Using Trace Signals and Additional Sequence Information},
236 journal = {Computer Science and Biology: Proceedings of the German Conference on Bioinformatics (GCB)}
237 volume = {99},
238 pages = {45-56},
239 url = {http://www.bioinfo.de/isb/gcb99/talks/chevreux/main.html}
240 }</citation>
241 </citations>
242 </tool>