diff berokka.xml @ 0:9a1626faa05c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/berokka commit 387f04ffbf5205aaaa7b46e9e3d518edb62a538f
author iuc
date Mon, 25 Mar 2019 12:55:16 -0400
parents
children f91f6054fca7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/berokka.xml	Mon Mar 25 12:55:16 2019 -0400
@@ -0,0 +1,107 @@
+<tool id="berokka" name="Berokka" version="0.2">
+    <description>Trim, circularise, orient and filter long read bacterial genome assemblies</description>
+    <requirements>
+        <requirement type="package" version="0.2">berokka</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        berokka 
+            --outdir default '${input_file}'
+
+	    #if $filter_fasta:
+                --filter '${filter_fasta}'
+            #end if
+
+            --readlen '${read_length}'
+
+            --fuzz '${fuzz}'
+
+            #unless $anno:
+                --noanno
+            #end unless
+
+    ]]></command>
+    <inputs>
+        <param name="input_file" type="data"  format="fasta" label="Input (FASTA)" help="Should be completed long-read assemblies in FASTA format, such as those from CANU or HGAP"/>
+        <param name="filter_fasta" optional="true" type="data" format="fasta" label="Filter (FASTA)" help="Give a fasta to use as a filter."/>
+        <param name="read_length" type="integer" value="60000" min="28"  label="Read Length" help="Approximate max read length (default '60000')"/>
+        <param name="fuzz" type="integer" value="5" label="Fuzz" help="Accept local alignment within X bp of global (default '5')"/>	
+        <param name="anno" type="boolean" checked="true" label="Annotation" help="Annotate Trimmed FASTA"/>
+    </inputs>
+    <outputs>
+        <data name="trimmed" format="fasta" from_work_dir="default/02.trimmed.fa" label="${tool.name} on ${on_string}: Trimmed"/>
+        <data name="results" format="tabular" from_work_dir="default/03.results.tab" label="${tool.name} on ${on_string}: Results"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="berokka_test1.fasta"/>
+            <param name="read_length" value="60000"/>
+            <param name="fuzz" value="5"/>
+            <param name="anno" value="true"/>
+            <output name="trimmed" file="trimmed_1" ftype="fasta"/>
+            <output name="results" file="results_1" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input_file" value="berokka_test1.fasta"/>
+            <param name="filter_select" value="true"/>
+            <param name="filter_fasta" value="berokka_test1.fasta"/>
+            <param name="read_length" value="60000"/>
+            <param name="fuzz" value="5"/>
+            <param name="anno" value="true"/>
+            <output name="trimmed" file="trimmed_2" ftype="fasta"/>
+            <output name="results" file="results_2" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="input_file" value="berokka_test1.fasta"/>
+            <param name="read_length" value="100"/>
+            <param name="fuzz" value="50"/>
+            <param name="anno" value="false"/>
+            <output name="trimmed" file="trimmed_3" ftype="fasta"/>
+            <output name="results" file="results_3" ftype="tabular"/>
+        </test>
+   </tests>
+   <help><![CDATA[
+**Summary**
+
+Trim, circularise, orient & filter long read bacterial genome assemblies
+There is already a good piece of software to trim/circularise and orient genome assemblies called Circlator. Please try that first!
+
+You should only try Berokka if:
+
+1. You only have the contig files and do not have the corrected reads anymore
+2. Your contigs are simple cases with clear overhang and could be done manually with BLAST
+3. Circlator fails on your data even after troubleshooting
+
+NOTE: orientation to dnaA or rep genes is not yet implemented.
+
+**Input**
+
+Input should be completed long-read assemblies in FASTA format, such as those from CANU or HGAP.
+
+**Output**
+
+1. trimmed: The (possibly) trimmed sequences (FASTA)
+
+2. results: Summary of results (TSV)
+
+**Options**
+
+* `Filter <FASTA>` allows you to remove contigs which match 50% of sequences in this file. Berokka comes with the standard Pacbio control sequence. You can provide your own FASTA file using this option.
+
+* `Read Length <LENGTH>` can be used for datasets that won't seem to circularise. It affects the length of the match it attempts to make using BLAST.
+
+* `Fuzz` can be used to accept local alignment within X bp of global (default '5')
+
+* `Annotation` can be set to "No" to ensure that the FASTA descriptions are not altered between the input and output FASTA files.
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@UNPUBLISHED{Seemann2016,
+    author = {Seemann, Torsten},
+    title = {Berokka: Faster Trim, circularise and orient long read bacterial genome assemblies},
+    year = {2016},
+    url = {https://github.com/tseemann/berokka},
+}
+        </citation>
+    </citations>
+</tool>