view berokka.xml @ 0:9a1626faa05c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/berokka commit 387f04ffbf5205aaaa7b46e9e3d518edb62a538f
author iuc
date Mon, 25 Mar 2019 12:55:16 -0400
parents
children f91f6054fca7
line wrap: on
line source

<tool id="berokka" name="Berokka" version="0.2">
    <description>Trim, circularise, orient and filter long read bacterial genome assemblies</description>
    <requirements>
        <requirement type="package" version="0.2">berokka</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        berokka 
            --outdir default '${input_file}'

	    #if $filter_fasta:
                --filter '${filter_fasta}'
            #end if

            --readlen '${read_length}'

            --fuzz '${fuzz}'

            #unless $anno:
                --noanno
            #end unless

    ]]></command>
    <inputs>
        <param name="input_file" type="data"  format="fasta" label="Input (FASTA)" help="Should be completed long-read assemblies in FASTA format, such as those from CANU or HGAP"/>
        <param name="filter_fasta" optional="true" type="data" format="fasta" label="Filter (FASTA)" help="Give a fasta to use as a filter."/>
        <param name="read_length" type="integer" value="60000" min="28"  label="Read Length" help="Approximate max read length (default '60000')"/>
        <param name="fuzz" type="integer" value="5" label="Fuzz" help="Accept local alignment within X bp of global (default '5')"/>	
        <param name="anno" type="boolean" checked="true" label="Annotation" help="Annotate Trimmed FASTA"/>
    </inputs>
    <outputs>
        <data name="trimmed" format="fasta" from_work_dir="default/02.trimmed.fa" label="${tool.name} on ${on_string}: Trimmed"/>
        <data name="results" format="tabular" from_work_dir="default/03.results.tab" label="${tool.name} on ${on_string}: Results"/>
    </outputs>
    <tests>
        <test>
            <param name="input_file" value="berokka_test1.fasta"/>
            <param name="read_length" value="60000"/>
            <param name="fuzz" value="5"/>
            <param name="anno" value="true"/>
            <output name="trimmed" file="trimmed_1" ftype="fasta"/>
            <output name="results" file="results_1" ftype="tabular"/>
        </test>
        <test>
            <param name="input_file" value="berokka_test1.fasta"/>
            <param name="filter_select" value="true"/>
            <param name="filter_fasta" value="berokka_test1.fasta"/>
            <param name="read_length" value="60000"/>
            <param name="fuzz" value="5"/>
            <param name="anno" value="true"/>
            <output name="trimmed" file="trimmed_2" ftype="fasta"/>
            <output name="results" file="results_2" ftype="tabular"/>
        </test>
        <test>
            <param name="input_file" value="berokka_test1.fasta"/>
            <param name="read_length" value="100"/>
            <param name="fuzz" value="50"/>
            <param name="anno" value="false"/>
            <output name="trimmed" file="trimmed_3" ftype="fasta"/>
            <output name="results" file="results_3" ftype="tabular"/>
        </test>
   </tests>
   <help><![CDATA[
**Summary**

Trim, circularise, orient & filter long read bacterial genome assemblies
There is already a good piece of software to trim/circularise and orient genome assemblies called Circlator. Please try that first!

You should only try Berokka if:

1. You only have the contig files and do not have the corrected reads anymore
2. Your contigs are simple cases with clear overhang and could be done manually with BLAST
3. Circlator fails on your data even after troubleshooting

NOTE: orientation to dnaA or rep genes is not yet implemented.

**Input**

Input should be completed long-read assemblies in FASTA format, such as those from CANU or HGAP.

**Output**

1. trimmed: The (possibly) trimmed sequences (FASTA)

2. results: Summary of results (TSV)

**Options**

* `Filter <FASTA>` allows you to remove contigs which match 50% of sequences in this file. Berokka comes with the standard Pacbio control sequence. You can provide your own FASTA file using this option.

* `Read Length <LENGTH>` can be used for datasets that won't seem to circularise. It affects the length of the match it attempts to make using BLAST.

* `Fuzz` can be used to accept local alignment within X bp of global (default '5')

* `Annotation` can be set to "No" to ensure that the FASTA descriptions are not altered between the input and output FASTA files.

    ]]></help>
    <citations>
        <citation type="bibtex">
@UNPUBLISHED{Seemann2016,
    author = {Seemann, Torsten},
    title = {Berokka: Faster Trim, circularise and orient long read bacterial genome assemblies},
    year = {2016},
    url = {https://github.com/tseemann/berokka},
}
        </citation>
    </citations>
</tool>