view binning_refiner.xml @ 1:1c8d0916f97f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/binning_refiner/ commit 05bd671a4765eb97583bac21729b26bec808adcd"
author iuc
date Wed, 27 Apr 2022 18:46:46 +0000
parents f350d182f786
children 1b18b36bad5e
line wrap: on
line source

<tool id="bin_refiner" name="Binning refiner" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>refines metagenome bins</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
#import re

## Binning refiner prepends the -p param value
## (which we set to be the string 'refined' in
## the command line) to a hard-coded string (i.e.,
## '_Binning_refiner_outputs') to set the base
## output directory.

## Binning_refiner requires 2 levels of directories
## for the input bins.
mkdir -p input_bin_dir/bins &&
#for $f in $input_bins:
    #set identifier = re.sub('[^\s\w\-]', '_', str($f.element_identifier))
    #set file_name = $identifier + '.' + $f.ext
    ## Binning_refiner doesn't handle gzipped files.
    #if $f.ext.endswith(".gz")
        gunzip -c '${f}' > 'input_bin_dir/bins/${file_name}' &&
    #else:
        ln -s '${f}' 'input_bin_dir/bins/${file_name}' &&
    #end if
#end for

Binning_refiner
-i input_bin_dir
-p 'refined'
-m $m
&& mv 'refined_Binning_refiner_outputs/refined_contigs.txt' '$output_refined_contigs'
&& mv 'refined_Binning_refiner_outputs/refined_sources_and_length.txt' '$output_sources_and_length'
    ]]></command>
    <inputs>
        <param name="input_bins" format="fasta,fasta.gz" type="data" multiple="true" label="Binned fasta files"/>
        <param argument="-m" type="integer" value="512" label="Minimum size (Kbp) of refined bin" help="Bins smaller than this will be eliminated"/>
    </inputs>
    <outputs>
        <collection name="output_refined_bins" type="list" label="${tool.name} on ${on_string}: (refined bins)">
            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.fasta" format="fasta" directory="refined_Binning_refiner_outputs/refined_refined_bins"/>
        </collection>
        <data name="output_refined_contigs" format="tabular" label="${tool.name} on ${on_string} (refined contigs)"/>
        <data name="output_sources_and_length" format="tabular" label="${tool.name} on ${on_string} (sources and length)"/>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="input_bins" value="MetaBAT_17.fa.gz,MetaBAT_18.fa.gz,MetaBAT_19.fa.gz,MetaBAT_20.fa.gz,MetaBAT_21.fa.gz,MetaBAT_22.fa.gz,MetaBAT_23.fa.gz,Concoct_1.fa.gz,Concoct_3.fa.gz,Concoct_8.fa.gz" ftype="fasta.gz"/>
            <param name="m" value="256"/>
            <output_collection name="output_refined_bins" type="list" count="8">
                <element name="refined_1" ftype="fasta">
                    <assert_contents>
                        <has_size value="1320640"/>
                        <has_text text=">scaffold_2064"/>
                        <has_n_lines n="21765"/>
                    </assert_contents>
                </element>
                <element name="refined_2" ftype="fasta">
                    <assert_contents>
                        <has_size value="941488"/>
                        <has_text text=">scaffold_1301"/>
                        <has_n_lines n="15461"/>
                    </assert_contents>
                </element>
                <element name="refined_3" ftype="fasta">
                    <assert_contents>
                        <has_size value="883642"/>
                        <has_text text=">scaffold_262"/>
                        <has_n_lines n="14495"/>
                    </assert_contents>
                </element>
                <element name="refined_4" ftype="fasta">
                    <assert_contents>
                        <has_size value="758509"/>
                        <has_text text=">scaffold_923"/>
                        <has_n_lines n="12484"/>
                    </assert_contents>
                </element>
                <element name="refined_5" ftype="fasta">
                    <assert_contents>
                        <has_size value="722197"/>
                        <has_text text=">scaffold_232"/>
                        <has_n_lines n="11849"/>
                    </assert_contents>
                </element>
                <element name="refined_6" ftype="fasta">
                    <assert_contents>
                        <has_size value="637342"/>
                        <has_text text=">scaffold_259"/>
                        <has_n_lines n="10460"/>
                    </assert_contents>
                </element>
                <element name="refined_7" ftype="fasta">
                    <assert_contents>
                        <has_size value="560996"/>
                        <has_text text=">scaffold_1510"/>
                        <has_n_lines n="9219"/>
                    </assert_contents>
                </element>
                <element name="refined_8" ftype="fasta">
                    <assert_contents>
                        <has_size value="276224"/>
                        <has_text text=">scaffold_955"/>
                        <has_n_lines n="4554"/>
                    </assert_contents>
                </element>
            </output_collection>
            <output name="output_refined_contigs" file="output_refined_contigs.tabular" ftype="tabular"/>
            <output name="output_sources_and_length" file="output_sources_and_length.tabular" ftype="tabular"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

Reconciles the outputs of different binning programs with the aim to improve the quality of genome bins,
especially with respect to contamination levels.

The tool accepts one or more fasta datasets (i.e., bins) that were produced by metagenome binning tools
(CONCOCT MaxBin2, MetaBAT2 and others).

All refined bins larger than the specified "Minimum size (Kbp) of refined bin" will be output as a dataset
collection of fasta files.  Additional outputs include a tabular dataset containing the id of the contigs
in each refined bin (refined contigs) and another tabular dataset containing the size of each refined bin
and the origin of its contigs (sources and length).

**More information**

https://github.com/songweizhi/Binning_refiner

    ]]></help>
    <expand macro="citations"/>
</tool>