view drep_dereplicate.xml @ 6:cafcc13990ee draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/drep commit 583548ea40e0f811de8dd7334164a1c54610a80f
author iuc
date Sun, 19 May 2024 16:16:18 +0000
parents a0713a10c9f6
children
line wrap: on
line source

<tool id="drep_dereplicate" name="dRep dereplicate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>De-replicate a list of genomes</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools" />
    <expand macro="requirements">
        <requirement type="package" version="1.2.2">checkm-genome</requirement>
    </expand>
    <command detect_errors="exit_code"><![CDATA[
@PREPARE_GENOMES@
dRep dereplicate outdir
@GENOMES@
@FILTER_OPTIONS@
@QUALITY_ASSESSMENT_OPTIONS@
@COMPARISON_CLUSTERING_OPTIONS@
@SCORING_OPTIONS@
@WARNING_OPTIONS@
--debug
|| (rc=\$?;
    ls -ltr `find outdir -type f`;
    cat outdir/data/checkM/checkM_outdir/checkm.log;
    cat outdir/log/logger.log;
    exit \$rc)
    ]]></command>
    <inputs>
        <expand macro="genomes"/>
        <expand macro="filtering_options"/>
        <expand macro="quality_assessment_options"/>
        <expand macro="comparison_clustering_options"/>
        <expand macro="scoring_options"/>
        <expand macro="warning_options"/>
        <expand macro="select_drep_outputs"/>
    </inputs>
    <outputs>
        <collection name="dereplicated_genomes" type="list" label="dereplicated_genomes">
             <discover_datasets pattern="__designation__" directory="outdir/dereplicated_genomes" ext='fasta'/>
        </collection>
        <expand macro="drep_outputs" />
    </outputs>
    <tests>
        <test expect_num_outputs="8">
            <expand macro="test_string_inputs"/>
            <expand macro="test_default_filtering_options"/>
            <expand macro="test_default_quality_assessment_options"/>
            <expand macro="test_default_comparison_clustering_options"/>
            <expand macro="test_default_scoring_options"/>
            <expand macro="test_default_warning_options"/>
            <expand macro="test_default_select_drep_outputs"/>
            <expand macro="test_log_output">
                <has_text text="dRep dereplicate finished" />
            </expand>
        </test>
        <test expect_num_outputs="8">
            <expand macro="test_integer_inputs"/>
            <expand macro="test_default_filtering_options"/>
            <expand macro="test_default_quality_assessment_options"/>
            <expand macro="test_default_comparison_clustering_options"/>
            <expand macro="test_extra_weight_table_scoring_options"/>
            <expand macro="test_default_warning_options"/>
            <expand macro="test_default_select_drep_outputs"/>
            <expand macro="test_log_output">
                <has_text text="dRep dereplicate finished" />
            </expand>
        </test>
        <test expect_num_outputs="8">
            <expand macro="test_string_inputs"/>
            <expand macro="test_default_filtering_options"/>
            <expand macro="test_default_quality_assessment_options"/>
            <expand macro="test_default_comparison_clustering_options"/>
            <expand macro="test_default_scoring_options"/>
            <expand macro="test_default_warning_options"/>
            <expand macro="test_default_select_drep_outputs"/>
            <expand macro="test_log_output">
                <has_text text="dRep dereplicate finished" />
            </expand>
        </test>
    </tests>
    <help><![CDATA[
**dRep dereplicate**

`dRep <https://drep.readthedocs.io/en/latest/overview.html>`_ performs rapid pair-wise comparison of genome sets.

`De-replication <https://drep.readthedocs.io/en/latest/overview.html#genome-de-replication>`_ is the process of identifying sets of genomes that are the “same” in a list of genomes, and removing all but the “best” genome from each redundant set. How similar genomes need to be to be considered “same”, how to determine which genome is “best”, and other important decisions are discussed in `Choosing parameters. <https://drep.readthedocs.io/en/latest/choosing_parameters.html>`_   Detailed options for each module are described at: https://drep.readthedocs.io/en/latest/module_descriptions.html

A common use for genome de-replication is the case of individual assembly of metagenomic data. If metagenomic samples are collected in a series, a common way to assemble the short reads is with a “co-assembly”. That is, combining the reads from all samples and assembling them together. The problem with this is assembling similar strains together can severely fragment assemblies, precluding recovery of a good genome bin. An alternative option is to assemble each sample separately, and then “de-replicate” the bins from each assembly to make a final genome set.

The steps to this process are:

  - Assemble each sample separately using your favorite assembler. You can also perform a co-assembly to catch low-abundance microbes
  - Bin each assembly (and co-assembly) separately using your favorite binner
  - Pull the bins from all assemblies together and run **dRep** on them
  - Perform downstream analysis on the de-replicated genome list



**INPUTS**

  - Genome sets in fasta format.


**OUTPUTS**

  - `Figures <https://drep.readthedocs.io/en/latest/example_output.html#figures>`_ that show the relationship of the Genome inputs.
  - `Warnings <https://drep.readthedocs.io/en/latest/example_output.html#warnings>`_ report two things: de-replicated genome similarity and secondary clusters that were almost different.
  - A Dataset collection of the “best” genome of each secondary cluster.
  - `Tables from intermediate steps <https://drep.readthedocs.io/en/latest/advanced_use.html>`_

    * Chdb.csv # CheckM results for Bdb
    * Widb.csv # Winning genomes' checkM information


    ]]></help>
    <expand macro="citations" />
</tool>