view dante_ltr_search.xml @ 22:d5508ce50c49 draft

planemo upload commit 398baa926d7864efeb2c349c52827fd6988e60e6-dirty
author petr-novak
date Tue, 28 Nov 2023 14:32:03 +0000
parents d1c3b29b1478
children
line wrap: on
line source

<tool id="dante_ltr_search" name="DANTE_LTR retrotransposon identification" version="@TOOL_VERSION@" python_template_version="3.5">
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        echo \$PATH
        &&
        dante_ltr --gff3 '$dante'
          --reference_sequence '$reference' -M $max_missing --output output --cpu \${GALAXY_SLOTS:-1}
        &&
        mv output.gff3 $te_ltr_gff
        &&
        mv output_statistics.csv $statistics
        &&
        if [ -f output_summary.html ]; then cp output_summary.html $html_report; fi
        &&
        mkdir -p ${html_report.extra_files_path}
        &&
        if [ -d output_summary_plots ]; then cp -r output_summary_plots ${html_report.extra_files_path}/; fi
    ]]></command>
    <inputs>
        <param type="data" name="dante" format="gff3" label="GFF3 output from DANTE pipeline - full output"/>
        <param type="data" name="reference" format="fasta" label="Reference sequence matching DANTE output" />
        <param type="integer" name="max_missing" min="0" max="3" value="1" label="Maximum number of missing protein domains to tolerate in full length retrotransposon" />
    </inputs>
    <outputs>
        <data name="te_ltr_gff" format="gff3" label="LTR retrotransposons annotation (GFF3)
        based on DANTE annotation $dante.hid and reference $reference.hid" />
        <data name="statistics" format="tabular" label="LTR retrotransposons detection
        summary based on $dante.hid and reference $reference.hid" />
        <data name="html_report" format="html" label="LTR retrotransposons detection on $dante.hid and $reference.hid report" />

    </outputs>
    <help><![CDATA[
        This tool uses output from DANTE annotation pipeline to identify full length LTR
        transposable elements. Output is in the GFF3 format and include annotation of
        5' and 3' Long Terminal Repeats, Target Site Duplication (TSD) and primer binding site (PBS).

        All identified elements contains set of protein domains as defined in
        REXdb_. Based on the results of detection of structural features,
        elements falls into five categories:

        - elements with domains, 5'LTR, 3'LTR, TSD and PBS - rank DLTP
        - elements with domains, 5'LTR, 3'LTR and PBS (TSD was not found) rank DLP
        - elements with domains, 5' LTR, 3'LTR, TSD (PBS was not found) - rank DTL
        - elements with protein domains, 5'LTR and 3'LTR (PBS and LDS were not found)  - rank DL
        - elements as cluster of proteins domains with same classification, no LTRs - rank D

       .. _REXdb: https://doi.org/10.1186/s13100-018-0144-1

      Principles of detection of LTR retrotransposons is describet in github_ repository of this tool.

      .. _github: https://github.com/kavonrtep/dante_ltr


    ]]></help>
</tool>