view TEfinder.xml @ 0:838fb3a1678f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/te_finder/ commit d86db11ee07ccc379667797b9124185ddfde1948
author iuc
date Mon, 08 Aug 2022 19:41:18 +0000
parents
children 2edb80d68a1b
line wrap: on
line source

<tool id="te_finder" name="TEfinder" version="1.0.1" profile="21.05">
    <description>Transposable element insertions finder</description>

    <requirements>
        <requirement type="package" version="1.15.1">samtools</requirement>
        <requirement type="package" version="2.30.0">bedtools</requirement>
        <requirement type="package" version="2.27.4">picard</requirement>
        <requirement type="package" version="3.4">grep</requirement>
        <requirement type="package" version="1.07.1">bc</requirement>
    </requirements>

    <command>
        <![CDATA[
            '$__tool_directory__/TEfinder' -fa '$required_inputs.FastaFile' 
            -alignment '$required_inputs.alignmentFile' 
            -gtf '$required_inputs.TransposonsInGenome' 
            -te '$required_inputs.TransposonsToSearch' 
            -bamo '$discordantreads' 
            -bedo '$bteinsertion' 
            -threads '\${GALAXY_SLOTS:-1}' 
            -fis $($advanced_options.FragmentInsertSize) 
            -md $($advanced_options.MaxDistanceForMerge) 
            -k $($advanced_options.MaxTSDLength)
            #if str( $advanced_options.OutFormat) == "gtf":
            -gtfo '$gteinsertion'
            -out $($advanced_options.OutFormat)
            #end if
        ]]>
    </command>

    <inputs>
        <!-- <param format="fasta" name="input" type="data" label="Source file"/> -->
        <section name="required_inputs" title="Required Inputs" expanded="True">
            <param name="FastaFile" type="data" format="fasta" label="Select reference genome FASTA index (FA/FASTA file)" />
            <param name="alignmentFile" type="data" format="bam" label="Select sample reads aligned to reference genome (BAM/SAM file)" />
            <param name="TransposonsInGenome" type="data" format="gtf" label="Select reference genome TE annotation (GFF/GTF file)" />
            <param name="TransposonsToSearch" type="data" format="text" label="Select TE names" help="Single column text file" />
        </section>
        <!-- Advanced Options  -->
        <section name="advanced_options" title="Advanced Options" expanded="False">
            <param name="FragmentInsertSize" argument="-fis" type="integer" min="0" value="400" label="Short-read sequencing fragment insert size" />
            <param name="MaxDistanceForMerge" argument="-md" type="integer" min="0" value="150" label="Maximum distance between reads for bedtools merge" />
            <param name="MaxTSDLength" argument="-k" type="integer" min="0" value="20" label="Maximum TE target site duplication (TSD) length" />
            <param name="OutFormat" argument="-out" type="select" display="radio" label="Select output format as BED [GTF]" help="See help below for more details">
                <option value="bed" selected="True">Default format is BED</option>
                <option value="gtf">Other available format is GTF (-out gtf)</option>
            </param>
        </section>
    </inputs>

    <outputs>
        <data format="bed" name="bteinsertion" label="${tool.name} on ${on_string}: BED" />
        <data format="gtf" name="gteinsertion" label="${tool.name} on ${on_string}: GTF">
            <filter>advanced_options['OutFormat'] and 'gtf' in advanced_options['OutFormat']</filter>
        </data>
        <data format="bam" name="discordantreads" />
    </outputs>

    <tests>
        <!-- Test for the most simple case for BED output : Running TEfinder with a .bam file and a .fasta file -->
        <test expect_num_outputs="2">
            <!-- TEfinder commands: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt -->
            <param name="FastaFile" ftype="fasta" value="reference.fa"/>
            <param name="alignmentFile" ftype="bam" value="sample.bam"/>
            <param name="TransposonsInGenome" ftype="gtf" value="TEs.gtf"/>
            <param name="TransposonsToSearch" ftype="txt" value="List_of_TEs.txt"/>
            <param name="OutFormat" value="bed" />
            <output name="bteinsertion" file="TEinsertions.bed" ftype="bed"/>
            <output name="discordantreads" file="DiscordantReads.bam" ftype="bam"/>  
        </test>

        <!-- Test for the GTF output -->
        <test expect_num_outputs="3">
            <!-- TEfinder commands: TEfinder -alignment sample.bam -fa reference.fa -gtf TEs.gtf -te List_of_TEs.txt -out gtf -->
            <param name="FastaFile" ftype="fasta" value="reference.fa"/>
            <param name="alignmentFile" ftype="bam" value="sample.bam"/>
            <param name="TransposonsInGenome" ftype="gtf" value="TEs.gtf"/>
            <param name="TransposonsToSearch" ftype="txt" value="List_of_TEs.txt"/>
            <param name="OutFormat" value="gtf" />
            <output name="bteinsertion" file="TEinsertions.bed" ftype="bed"/>
            <output name="gteinsertion" file="TEinsertions.gtf" ftype="gtf"/>
            <output name="discordantreads" file="DiscordantReads.bam" ftype="bam"/>
        </test>
    </tests>

    <help>
        <![CDATA[
        A bioinformatics tool for detecting novel transposable element insertions

        Authors: Vista Sohrab & Dilay Hazal Ayhan

        TEfinder uses discordant reads to detect novel transposable element insertion events in paired-end sample sequencing data.

        **Output files**::

            TE_insertions.bed contains identified TE insertion events in sample (in the final column, FILTER attribute with "PASS" refers to high confidence insertion events while instances labeled as "in_repeat", "weak_evidence", "strand bias" or a combination of these three labels indicate less confident insertion events)
            
            TE_insertions.gtf is provided with the same information as the BED file if using -out GTF
            
            DiscordantReads.bam contains all discordant reads that have been identified based on the TEs of interest that have been submitted to TEfinder

        **Note**::

            Modifying the maximum TSD length (-k) could be useful if there is an unexpected number of insertion events identified with the default parameter. The optimal maximum TSD length can vary across datasets.
            Modifying the fragment insert size (-fis) based on the sequencing library preparation can be useful.

        ]]>
    </help>

    <citations>
        <citation type="doi">10.5281/zenodo.4479946</citation>
    </citations>

</tool>