view regtools_junctions_extract.xml @ 0:01ed8e112f2a draft

planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
author yating-l
date Wed, 12 Apr 2017 17:44:58 -0400
parents
children 06d9062f5430
line wrap: on
line source

<tool id="regtools_junctions_extract" name="regtools junctions extract" version="0.1.0">
    <description>Extract splice junctions from a RNA-Seq BAM file</description>
    <requirements>
        <requirement type="package" version="0.3.0">regtools</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[
        ## regtools expects an indexed BAM file
        ## Use symlinks to link the BAM file and its index to the working directory
        ## See: https://biostar.usegalaxy.org/p/10128/

        ln -sf '${input1}' infile.bam &&
        ln -sf '${input1.metadata.bam_index}' infile.bam.bai &&

        regtools junctions extract
          -i $min_intron_length
          -I $max_intron_length
          -o $output1
          #if str($advanced_options.advanced_options_selector) == "on":
            -a $advanced_options.min_anchor_length

            #if str($advanced_options.region):
                -r "$advanced_options.region"
            #end if
          #end if
          infile.bam  &&
        
        python $__tool_directory__/validator.py $output1 $fixed_output
    ]]></command>
    <inputs>
        <param name="input1" type="data" format="bam" />

        <param name="min_intron_length" type="integer" label="Minimum intron length" value="20" min="1">
            <help>Only considers spliced RNA-Seq reads with this minimum intron length (-i)</help>
        </param>
        <param name="max_intron_length" type="integer" label="Maximum intron length" value="500000" min="1">
            <help>Only considers spliced RNA-Seq reads with this maximum intron length (-I)</help>
        </param>

        <conditional name="advanced_options">
            <param name="advanced_options_selector" type="select" label="Advanced options">
                <option value="off" selected="true">Hide advanced options</option>
                <option value="on">Display advanced options</option>
            </param>
            <when value="on">
                <param name="min_anchor_length" type="integer" label="Minimum anchor length" value="8" min="1">
                    <help>Only report splice junctions with this minimum anchor length on both sides of the junction (-a)</help>
                </param>
                <param name="region" type="text" optional="true" label="Only extract junctions in this region" value="">
                    <help>Only report splice junctions within this region (format = chrom:start-end) (-r)</help>
                    <validator type="regex"
                        message="Region should be in the following format: chrom:start-end">^([^:]+):(\d+)-(\d+)$</validator>
                </param>
            </when>

            <when value="off" />
        </conditional>
    </inputs>
    <outputs>
        <data name="output1" format="bed" label="${tool.name} on ${on_string}: original output"/>
        <data name="fixed_output" format="bed" label="${tool.name} on ${on_string}: fixed_output for HAC workflow"/>
    </outputs>
    <tests>
        <test>
            <!-- Test with default parameters -->
            <param name="input1" value="Dbia3_adult_males_shallow.bam" />
            <output name="output1" file="Dbia3_adult_males_junctions_i20_I500000.bed" />
            <output name="fixed_output" file="Dbia3_adult_males_junctions_i20_I500000_fixed.bed" />
        </test>
        <test>
            <!-- Test with custom intron length thresholds -->
            <param name="min_intron_length" value="50" />
            <param name="max_intron_length" value="50000" />
            <param name="input1" value="Dbia3_adult_males_shallow.bam" />
            <output name="output1" file="Dbia3_adult_males_junctions_i50_I50000.bed" />
            <output name="fixed_output" file="Dbia3_adult_males_junctions_i50_I50000_fixed.bed" />
        </test>
        <test>
            <!-- Test with advanced option and custom anchor size -->
            <param name="advanced_options_selector" value="on" />
            <param name="min_anchor_length" value="10" />
            <param name="input1" value="Dbia3_adult_males_shallow.bam" />
            <output name="output1"
                file="Dbia3_adult_males_junctions_i20_I500000_a10.bed" />
            <output name="fixed_output"
                file="Dbia3_adult_males_junctions_i20_I500000_a10_fixed.bed" />
        </test>
        <test>
            <!-- Test with advanced option and selected region -->
            <param name="advanced_options_selector" value="on" />
            <param name="region" value="contig2:10000-30000" />
            <param name="input1" value="Dbia3_adult_males_shallow.bam" />
            <output name="output1" file="Dbia3_adult_males_junctions_i20_I500000_rcontig2_10-30kb.bed" />
            <output name="fixed_output" file="Dbia3_adult_males_junctions_i20_I500000_rcontig2_10-30kb_fixed.bed"/>
        </test>
        <test>
            <!--Test with output having invalid strand -->
            <param name="input1" value="Dbia3.bam" />
            <output name="output1" file="Dbia3_i20_I500000.bed" />
            <output name="fixed_output" file="Dbia3_i20_I500000_fixed.bed" />  
        </test>   
    </tests>
    <help><![CDATA[
**What it does**

The `regtools junctions extract <https://regtools.readthedocs.io/en/latest/commands/junctions-extract/>`_
tool creates a list of exon-exon junctions from spliced RNA-Seq reads within a BAM
alignment file. The format of the output BED file is similar to the ``junctions.bed``
file produced by TopHat2.

The extent of each BED feature corresponds to the maximum overhang of each splice
junction. The score of each BED feature corresponds to the number of spliced RNA-Seq
reads that support each junction.

--------

.. class:: warningmark

By default, the minimum intron size is **20 bp** and the maximum intron size is
**500,000 bp** so that they conform to the default intron size settings for
`HISAT2 <https://ccb.jhu.edu/software/hisat2/manual.shtml#options>`_. The minimum
intron size for the command-line version of ``regtools junctions extract`` is 70 bp.

    ]]></help>
    <citations>
        <citation type="bibtex">
@misc{githubregtools,
  author = {Griffith Lab},
  year = {2016},
  title = {regtools},
  publisher = {GitHub},
  journal = {GitHub repository},
  url = {https://github.com/griffithlab/regtools},
}</citation>
    </citations>
</tool>