Mercurial > repos > iuc > stringtie

<tool id="stringtie" name="StringTie" version="1.0.4">
    <description>transcript assembly and quantification</description>
    <requirements>
        <requirement type="package" version="1.0.4">stringtie</requirement>
    </requirements>
    <command><![CDATA[
stringtie "$input_bam"
-o "$output_gtf"
-p "\${GALAXY_SLOTS:-1}"
#if str($guide.use_guide) == 'yes':
    -C "$coverage" -G "$guide.guide_gff" $guide.input_estimation
    #if str($guide.output_ballgown) == '-b':
        $guide.output_ballgown `pwd`
    #end if
#end if
#if str($option_set.options) == 'advanced':
    -l "$option_set.name_prefix"
    -f "$option_set.fraction"
    -m "$option_set.min_tlen"
    -a "$option_set.min_anchor_len"
    -j "$option_set.min_anchor_cov"
    -c "$option_set.min_bundle_cov"
    -s "$option_set.maxcov"
    -g "$option_set.bdist"
    -M "$option_set.bundle_fraction" $option_set.sensitive $option_set.disable_trimming
#end if
]]>
</command>
    <inputs>
        <param format="bam" label="Mapped reads to assemble transcripts from" name="input_bam" type="data" />
        <conditional name="guide">
            <param label="Use GFF file to guide assembly" name="use_guide" type="select">
                <option value="yes">Use GFF</option>
                <option selected="True" value="no">Do not use GFF</option>
            </param>
            <when value="no" />
            <when value="yes">
                <param format="gtf,gff3" help="-G" label="Reference annotation to use for guiding the assembly process" name="guide_gff" type="data" />
                <param falsevalue="" help="-e" label="Perform abundance estimation only of input transcripts" name="input_estimation" truevalue="-e" type="boolean" />
                <param falsevalue="" help="-b" label="Output additional files for use in Ballgown" name="output_ballgown" truevalue="-b" type="boolean" />
            </when>
        </conditional>
        <conditional name="option_set">
            <param label="Options" name="options" type="select">
                <option selected="True" value="default">Use defaults</option>
                <option value="advanced">Specify advanced options</option>
            </param>
            <when value="default" />
            <when value="advanced">
                <param falsevalue="" help="-t" label="Disable trimming of predicted transcripts based on coverage" name="disable_trimming" truevalue="-t" type="boolean" />
                <param falsevalue="" help="-S" label="Increase sensitivity" name="sensitive" truevalue="-S" type="boolean" />
                <param help="-l" label="Name prefix for output transcripts" name="name_prefix" type="text" value="STRG" />
                <param help="-f" label="Minimum isoform fraction" max="1.0" min="0.0" name="fraction" type="float" value="0.15" />
                <param help="-m" label="Minimum assembled transcript length" name="min_tlen" type="integer" value="200" />
                <param help="-a" label="Minimum anchor length for junctions" name="min_anchor_len" type="integer" value="10" />
                <param help="-j" label="Minimum junction coverage" name="min_anchor_cov" type="integer" value="1" />
                <param help="-c" label="Minimum bundle reads per bp coverage to consider for assembly" name="min_bundle_cov" type="integer" value="2" />
                <param help="-s" label="Coverage saturation threshold" name="maxcov" type="integer" value="1000000" />
                <param help="-g" label="Gap between read mappings triggering a new bundle" name="bdist" type="integer" value="50" />
                <param help="-M" label="Fraction of bundle allowed to be covered by multi-hit reads" name="bundle_fraction" type="float" value="0.95" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="gtf" label="${tool.name} on ${on_string}: Assembled transcripts" name="output_gtf" />
        <data format="gff3" label="${tool.name} on ${on_string}: Coverage" name="coverage">
            <filter>guide['use_guide'] == "yes"</filter>
        </data>
        <data format="tabular" from_work_dir="e_data.ctab" label="${tool.name} on ${on_string}: exon-level expression measurements" name="exon_expression">
            <filter>guide['output_ballgown']</filter>
        </data>
        <data format="tabular" from_work_dir="i_data.ctab" label="${tool.name} on ${on_string}: intron-level expression measurements" name="intron_expression">
            <filter>guide['output_ballgown']</filter>
        </data>
        <data format="tabular" from_work_dir="t_data.ctab" label="${tool.name} on ${on_string}: transcript-level expression measurements" name="transcript_expression">
            <filter>guide['output_ballgown']</filter>
        </data>
        <data format="tabular" from_work_dir="e2t.ctab" label="${tool.name} on ${on_string}: exon to transcript mapping" name="exon_transcript_mapping">
            <filter>guide['output_ballgown']</filter>
        </data>
        <data format="tabular" from_work_dir="i2t.ctab" label="${tool.name} on ${on_string}: intron to transcript mapping" name="intron_transcript_mapping">
            <filter>guide['output_ballgown']</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
            <param name="use_guide" value="no" />
            <param name="options" value="default" />
            <output file="stringtie_out1.gtf" ftype="gtf" name="output_gtf" lines_diff="2" />
        </test>
        <test>
            <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
            <param name="use_guide" value="no" />
            <param name="options" value="advanced" />
            <param name="fraction" value="0.17" />
            <output file="stringtie_out2.gtf" ftype="gtf" name="output_gtf" lines_diff="2" />
        </test>
        <test>
            <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
            <param name="use_guide" value="yes" />
            <param name="guide_gff" value="stringtie_in.gtf" />
            <param name="options" value="default" />
            <output file="stringtie_out3.gtf" ftype="gtf" name="output_gtf" lines_diff="2" />
        </test>
        <test>
            <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
            <param name="use_guide" value="yes" />
            <param name="guide_gff" value="stringtie_in.gtf" />
            <param name="options" value="advanced" />
            <param name="fraction" value="0.17" />
            <output file="stringtie_out4.gtf" ftype="gtf" name="output_gtf" lines_diff="2" />
        </test>
        <test>
            <param ftype="bam" name="input_bam" value="stringtie_in1.bam" />
            <param name="use_guide" value="yes" />
            <param name="output_ballgown" value="yes" />
            <param name="guide_gff" value="stringtie_in.gtf" />
            <param name="options" value="default" />
            <output file="ballgown/e_data.ctab" ftype="tabular" name="exon_expression" />
            <output file="ballgown/i_data.ctab" ftype="tabular" name="intron_expression" />
            <output file="ballgown/t_data.ctab" ftype="tabular" name="transcript_expression" />
            <output file="ballgown/e2t.ctab" ftype="tabular" name="exon_transcript_mapping" />
            <output file="ballgown/i2t.ctab" ftype="tabular" name="intron_transcript_mapping" />
            <output file="stringtie_out5.gtf" ftype="gtf" name="output_gtf" lines_diff="2" />
            <output file="stringtie_out_coverage.gtf" ftype="gff3" name="coverage" />
        </test>
    </tests>
    <help>
<![CDATA[

**What it does?**

StringTie_ is a fast and highly efficient assembler of RNA-Seq alignments into potential transcripts. It uses a novel network flow algorithm as well as an optional *de novo* assembly step to assemble and quantitate full-length transcripts representing multiple splice variants for each gene locus. Its input can include not only the alignments of raw reads used by other transcript assemblers, but also alignments longer sequences that have been assembled from those reads.To identify differentially expressed genes between experiments, StringTie's output can be processed either by the Cuffdiff or Ballgown programs.

.. _StringTie: http://ccb.jhu.edu/software/stringtie/

------

StringTie has the following options::

 -G reference annotation to use for guiding the assembly process (GTF/GFF3)
 -l name prefix for output transcripts (default: STRG)
 -f minimum isoform fraction (default: 0.1)
 -m minimum assembled transcript length to report (default 200bp)
 -o output path/file name for the assembled transcripts GTF (default: stdout)
 -a minimum anchor length for junctions (default: 10)
 -j minimum junction coverage (default: 1)
 -t disable trimming of predicted transcripts based on coverage
    (default: coverage trimming is enabled)
 -c minimum reads per bp coverage to consider for transcript assembly (default: 2.5)
 -s coverage saturation threshold; further read alignments will be
    ignored in a region where a local coverage depth of <maxcov>
    is reached (default: 1,000,000);
 -v verbose (log bundle processing details)
 -g gap between read mappings triggering a new bundle (default: 50)
 -C output file with reference transcripts that are covered by reads
 -M fraction of bundle allowed to be covered by multi-hit reads (default:0.95)
 -p number of threads (CPUs) to use (default: 1)
 -B enable output of Ballgown table files which will be created in the
    same directory as the output GTF (requires -G, -o recommended)
 -b enable output of Ballgown table files but these files will be
    created under the directory path given as <dir_path>
 -e only estimates the abundance of given reference transcripts (requires -G)
 ]]>
    </help>
    <citations>
        <citation type="doi">doi:10.1038/nbt.3122</citation>
    </citations>
</tool>
author	iuc
date	Wed, 12 Aug 2015 07:07:40 -0400
parents	520e0988ec1c
children	f504b3b7e49d