Mercurial > repos > iuc > rgrnastar

<tool id="rna_star" name="RNA STAR" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
    <description>Gapped-read mapper for RNA-seq data</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam"/>
    <xrefs>
        <xref type="bio.tools">star</xref>
    </xrefs>
    <expand macro="requirements"/>
    <expand macro="stdio" />

<!--
    important quote (https://groups.google.com/forum/#!topic/rna-star/q4zGzlPgwXY):
    Hi Gary,

    if you generate the genome with GTF file, and do not specify the value for  - -sjdbOverhang, it will be set to the default 100.
    If you want to be able to set arbitrary value of  - -sjdbOverhang on the fly, you have to generate the genome without annotations (GTF) - then you supply both the  - -sjdbOverhang and GTF file at the mapping step.

    Cheers
    Alex
-->
    <command><![CDATA[
    @TEMPINDEX@
    STAR
    @REFGENOMEHANDLING@

        --readFilesIn
        #if str($singlePaired.sPaired) == 'paired_collection':
            '$singlePaired.input.forward' '$singlePaired.input.reverse'

            #if $singlePaired.input.forward.is_of_type('fastq.gz', 'fastqsanger.gz'):
                @FASTQ_GZ_OPTION@
            #end if
        #else
            '$singlePaired.input1'
            #if str($singlePaired.sPaired) == 'paired':
                '$singlePaired.input2'
            #end if

            #if $singlePaired.input1.is_of_type('fastq.gz', 'fastqsanger.gz'):
                @FASTQ_GZ_OPTION@
            #end if
        #end if

        --outSAMtype BAM SortedByCoordinate

        ## Two pass mode
        --twopassMode ${twopass.twopassMode} ${twopass.twopass_read_subset}
        #for $sj_input in $twopass.sj_precalculated:
            '$sj_input'
        #end for
        #if str($twopass.twopassMode) != 'None':
            #if str($refGenomeSource.GTFconditional.GTFselect) == 'with-gtf':
                ## need to check first if its a cached index or from history
                ## if it's cached then the sjdbGTFfile and sjdbOverhang params are not provided
                #if str($refGenomeSource.geneSource) == 'history':
                    #if not $refGenomeSource.GTFconditional.sjdbGTFfile:
                       ## case of cached index without built-in gene model,
                       ## when user does not supply the optional gtf, but
                       ## specifies the splice junction overhang
                       --sjdbOverhang $refGenomeSource.GTFconditional.sjdbOverhang
                    #end if
                #end if
            #end if
        #end if

        --quantMode ${refGenomeSource.GTFconditional.quantmode_output.quantMode}
        #if 'TranscriptomeSAM' in str($refGenomeSource.GTFconditional.quantmode_output.quantMode):
            --quantTranscriptomeBan ${refGenomeSource.GTFconditional.quantmode_output.quantTranscriptomeBan}
        #end if

        ## Output format parameters

        ## Read tags
        #set read_tags = str($oformat.outSAMattributes).split(',')
        #if 'XS' in str($oformat.outSAMattributes):
            ## STAR writes XS tag when --outSAMstrandField intronMotif is used
            $read_tags.remove('XS')
            --outSAMstrandField intronMotif
        #end if
        #if 'HI' in str($oformat.outSAMattributes):
            --outSAMattrIHstart ${oformat.HI_offset}
        #end if
        #set $tag_names = ' '.join($read_tags)
        --outSAMattributes $tag_names

        ## Read FLAG
        --outSAMprimaryFlag ${oformat.outSAMprimaryFlag}

        ## Read MAPQ
        --outSAMmapqUnique ${oformat.outSAMmapqUnique}

        ## Output filter parameters

        ## Basic Filters
        #if str($filter.basic_filters) != 'None':
            #set $filter_options = str($filter.basic_filters).split(',')
        #else:
            #set filter_options = []
        #end if
        #if 'exclude_unmapped' in $filter_options:
            $filter_options.remove('exclude_unmapped')
            --outSAMunmapped None
        #else:
            --outSAMunmapped Within
        #end if
        #if '--outFilterIntronMotifs RemoveNoncanonical' in $filter_options:
            ## RemoveNoncanonical excludes a superset of the reads excluded
            ## with RemoveNoncanonicalUnannotated
            #if '--outFilterIntronMotifs RemoveNoncanonicalUnannotated' in $filter_options:
                $filter_options.remove('--outFilterIntronMotifs RemoveNoncanonicalUnannotated')
            #end if
        #end if
        #echo ' '.join($filter_options)

        ## Other Filters
        #if str( $filter.output_params2.output_select2 ) == 'yes':
            --outFilterType $filter.output_params2.outFilterType
            --outFilterMultimapScoreRange $filter.output_params2.outFilterMultimapScoreRange
            --outFilterMultimapNmax $filter.output_params2.outFilterMultimapNmax
            --outFilterMismatchNmax $filter.output_params2.outFilterMismatchNmax
            --outFilterMismatchNoverLmax $filter.output_params2.outFilterMismatchNoverLmax
            --outFilterMismatchNoverReadLmax $filter.output_params2.outFilterMismatchNoverReadLmax
            --outFilterScoreMin $filter.output_params2.outFilterScoreMin
            --outFilterScoreMinOverLread $filter.output_params2.outFilterScoreMinOverLread
            --outFilterMatchNmin $filter.output_params2.outFilterMatchNmin
            --outFilterMatchNminOverLread $filter.output_params2.outFilterMatchNminOverLread
            --outSAMmultNmax $filter.output_params2.outSAMmultNmax
            --outSAMtlen $filter.output_params2.outSAMtlen
        #end if

        ## Other parameters
        #if str( $algo.params.settingsType ) == 'star_fusion':
            ## Preset parameters for STAR-Fusion
            --chimSegmentMin 12
            --chimJunctionOverhangMin 12
            --alignSJDBoverhangMin 10
            --alignMatesGapMax 100000
            --alignIntronMax 100000
            --chimSegmentReadGapMax 3
            --alignSJstitchMismatchNmax 5 -1 5 5
            --peOverlapNbasesMin 12
            --peOverlapMMp 0.1
            --chimMultimapScoreRange 10
            --chimMultimapNmax 10
            --chimNonchimScoreDropMin 10

        #elif str( $algo.params.settingsType ) == 'arriba':
            ## Preset parameters for Arriba
            --peOverlapNbasesMin 10
            --alignSplicedMateMapLminOverLmate 0.5
            --alignSJstitchMismatchNmax 5 -1 5 5
            --chimSegmentMin 10
            --chimJunctionOverhangMin 10
            --chimScoreDropMax 30
            --chimScoreJunctionNonGTAG 0
            --chimScoreSeparation 1
            --chimSegmentReadGapMax 3
            --chimMultimapNmax 50

        #elif str( $algo.params.settingsType ) == 'full':
            ## Extended parameter options

            ## Seed parameter options
            --seedSearchStartLmax ${algo.params.seed.seedSearchStartLmax}
            --seedSearchStartLmaxOverLread ${algo.params.seed.seedSearchStartLmaxOverLread}
            --seedSearchLmax ${algo.params.seed.seedSearchLmax}
            --seedMultimapNmax ${algo.params.seed.seedMultimapNmax}
            --seedPerReadNmax ${algo.params.seed.seedPerReadNmax}
            --seedPerWindowNmax ${algo.params.seed.seedPerWindowNmax}
            --seedNoneLociPerWindow ${algo.params.seed.seedNoneLociPerWindow}

            ## Alignment parameter options
            --alignIntronMin ${algo.params.align.alignIntronMin}
            --alignIntronMax ${algo.params.align.alignIntronMax}
            --alignMatesGapMax ${algo.params.align.alignMatesGapMax}
            --alignSJoverhangMin ${algo.params.align.alignSJoverhangMin}
            --alignSJstitchMismatchNmax ${algo.params.align.alignSJstitchMismatchNmax.alignSJstitchMismatchNmax1} ${algo.params.align.alignSJstitchMismatchNmax.alignSJstitchMismatchNmax2} ${algo.params.align.alignSJstitchMismatchNmax.alignSJstitchMismatchNmax3} ${algo.params.align.alignSJstitchMismatchNmax.alignSJstitchMismatchNmax4}
            --alignSJDBoverhangMin ${algo.params.align.alignSJDBoverhangMin}
            --alignSplicedMateMapLmin ${algo.params.align.alignSplicedMateMapLmin}
            --alignSplicedMateMapLminOverLmate ${algo.params.align.alignSplicedMateMapLminOverLmate}
            --alignWindowsPerReadNmax ${algo.params.align.alignWindowsPerReadNmax}
            --alignTranscriptsPerWindowNmax ${algo.params.align.alignTranscriptsPerWindowNmax}
            --alignTranscriptsPerReadNmax ${algo.params.align.alignTranscriptsPerReadNmax}
            --alignEndsType ${algo.params.align.alignEndsType}
            --peOverlapNbasesMin ${algo.params.align.peOverlapNbasesMin}
            --peOverlapMMp ${algo.params.align.peOverlapMMp}
            ## Chimeric alignment parameter options
            #if str($chimOutType):
                --chimSegmentMin ${algo.params.chim_settings.chimSegmentMin}
                --chimScoreMin ${algo.params.chim_settings.chimScoreMin}
                --chimScoreDropMax $algo.params.chim_settings.chimScoreDropMax
                --chimScoreSeparation $algo.params.chim_settings.chimScoreSeparation
                --chimScoreJunctionNonGTAG $algo.params.chim_settings.chimScoreJunctionNonGTAG
                --chimSegmentReadGapMax $algo.params.chim_settings.chimSegmentReadGapMax
                --chimFilter $algo.params.chim_settings.chimFilter
                --chimJunctionOverhangMin $algo.params.chim_settings.chimJunctionOverhangMin
                --chimMainSegmentMultNmax $algo.params.chim_settings.chimMainSegmentMultNmax
                #if str($chimOutType) == 'Junctions':
                    --chimMultimapNmax $algo.params.chim_settings.chimMultimapNmax
                #else:
                    --chimMultimapNmax 0
                #end if
                --chimMultimapScoreRange $algo.params.chim_settings.chimMultimapScoreRange
            #end if

            ## Limits
                @LIMITS@
        #else:
            ## Go with STAR's default algorithmic settings,
            ## but we need to provide a reasonable default
            ## (taken from STAR-Fusion)
            ## for --chimSegmentMin in case the user enabled chimeric
            ## alignments (the STAR default is 0, which disables chimeric
            ## alignments). For consistency, also set
            ## --chimMultimapNmax to 1 when chimeric alignments are reported
            ## in Junctions format only.
            #if str($chimOutType):
                --chimSegmentMin 12
                #if str($chimOutType) == 'Junctions':
                    --chimMultimapNmax 1
                #end if
            #end if
        #end if

        --outBAMsortingThreadN \${GALAXY_SLOTS:-4}
        --outBAMsortingBinsN $perf.outBAMsortingBinsN
        --winAnchorMultimapNmax $perf.winAnchorMultimapNmax
        --limitBAMsortRAM \$((\${GALAXY_MEMORY_MB:-0}*1000000))

        ## Handle chimeric options and output
        #if str($chimOutType):
            --chimOutType $chimOutType
            #if 'Junctions' in str($chimOutType):
                --chimOutJunctionFormat 1
            #end if
        #end if

        ##outWig:
        @OUTWIG@
        &&
        ## recompress BAM output for smaller file size
        samtools view -b -o '$mapped_reads' Aligned.sortedByCoord.out.bam
        #if 'TranscriptomeSAM' in str($refGenomeSource.GTFconditional.quantmode_output.quantMode):
            ## same recompression for optional transcriptome BM
            &&
            samtools view -b -o '$transcriptome_mapped_reads' Aligned.toTranscriptome.out.bam
        #end if
        ##outWig:
        @OUTWIGOUTPUTS@
    ]]></command>

    <inputs>
        <!-- FASTQ input(s) and options specifically for paired-end data. -->
        <conditional name="singlePaired">
            <param name="sPaired" type="select" label="Single-end or paired-end reads">
                <option value="single" selected="true">Single-end</option>
                <option value="paired">Paired-end (as individual datasets)</option>
                <option value="paired_collection">Paired-end (as collection)</option>
            </param>
            <when value="single">
                <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file"/>
            </when>
            <when value="paired">
                <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input1" type="data" label="RNA-Seq FASTQ/FASTA file, forward reads"/>
                <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input2" type="data" label="RNA-Seq FASTQ/FASTA file, reverse reads"/>
            </when>
            <when value="paired_collection">
                <param format="fastq,fasta,fastq.gz,fastqsanger.gz" name="input" type="data_collection" collection_type="paired" label="RNA-Seq FASTQ/FASTA paired reads"/>
            </when>
        </conditional>

        <!-- Genome source. -->
        <conditional name="refGenomeSource">
            <param name="geneSource" type="select" label="Custom or built-in reference genome" help="Built-ins were indexed using default options">
                <option value="indexed" selected="true">Use a built-in index</option>
                <option value="history">Use reference genome from history and create temporary index</option>
            </param>
            <when value="indexed">
                <conditional name="GTFconditional">
                    <param name="GTFselect" type="select"
                           label="Reference genome with or without an annotation"
                           help="Select the '... with builtin gene-model' option to select from the list of available indexes that were built with splice junction information. Select the '... without builtin gene-model' option to select from the list of available indexes without annotated splice junctions, and, optionally, provide your own splice-junction annonations.">
                        <option value="without-gtf-with-gtf" selected='true'>use genome reference without builtin gene-model but provide a gtf</option>
                        <option value="without-gtf">use genome reference without builtin gene-model and do not provide a gtf</option>
                        <option value="with-gtf">use genome reference with builtin gene-model</option>
                    </param>
                    <when value="with-gtf">
                        <expand macro="index_selection" with_gene_model="1" />
                        <expand macro="quantMode" />
                    </when>
                    <when value="without-gtf-with-gtf">
                        <expand macro="index_selection" with_gene_model="0" />
                        <expand macro="SJDBOPTIONS"/>
                        <expand macro="quantMode" />
                    </when>
                    <when value="without-gtf">
                        <expand macro="index_selection" with_gene_model="0" />
                        <expand macro="quantModeNoGTF" />
                    </when>
                </conditional>
            </when>
            <when value="history">
                <expand macro="ref_selection" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" type="select"
                           label="Build index with or without known splice junctions annotation"
                           help="To build an index with known splice junctions annotated, you will have to provide a GTF or GFF3 dataset that describes the gene models (the location of genes, transcripts and exons) known for the reference genome.">
                        <option value="without-gtf">build index without gene-model</option>
                        <option value="with-gtf">build index with gene-model</option>
                    </param>
                    <when value="with-gtf">
                        <expand macro="SJDBOPTIONS"/>
                        <expand macro="quantMode" />
                    </when>
                    <when value="without-gtf">
                        <expand macro="quantModeNoGTF" />
                    </when>
                </conditional>
            </when>
        </conditional>

        <conditional name="twopass">
            <param argument="--twopassMode" type="select"
            label="Use 2-pass mapping for more sensitive novel splice junction discovery"
            help="For a study with multiple samples, multisample 2-pass mapping is the most sensitive approach. It involves two separate runs of STAR for each sample, where, in the second run of each sample, the splice junctions found in any sample in the first runs are treated as additional known junctions. If you plan to use the mapping results as input for STAR-Fusion it is recommended that you use at least single-sample 2-pass mapping of all reads.">
                <option value="None">No</option>
                <option value="Basic">Yes, perform single-sample 2-pass mapping of all reads</option>
                <option value="Basic --twopass1readsN">Yes, but base novel splice junction detection in the first pass on a subset of all reads (faster, but less sensitive than single-sample 2-pass mode)</option>
                <option value="None --sjdbFileChrStartEnd">Yes, I want to use multi-sample 2-pass mapping and I have obtained splice junctions datasets of all samples through previous 1-pass runs of STAR.</option>
            </param>
            <when value="None">
                <param name="twopass_read_subset" type="hidden" value="" />
                <param name="sj_precalculated" type="hidden" value="" />
            </when>
            <when value="Basic">
                <param name="twopass_read_subset" type="hidden" value="" />
                <param name="sj_precalculated" type="hidden" value="" />
            </when>
            <when value="Basic --twopass1readsN">
                <param name="sj_precalculated" type="hidden" value="" />
                <param argument="--twopass1readsN" name="twopass_read_subset" type="integer" min="1" value="50000" label="Number of reads to map in the first pass"/>
            </when>
            <when value="None --sjdbFileChrStartEnd">
                <param name="twopass_read_subset" type="hidden" value="" />
                <param name="sj_precalculated" type="data" multiple="true" format="interval"
                label="Pregenerated splice junctions datasets of your samples" />
            </when>
        </conditional>
        <param argument="--chimOutType" type="select"
        label="Report chimeric alignments?"
        help="Choose if and how chimeric alignments should be reported. STAR-Fusion users should select the 'Junctions' option and use the resulting tabular dataset as input to STAR-Fusion. Everyone else: note that selecting 'WithinBAM' or 'WithinBAM Junctions' disables the --chimMultimapNmax setting in the algorithmic parameters section below (the tool will only consider uniquely mapped reads in the search for chimeric alignments). If you disable the reporting of chimeric alignments here, then all chimeric alignment settings in the algorithmic parameters section below will be ignored.">
            <option value="">Don't report chimeric alignments</option>
            <option value="Junctions">As separate tabular "Junctions" output (Junctions)</option>
            <option value="WithinBAM">Within the BAM output (together with regular alignments; WithinBAM)</option>
            <option value="WithinBAM HardClip">Within the BAM output (together with regular alignments; WithinBAM HardClip) hard-clipping in the CIGAR for supplemental chimeric alignments</option>
            <option value="WithinBAM SoftClip">Within the BAM output (together with regular alignments; WithinBAM SoftClip) soft-clipping in the CIGAR for supplemental chimeric alignments</option>
        </param>

        <section name="oformat" title="BAM output format specification" expanded="true">
            <param argument="--outSAMattributes" type="select" display="checkboxes" multiple="true" optional="true"
            label="Read alignment tags to include in the BAM output"
            help="Note on using the XS tag: If the XS tag is used, STAR will filter out alignments with undefined strand (i.e., those containing only non-canonical unannotated junctions). Using this tag is recommended if you plan to use the STAR results with STAR-Fusion. In addition, it is required for compatibility
with Cufflinks if your sequences come from an unstranded library preparation.">
                <expand macro="common_SAM_attributes"/>
                <option value="MC">MC (CIGAR string for mate/next segment)</option>
                <option value="XS">XS (strand flag, see parameter help below) </option>
                <option value="ch" selected="true">ch (used to indicate chimeric alignments)</option> <!--This is not the default in STAR-->
            </param>
            <param argument="--outSAMattrIHstart" name="HI_offset" type="select" display="radio"
            label="HI tag values should be">
                <option value="1" selected="true">one-based</option>
                <option value="0">zero-based</option>
            </param>
            <!-- Using - -outSAMprimaryFlag AllBestScore would cause a
            violation of the SAM/BAM spec, which says:
            "For each read/contig in a SAM file, it is required that one and
            only one line associated with the read satisfies
            ‘FLAG & 0x900 == 0’.
            This line is called the primary line of the read."

            Thus, this parameter has been removed from the tool interface:
            <param argument="- -outSAMprimaryFlag" type="boolean"
            truevalue="AllBestScore" falsevalue="OneBestScore" checked="false"
            label="Would you like all alignments with the best score labeled
            primary?"/> -->
            <param name="outSAMprimaryFlag" type="hidden" value="OneBestScore" />
            <expand macro="outSAMmapqUnique"/>
        </section>
        <section name="filter" title="Output filter criteria" expanded="true">
            <param name="basic_filters" type="select" display="checkboxes" multiple="true" optional="true"
            label="Exclude the following records from the BAM output">
                <option value="exclude_unmapped">Unmapped reads</option>
                <option value="--outFilterIntronStrands RemoveInconsistentStrands">Alignments that have junctions with inconsistent strands</option>
                <option value="--outFilterIntronMotifs RemoveNoncanonicalUnannotated">Alignments across unannotated non-canonical junctions</option>
                <option value="--outFilterIntronMotifs RemoveNoncanonical">All alignments across non-canonical junctions (recommended for compatibility with Cufflinks)</option>
            </param>
            <!-- Additional output parameter settings. -->
            <conditional name="output_params2">
                <param name="output_select2" type="select" label="Would you like to set additional output filters?">
                    <option value="no" selected="true">No</option>
                    <option value="yes">Yes</option>
                </param>
                <when value="yes">
                    <param argument="--outFilterType" type="boolean" truevalue="BySJout" falsevalue="Normal" checked="false" label="Would you like to keep only reads that contain junctions that passed filtering?"/>
                    <param argument="--outFilterMultimapScoreRange" type="integer" value="1" min="0" label="Score range below the maximum score for multimapping alignments"/>
                    <param argument="--outFilterMultimapNmax" type="integer" value="10" min="1" label="Maximum number of alignments to output a read's alignment results, plus 1" help="Reads with at least this number of alignments will have no alignments output"/>
                    <param argument="--outFilterMismatchNmax" type="integer" value="10" min="0" label="Maximum number of mismatches to output an alignment, plus 1" help="Alignments with at least this number of mismatches will not be output"/>
                    <param argument="--outFilterMismatchNoverLmax" type="float" value="0.3" min="0" max="1" label="Maximum ratio of mismatches to mapped length" help="Alignments with a mismatch ratio of at least this value will not be output"/>
                    <param argument="--outFilterMismatchNoverReadLmax" type="float" value="1" min="0" max="1" label="Maximum ratio of mismatches to read length" help="Alignments with a mismatch ratio of at least this value will not be output"/>
                    <param argument="--outFilterScoreMin" type="integer" value="0" min="0" label="Minimum alignment score" help="Alignments must have scores higher than this value to be output"/>
                    <param argument="--outFilterScoreMinOverLread" type="float" value="0.66" min="0" max="1" label="Minimum alignment score, normalized to read length" help="Alignments must have (normalized) scores higher than this value to be output"/>
                    <param argument="--outFilterMatchNmin" type="integer" value="0" min="0" label="Minimum number of matched bases" help="Alignments must have the number of matched bases higher than this value to be output"/>
                    <param argument="--outFilterMatchNminOverLread" type="float" value="0.66" min="0" max="1" label="Minimum number of matched bases, normalized to read length" help="Alignments must have the (normalized) number of matched bases higher than this value to be output"/>
                    <param argument="--outSAMmultNmax" type="integer" value="-1" min="-1" label="Maximum number of multimapping alignments to output for a read" help="A value of -1 (the default) results in all alignments (up to --outFilterMultimapNmax) being output" />
                    <param argument="--outSAMtlen" type="select" label="Calculation method for TLEN">
                        <option value="1" selected="true">leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate</option>
                        <option value="2">leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends</option>
                    </param>
                </when>
                <when value="no"/>
            </conditional>
        </section>

        <!-- Algorithmic settings. -->
        <section name="algo" title="Algorithmic settings" expanded="true">
            <conditional name="params">
                <param name="settingsType" type="select" label="Configure seed, alignment and limits options">
                    <option value="default" selected="true">Use Defaults</option>
                    <option value="star_fusion">Use parameters suggested for STAR-Fusion</option>
                    <option value="arriba">Use parameters suggested for Arriba</option>
                    <option value="full">Extended parameter list</option>
                </param>
                <when value="default"/>
                <!-- Set STAR-fusion parameters in command section -->
                <when value="star_fusion"/>
                <when value="arriba"/>

                <when value="full">
                    <section name="seed" title="Seed parameters" expanded="false">
                        <param argument="--seedSearchStartLmax" type="integer" min="1" value="50" label="Search start point through the read"/>
                        <param argument="--seedSearchStartLmaxOverLread" type="float" min="0" value="1.0" label="Search start point through the read, normalized to read length"/>
                        <param argument="--seedSearchLmax" type="integer" min="0" value="0" label="Maximum length of seeds" help="Default of 0 indicates no maximum length"/>
                        <param argument="--seedMultimapNmax" type="integer" min="1" value="10000" label="Maximum number of mappings to use a piece in stitching"/>
                        <param argument="--seedPerReadNmax" type="integer" min="1" value="1000" label="Maximum number of seeds per read"/>
                        <param argument="--seedPerWindowNmax" type="integer" min="1" value="50" label="Maximum number of seeds per window"/>
                        <param argument="--seedNoneLociPerWindow" type="integer" min="1" value="10" label="Maximum number of one seed loci per window"/>
                    </section>

                    <section name="align" title="Alignment parameters" expanded="false">
                        <param argument="--alignIntronMin" type="integer" min="0" value="21" label="Minimum intron size"/>
                        <param argument="--alignIntronMax" type="integer" min="0" value="0" label="Maximum intron size"/>
                        <param argument="--alignMatesGapMax" type="integer" min="0" value="0" label="Maximum gap between two mates"/>
                        <param argument="--alignSJoverhangMin" type="integer" min="1" value="5" label="Minimum overhang for spliced alignments"/>
                        <section name="alignSJstitchMismatchNmax" title="Maximum number of mismatches for stitching of the splice junctions (-1: no limit)" expanded="true">
                            <param argument="--alignSJstitchMismatchNmax" name="alignSJstitchMismatchNmax1" type="integer" min="-1" value="0" label="Non-canonical motifs"/>
                            <param argument="--alignSJstitchMismatchNmax" name="alignSJstitchMismatchNmax2" type="integer" min="-1" value="-1" label="GT/AG and CT/AC motif"/>
                            <param argument="--alignSJstitchMismatchNmax" name="alignSJstitchMismatchNmax3" type="integer" min="-1" value="0" label="GC/AG and CT/GC motif"/>
                            <param argument="--alignSJstitchMismatchNmax" name="alignSJstitchMismatchNmax4" type="integer" min="-1" value="0" label="AT/AC and GT/AT motif"/>
                        </section>
                        <param argument="--alignSJDBoverhangMin" type="integer" min="1" value="3" label="Minimum overhang for annotated spliced alignments"/>
                        <param argument="--alignSplicedMateMapLmin" type="integer" min="0" value="0" label="Minimum mapped length for a read mate that is spliced"/>
                        <param argument="--alignSplicedMateMapLminOverLmate" type="float" min="0" value="0.66" label="Minimum mapped length for a read mate that is spliced, normalized to mate length"/>
                        <param argument="--alignWindowsPerReadNmax" type="integer" min="1" value="10000" label="Maximum number of windows per read"/>
                        <param argument="--alignTranscriptsPerWindowNmax" type="integer" min="1" value="100" label="Maximum number of transcripts per window"/>
                        <param argument="--alignTranscriptsPerReadNmax" type="integer" min="1" value="10000" label="Maximum number of different alignments per read to consider"/>
                        <param argument="--alignEndsType" type="select" label="type of read ends alignment">
                            <option value="Local">standard local alignment with soft-clipping allowed</option>
                            <option value="EndToEnd">force end-to-end read alignment, do not soft-clip</option>
                            <option value="Extend5pOfRead1">fully extend only the 5p of the read1, all other ends: local alignment</option>
                            <option value="Extend5pOfReads12">fully extend only the 5p of the both read1 and read2, all other ends: local alignment</option>
                        </param>
                        <param argument="--peOverlapNbasesMin" type="integer" min="0" value="0"
                        label="minimum number of overlap bases to trigger mates merging and realignment" />
                        <param argument="--peOverlapMMp" type="float" min="0" max="1" value="0.01"
                        label="maximum proportion of mismatched bases in the overlap area" />
                    </section>
                    <section name="chim_settings" title="Chimeric alignment parameters" expanded="false">
                        <param argument="--chimSegmentMin" type="integer" min="1" value="12"
                        label="Minimum length of chimeric segment"
                        help="For small numbers this will cause large number of chimeric alignments. A value of 12 is commonly used." />
                        <param argument="--chimScoreMin" type="integer" min="0" value="0"
                        label="Minimum total (summed) score of chimeric segments"/>
                        <param argument="--chimScoreDropMax" type="integer" min="0" value="20"
                        label="Maximum difference of chimeric score from read length"/>
                        <param argument="--chimScoreSeparation" type="integer" min="0" value="10"
                        label="Minimum difference between the best chimeric score and the next one"/>
                        <param argument="--chimScoreJunctionNonGTAG" type="integer" value="-1"
                        label="Penalty for a non-GT/AG chimeric junction"/>
                        <param argument="--chimJunctionOverhangMin" type="integer" min="0" value="20"
                        label="Minimum overhang for a chimeric junction"/>
                        <param argument="--chimSegmentReadGapMax" type="integer" min="0" value="0"
                        label="Maximum gap in the read sequence between chimeric segments" />
                        <param argument="--chimFilter" type="boolean" truevalue="banGenomicN" falsevalue="None" checked="true"
                        label="Discard chimeric alignments with Ns in the genome sequence around the chimeric junction" />
                        <param argument="--chimMainSegmentMultNmax" type="integer" min="1" value="10"
                        label="Maximum number of multi-alignments for the main chimeric segment."
                        help="A value of 1 prohibits multimapping main segments"/>
                        <param argument="--chimMultimapNmax" type="integer" min="1" value="1"
                        label="Maximum number of chimeric multi-alignments"
                        help="The default value of 1 only considers unique alignments. If you chose to report chimeric alignments alongside regular ones in the BAM output, this setting is ignored and only uniquely mapping chimeric reads get reported. " />
                        <param argument="--chimMultimapScoreRange" type="integer" min="0" value="1"
                        label="Score range for multi-mapping chimeras"
                        help="The threshold below the best chimeric score that a multimapping chimera must have to be output. This is ignored unless --chimMultimapNmax is above 1" />
                    </section>
                    <expand macro="limits" />
                </when>
            </conditional>
        </section>
        <section name="perf" title="Performance tweaks / Troubleshooting" expanded="false">
            <param argument="--outBAMsortingBinsN" type="integer" value="50" min="1" label="Number of genome bins for coordinate-sorting" help="Higher values result in lower RAM requirements during the sorting step. The default value is 50. Tweak this if you are facing memory-related errors." />
            <param argument="--winAnchorMultimapNmax" type="integer" value="50" min="50" label="Maximum number of loci anchors are allowed to map to" help="Higher value can increase the runtime singificantly. This value should be set greater or equal to --outFilterMultimapNmax" />
        </section>
        <expand macro="outWig"/>
    </inputs>

    <outputs>
        <data format="txt" name="output_log" label="${tool.name} on ${on_string}: log" from_work_dir="Log.final.out">
            <expand macro="dbKeyActions" />
        </data>

        <data format="interval" name="chimeric_junctions" label="${tool.name} on ${on_string}: chimeric junctions" from_work_dir="Chimeric.out.junction">
            <filter>('Junctions' in chimOutType)</filter>
            <expand macro="dbKeyActions" />
        </data>

        <data format="interval" name="splice_junctions" label="${tool.name} on ${on_string}: splice junctions.bed" from_work_dir="SJ.out.tab">
            <expand macro="dbKeyActions" />
        </data>

        <data name="mapped_reads" format="bam" label="${tool.name} on ${on_string}: mapped.bam">
            <expand macro="dbKeyActions" />
        </data>

        <data name="transcriptome_mapped_reads" format="unsorted.bam" label="${tool.name} on ${on_string}: transcriptome-mapped.bam" >
            <filter>'TranscriptomeSAM' in refGenomeSource['GTFconditional']['quantmode_output']['quantMode']</filter>
            <expand macro="dbKeyActions" />
        </data>

        <data name="reads_per_gene" format="tabular" label="${tool.name} on ${on_string}: reads per gene" from_work_dir="ReadsPerGene.out.tab">
            <filter>'GeneCounts' in refGenomeSource['GTFconditional']['quantmode_output']['quantMode']</filter>
            <expand macro="outCountActions" />
        </data>
        <expand macro="outWigOutputs"/>
    </outputs>

    <tests>
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa.gz" />
                <param name="genomeSAindexNbases" value="5" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>
            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" />
        </test>
        <!-- test with cached genome index -->
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="indexed" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="with-gtf" />
                    <param name="genomeDir" value="001" />
                </conditional>
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>
            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" />
        </test>
        <!-- test gtf file and GeneCounts mode -->
        <test expect_num_outputs="4">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="with-gtf" />
                    <param name="sjdbOverhang" value="75"/>
                    <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/>
                    <conditional name="quantmode_output">
                        <param name="quantMode" value="GeneCounts"/>
                    </conditional>
                </conditional>
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" />
            <output name="reads_per_gene" file="tophat_test_reads_per_gene.txt">
                <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
            </output>
        </test>
        <!-- Test if an alternative feature can be provided as a splicing unit.
         If the splice feature set is not set correctly,
        "Fatal INPUT FILE error, no exon lines in the GTF file" is expected -->
        <test expect_num_outputs="4">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="with-gtf" />
                    <param name="sjdbOverhang" value="75"/>
                    <param name="sjdbGTFfile" value="no_exon.gtf" ftype="gtf"/>
                    <param name="sjdbGTFfeatureExon" value="fakexon"/>
                    <conditional name="quantmode_output">
                        <param name="quantMode" value="GeneCounts"/>
                    </conditional>
                </conditional>
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads_fakexon.bam" compare="sim_size" delta="634" />
            <output name="reads_per_gene" file="tophat_test_reads_per_gene.txt">
                <metadata name="column_names" value="GeneID,Counts_unstrand,Counts_firstStrand,Counts_secondStrand" />
            </output>
        </test>
        <!-- test gtf file and TranscriptomeSAM mode -->
        <test expect_num_outputs="4">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="with-gtf" />
                    <param name="sjdbOverhang" value="75"/>
                    <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/>
                    <conditional name="quantmode_output">
                        <param name="quantMode" value="TranscriptomeSAM"/>
                    </conditional>
                </conditional>
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" />
            <output name="transcriptome_mapped_reads" file="rnastar_test_transcriptome_mapped_reads.bam" compare="sim_size" delta="634" />
        </test>
        <!-- test cached no index but gtf file and GeneCounts TranscriptomeSAM mode -->
        <test expect_num_outputs="5">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="indexed" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="without-gtf-with-gtf" />
                    <param name="genomeDir" value="000" />
                    <param name="sjdbOverhang" value="75"/>
                    <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/>
                    <conditional name="quantmode_output">
                        <param name="quantMode" value="TranscriptomeSAM GeneCounts"/>
                    </conditional>
                </conditional>
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" />
            <output name="reads_per_gene" file="tophat_test_reads_per_gene.txt" />
            <output name="transcriptome_mapped_reads" file="rnastar_test_transcriptome_mapped_reads.bam" compare="sim_size" delta="634" />
        </test>
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch,XS" />
            </section>
            <section name="filter">
                <param name="basic_filters" value="exclude_unmapped,--outFilterIntronMotifs RemoveNoncanonical" />
                <conditional name="output_params2">
                    <param name="output_select2" value="yes" />
                    <param name="outFilterScoreMinOverLread" value="0.9" />
                </conditional>
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="full" />
                    <section name="seed">
                        <param name="seed_select" value="yes" />
                        <param name="seedSearchStartLmax" value="25" />
                    </section>
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test2.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test2_splicejunctions.bed"/>
            <output name="mapped_reads" file="rnastar_test2_mapped_reads.bam" compare="sim_size" delta="200" />
        </test>
        <test expect_num_outputs="4">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="test3.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="test3.ref.fa" />
                <param name="genomeSAindexNbases" value="5" />
            </conditional>
            <param name="chimOutType" value="Junctions" />
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch,XS" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="star_fusion" />
                </conditional>
            </section>

            <output name="chimeric_junctions" file="test3.chimjunc.tabular" compare="diff" lines_diff="2"/>
        </test>
        <test expect_num_outputs="4"><!-- tests fastqsanger.gz -->
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="test3.fastqsanger.gz" ftype="fastqsanger.gz" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="test3.ref.fa" />
                <param name="genomeSAindexNbases" value="5" />
            </conditional>
            <param name="chimOutType" value="Junctions" />
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch,XS" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="star_fusion" />
                </conditional>
            </section>

            <output name="chimeric_junctions" file="test3.chimjunc.tabular" compare="diff" lines_diff="2"/>
        </test>
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="filter">
                <param name="basic_filters" value="--outFilterIntronMotifs RemoveNoncanonical" />
                <conditional name="output_params2">
                    <param name="output_select2" value="yes" />
                </conditional>
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="full" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads.bam" compare="sim_size" delta="634" />
        </test>
        <!-- twopass mode tests -->
        <!-- test Basic twopass -->
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
            </conditional>
            <conditional name="twopass">
                <param name="twopassMode" value="Basic" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="filter">
                <param name="basic_filters" value="exclude_unmapped,--outFilterIntronMotifs RemoveNoncanonical" />
                <conditional name="output_params2">
                    <param name="output_select2" value="yes" />
                </conditional>
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="full" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test_twopass.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions_twopass.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads_twopass.bam" compare="sim_size" delta="634" />
        </test>
        <!-- test Basic twopass without a gtf file option -->
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="indexed" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="without-gtf" />
                    <param name="genomeDir" value="000" />
                </conditional>
            </conditional>
            <conditional name="twopass">
                <param name="twopassMode" value="Basic" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="filter">
                <param name="basic_filters" value="exclude_unmapped,--outFilterIntronMotifs RemoveNoncanonical" />
                <conditional name="output_params2">
                    <param name="output_select2" value="yes" />
                </conditional>
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="full" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test_twopass.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions_twopass.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads_twopass.bam" compare="sim_size" delta="634" />
        </test>
        <!-- test Basic twopass with a built-in gtf index option -->
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="indexed" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="with-gtf" />
                    <param name="genomeDir" value="001" />
                </conditional>
            </conditional>
            <conditional name="twopass">
                <param name="twopassMode" value="Basic" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="filter">
                <param name="basic_filters" value="exclude_unmapped,--outFilterIntronMotifs RemoveNoncanonical" />
                <conditional name="output_params2">
                    <param name="output_select2" value="yes" />
                </conditional>
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="full" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test_twopass.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions_twopass.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads_twopass.bam" compare="sim_size" delta="634" />
        </test>
        <!-- test "multisample" twopass -->
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
            </conditional>
            <conditional name="twopass">
                <param name="twopassMode" value="None --sjdbFileChrStartEnd" />
                <param name="sj_precalculated" value="rnastar_test_splicejunctions_twopass.bed" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="filter">
                <param name="basic_filters" value="exclude_unmapped,--outFilterIntronMotifs RemoveNoncanonicalUnannotated,--outFilterIntronMotifs RemoveNoncanonical" />
                <conditional name="output_params2">
                    <param name="output_select2" value="yes" />
                </conditional>
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="full" />
                </conditional>
            </section>

            <output name="output_log" file="rnastar_test_twopass.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions_twopass.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads_twopass.bam" compare="sim_size" delta="634" />
        </test>
        <!-- test "genomeSAindexNbases" parameter -->
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="14" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>
            <output name="output_log" file="rnastar_test_genomeSAindexNbases.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_genomeSAindexNbases.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads_genomeSAindexNbases.bam" compare="sim_size" delta="634" />
        </test>
        <test expect_num_outputs="3">
            <conditional name="singlePaired">
                <param name="sPaired" value="single" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="10" />
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>
            <output name="output_log" file="rnastar_test_genomeSAindexNbases_02.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_genomeSAindexNbases_02.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads_genomeSAindexNbases_02.bam" compare="sim_size" delta="634" />
        </test>
        <!-- test paired-end input and outWig -->
        <test expect_num_outputs="6">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired" />
                <param name="input1" value="tophat_in2.fastqsanger" ftype="fastqsanger" />
                <param name="input2" value="tophat_in3.fastqsanger" ftype="fastqsanger" />
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="with-gtf" />
                    <param name="sjdbOverhang" value="75"/>
                    <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/>
                    <conditional name="quantmode_output">
                        <param name="quantMode" value="GeneCounts"/>
                    </conditional>
                </conditional>
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>
            <conditional name="outWig">
                <param name="outWigType" value="wiggle" />
                <param name="outWigTypeSecondWord" value="read2"/>
                <param name="outWigStrand" value="false" />
            </conditional>
            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions" file="rnastar_test_splicejunctions_PE.bed"/>
            <output name="mapped_reads" file="rnastar_test_mapped_reads_PE.bam" compare="sim_size" delta="634" />
            <output name="reads_per_gene" file="tophat_test_reads_per_gene_PE.txt" />
            <output name="signal_unique_str1" file="tophat_Signal.Unique.both.read2.out.wig" ftype="wig"/>
            <output name="signal_uniquemultiple_str1" file="tophat_Signal.Unique.both.read2.out.wig" ftype="wig" />
        </test>
        <!-- test paired-end input as collection and outWig stranded -->
        <test expect_num_outputs="8">
            <conditional name="singlePaired">
                <param name="sPaired" value="paired_collection" />
                <param name="input" >
                    <collection type="paired">
                        <element name="forward" value="tophat_revlib_R1.fastqsanger" ftype="fastq"/>
                        <element name="reverse" value="tophat_revlib_R2.fastqsanger" ftype="fastq"/>
                    </collection>
                </param>
            </conditional>
            <conditional name="refGenomeSource">
                <param name="geneSource" value="history" />
                <param name="genomeFastaFiles" value="tophat_test.fa" />
                <param name="genomeSAindexNbases" value="5" />
                <conditional name="GTFconditional">
                    <param name="GTFselect" value="with-gtf" />
                    <param name="sjdbOverhang" value="75"/>
                    <param name="sjdbGTFfile" value="test1.gtf" ftype="gtf"/>
                    <conditional name="quantmode_output">
                        <param name="quantMode" value="GeneCounts"/>
                    </conditional>
                </conditional>
            </conditional>
            <section name="oformat">
                <param name="outSAMattributes" value="NH,HI,AS,nM,NM,MD,jM,jI,MC,ch" />
            </section>
            <section name="algo">
                <conditional name="params">
                    <param name="settingsType" value="default" />
                </conditional>
            </section>
            <conditional name="outWig">
                <param name="outWigType" value="bedGraph" />
            </conditional>

            <output name="output_log" file="rnastar_test.log" compare="re_match_multiline" />
            <output name="splice_junctions">
                <assert_contents>
                    <has_n_lines n="2"/>
                    <has_line_matching expression="test_chromosome\s+251\s+350\s+1\s+1\s+0\s+24\s+0\s+33"/>
                    <has_line_matching expression="test_chromosome\s+401\s+500\s+1\s+1\s+0\s+25\s+0\s+36"/>
                </assert_contents>
            </output>
            <output name="mapped_reads" >
                <assert_contents>
                    <has_size value="4711" delta="800"/>
                </assert_contents>
            </output>
            <output name="reads_per_gene" file="tophat_test_reads_per_gene_PE_rev.txt" />
            <output name="signal_unique_str1" file="tophat_rev_Signal.Unique.str1.out.bg" ftype="bedgraph"/>
            <output name="signal_uniquemultiple_str1" file="tophat_rev_Signal.Unique.str1.out.bg" ftype="bedgraph"/>
            <output name="signal_unique_str2" file="tophat_rev_Signal.Unique.str2.out.bg" ftype="bedgraph" />
            <output name="signal_uniquemultiple_str2" file="tophat_rev_Signal.Unique.str2.out.bg" ftype="bedgraph" />
        </test>
      </tests>
    <help><![CDATA[
**What it does**

STAR_ is an ultrafast universal RNA-seq aligner.

**Compatibility Notes**

STAR has a huge amount of options to filter alignments and to configure the
exact format of its output.

Some tools you may plan to use in your downstream analysis of the results are known to be sensitive to these settings or combinations of them.

*STAR-Fusion*

STAR-Fusion_ can use the chimeric junctions output of STAR as input, but you
need to enable **chimeric alignment detection** by STAR for that dataset to be
generated. Hence, be sure to select:

**Report chimeric alignments?**: `As separate tabular "Junctions" output (Junctions)`.

In addition, for best results it is recommended_ that you

- use **2-pass mapping** for more sensitive novel splice junction discovery

- under *BAM output format specification*,
  **Read alignment tags to include in the BAM output**: select `XS` as an
  additional tag to generate (this is the equivalent of using
  `--outSAMstrandField intronMotif` on the command line)

- under *Algorithmic settings*, **Configure seed, alignment and limits options**:
  `use parameters suggested for STAR-Fusion`.

*Arriba*

Arriba_ can use the BAM with chimeric junctions or both files separately, generated by STAR, as input, but you
need to enable **chimeric alignment detection** by STAR for those datasets to be
generated. Hence, be sure to select either:

**Report chimeric alignments?**: `As separate tabular "Junctions" output (Junctions)` or **Report chimeric alignments?**: `Within the BAM output (together with regular alignments; WithinBAM)`.

In addition, the following parameters_ related to chimeric alignment are recommended for improved sensitivity

- under *Output filter criteria*,
  **Would you like to set additional output filters?**: select `Yes` to set
  **Maximum number of alignments to output a read's alignment results, plus 1** to 50

- under *Algorithmic settings*, **Configure seed, alignment and limits options**:
  `use parameters suggested for Arriba`.

*Cufflinks*

.. class:: infomark

   Cufflinks is not considered to be the best tool for use downstream of STAR
   anymore. Consider using *Stringtie* instead, which also should pose no
   compatibility issues.

To avoid compatibility issues with Cufflinks you should:

- select **XS** as a *Read alignment tag to include in the BAM output* if (and
  only if) your sequenced reads come from an unstranded library prep
- *not* select the *jM* and *jI* tags for inclusion
- keep the **HI** tag selected and
- select *HI tag values should be* **zero-based**
- exclude **All alignments across non-canonical junctions** under *Output
  filter criteria -> Exclude the following records from the BAM output*

-----

Attribution

Minor tweaks to output names to suit downstream purposes, toolshed automated
dependencies and odds and ends of other code and documentation comprising
this tool were originally written by Ross Lazarus and have been licensed under
the creative commons
`BY-NC_ND 3.0 license <http://creativecommons.org/licenses/by-nc-nd/3.0/>`__.

.. _STAR: https://github.com/alexdobin/STAR
.. _STAR-Fusion: https://github.com/STAR-Fusion/STAR-Fusion
.. _Arriba: https://github.com/suhrig/arriba
.. _recommended: https://github.com/STAR-Fusion/STAR-Fusion/wiki#alternatively-kickstart-mode-running-star-yourself-and-then-running-star-fusion-using-the-existing-outputs
.. _parameters: https://arriba.readthedocs.io/en/latest/workflow/
    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Fri, 01 Sep 2023 13:17:29 +0000
parents	4df95e2d7f61
children	3e94726bfa9d