view gmap/gsnap.xml @ 2:52da588232b0

Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
author Jim Johnson <jj@umn.edu>
date Fri, 21 Oct 2011 11:38:55 -0500
parents d58d272914e7
children f49f5a460c74
line wrap: on
line source

<tool id="gsnap" name="GSNAP" version="2.0.0">
  <description>Genomic Short-read Nucleotide Alignment Program</description>
  <requirements>
      <requirement type="binary">gsnap</requirement>
      <!-- proposed tag for added datatype dependencies -->
      <requirement type="datatype">gmapdb</requirement>
      <requirement type="datatype">gmapsnpindex</requirement>
      <requirement type="datatype">splicesites.iit</requirement>
      <requirement type="datatype">introns.iit</requirement>
  </requirements>
  <version_string>gsnap --version</version_string>
  <command>
    #import os.path, re
    gsnap
    --nthreads="4" --ordered
    #if $refGenomeSource.genomeSource == "gmapdb":
      #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
      --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name
    #else:
      --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
    #end if
    #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
      --kmer=$refGenomeSource.kmer
    #end if
    #if $refGenomeSource.use_splicing.src == 'gmapdb':
      #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
        -s $refGenomeSource.use_splicing.splicemap.value
      #end if
    #elif $refGenomeSource.use_splicing.src == 'history':
      #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
        -S $os.path.dirname($refGenomeSource.use_splicing.splicemap) -s $os.path.basename($refGenomeSource.use_splicing.splicemap)
      #end if
    #end if
    #if $refGenomeSource.use_snps.src == 'gmapdb':
       #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
        -v $refGenomeSource.use_snps.snpindex.value
       #end if
    #elif $refGenomeSource.use_snps.src == 'history':
       #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
         -V $refGenomeSource.use_snps.snpindex.extra_files_path -v $refGenomeSource.use_snps.snpindex.metadata.snps_name
       #end if
    #end if
    #if $mode.__str__ != '':
      --mode=$mode
    #end if
    #if $computation.options == "advanced":
      #if $computation.max_mismatches.__str__ != '':
        --max-mismatches=$computation.max_mismatches
      #end if
      $computation.query_unk_mismatch
      $computation.genome_unk_mismatch
      #if $computation.terminal_threshold.__str__ != '':
        --terminal-threshold=$computation.terminal_threshold
      #end if
      #if $computation.indel_penalty.__str__ != '':
        --indel-penalty=$computation.indel_penalty
      #end if
      #if $computation.indel_endlength.__str__ != '':
        --indel-endlength=$computation.indel_endlength
      #end if
      #if $computation.max_middle_insertions.__str__ != '':
        --max-middle-insertions=$computation.max_middle_insertions
      #end if
      #if $computation.max_middle_deletions.__str__ != '':
        --max-middle-deletions=$computation.max_middle_deletions
      #end if
      #if $computation.max_end_insertions.__str__ != '':
        --max-end-insertions=$computation.max_end_insertions
      #end if
      #if $computation.max_end_deletions.__str__ != '':
        --max-end-deletions=$computation.max_end_deletions
      #end if
      #if $computation.suboptimal_levels.__str__ != '':
        --suboptimal-levels=$computation.suboptimal_levels
      #end if
      #if $computation.adapter_strip.__str__ != '':
        --adapter-strip=$computation.adapter_strip
      #end if
      ## gmap options
      #if $computation.gmap_mode.__str__ != '' and  $computation.gmap_mode.__str__ != 'None':
        --gmap-mode='$computation.gmap_mode'
      #end if
      #if $computation.trigger_score_for_gmap.__str__ != '':
        --trigger-score-for-gmap=$computation.trigger_score_for_gmap
      #end if
      #if $computation.max_gmap_pairsearch.__str__ != '' and $re.search("pairsearch",$computation.gmap_mode):
        --max-gmap-pairsearch=$computation.max_gmap_pairsearch
      #end if
      #if $computation.max_gmap_terminal.__str__ != '' and $re.search("terminal",$computation.gmap_mode):
        --max-gmap-terminal=$computation.max_gmap_terminal
      #end if
      #if $computation.max_gmap_improvement.__str__ != '' and $re.search("improv",$computation.gmap_mode):
        --max-gmap-improvement=$computation.max_gmap_improvement
      #end if
      #if $computation.microexon_spliceprob.__str__ != '':
        --microexon-spliceprob=$computation.microexon_spliceprob
      #end if
    #end if
    #if $splicing.options == "advanced":
      $splicing.novelsplicing
      #if $splicing.localsplicedist.__str__ != '':
        --localsplicedist=$splicing.localsplicedist
      #end if
      #if $splicing.local_splice_penalty.__str__ != '':
        --local-splice-penalty=$splicing.local_splice_penalty
      #end if
      #if $splicing.distant_splice_penalty.__str__ != '':
        --distant-splice-penalty=$splicing.distant_splice_penalty
      #end if
      #if $splicing.local_splice_endlength.__str__ != '':
        --local-splice-endlength=$splicing.local_splice_endlength
      #end if
      #if $splicing.distant_splice_endlength.__str__ != '':
        --distant-splice-endlength=$splicing.distant_splice_endlength
      #end if
      #if $splicing.distant_splice_identity.__str__ != '':
        --distant-splice-identity=$splicing.distant_splice_identity
      #end if
    #end if
    #if $output.options == "advanced":
      #if $output.npath.__str__ != '':
        --npath=$output.npath
      #end if
      $output.quiet_if_excessive
      $output.show_refdiff
      $output.clip_overlap
    #end if
    #if $result.format == "sam":
      --format=sam
      $result.no_sam_headers
      #if $result.read_group_id.__str__.strip != '':
         --read-group-id='$result.read_group_id'
      #end if
      #if $result.read_group_name.__str__ != '':
         --read-group-name='$result.read_group_name'
      #end if
      #if $result.read_group_library.__str__ != '':
         --read-group-library='$result.read_group_library'
      #end if
      #if $result.read_group_platform.__str__ != '':
         --read-group-platform='$result.read_group_platform'
      #end if
      #if $result.quality_shift.__str__ != '':
        --quality-shift=$result.quality_shift
      #end if
    #elif $result.format == "goby":
      #if $result.goby_output.__str__ != '':
        --goby-output='$result.goby_output'
      #end if
      #if $result.creads_window_start.__str__ != '':
        --creads-window-start=$result.creads_window_start
      #end if
      #if $result.creads_window_end.__str__ != '':
        --creads-window-end=$result.creads_window_end
      #end if
      $result.creads_complement
    #end if
    ## TODO - do we need these options (Is it tally XOR runlength?):
    ## --tallydir=  --use-tally=tally
    ## --runlengthdir  --use-runlength=runlength
    #if $seq.format == "gsnap_fasta":
      $seq.circularinput $seq.gsnap
    #else if $seq.format == "fastq":
      #if $seq.barcode_length.__str__ != '':
        --barcode-length=$seq.barcode_length
      #end if
      #if $seq.fastq_id_start.__str__ != '':
        --fastq-id-start=$seq.fastq_id_start
      #end if
      #if $seq.fastq_id_end.__str__ != '':
        --fastq-id-end=$seq.fastq_id_end
      #end if
      #if $seq.filter_chastity.__str__ != 'off':
        --filter-chastity=$seq.filter_chastity
      #end if
      #if $seq.paired.ispaired.__str__ == "yes":
        #if $seq.paired.pairmax_dna.__str__ != '':
          --pairmax-dna=$seq.paired.pairmax_dna
        #end if
        #if $seq.paired.pairmax_rna.__str__ != '':
          --pairmax-rna=$seq.paired.pairmax_rna
        #end if
        $seq.fastq $seq.paired.fastq
      #else
        $seq.fastq
      #end if
    #end if
    #if $split_output == True
      2> $gsnap_stderr
    #else
      2> $gsnap_stderr > $results
    #end if

  </command>
  <inputs>
    <!-- Input data -->
    <conditional name="seq">
      <param name="format" type="select" label="&lt;H2&gt;Input Sequences&lt;/H2&gt;Select the input format" help="">
        <option value="fastq">Fastq</option>
        <option value="gsnap_fasta">GNSAP fasta</option>
      </param>
      <when value="fastq">
        <param name="fastq" type="data" format="fastq" label="Select a fastq dataset" />
        <conditional name="paired">
          <param name="ispaired" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Paired Reads"/>
          <when value="no"/>
          <when value="yes">
            <param name="fastq" type="data" format="fastq" label="Select the paired reads reverse dataset" />
            <param name="orientation" type="select" label="Orientation of paired-end reads" help="">
              <option value="FR">fwd-rev, typical Illumina default</option>
              <option value="RF">rev-fwd, for circularized inserts</option>
              <option value="FF">fwd-fwd, same strand</option>
            </param>
            <param name="pairmax_dna"  type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/>
            <param name="pairmax_rna"  type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used novelspliceing is specified or a splice file is provided.  Should probably match the value for localsplicedist."/>
          </when>
        </conditional>
        <param name="barcode_length" type="integer" value="" optional="true"  label="Amount of barcode to remove from start of read (default 0)" />
        <param name="fastq_id_start" type="integer" value="" optional="true"  label="Starting field  of identifier in FASTQ header, space-delimited, starting from 1" />
        <param name="fastq_id_end" type="integer" value="" optional="true"  label="Ending field  of identifier in FASTQ header, space-delimited, starting from 1" 
             help="Examples:
                  &lt;br&gt;@HWUSI-EAS100R:6:73:941:1973#0/1
                  &lt;br&gt; . start=1, end=1 (default)  => identifier is HWUSI-EAS100R:6:73:941:1973#0/1
                  &lt;br&gt;@SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=36
                  &lt;br&gt; . start=1, end=1  => identifier is SRR001666.1
                  &lt;br&gt; . start=2, end=2  => identifier is 071112_SLXA-EAS1_s_7:5:1:817:345
                  &lt;br&gt; . start=1, end=2  => identifier is SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345"
        />
        <param name="filter_chastity" type="select" label="Skip reads marked by the Illumina chastity program" 
               help="String after the accession having a  'Y'  after the first colon, like this:  
                    &lt;br&gt;@accession 1:Y:0:CTTGTA
                    &lt;br&gt;where the  'Y'  signifies filtering by chastity.
                    &lt;br&gt; For 'either', a  'Y'  on either end of a paired-end read will be filtered.
                    &lt;br&gt;  For 'both', a  'Y'  is required on both ends of a paired-end read (or on the only end of a single-end read)"
          >
          <option value="off">off - no filtering</option>
          <option value="either">either - a 'Y' on either end of a paired-end read</option>
          <option value="both">both - a 'Y' is required on both ends of a paired-end read or the only end of a single-end read</option>
        </param>
      </when>
      <when value="gsnap_fasta">
        <param name="gsnap" type="data" format="fasta" label="Select a single-end dataset" help="GSNAP fasta must have the sequence entirely on one line, a second line is interpreted as the paired-end sequence"/>
        <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/>
      </when>
    </conditional>

    <param name="mode" type="select" label="Alignment mode" help="Assumes cmetindex and atoiindex were run on the gmap datatbase.">
        <option value="">standard</option>
        <option value="cmet-stranded">cmet-stranded   for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
        <option value="cmet-nonstranded">cmet-nonstranded   for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
        <option value="atoi-stranded">atoi-stranded   for RNA-editing tolerance (A-to-G changes)</option>
        <option value="atoi-nonstranded">atoi-nonstranded   for RNA-editing tolerance (A-to-G changes)</option>
    </param>

    <!-- GMAPDB for alignment -->
    <conditional name="refGenomeSource">
     <param name="genomeSource" type="select" label="&lt;HR&gt;&lt;H2&gt;Align To&lt;/H2&gt;Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
        <option value="indexed">Use a built-in index</option>
        <option value="gmapdb">Use a gmapdb from your history</option>
      </param>
      <when value="indexed">
        <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
          <options from_file="gmap_indices.loc">
            <column name="uid" index="0" />
            <column name="dbkey" index="1" />
            <column name="name" index="2" />
            <column name="kmers" index="3" />
            <column name="maps" index="4" />
            <column name="snps" index="5" />
            <column name="value" index="6" />
          </options>
        </param>

        <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
          <options from_file="gmap_indices.loc">
            <column name="name" index="3"/>
            <column name="value" index="3"/>
            <filter type="param_value" ref="gmapindex" column="6"/>
            <filter type="multiple_splitter" column="3" separator=","/>
            <filter type="add_value" name="" value=""/>
            <filter type="sort_by" column="3"/>
          </options>
        </param>

        <conditional name="use_splicing">
          <param name="src" type="select" label="Known Splicesite and Introns" 
                 help="Look for splicing involving known sites or known introns at short or long distances 
                  See README instructions for the distinction between known sites and known introns">
            <option value="none" selected="true">None</option>
            <option value="gmapdb">From the GMAP Database</option>
            <option value="history">A Map in your history</option>
          </param>
          <when value="none"/>
          <when value="history">
            <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 
              help="built with GMAP IIT"/>
          </when>
          <when value="gmapdb">
            <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help="">
              <options from_file="gmap_indices.loc">
                <column name="name" index="4"/>
                <column name="value" index="4"/>
                <filter type="param_value" ref="gmapindex" column="6"/>
                <filter type="multiple_splitter" column="4" separator=","/>
                <filter type="add_value" name="" value=""/>
                <filter type="sort_by" column="4"/>
              </options>
            </param>
          </when>
        </conditional>

        <conditional name="use_snps">
          <param name="src" type="select" label="Known SNPs" help="for SNP tolerant alignments">
            <option value="none" selected="true">None</option>
            <option value="gmapdb">From the GMAP Database</option>
            <option value="history">A SNP Index in your history</option>
          </param>
          <when value="none"/>
          <when value="history">
            <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex" 
              help="built with GMAP SNP Index"/>
          </when>
          <when value="gmapdb">
            <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help="">
              <options from_file="gmap_indices.loc">
                <column name="name" index="5"/>
                <column name="value" index="5"/>
                <filter type="param_value" ref="gmapindex" column="6"/>
                <filter type="multiple_splitter" column="5" separator=","/>
                <filter type="add_value" name="" value=""/>
                <filter type="sort_by" column="5"/>
              </options>
            </param>
          </when>
        </conditional>

      </when>
      <when value="gmapdb">
        <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 
              help="A GMAP database built with GMAP Build"/>
        <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
          <options>
            <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
          </options>
        </param>

        <conditional name="use_splicing">
          <param name="src" type="select" label="Known Splicesite and Introns" 
                 help="Look for splicing involving known sites or known introns at short or long distances 
                  See README instructions for the distinction between known sites and known introns">
            <option value="none" selected="true">None</option>
            <option value="gmapdb">From the GMAP Database</option>
            <option value="history">A Map in your history</option>
          </param>
          <when value="none"/>
          <when value="history">
            <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 
              help="built with GMAP IIT"/>
          </when>
          <when value="gmapdb">
            <param name="splicemap" type="select"  data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
              <options>
                <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
              </options>
            </param>
          </when>
        </conditional>

        <conditional name="use_snps">
          <param name="src" type="select" label="Known SNPs" help="for SNP tolerant alignments">
            <option value="none" selected="true">None</option>
            <option value="gmapdb">From the GMAP Database</option>
            <option value="history">A SNP Index in your history</option>
          </param>
          <when value="none"/>
          <when value="history">
            <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex" 
              help="built with GMAP SNP Index"/>
          </when>
          <when value="gmapdb">
            <param name="snpindex" type="select"  data_ref="gmapdb" label="Use database containing known SNPs" help="">
              <options>
                <filter type="data_meta" ref="gmapdb" key="snps" multiple="True" separator=","/>
              </options>
            </param>
          </when>
        </conditional>

      </when>
    </conditional>

    <!-- Computation options -->
    <conditional name="computation">
      <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
        <option value="default">Use default settings</option>
        <option value="advanced">Set Computation Options</option>
      </param>
      <when value="default"/>
      <when value="advanced">
         <param name="max_mismatches" type="float" value="" optional="true" label="Maximum number of mismatches allowed (uses default when negative)" 
              help="Defaults to the ultrafast level of ((readlength+2)/12 - 2)).
                    If specified between 0.0 and 1.0, then treated as a fraction
                    of each read length.  Otherwise, treated as an integral number
                    of mismatches (including indel and splicing penalties)
                    For RNA-Seq, you may need to increase this value slightly
                    to align reads extending past the ends of an exon.">
            <validator type="in_range" message="The mismatches must >= 0." min="0."/>
         </param>
         <param name="query_unk_mismatch" type="boolean" checked="false" truevalue="--query-unk-mismatch=1" falsevalue="" label="Count unknown (N) characters in the query as a mismatch"/>
         <param name="genome_unk_mismatch" type="boolean" checked="true" truevalue="" falsevalue="--genome-unk-mismatch=0" label="Count unknown (N) characters in the genome as a mismatch"/>
         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 3)" 
                help="(from one end of the read to the best possible position at the other end).  To turn off terminal alignments, set this to a high value." />
         <param name="indel_penalty"  type="integer" value="" optional="true" label="Penalty for an indel (default 2)" 
                help="Counts against mismatches allowed.  To find indels, make indel-penalty less than or equal to max-mismatches.  A value &lt; 2 can lead to false positives at read ends" />
         <param name="indel_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" />
         <param name="max_middle_insertions"  type="integer" value="" optional="true" label="Maximum number of middle insertions allowed (default 9)" />
         <param name="max_middle_deletions"  type="integer" value="" optional="true" label="Maximum number of middle deletions allowed (default 30)" />
         <param name="max_end_insertions"  type="integer" value="" optional="true" label="Maximum number of end insertions allowed (default 3)" />
         <param name="max_end_deletions"  type="integer" value="" optional="true" label="Maximum number of end deletions allowed (default 6)" />
         <param name="suboptimal_levels"  type="integer" value="" optional="true" label="Report suboptimal hits beyond best hit (default 0)"
                help="All hits with best score plus suboptimal-levels are reported" />
         <param name="adapter_strip"  type="select" label="Method for removing adapters from reads" 
                help="paired removes adapters from paired-end reads if a concordant or paired alignment cannot be found from the original read">
           <option value="paired" selected="true">paired</option>
           <option value="off">off</option>
         </param>
         <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)" 
                help="to turn off trimming, specify 0"/>
         
         <!-- Options for GMAP alignment within GSNAP -->
          <param name="gmap_mode" type="select" multiple="true" optional="true" label="Cases to use GMAP for complex alignments containing multiple splices or indels" help="">
            <option value="pairsearch">pairsearch</option>
            <option value="terminal">terminal</option>
            <option value="improve">improve</option>
          </param>
          <param name="trigger_score_for_gmap" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 5)" 
                 help="Try GMAP pairsearch on nearby genomic regions if best score (the total of both ends if paired-end) exceeds this value (default 5)" />
          <param name="max_gmap_pairsearch" type="integer" value="" optional="true" label="GMAP pairsearch threshold (default 3)" 
                 help="Perform GMAP pairsearch on nearby genomic regions up to this many candidate ends (default 3)." />
          <param name="max_gmap_terminal" type="integer" value="" optional="true" label="GMAP terminal threshold (default 3)" 
                 help="Perform GMAP terminal on nearby genomic regions up to this many candidate ends (default 3)." />
          <param name="max_gmap_improvement" type="integer" value="" optional="true" label="GMAP improvement threshold (default 3)" 
                 help="Perform GMAP improvement on nearby genomic regions up to this many candidate ends (default 3)." />
          <param name="microexon_spliceprob"  type="float" value="" optional="true" label="GMAP microexons threshold (default .90)" 
                 help="Allow microexons only if one of the splice site probabilities is greater than this value." >
            <validator type="in_range" message="The microexons  probability must be between 0. and 1." min="0." max="1."/>
          </param>
      </when>
    </conditional>

    <conditional name="splicing">
      <param name="options" type="select" label="&lt;HR&gt;Splicing options for RNA-Seq" help="">
        <option value="default">Use default settings</option>
        <option value="advanced">Set Splicing Options</option>
      </param>
      <when value="default"/>
      <when value="advanced">
         <!-- Splicing options for RNA-Seq -->
         <!-- use-splices This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splices --> 
         <!-- Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic) -->
         <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/>
         <param name="localsplicedist"  type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/>
         <param name="local_splice_penalty"  type="integer" value="" optional="true" label="Penalty for a local splice (default 0).  Counts against mismatches allowed"/>
         <param name="distant_splice_penalty"  type="integer" value="" optional="true" label="Penalty for a distant splice (default 3).  Counts against mismatches allowed"/>
         <param name="local_splice_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for local spliced alignments (default 15, min is 14)"/>
         <param name="distant_splice_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for distant spliced alignments (default 16, min is 14)"/>
         <param name="shortend_splice_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for distant spliced alignments (default 16, min is 14)"/>
         <param name="distant_splice_identity"  type="float" value="" optional="true" label="Minimum identity at end required for distant spliced alignments (default 0.95)"/>
      </when>
    </conditional>

    <!-- Output data -->
    <conditional name="output">
      <param name="options" type="select" label="&lt;HR&gt;&lt;H2&gt;Output&lt;/H2&gt;Output options for RNA-Seq" help="">
        <option value="default">Use default settings</option>
        <option value="advanced">Set Output Options</option>
      </param>
      <when value="default"/>
      <when value="advanced">
        <param name="npath"  type="integer" value="" optional="true" label="Maximum number of paths to print (default 100)"/>
        <param name="quiet_if_excessive" type="boolean" checked="false" truevalue="--quiet-if-excessive" falsevalue="" label="Quiet if Excessive" 
               help="If more than maximum number of paths are found, then nothing is printed."/>
        <param name="show_refdiff" type="boolean" checked="false" truevalue="--show-refdiff" falsevalue="" label="Show SNP-tolerant alignment" 
               help="For GSNAP output in SNP-tolerant alignment, shows all differences relative to the reference genome as lower case (otherwise, it shows all differences relative to both the reference and alternate genome)"/>
        <param name="clip_overlap" type="boolean" checked="false" truevalue="--clip-overlap" falsevalue="" label="Clip Overlap" 
               help="For paired-end reads whose alignments overlap, clip the overlapping region."/>
      </when>
    </conditional>
    <conditional name="result">
      <param name="format" type="select" label="Select the output format" help="">
        <option value="sam">SAM</option>
        <option value="goby">Goby</option>
        <option value="gsnap">GSNAP default output</option>
      </param>
      <when value="gsnap"/>
      <when value="sam">
        <param name="no_sam_headers" type="boolean" truevalue="--no-sam-headers" falsevalue="" checked="false" label="Do not print headers beginning with '@'"/>
        <param name="read_group_id" type="text" value="" optional="true" label="Value to put into read-group id (RG-ID) field"/>
        <param name="read_group_name" type="text" value="" optional="true" label="Value to put into read-group name (RG-SM) field"/>
        <param name="read_group_library" type="text" value="" optional="true" label="Value to put into read-group library (RG-LB) field"/>
        <param name="read_group_platform" type="text" value="" optional="true" label="Value to put into read-group library platform (RG-PL) field"/>
        <param name="quality_shift"  type="integer" value="" optional="true" label="Shift FASTQ quality scores by this amount in SAM output (default -31)"/>
      </when>
      <when value="goby">
        <param name="goby_output" type="text" value="" label="Basename for Goby output files"/>
        <param name="creads_window_start"  type="integer" value="" optional="true" label="Compact reads window start (default: 0=start of file)"/>
        <param name="creads_window_end"  type="integer" value="" optional="true" label="Compact reads window end (default: 0=end of file)"/>
        <param name="creads_complement" type="boolean" truevalue="--creads-complement" falsevalue="" checked="false" label="Complement read sequences (without reversing)"/>
      </when>
    </conditional>
    <param name="split_output" type="boolean" truevalue="--split-output=gsnap_out" falsevalue="" checked="false" label="Separate outputs" 
       help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results"/> 
  </inputs>
  <outputs>
    <data format="txt" name="gsnap_stderr" label="${tool.name} on ${on_string}: stderr"/>
    <data format="txt" name="results" label="${tool.name} on ${on_string} ${result.format}" >
      <filter>(split_output == False)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <!-- nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, concordant_mult -->
    <data format="txt" name="concordant_mult" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.concordant_mult">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <data format="txt" name="concordant_uniq" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.concordant_uniq">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <data format="txt" name="paired_mult" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.paired_mult">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <data format="txt" name="paired_uniq" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.paired_uniq">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <data format="txt" name="unpaired_mult" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.unpaired_mult">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <data format="txt" name="unpaired_uniq" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.unpaired_uniq">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <data format="txt" name="halfmapping_mult" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.halfmapping_mult">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <data format="txt" name="halfmapping_uniq" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.halfmapping_uniq">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>
    <data format="txt" name="nomapping" label="${tool.name} on ${on_string} uniq.${result.format}"  from_work_dir="gsnap_out.nomapping">
      <filter>(split_output == True)</filter>
      <change_format>
        <when input="result['format']" value="sam" format="sam"/>
      </change_format>
    </data>

  </outputs>
  <tests>
  </tests> 

  <help>

**What it does**

GSNAP_ (Genomic Short-read Nucleotide Alignment Program) is a short read aligner which can align both single- and paired-end reads as short as 14nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state. It is developed by Thomas D. Wu of Genentech, Inc.  
Publication_ citation: Thomas D. Wu, Serban Nacu "Fast and SNP-tolerant detection of complex variants and splicing in short reads. Bioinformatics. 2010 Apr 1;26(7):873-81. Epub 2010 Feb 10.

.. _GSNAP: http://research-pub.gene.com/gmap/
.. _Publication: http://bioinformatics.oupjournals.org/cgi/content/full/26/7/873
http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2844994/?tool=pubmed

------

**Know what you are doing**

.. class:: warningmark

You will want to read the README_

.. _README: http://research-pub.gene.com/gmap/src/README

------

**Input formats**

Input to GSNAP should be either in FASTQ or FASTA format.  

The FASTQ input may include quality scores, which will then be included in SAM
output, if that output format is selected. 

For FASTA format, you should include one line per read (or end of a
paired-end read).  The same FASTA file can have a mixture of
single-end and paired-end reads of varying lengths, if desired.

Single-end reads:

Each FASTA entry should contain one short read per line, like this

>Header information
AAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTA

Each short read can have a different length.  However, the entire read
needs to be on a single line, and may not wrap around multiple lines.
If it extends to a second line, GSNAP will think that the read is
paired-end.


Paired-end reads:

Each FASTA entry should contain two short reads, one per line, like
this

>Header information
AAAACATTCTCCTCCGCATAAGCCTAGTAGATTA
GGCGTAGGTAGAAGTAGAGGTTAAGGCGCGTCAG

By default, the program assumes that the second end is in the reverse
complement direction compared with the first end.  If they are in the
same direction, you may need to use the --circular-input (or -c) flag.

( The Galaxy tool: "FASTA Width formatter"  can be used to reformat fasta files to have single line sequences. )

------

**Output formats in GSNAP**

SAM output format

Default GSNAP format
  See the README_




  </help>
</tool>