changeset 8:a89fec682254

gmap/gsnap updated to version 2011-11-30
author Jim Johnson <jj@umn.edu>
date Thu, 08 Dec 2011 11:00:46 -0600
parents 561503a442f0
children 7f032685214b
files gmap.xml gmap_build.xml gsnap.xml iit_store.xml snpindex.xml
diffstat 5 files changed, 112 insertions(+), 57 deletions(-) [+]
line wrap: on
line diff
--- a/gmap.xml	Tue Nov 08 13:26:41 2011 -0600
+++ b/gmap.xml	Thu Dec 08 11:00:46 2011 -0600
@@ -1,13 +1,7 @@
-<tool id="gmap" name="GMAP" version="2.0.0">
+<tool id="gmap" name="GMAP" version="2.0.1">
   <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description>
   <requirements>
     <requirement type="binary">gmap</requirement>
-    <!-- proposed tag for added datatype dependencies -->
-    <requirement type="datatype">gmapdb</requirement>
-    <requirement type="datatype">gmap_annotation</requirement>
-    <requirement type="datatype">gmap_splicesites</requirement>
-    <requirement type="datatype">gmap_introns</requirement>
-    <requirement type="datatype">gmap_snps</requirement>
   </requirements>
   <version_string>gmap --version</version_string>
   <command>
@@ -49,9 +43,11 @@
     #elif $result.format == "sam":
       --format=$result.sam_paired_read
       $result.no_sam_headers 
+      #* Removed in gmap version 2011-11-30
       #if len($result.noncanonical_splices.__str__) > 0
          --noncanonical-splices=$result.noncanonical_splices
       #end if
+      *#
       #if len($result.read_group_id.__str__) > 0
          --read-group-id=$result.read_group_id
       #end if
@@ -70,35 +66,50 @@
     #if $computation.options == "advanced":
       $computation.nosplicing
       $computation.cross_species
-      --min-intronlength=$computation.min_intronlength
-      --intronlength=$computation.intronlength
-      --localsplicedist=$computation.localsplicedist
-      --totallength=$computation.totallength
-      --trimendexons=$computation.trimendexons
+      #if len($computation.min_intronlength.__str__) > 0
+        --min-intronlength=$computation.min_intronlength
+      #end if
+      #if len($computation.intronlength.__str__) > 0
+        --intronlength=$computation.intronlength
+      #end if
+      #if len($computation.localsplicedist.__str__) > 0
+        --localsplicedist=$computation.localsplicedist
+      #end if
+      #if len($computation.totallength.__str__) > 0
+        --totallength=$computation.totallength
+      #end if
+      #if len($computation.trimendexons.__str__) > 0
+        --trimendexons=$computation.trimendexons
+      #end if
       --direction=$computation.direction
       --canonical-mode=$computation.canonical
       --prunelevel=$computation.prunelevel
       --allow-close-indels=$computation.allow_close_indels
-      --microexon-spliceprob=$computation.microexon_spliceprob
-      #if int($computation.chimera_margin) >= 0:
+      #if len($computation.microexon_spliceprob.__str__) >= 0:
+        --microexon-spliceprob=$computation.microexon_spliceprob
+      #end if
+      #if len($computation.chimera_margin.__str__) >= 0:
         --chimera-margin=$computation.chimera_margin
       #end if
     #end if
     #if $advanced.options == "used":
-      #if int($advanced.npaths) >= 0:
+      #if len($advanced.npaths.__str__) > 0:
         --npaths=$advanced.npaths
       #end if
-      #if int($advanced.chimera_overlap) > 0:
+      #if len($advanced.suboptimal_score.__str__) > 0:
+        --suboptimal-score=$advanced.suboptimal_score
+      #end if
+      #if len($advanced.chimera_overlap.__str__) > 0:
         --chimera_overlap=$advanced.chimera_overlap
       #end if
       $advanced.protein
       $advanced.tolerant
       $advanced.nolengths
       $advanced.invertmode
-      #if int($advanced.introngap) > 0:
+      #if len($advanced.introngap.__str__) > 0:
         --introngap=$advanced.introngap
       #end if
-      #if int($advanced.wraplength) > 0:
+      #if len($advanced.wraplength.__str__) > 0:
         --wraplength=$advanced.wraplength
       #end if
     #end if
@@ -200,11 +211,22 @@
       <when value="default"/>
       <when value="advanced">
        <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/>
-       <param name="min_intronlength" type="integer" value="9" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." />	
-       <param name="intronlength" type="integer" value="1000000" label="Max length for one intron (default 1000000)" />	
-       <param name="localsplicedist" type="integer" value="200000" label="Max length for known splice sites at ends of sequence (default 200000)" />	
-       <param name="totallength"  type="integer" value="2400000" label="Max total intron length (default 2400000)" />	
-       <param name="chimera_margin" type="integer" value="40" label="Amount of unaligned sequence that triggers search for a chimera (default is 40, 0 is off)" />	
+       <param name="min_intronlength" type="integer" value="" optional="true" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." >	
+         <validator type="in_range" message="min_intronlength must be positive" min="0" />
+       </param>
+       <param name="intronlength" type="integer" value="" optional="true" label="Max length for one intron (default 1000000)" >	
+         <validator type="in_range" message="intronlength must be positive" min="0" />
+       </param>
+       <param name="localsplicedist" type="integer" value="" optional="true" label="Max length for known splice sites at ends of sequence (default 200000)" >	
+         <validator type="in_range" message="localsplicedist must be positive" min="0" />
+       </param>
+       <param name="totallength"  type="integer" value="" optional="true" label="Max total intron length (default 2400000)" >	
+         <validator type="in_range" message="totallength must be positive" min="0" />
+       </param>
+       <param name="chimera_margin" type="integer" value="" optional="true" label="Amount of unaligned sequence that triggers search for a chimera" 
+              help=" default is 40, To turn off, set to a large value (greater than the query length)" >	
+         <validator type="in_range" message="chimera_margin must be positive" min="0" />
+       </param>
        <param name="direction"  type="select" label="cDNA direction">	
          <option value="auto">auto</option>
          <option value="sense_force">sense_force</option>
@@ -212,7 +234,9 @@
          <option value="sense_filter">sense_filter</option>
          <option value="antisense_filter">antisense_filter</option>
        </param>
-       <param name="trimendexons"  type="integer" value="12" label="Trim end exons with fewer than given number of matches (in nt, default 12)" />	
+       <param name="trimendexons"  type="integer" value="" optional="true" label="Trim end exons with fewer than given number of matches (in nt, default 12)" >	
+         <validator type="in_range" message="trimendexons must be positive" min="1" />
+       </param>
        <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/>
        
        <param name="canonical"  type="select" label="Reward for canonical and semi-canonical introns">	
@@ -225,7 +249,8 @@
          <option value="0">no</option>
          <option value="2">only for high-quality alignments</option>
        </param>
-       <param name="microexon_spliceprob" type="float" value="0.90" label="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >	
+       <param name="microexon_spliceprob" type="float" value="" optional="true" label="Micro Exon splice probablility threshold" 
+              help="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >	
          <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/> 
        </param>
        <param name="prunelevel"  type="select" label="Pruning level">	
@@ -255,11 +280,24 @@
         <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option>
         <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option>
        </param>
-       <param name="introngap" type="integer" value="3" label="Nucleotides to show on each end of intron (default=3)" />	
-       <param name="wraplength" type="integer" value="50" label="Line Wrap length for alignment (default=50)" />	
-       <param name="npaths" type="integer" value="-1" optional="true"
-              label="Maximum number of paths to show.  Ignored if negative.  If 0, prints two paths if chimera detected, else one." />	
-       <param name="chimera_overlap" type="integer" value="0" label="Overlap to show, if any, at chimera breakpoint" />	
+       <param name="introngap" type="integer" value="" optional="true" label="Nucleotides to show on each end of intron (default=3)">	
+         <validator type="in_range" message="introngap must be positive" min="0" />
+       </param>
+       <param name="wraplength" type="integer" value="" optional="true" label="Line Wrap length for alignment (default=50)">	
+         <validator type="in_range" message="wraplength must be positive" min="1" />
+       </param>
+       <param name="npaths" type="integer" value="" optional="true"
+              label="Maximum number of paths to show.  Ignored if negative.  If 0, prints two paths if chimera detected, else one." >	
+         <validator type="in_range" message="npaths must be positive" min="0" />
+       </param>
+       <param name="suboptimal_score" type="integer" value="" optional="true"
+              label="Report only paths whose score is within this value of the best path"
+              help="By default the program prints all paths found." >	
+         <validator type="in_range" message="suboptimal_score must be positive" min="0" />
+       </param>
+       <param name="chimera_overlap" type="integer" value="" optional="true" label="Overlap to show, if any, at chimera breakpoint (default 0)" >	
+                <validator type="in_range" message="chimera_overlap must be positive" min="0" />
+       </param>
        <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue="" 
               label="Translates cDNA with corrections for frameshifts"/>
        <param name="protein" type="select" label="Protein alignment" help="">
@@ -294,10 +332,10 @@
       <option value="coords">coords in table format</option>
       <option value="sam" selected="true">SAM format</option>
     </param>
-      <when value="gmap"/>
+      <when value="gmap">
+      </when>
       <when value="summary"/>
       <when value="align">
-
       </when>
       <when value="continuous">
       </when>
@@ -320,11 +358,13 @@
       <when value="sam">
         <param name="sam_paired_read" type="boolean" truevalue="sampe" falsevalue="samse" checked="false" label="SAM paired reads"/>
         <param name="no_sam_headers" type="boolean" truevalue="--no-sam-headers" falsevalue="" checked="false" label="Do not print headers beginning with '@'"/>
+        <!--  Removed in gmap version 2011-11-30
         <param name="noncanonical_splices" type="select" label="Print non-canonical genomic gaps greater than 20 nt in CIGAR string as STRING.">
           <option value="">Use default</option>
           <option value="N">N</option>
           <option value="D">D</option>
         </param>
+        -->
         <param name="read_group_id" type="text" value="" label="Value to put into read-group id (RG-ID) field"/>
         <param name="read_group_name" type="text" value="" label="Value to put into read-group name (RG-SM) field"/>
         <param name="read_group_library" type="text" value="" label="Value to put into read-group library (RG-LB) field"/>
--- a/gmap_build.xml	Tue Nov 08 13:26:41 2011 -0600
+++ b/gmap_build.xml	Thu Dec 08 11:00:46 2011 -0600
@@ -2,9 +2,6 @@
   <description>a database genome index for GMAP and GSNAP</description>
   <requirements>
       <requirement type="binary">gmap_build</requirement>
-      <!-- proposed tag for added datatype dependencies -->
-      <requirement type="datatype">gmapdb</requirement>
-      <requirement type="datatype">gmap_snps</requirement>
   </requirements>
   <version_string>gmap --version</version_string>
   <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>
--- a/gsnap.xml	Tue Nov 08 13:26:41 2011 -0600
+++ b/gsnap.xml	Thu Dec 08 11:00:46 2011 -0600
@@ -1,12 +1,7 @@
-<tool id="gsnap" name="GSNAP" version="2.0.0">
+<tool id="gsnap" name="GSNAP" version="2.0.1">
   <description>Genomic Short-read Nucleotide Alignment Program</description>
   <requirements>
       <requirement type="binary">gsnap</requirement>
-      <!-- proposed tag for added datatype dependencies -->
-      <requirement type="datatype">gmapdb</requirement>
-      <requirement type="datatype">gmapsnpindex</requirement>
-      <requirement type="datatype">splicesites.iit</requirement>
-      <requirement type="datatype">introns.iit</requirement>
   </requirements>
   <version_string>gsnap --version</version_string>
   <command>
@@ -25,10 +20,16 @@
     #if $refGenomeSource.use_splicing.src == 'gmapdb':
       #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
         -s $refGenomeSource.use_splicing.splicemap.value
+        #if $computation.trim_mismatch_score.__str__ == '0':
+          $ambig_splice_noclip
+        #end if
       #end if
     #elif $refGenomeSource.use_splicing.src == 'history':
       #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
         -S $os.path.dirname($refGenomeSource.use_splicing.splicemap) -s $os.path.basename($refGenomeSource.use_splicing.splicemap)
+        #if $computation.trim_mismatch_score.__str__ == '0':
+          $ambig_splice_noclip
+        #end if
       #end if
     #end if
     #if $refGenomeSource.use_snps.src == 'gmapdb':
@@ -43,9 +44,11 @@
     #if $refGenomeSource.mode.__str__ != '':
       --mode=$refGenomeSource.mode
     #end if
+    #* ## No longer in options as of version 2011-11-30
     #if $mapq_unique_score.__str__ != '':
       --mapq-unique-score=$mapq_unique_score
     #end if
+    *#
     #if $computation.options == "advanced":
       #if $computation.max_mismatches.__str__ != '':
         --max-mismatches=$computation.max_mismatches
@@ -82,6 +85,9 @@
       #if $computation.trim_mismatch_score.__str__ != '':
         --trim-mismatch-score=$computation.trim_mismatch_score
       #end if
+      #if $computation.trim_indel_score.__str__ != '':
+        --trim-indel-score=$computation.trim_indel_score
+      #end if
       ## TODO - do we need these options (Is it tally XOR runlength?):
       ## --tallydir=  --use-tally=tally
       ## --runlengthdir  --use-runlength=runlength
@@ -206,6 +212,12 @@
         #if $seq.paired.pairmax_rna.__str__ != '':
           --pairmax-rna=$seq.paired.pairmax_rna
         #end if
+        #if $seq.paired.pairexpect.__str__ != '':
+          --pairexpect=$seq.paired.pairexpect
+        #end if
+        #if $seq.paired.pairdev.__str__ != '':
+          --pairdev=$seq.paired.pairdev
+        #end if
         $seq.fastq $seq.paired.fastq
       #else
         $seq.fastq
@@ -245,7 +257,11 @@
               <option value="FF">fwd-fwd, same strand</option>
             </param>
             <param name="pairmax_dna"  type="integer" value="" optional="true" label="Max total genomic length for DNA-Seq paired reads, or other reads without splicing (default 1000)." help="Used if no splice file is provided and novelsplicing is off."/>
-            <param name="pairmax_rna"  type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used novelspliceing is specified or a splice file is provided.  Should probably match the value for localsplicedist."/>
+            <param name="pairmax_rna"  type="integer" value="" optional="true" label="Max total genomic length for RNA-Seq paired reads, or other reads that could have a splice (default 200000)." help="Used when novel splicing is specified or a splice file is provided.  Should probably match the value for localsplicedist."/>
+            <param name="pairexpect"  type="integer" value="" optional="true" label="Expected paired-end length" 
+                   help="Used for calling splices in medial part of paired-end reads (default 200)"/>
+            <param name="pairdev"  type="integer" value="" optional="true" label="Allowable deviation from expected paired-end length" 
+                   help="Used for calling splices in medial part of paired-end reads (default 25)"/>
           </when>
         </conditional>
         <param name="barcode_length" type="integer" value="" optional="true"  label="Amount of barcode to remove from start of read (default 0)" />
@@ -281,9 +297,11 @@
       </when>
       
     </conditional>
+    <!-- No longer in options as of version 2011-11-30
     <param name="mapq_unique_score"  type="integer" value="" optional="true" label="MAPQ score threshold" 
                 help="For multiple results, consider as a unique result if only one of the results has a MAPQ score equal or greater than this
                       (if not selected, then reports all multiple results, up to npaths)" />
+    -->
 
     <!-- GMAPDB for alignment -->
     <conditional name="refGenomeSource">
@@ -405,6 +423,9 @@
           <when value="history">
             <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 
               help="built with GMAP IIT"/>
+            <param name="ambig_splice_noclip"  type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
+              help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.  
+                    This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
           </when>
           <when value="gmapdb">
             <param name="splicemap" type="select"  data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
@@ -412,6 +433,9 @@
                 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
               </options>
             </param>
+            <param name="ambig_splice_noclip"  type="boolean" checked="false" truevalue="--ambig-splice-noclip" falsevalue="" label="Do not clip at ambiguous splice sites"
+              help="For ambiguous known splicing at ends of the read, do not clip at the splice site, but extend instead into the intron.  
+                    This flag makes sense only if you are trying to eliminate all soft clipping with --trim-mismatch-score=0"/>
           </when>
         </conditional>
 
@@ -457,8 +481,12 @@
          </param>
          <param name="query_unk_mismatch" type="boolean" checked="false" truevalue="--query-unk-mismatch=1" falsevalue="" label="Count unknown (N) characters in the query as a mismatch"/>
          <param name="genome_unk_mismatch" type="boolean" checked="true" truevalue="" falsevalue="--genome-unk-mismatch=0" label="Count unknown (N) characters in the genome as a mismatch"/>
-         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 3)" 
-                help="(from one end of the read to the best possible position at the other end).  To turn off terminal alignments, set this to a high value." />
+         <param name="terminal_threshold"  type="integer" value="" optional="true" label="Threshold for searching for a terminal alignment (default 2)" 
+                help="(from one end of the read to the best possible position at the other end).   For example, if this value is 2, then if GSNAP finds an exact or
+                                   1-mismatch alignment, it will not try to find a terminal alignment.
+                                   Note that this default value may not be low enough if you want to
+                                   obtain terminal alignments for very short reads, although such reads
+                                   probably don't have enough specificity for terminal alignments anyway." />
          <param name="indel_penalty"  type="integer" value="" optional="true" label="Penalty for an indel (default 2)" 
                 help="Counts against mismatches allowed.  To find indels, make indel-penalty less than or equal to max-mismatches.  A value &lt; 2 can lead to false positives at read ends" />
          <param name="indel_endlength"  type="integer" value="" optional="true" label="Minimum length at end required for indel alignments (default 4)" />
@@ -474,7 +502,9 @@
            <option value="off">off</option>
          </param>
          <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)" 
-                help="to turn off trimming, specify 0"/>
+                help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive mismatches at the ends of reads)"/>
+         <param name="trim_indel_score" type="integer" value="" optional="true" label="Score to use for indels when trimming at ends (default is -4)" 
+                help="to turn off trimming, specify 0 (Warning: turning trimming off will give false positive indels at the ends of reads)"/>
          <param name="use_tally" type="data" format="tally.iit" optional="true" metadata_name="dbkey" label="Select a tally IIT file to resolve concordant multiple results" 
               help="generated by gsnap_tally and iit_store"/>
 
--- a/iit_store.xml	Tue Nov 08 13:26:41 2011 -0600
+++ b/iit_store.xml	Thu Dec 08 11:00:46 2011 -0600
@@ -2,13 +2,6 @@
   <description>Create a map store for known genes or SNPs</description>
   <requirements>
       <requirement type="binary">iit_store</requirement>
-      <!-- proposed tag for added datatype dependencies -->
-      <requirement type="datatype">gmap_annotation</requirement>
-      <requirement type="datatype">gmap_snps</requirement>
-      <requirement type="datatype">iit</requirement>
-      <requirement type="datatype">splicesites.iit</requirement>
-      <requirement type="datatype">introns.iit</requirement>
-      <requirement type="datatype">snps.iit</requirement>
   </requirements>
   <version_string>iit_store --version</version_string>
   <command interpreter="command"> /bin/bash $shscript 2> $log </command>
--- a/snpindex.xml	Tue Nov 08 13:26:41 2011 -0600
+++ b/snpindex.xml	Thu Dec 08 11:00:46 2011 -0600
@@ -2,11 +2,6 @@
   <description>build index files for known SNPs</description>
   <requirements>
       <requirement type="binary">snpindex</requirement>
-      <!-- proposed tag for added datatype dependencies -->
-      <requirement type="datatype">gmapsnpindex</requirement>
-      <requirement type="datatype">gmapdb</requirement>
-      <requirement type="datatype">gmap_snps</requirement>
-      <requirement type="datatype">snps.iit</requirement>
   </requirements>
   <version_string>snpindex --version</version_string>
   <command interpreter="command"> /bin/bash $shscript 2>1 1> $output </command>