diff gmap/gsnap.xml @ 2:52da588232b0

Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
author Jim Johnson <jj@umn.edu>
date Fri, 21 Oct 2011 11:38:55 -0500
parents d58d272914e7
children f49f5a460c74
line wrap: on
line diff
--- a/gmap/gsnap.xml	Tue Oct 18 11:51:15 2011 -0500
+++ b/gmap/gsnap.xml	Fri Oct 21 11:38:55 2011 -0500
@@ -2,31 +2,43 @@
   <description>Genomic Short-read Nucleotide Alignment Program</description>
   <requirements>
       <requirement type="binary">gsnap</requirement>
+      <!-- proposed tag for added datatype dependencies -->
+      <requirement type="datatype">gmapdb</requirement>
+      <requirement type="datatype">gmapsnpindex</requirement>
+      <requirement type="datatype">splicesites.iit</requirement>
+      <requirement type="datatype">introns.iit</requirement>
   </requirements>
   <version_string>gsnap --version</version_string>
   <command>
     #import os.path, re
     gsnap
     --nthreads="4" --ordered
-    #if $refGenomeSource.genomeSource == "history":
-      --gseg=$refGenomeSource.ownFile
-    #elif $refGenomeSource.genomeSource == "gmapdb":
+    #if $refGenomeSource.genomeSource == "gmapdb":
       #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
-      --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb
-      #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
-        --kmer=$refGenomeSource.kmer
-      #end if
-      #if $refGenomeSource.splicemap != None and len($refGenomeSource.splicemap.__str__) == 2:
-        --use-splices=$refGenomeSource.splicemap
-      #end if
-      #if $refGenomeSource.snpindex != None and len($refGenomeSource.snpindex.__str__) == 2:
-        --use-snps=$refGenomeSource.snpindex
-      #end if
+      --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name
     #else:
       --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
-      #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
-        --kmer=$refGenomeSource.kmer
+    #end if
+    #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
+      --kmer=$refGenomeSource.kmer
+    #end if
+    #if $refGenomeSource.use_splicing.src == 'gmapdb':
+      #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
+        -s $refGenomeSource.use_splicing.splicemap.value
       #end if
+    #elif $refGenomeSource.use_splicing.src == 'history':
+      #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
+        -S $os.path.dirname($refGenomeSource.use_splicing.splicemap) -s $os.path.basename($refGenomeSource.use_splicing.splicemap)
+      #end if
+    #end if
+    #if $refGenomeSource.use_snps.src == 'gmapdb':
+       #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
+        -v $refGenomeSource.use_snps.snpindex.value
+       #end if
+    #elif $refGenomeSource.use_snps.src == 'history':
+       #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
+         -V $refGenomeSource.use_snps.snpindex.extra_files_path -v $refGenomeSource.use_snps.snpindex.metadata.snps_name
+       #end if
     #end if
     #if $mode.__str__ != '':
       --mode=$mode
@@ -181,86 +193,9 @@
 
   </command>
   <inputs>
-    <conditional name="refGenomeSource">
-     <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-        <option value="indexed">Use a built-in index</option>
-        <option value="gmapdb">Use gmapdb from the history</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
-          <options from_file="gmap_indices.loc">
-            <column name="uid" index="0" />
-            <column name="dbkey" index="1" />
-            <column name="name" index="2" />
-            <column name="kmers" index="3" />
-            <column name="maps" index="4" />
-            <column name="snps" index="5" />
-            <column name="value" index="6" />
-          </options>
-        </param>
-
-        <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
-          <options from_file="gmap_indices.loc">
-            <column name="name" index="3"/>
-            <column name="value" index="3"/>
-            <filter type="param_value" ref="gmapindex" column="6"/>
-            <filter type="multiple_splitter" column="3" separator=","/>
-            <filter type="add_value" name="" value=""/>
-            <filter type="sort_by" column="3"/>
-          </options>
-        </param>
-
-        <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help="">
-          <options from_file="gmap_indices.loc">
-            <column name="name" index="4"/>
-            <column name="value" index="4"/>
-            <filter type="param_value" ref="gmapindex" column="6"/>
-            <filter type="multiple_splitter" column="4" separator=","/>
-            <filter type="add_value" name="" value=""/>
-            <filter type="sort_by" column="4"/>
-          </options>
-        </param>
-
-        <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help="">
-          <options from_file="gmap_indices.loc">
-            <column name="name" index="5"/>
-            <column name="value" index="5"/>
-            <filter type="param_value" ref="gmapindex" column="6"/>
-            <filter type="multiple_splitter" column="5" separator=","/>
-            <filter type="add_value" name="" value=""/>
-            <filter type="sort_by" column="5"/>
-          </options>
-        </param>
-      </when>
-      <when value="gmapdb">
-        <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 
-              help="A GMAP database built with GMAP Build"/>
-        <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
-          <options>
-            <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
-          </options>
-        </param>
-        <param name="splicemap" type="select"  data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
-          <options>
-            <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
-          </options>
-        </param>
-        <param name="snpindex" type="select"  data_ref="gmapdb" label="Use database containing known SNPs" help="">
-          <options>
-            <filter type="data_meta" ref="gmapdb" key="snps" multiple="True" separator=","/>
-          </options>
-        </param>
-
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" 
-              help="Fasta containing genomic DNA sequence"/>
-      </when>
-    </conditional>
     <!-- Input data -->
     <conditional name="seq">
-      <param name="format" type="select" label="Select the input format" help="">
+      <param name="format" type="select" label="&lt;H2&gt;Input Sequences&lt;/H2&gt;Select the input format" help="">
         <option value="fastq">Fastq</option>
         <option value="gsnap_fasta">GNSAP fasta</option>
       </param>
@@ -308,6 +243,7 @@
         <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/>
       </when>
     </conditional>
+
     <param name="mode" type="select" label="Alignment mode" help="Assumes cmetindex and atoiindex were run on the gmap datatbase.">
         <option value="">standard</option>
         <option value="cmet-stranded">cmet-stranded   for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
@@ -315,9 +251,147 @@
         <option value="atoi-stranded">atoi-stranded   for RNA-editing tolerance (A-to-G changes)</option>
         <option value="atoi-nonstranded">atoi-nonstranded   for RNA-editing tolerance (A-to-G changes)</option>
     </param>
+
+    <!-- GMAPDB for alignment -->
+    <conditional name="refGenomeSource">
+     <param name="genomeSource" type="select" label="&lt;HR&gt;&lt;H2&gt;Align To&lt;/H2&gt;Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+        <option value="indexed">Use a built-in index</option>
+        <option value="gmapdb">Use a gmapdb from your history</option>
+      </param>
+      <when value="indexed">
+        <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
+          <options from_file="gmap_indices.loc">
+            <column name="uid" index="0" />
+            <column name="dbkey" index="1" />
+            <column name="name" index="2" />
+            <column name="kmers" index="3" />
+            <column name="maps" index="4" />
+            <column name="snps" index="5" />
+            <column name="value" index="6" />
+          </options>
+        </param>
+
+        <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
+          <options from_file="gmap_indices.loc">
+            <column name="name" index="3"/>
+            <column name="value" index="3"/>
+            <filter type="param_value" ref="gmapindex" column="6"/>
+            <filter type="multiple_splitter" column="3" separator=","/>
+            <filter type="add_value" name="" value=""/>
+            <filter type="sort_by" column="3"/>
+          </options>
+        </param>
+
+        <conditional name="use_splicing">
+          <param name="src" type="select" label="Known Splicesite and Introns" 
+                 help="Look for splicing involving known sites or known introns at short or long distances 
+                  See README instructions for the distinction between known sites and known introns">
+            <option value="none" selected="true">None</option>
+            <option value="gmapdb">From the GMAP Database</option>
+            <option value="history">A Map in your history</option>
+          </param>
+          <when value="none"/>
+          <when value="history">
+            <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 
+              help="built with GMAP IIT"/>
+          </when>
+          <when value="gmapdb">
+            <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help="">
+              <options from_file="gmap_indices.loc">
+                <column name="name" index="4"/>
+                <column name="value" index="4"/>
+                <filter type="param_value" ref="gmapindex" column="6"/>
+                <filter type="multiple_splitter" column="4" separator=","/>
+                <filter type="add_value" name="" value=""/>
+                <filter type="sort_by" column="4"/>
+              </options>
+            </param>
+          </when>
+        </conditional>
+
+        <conditional name="use_snps">
+          <param name="src" type="select" label="Known SNPs" help="for SNP tolerant alignments">
+            <option value="none" selected="true">None</option>
+            <option value="gmapdb">From the GMAP Database</option>
+            <option value="history">A SNP Index in your history</option>
+          </param>
+          <when value="none"/>
+          <when value="history">
+            <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex" 
+              help="built with GMAP SNP Index"/>
+          </when>
+          <when value="gmapdb">
+            <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help="">
+              <options from_file="gmap_indices.loc">
+                <column name="name" index="5"/>
+                <column name="value" index="5"/>
+                <filter type="param_value" ref="gmapindex" column="6"/>
+                <filter type="multiple_splitter" column="5" separator=","/>
+                <filter type="add_value" name="" value=""/>
+                <filter type="sort_by" column="5"/>
+              </options>
+            </param>
+          </when>
+        </conditional>
+
+      </when>
+      <when value="gmapdb">
+        <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 
+              help="A GMAP database built with GMAP Build"/>
+        <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
+          <options>
+            <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
+          </options>
+        </param>
+
+        <conditional name="use_splicing">
+          <param name="src" type="select" label="Known Splicesite and Introns" 
+                 help="Look for splicing involving known sites or known introns at short or long distances 
+                  See README instructions for the distinction between known sites and known introns">
+            <option value="none" selected="true">None</option>
+            <option value="gmapdb">From the GMAP Database</option>
+            <option value="history">A Map in your history</option>
+          </param>
+          <when value="none"/>
+          <when value="history">
+            <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map" 
+              help="built with GMAP IIT"/>
+          </when>
+          <when value="gmapdb">
+            <param name="splicemap" type="select"  data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
+              <options>
+                <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
+              </options>
+            </param>
+          </when>
+        </conditional>
+
+        <conditional name="use_snps">
+          <param name="src" type="select" label="Known SNPs" help="for SNP tolerant alignments">
+            <option value="none" selected="true">None</option>
+            <option value="gmapdb">From the GMAP Database</option>
+            <option value="history">A SNP Index in your history</option>
+          </param>
+          <when value="none"/>
+          <when value="history">
+            <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex" 
+              help="built with GMAP SNP Index"/>
+          </when>
+          <when value="gmapdb">
+            <param name="snpindex" type="select"  data_ref="gmapdb" label="Use database containing known SNPs" help="">
+              <options>
+                <filter type="data_meta" ref="gmapdb" key="snps" multiple="True" separator=","/>
+              </options>
+            </param>
+          </when>
+        </conditional>
+
+      </when>
+    </conditional>
+
     <!-- Computation options -->
     <conditional name="computation">
-      <param name="options" type="select" label="Computational Settings" help="">
+      <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
         <option value="default">Use default settings</option>
         <option value="advanced">Set Computation Options</option>
       </param>
@@ -352,8 +426,7 @@
          </param>
          <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)" 
                 help="to turn off trimming, specify 0"/>
-         <!-- use-snps This should be either a select list from the gmapdb maps or a data type using snpsdir and use-snps --> 
-         <param name="use_snps" type="text" value="" optional="true" label="SNP database Name for SNP tolearnce" help="Use database containing known SNPs (built previously using snpindex) for tolerance to SNPs"/>
+         
          <!-- Options for GMAP alignment within GSNAP -->
           <param name="gmap_mode" type="select" multiple="true" optional="true" label="Cases to use GMAP for complex alignments containing multiple splices or indels" help="">
             <option value="pairsearch">pairsearch</option>
@@ -376,7 +449,7 @@
     </conditional>
 
     <conditional name="splicing">
-      <param name="options" type="select" label="Splicing options for RNA-Seq" help="">
+      <param name="options" type="select" label="&lt;HR&gt;Splicing options for RNA-Seq" help="">
         <option value="default">Use default settings</option>
         <option value="advanced">Set Splicing Options</option>
       </param>
@@ -384,7 +457,7 @@
       <when value="advanced">
          <!-- Splicing options for RNA-Seq -->
          <!-- use-splices This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splices --> 
-         <param name="use_splices" type="text" value="" optional="true" label="Known splicesites or introns" help="Look for splicing involving known sites or known introns at short or long distances See README instructions for the distinction between known sites and known introns"/>
+         <!-- Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic) -->
          <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/>
          <param name="localsplicedist"  type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/>
          <param name="local_splice_penalty"  type="integer" value="" optional="true" label="Penalty for a local splice (default 0).  Counts against mismatches allowed"/>
@@ -398,7 +471,7 @@
 
     <!-- Output data -->
     <conditional name="output">
-      <param name="options" type="select" label="Output options for RNA-Seq" help="">
+      <param name="options" type="select" label="&lt;HR&gt;&lt;H2&gt;Output&lt;/H2&gt;Output options for RNA-Seq" help="">
         <option value="default">Use default settings</option>
         <option value="advanced">Set Output Options</option>
       </param>
@@ -439,7 +512,7 @@
        help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results"/> 
   </inputs>
   <outputs>
-    <data format="txt" name="gsnap_stderr" label="${tool.name} on ${on_string}: log"/>
+    <data format="txt" name="gsnap_stderr" label="${tool.name} on ${on_string}: stderr"/>
     <data format="txt" name="results" label="${tool.name} on ${on_string} ${result.format}" >
       <filter>(split_output == False)</filter>
       <change_format>