changeset 0:e97db054a88d draft

Uploaded
author rnateam
date Sat, 22 Feb 2014 06:01:16 -0500
parents
children df7c7d732d31
files segemehl.xml tool-data/segemehl_indices.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 4 files changed, 203 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/segemehl.xml	Sat Feb 22 06:01:16 2014 -0500
@@ -0,0 +1,160 @@
+<tool id="segemehl" name="segemehl" version="0.1.6.0">
+    <description>based short read aligner</description>
+    <requirements>
+        <requirement type="package" version="0.1.6">segemehl</requirement>
+    </requirements>
+    <command>
+        ## prepare segemehl index if no reference genome is supplied
+        temp_index = `mktemp`;
+        #if $refGenomeSource.genomeSource == "history":
+            segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome;
+        #else:
+            $temp_index = ${refGenomeSource.index.fields.index_path}
+        #end if
+
+
+        ## execute segemehl
+        segemehl.x
+        
+        ## number of threads
+        -t "\${GALAXY_SLOTS:-12}"
+
+        ## db file path
+        -d ${refGenomeSource.index.fields.db_path}
+
+        -i $temp_index
+
+        ## check for single/pair-end
+        #if str( $library.type ) == "single":
+            #set $query_list = list()
+            ## prepare inputs
+            #for $fastq in $library.reads:
+                $query_list.append('%s' %($fastq.input_query))
+            #end for
+            -q "#echo ' '.join( $query_list )#"
+        #else
+            ## prepare inputs
+            
+            #set $mate1 = list()
+            #set $mate2 = list()
+            #for $mate_pair in $library.mate_list:
+                $mate1.append( str($mate_pair.first_strand_query) )
+                $mate2.append( str($mate_pair.second_strand_query) )
+            #end for
+
+            -q #echo ','.join($mate1)
+            -p #echo ','.join($mate2)
+
+            -I $library.maxinsertsize
+        #end if
+        -m $minsize
+        -A $accuracy
+        -H $hitstrategy
+        #if str( $prime5 ).strip():
+            -P $prime5
+        #end if
+        #if str( $prime3 ).strip():
+            -Q $prime3
+        #end if
+        $polyA
+        $autoclip
+        $hardclip
+        $order
+        -s
+        -o $segemehl_out
+    </command>
+    <stdio>
+        <regex match="Exit forced" 
+           source="both" 
+           level="fatal" 
+           description="Execution halted." />
+    </stdio>
+    <inputs>
+
+        <conditional name="refGenomeSource">
+          <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
+            <option value="indexed">Use a built-in index</option>
+            <option value="history">Use one from the history</option>
+          </param>
+          <when value="indexed">
+            <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
+              <options from_data_table="segemehl_indexes">
+                <column name="value" index="0"/>
+                <column name="dbkey" index="1"/>
+                <column name="name" index="2"/>
+                <column name="db_path" index="3"/>
+                <column name="index_path" index="4"/>
+                <filter type="sort_by" column="2"/>
+                <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+              </options>
+            </param>
+          </when>  <!-- build-in -->
+          <when value="history">
+            <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
+          </when>  <!-- history -->
+        </conditional>  <!-- refGenomeSource -->
+
+
+        <conditional name="library">
+            <param name="type" type="select" label="Is this library paired-end?">
+                <option value="single">Single-end</option>
+                <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <repeat name="reads" title="FASTQ/FASTA files">
+                    <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
+                </repeat>
+            </when>
+            <when value="paired">
+                <repeat name="mate_list" title="Paired End Pairs" min="1">
+                    <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
+                    <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
+                </repeat>
+                <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
+            </when>
+        </conditional>
+
+
+        <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)">
+            <validator type="in_range" min="1"/>
+        </param>
+        <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" >
+            <validator type="in_range" min="1" max="100"/>
+        </param>
+        <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
+            <option value="1">report only best scoring hits</option>
+            <option value="0">report all scoring hits</option>
+        </param>
+        <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" />
+        <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/>
+        <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
+        <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
+        <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/>
+        <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
+    </inputs>
+
+    <outputs>
+        <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
+    </outputs>
+    <help>
+
+.. class:: infomark
+
+**What it does** 
+
+Segemehl_ is a short read mapper with gaps.
+
+Segemehl_ is a software to map short sequencer reads to reference genomes. 
+Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions. 
+Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly. 
+segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
+
+.. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
+
+**References**
+
+Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502
+download latest version: 0.1.6 manual: download here new stuff: faster multiple split read mapping bug fixes: bugfixes: increased sensitivity for strand switches changes: - default accuracy now 90% older segemehl indices are still usable. issues: untraceable errors with gcc compiler gcc-4.5. zlib linker problems with some ubuntu versions complaint department: steve bioinf uni leipzig deshapeimage_1_link_0shapeimage_1_link_1
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/segemehl_indices.loc.sample	Sat Feb 22 06:01:16 2014 -0500
@@ -0,0 +1,29 @@
+#This is a sample file that enables the segemehl mapper to find genome indices
+#and database files. You will need to create these data files and then create 
+#a segemehl_indices.loc file similar to this one (store it in this directory) 
+#that points to the directories in which those files are stored. 
+#The segemehl_indices.loc file has this format (longer white space characters are TAB characters):
+#
+#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
+#
+#So, for example, if you had hg18 indexed stored in 
+#/data/0/galaxy/segemehl/hg18/, 
+#then the segemehl_indices.loc entry would look like this:
+#
+#hg18   hg18   hg18   /data/0/galaxy/segemehl/hg18/chromosomes.fa   /data/0/galaxy/segemehl/hg18/chromosomes.idx
+#
+#and your /data/0/galaxy/segemehl/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r--  1 bag    bag 539833 2013-10-13 10:12 chromosomes.idx
+#-rw-r--r--  1 bag    bag 342562 2013-10-13 10:12 chromosomes.fa
+#...etc...
+#
+#Your segemehl_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon          hg18   hg18 Canonical   /data/0/galaxy/segemehl/hg18/chromosomes_can.fa   /data/0/galaxy/segemehl/hg18/chromosomes_can.idx
+#hg18full           hg18   hg18 Full        /data/0/galaxy/segemehl/hg18/chromosomes_full.fa   /data/0/galaxy/segemehl/hg18/chromosomes_full.idx
+#...etc...
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sat Feb 22 06:01:16 2014 -0500
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of indexes in the Bowtie mapper format -->
+    <table name="segemehl_indexes" comment_char="#">
+        <columns>value, dbkey, name, db_path, index_path</columns>
+        <file path="tool-data/segemehl_indices.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Sat Feb 22 06:01:16 2014 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="segemehl" version="0.1.6">
+        <repository changeset_revision="7078b80ffc12" name="package_segemehl_0_1_6" owner="rnateam" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>