comparison segemehl.xml @ 0:e97db054a88d draft

Uploaded
author rnateam
date Sat, 22 Feb 2014 06:01:16 -0500
parents
children df7c7d732d31
comparison
equal deleted inserted replaced
-1:000000000000 0:e97db054a88d
1 <tool id="segemehl" name="segemehl" version="0.1.6.0">
2 <description>based short read aligner</description>
3 <requirements>
4 <requirement type="package" version="0.1.6">segemehl</requirement>
5 </requirements>
6 <command>
7 ## prepare segemehl index if no reference genome is supplied
8 temp_index = `mktemp`;
9 #if $refGenomeSource.genomeSource == "history":
10 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome;
11 #else:
12 $temp_index = ${refGenomeSource.index.fields.index_path}
13 #end if
14
15
16 ## execute segemehl
17 segemehl.x
18
19 ## number of threads
20 -t "\${GALAXY_SLOTS:-12}"
21
22 ## db file path
23 -d ${refGenomeSource.index.fields.db_path}
24
25 -i $temp_index
26
27 ## check for single/pair-end
28 #if str( $library.type ) == "single":
29 #set $query_list = list()
30 ## prepare inputs
31 #for $fastq in $library.reads:
32 $query_list.append('%s' %($fastq.input_query))
33 #end for
34 -q "#echo ' '.join( $query_list )#"
35 #else
36 ## prepare inputs
37
38 #set $mate1 = list()
39 #set $mate2 = list()
40 #for $mate_pair in $library.mate_list:
41 $mate1.append( str($mate_pair.first_strand_query) )
42 $mate2.append( str($mate_pair.second_strand_query) )
43 #end for
44
45 -q #echo ','.join($mate1)
46 -p #echo ','.join($mate2)
47
48 -I $library.maxinsertsize
49 #end if
50 -m $minsize
51 -A $accuracy
52 -H $hitstrategy
53 #if str( $prime5 ).strip():
54 -P $prime5
55 #end if
56 #if str( $prime3 ).strip():
57 -Q $prime3
58 #end if
59 $polyA
60 $autoclip
61 $hardclip
62 $order
63 -s
64 -o $segemehl_out
65 </command>
66 <stdio>
67 <regex match="Exit forced"
68 source="both"
69 level="fatal"
70 description="Execution halted." />
71 </stdio>
72 <inputs>
73
74 <conditional name="refGenomeSource">
75 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
76 <option value="indexed">Use a built-in index</option>
77 <option value="history">Use one from the history</option>
78 </param>
79 <when value="indexed">
80 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
81 <options from_data_table="segemehl_indexes">
82 <column name="value" index="0"/>
83 <column name="dbkey" index="1"/>
84 <column name="name" index="2"/>
85 <column name="db_path" index="3"/>
86 <column name="index_path" index="4"/>
87 <filter type="sort_by" column="2"/>
88 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
89 </options>
90 </param>
91 </when> <!-- build-in -->
92 <when value="history">
93 <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
94 </when> <!-- history -->
95 </conditional> <!-- refGenomeSource -->
96
97
98 <conditional name="library">
99 <param name="type" type="select" label="Is this library paired-end?">
100 <option value="single">Single-end</option>
101 <option value="paired">Paired-end</option>
102 </param>
103 <when value="single">
104 <repeat name="reads" title="FASTQ/FASTA files">
105 <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
106 </repeat>
107 </when>
108 <when value="paired">
109 <repeat name="mate_list" title="Paired End Pairs" min="1">
110 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
111 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
112 </repeat>
113 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
114 </when>
115 </conditional>
116
117
118 <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)">
119 <validator type="in_range" min="1"/>
120 </param>
121 <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" >
122 <validator type="in_range" min="1" max="100"/>
123 </param>
124 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
125 <option value="1">report only best scoring hits</option>
126 <option value="0">report all scoring hits</option>
127 </param>
128 <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" />
129 <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/>
130 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
131 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
132 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/>
133 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
134 </inputs>
135
136 <outputs>
137 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
138 </outputs>
139 <help>
140
141 .. class:: infomark
142
143 **What it does**
144
145 Segemehl_ is a short read mapper with gaps.
146
147 Segemehl_ is a software to map short sequencer reads to reference genomes.
148 Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions.
149 Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly.
150 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
151
152 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
153
154 **References**
155
156 Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502
157 download latest version: 0.1.6 manual: download here new stuff: faster multiple split read mapping bug fixes: bugfixes: increased sensitivity for strand switches changes: - default accuracy now 90% older segemehl indices are still usable. issues: untraceable errors with gcc compiler gcc-4.5. zlib linker problems with some ubuntu versions complaint department: steve bioinf uni leipzig deshapeimage_1_link_0shapeimage_1_link_1
158
159 </help>
160 </tool>