0
|
1 <tool id="segemehl" name="segemehl" version="0.1.6.0">
|
|
2 <description>based short read aligner</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.1.6">segemehl</requirement>
|
|
5 </requirements>
|
|
6 <command>
|
|
7 ## prepare segemehl index if no reference genome is supplied
|
|
8 temp_index = `mktemp`;
|
|
9 #if $refGenomeSource.genomeSource == "history":
|
|
10 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome;
|
|
11 #else:
|
|
12 $temp_index = ${refGenomeSource.index.fields.index_path}
|
|
13 #end if
|
|
14
|
|
15
|
|
16 ## execute segemehl
|
|
17 segemehl.x
|
|
18
|
|
19 ## number of threads
|
|
20 -t "\${GALAXY_SLOTS:-12}"
|
|
21
|
|
22 ## db file path
|
|
23 -d ${refGenomeSource.index.fields.db_path}
|
|
24
|
|
25 -i $temp_index
|
|
26
|
|
27 ## check for single/pair-end
|
|
28 #if str( $library.type ) == "single":
|
|
29 #set $query_list = list()
|
|
30 ## prepare inputs
|
|
31 #for $fastq in $library.reads:
|
|
32 $query_list.append('%s' %($fastq.input_query))
|
|
33 #end for
|
|
34 -q "#echo ' '.join( $query_list )#"
|
|
35 #else
|
|
36 ## prepare inputs
|
|
37
|
|
38 #set $mate1 = list()
|
|
39 #set $mate2 = list()
|
|
40 #for $mate_pair in $library.mate_list:
|
|
41 $mate1.append( str($mate_pair.first_strand_query) )
|
|
42 $mate2.append( str($mate_pair.second_strand_query) )
|
|
43 #end for
|
|
44
|
|
45 -q #echo ','.join($mate1)
|
|
46 -p #echo ','.join($mate2)
|
|
47
|
|
48 -I $library.maxinsertsize
|
|
49 #end if
|
|
50 -m $minsize
|
|
51 -A $accuracy
|
|
52 -H $hitstrategy
|
|
53 #if str( $prime5 ).strip():
|
|
54 -P $prime5
|
|
55 #end if
|
|
56 #if str( $prime3 ).strip():
|
|
57 -Q $prime3
|
|
58 #end if
|
|
59 $polyA
|
|
60 $autoclip
|
|
61 $hardclip
|
|
62 $order
|
|
63 -s
|
|
64 -o $segemehl_out
|
|
65 </command>
|
|
66 <stdio>
|
|
67 <regex match="Exit forced"
|
|
68 source="both"
|
|
69 level="fatal"
|
|
70 description="Execution halted." />
|
|
71 </stdio>
|
|
72 <inputs>
|
|
73
|
|
74 <conditional name="refGenomeSource">
|
|
75 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
|
|
76 <option value="indexed">Use a built-in index</option>
|
|
77 <option value="history">Use one from the history</option>
|
|
78 </param>
|
|
79 <when value="indexed">
|
|
80 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
|
|
81 <options from_data_table="segemehl_indexes">
|
|
82 <column name="value" index="0"/>
|
|
83 <column name="dbkey" index="1"/>
|
|
84 <column name="name" index="2"/>
|
|
85 <column name="db_path" index="3"/>
|
|
86 <column name="index_path" index="4"/>
|
|
87 <filter type="sort_by" column="2"/>
|
|
88 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
|
|
89 </options>
|
|
90 </param>
|
|
91 </when> <!-- build-in -->
|
|
92 <when value="history">
|
|
93 <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
|
|
94 </when> <!-- history -->
|
|
95 </conditional> <!-- refGenomeSource -->
|
|
96
|
|
97
|
|
98 <conditional name="library">
|
|
99 <param name="type" type="select" label="Is this library paired-end?">
|
|
100 <option value="single">Single-end</option>
|
|
101 <option value="paired">Paired-end</option>
|
|
102 </param>
|
|
103 <when value="single">
|
|
104 <repeat name="reads" title="FASTQ/FASTA files">
|
|
105 <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
|
|
106 </repeat>
|
|
107 </when>
|
|
108 <when value="paired">
|
|
109 <repeat name="mate_list" title="Paired End Pairs" min="1">
|
|
110 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
|
|
111 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
|
|
112 </repeat>
|
|
113 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
|
|
114 </when>
|
|
115 </conditional>
|
|
116
|
|
117
|
|
118 <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)">
|
|
119 <validator type="in_range" min="1"/>
|
|
120 </param>
|
|
121 <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" >
|
|
122 <validator type="in_range" min="1" max="100"/>
|
|
123 </param>
|
|
124 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
|
|
125 <option value="1">report only best scoring hits</option>
|
|
126 <option value="0">report all scoring hits</option>
|
|
127 </param>
|
|
128 <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" />
|
|
129 <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/>
|
|
130 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
|
|
131 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
|
|
132 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/>
|
|
133 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
|
|
134 </inputs>
|
|
135
|
|
136 <outputs>
|
|
137 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
|
|
138 </outputs>
|
|
139 <help>
|
|
140
|
|
141 .. class:: infomark
|
|
142
|
|
143 **What it does**
|
|
144
|
|
145 Segemehl_ is a short read mapper with gaps.
|
|
146
|
|
147 Segemehl_ is a software to map short sequencer reads to reference genomes.
|
|
148 Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions.
|
|
149 Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly.
|
|
150 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
|
|
151
|
|
152 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
|
|
153
|
|
154 **References**
|
|
155
|
|
156 Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502
|
|
157 download latest version: 0.1.6 manual: download here new stuff: faster multiple split read mapping bug fixes: bugfixes: increased sensitivity for strand switches changes: - default accuracy now 90% older segemehl indices are still usable. issues: untraceable errors with gcc compiler gcc-4.5. zlib linker problems with some ubuntu versions complaint department: steve bioinf uni leipzig deshapeimage_1_link_0shapeimage_1_link_1
|
|
158
|
|
159 </help>
|
|
160 </tool>
|