annotate segemehl.xml @ 0:e97db054a88d draft

Uploaded
author rnateam
date Sat, 22 Feb 2014 06:01:16 -0500
parents
children df7c7d732d31
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
1 <tool id="segemehl" name="segemehl" version="0.1.6.0">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
2 <description>based short read aligner</description>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
3 <requirements>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
4 <requirement type="package" version="0.1.6">segemehl</requirement>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
5 </requirements>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
6 <command>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
7 ## prepare segemehl index if no reference genome is supplied
e97db054a88d Uploaded
rnateam
parents:
diff changeset
8 temp_index = `mktemp`;
e97db054a88d Uploaded
rnateam
parents:
diff changeset
9 #if $refGenomeSource.genomeSource == "history":
e97db054a88d Uploaded
rnateam
parents:
diff changeset
10 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome;
e97db054a88d Uploaded
rnateam
parents:
diff changeset
11 #else:
e97db054a88d Uploaded
rnateam
parents:
diff changeset
12 $temp_index = ${refGenomeSource.index.fields.index_path}
e97db054a88d Uploaded
rnateam
parents:
diff changeset
13 #end if
e97db054a88d Uploaded
rnateam
parents:
diff changeset
14
e97db054a88d Uploaded
rnateam
parents:
diff changeset
15
e97db054a88d Uploaded
rnateam
parents:
diff changeset
16 ## execute segemehl
e97db054a88d Uploaded
rnateam
parents:
diff changeset
17 segemehl.x
e97db054a88d Uploaded
rnateam
parents:
diff changeset
18
e97db054a88d Uploaded
rnateam
parents:
diff changeset
19 ## number of threads
e97db054a88d Uploaded
rnateam
parents:
diff changeset
20 -t "\${GALAXY_SLOTS:-12}"
e97db054a88d Uploaded
rnateam
parents:
diff changeset
21
e97db054a88d Uploaded
rnateam
parents:
diff changeset
22 ## db file path
e97db054a88d Uploaded
rnateam
parents:
diff changeset
23 -d ${refGenomeSource.index.fields.db_path}
e97db054a88d Uploaded
rnateam
parents:
diff changeset
24
e97db054a88d Uploaded
rnateam
parents:
diff changeset
25 -i $temp_index
e97db054a88d Uploaded
rnateam
parents:
diff changeset
26
e97db054a88d Uploaded
rnateam
parents:
diff changeset
27 ## check for single/pair-end
e97db054a88d Uploaded
rnateam
parents:
diff changeset
28 #if str( $library.type ) == "single":
e97db054a88d Uploaded
rnateam
parents:
diff changeset
29 #set $query_list = list()
e97db054a88d Uploaded
rnateam
parents:
diff changeset
30 ## prepare inputs
e97db054a88d Uploaded
rnateam
parents:
diff changeset
31 #for $fastq in $library.reads:
e97db054a88d Uploaded
rnateam
parents:
diff changeset
32 $query_list.append('%s' %($fastq.input_query))
e97db054a88d Uploaded
rnateam
parents:
diff changeset
33 #end for
e97db054a88d Uploaded
rnateam
parents:
diff changeset
34 -q "#echo ' '.join( $query_list )#"
e97db054a88d Uploaded
rnateam
parents:
diff changeset
35 #else
e97db054a88d Uploaded
rnateam
parents:
diff changeset
36 ## prepare inputs
e97db054a88d Uploaded
rnateam
parents:
diff changeset
37
e97db054a88d Uploaded
rnateam
parents:
diff changeset
38 #set $mate1 = list()
e97db054a88d Uploaded
rnateam
parents:
diff changeset
39 #set $mate2 = list()
e97db054a88d Uploaded
rnateam
parents:
diff changeset
40 #for $mate_pair in $library.mate_list:
e97db054a88d Uploaded
rnateam
parents:
diff changeset
41 $mate1.append( str($mate_pair.first_strand_query) )
e97db054a88d Uploaded
rnateam
parents:
diff changeset
42 $mate2.append( str($mate_pair.second_strand_query) )
e97db054a88d Uploaded
rnateam
parents:
diff changeset
43 #end for
e97db054a88d Uploaded
rnateam
parents:
diff changeset
44
e97db054a88d Uploaded
rnateam
parents:
diff changeset
45 -q #echo ','.join($mate1)
e97db054a88d Uploaded
rnateam
parents:
diff changeset
46 -p #echo ','.join($mate2)
e97db054a88d Uploaded
rnateam
parents:
diff changeset
47
e97db054a88d Uploaded
rnateam
parents:
diff changeset
48 -I $library.maxinsertsize
e97db054a88d Uploaded
rnateam
parents:
diff changeset
49 #end if
e97db054a88d Uploaded
rnateam
parents:
diff changeset
50 -m $minsize
e97db054a88d Uploaded
rnateam
parents:
diff changeset
51 -A $accuracy
e97db054a88d Uploaded
rnateam
parents:
diff changeset
52 -H $hitstrategy
e97db054a88d Uploaded
rnateam
parents:
diff changeset
53 #if str( $prime5 ).strip():
e97db054a88d Uploaded
rnateam
parents:
diff changeset
54 -P $prime5
e97db054a88d Uploaded
rnateam
parents:
diff changeset
55 #end if
e97db054a88d Uploaded
rnateam
parents:
diff changeset
56 #if str( $prime3 ).strip():
e97db054a88d Uploaded
rnateam
parents:
diff changeset
57 -Q $prime3
e97db054a88d Uploaded
rnateam
parents:
diff changeset
58 #end if
e97db054a88d Uploaded
rnateam
parents:
diff changeset
59 $polyA
e97db054a88d Uploaded
rnateam
parents:
diff changeset
60 $autoclip
e97db054a88d Uploaded
rnateam
parents:
diff changeset
61 $hardclip
e97db054a88d Uploaded
rnateam
parents:
diff changeset
62 $order
e97db054a88d Uploaded
rnateam
parents:
diff changeset
63 -s
e97db054a88d Uploaded
rnateam
parents:
diff changeset
64 -o $segemehl_out
e97db054a88d Uploaded
rnateam
parents:
diff changeset
65 </command>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
66 <stdio>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
67 <regex match="Exit forced"
e97db054a88d Uploaded
rnateam
parents:
diff changeset
68 source="both"
e97db054a88d Uploaded
rnateam
parents:
diff changeset
69 level="fatal"
e97db054a88d Uploaded
rnateam
parents:
diff changeset
70 description="Execution halted." />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
71 </stdio>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
72 <inputs>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
73
e97db054a88d Uploaded
rnateam
parents:
diff changeset
74 <conditional name="refGenomeSource">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
75 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
76 <option value="indexed">Use a built-in index</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
77 <option value="history">Use one from the history</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
78 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
79 <when value="indexed">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
80 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
81 <options from_data_table="segemehl_indexes">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
82 <column name="value" index="0"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
83 <column name="dbkey" index="1"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
84 <column name="name" index="2"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
85 <column name="db_path" index="3"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
86 <column name="index_path" index="4"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
87 <filter type="sort_by" column="2"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
88 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
89 </options>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
90 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
91 </when> <!-- build-in -->
e97db054a88d Uploaded
rnateam
parents:
diff changeset
92 <when value="history">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
93 <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
94 </when> <!-- history -->
e97db054a88d Uploaded
rnateam
parents:
diff changeset
95 </conditional> <!-- refGenomeSource -->
e97db054a88d Uploaded
rnateam
parents:
diff changeset
96
e97db054a88d Uploaded
rnateam
parents:
diff changeset
97
e97db054a88d Uploaded
rnateam
parents:
diff changeset
98 <conditional name="library">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
99 <param name="type" type="select" label="Is this library paired-end?">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
100 <option value="single">Single-end</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
101 <option value="paired">Paired-end</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
102 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
103 <when value="single">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
104 <repeat name="reads" title="FASTQ/FASTA files">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
105 <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
106 </repeat>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
107 </when>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
108 <when value="paired">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
109 <repeat name="mate_list" title="Paired End Pairs" min="1">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
110 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
111 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
112 </repeat>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
113 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
114 </when>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
115 </conditional>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
116
e97db054a88d Uploaded
rnateam
parents:
diff changeset
117
e97db054a88d Uploaded
rnateam
parents:
diff changeset
118 <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
119 <validator type="in_range" min="1"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
120 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
121 <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" >
e97db054a88d Uploaded
rnateam
parents:
diff changeset
122 <validator type="in_range" min="1" max="100"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
123 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
124 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
125 <option value="1">report only best scoring hits</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
126 <option value="0">report all scoring hits</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
127 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
128 <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
129 <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
130 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
131 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
132 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
133 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
134 </inputs>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
135
e97db054a88d Uploaded
rnateam
parents:
diff changeset
136 <outputs>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
137 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
138 </outputs>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
139 <help>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
140
e97db054a88d Uploaded
rnateam
parents:
diff changeset
141 .. class:: infomark
e97db054a88d Uploaded
rnateam
parents:
diff changeset
142
e97db054a88d Uploaded
rnateam
parents:
diff changeset
143 **What it does**
e97db054a88d Uploaded
rnateam
parents:
diff changeset
144
e97db054a88d Uploaded
rnateam
parents:
diff changeset
145 Segemehl_ is a short read mapper with gaps.
e97db054a88d Uploaded
rnateam
parents:
diff changeset
146
e97db054a88d Uploaded
rnateam
parents:
diff changeset
147 Segemehl_ is a software to map short sequencer reads to reference genomes.
e97db054a88d Uploaded
rnateam
parents:
diff changeset
148 Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions.
e97db054a88d Uploaded
rnateam
parents:
diff changeset
149 Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly.
e97db054a88d Uploaded
rnateam
parents:
diff changeset
150 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
e97db054a88d Uploaded
rnateam
parents:
diff changeset
151
e97db054a88d Uploaded
rnateam
parents:
diff changeset
152 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
e97db054a88d Uploaded
rnateam
parents:
diff changeset
153
e97db054a88d Uploaded
rnateam
parents:
diff changeset
154 **References**
e97db054a88d Uploaded
rnateam
parents:
diff changeset
155
e97db054a88d Uploaded
rnateam
parents:
diff changeset
156 Hoffmann S, Otto C, Kurtz S, Sharma CM, Khaitovich P, Vogel J, Stadler PF, Hackermueller J: "Fast mapping of short sequences with mismatches, insertions and deletions using index structures", PLoS Comput Biol (2009) vol. 5 (9) pp. e1000502
e97db054a88d Uploaded
rnateam
parents:
diff changeset
157 download latest version: 0.1.6 manual: download here new stuff: faster multiple split read mapping bug fixes: bugfixes: increased sensitivity for strand switches changes: - default accuracy now 90% older segemehl indices are still usable. issues: untraceable errors with gcc compiler gcc-4.5. zlib linker problems with some ubuntu versions complaint department: steve bioinf uni leipzig deshapeimage_1_link_0shapeimage_1_link_1
e97db054a88d Uploaded
rnateam
parents:
diff changeset
158
e97db054a88d Uploaded
rnateam
parents:
diff changeset
159 </help>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
160 </tool>