0
|
1 <tool id="segemehl" name="segemehl" version="0.1.6.0">
|
|
2 <description>based short read aligner</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.1.6">segemehl</requirement>
|
|
5 </requirements>
|
|
6 <command>
|
1
|
7 <![CDATA[
|
0
|
8 ## prepare segemehl index if no reference genome is supplied
|
|
9 temp_index = `mktemp`;
|
|
10 #if $refGenomeSource.genomeSource == "history":
|
|
11 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome;
|
|
12 #else:
|
1
|
13 #set $temp_index = ${refGenomeSource.index.fields.index_path}
|
0
|
14 #end if
|
|
15
|
|
16
|
|
17 ## execute segemehl
|
|
18 segemehl.x
|
1
|
19
|
0
|
20 ## number of threads
|
|
21 -t "\${GALAXY_SLOTS:-12}"
|
|
22
|
|
23 ## db file path
|
|
24 -d ${refGenomeSource.index.fields.db_path}
|
|
25
|
|
26 -i $temp_index
|
|
27
|
|
28 ## check for single/pair-end
|
|
29 #if str( $library.type ) == "single":
|
|
30 #set $query_list = list()
|
|
31 ## prepare inputs
|
|
32 #for $fastq in $library.reads:
|
|
33 $query_list.append('%s' %($fastq.input_query))
|
|
34 #end for
|
|
35 -q "#echo ' '.join( $query_list )#"
|
|
36 #else
|
|
37 ## prepare inputs
|
1
|
38
|
0
|
39 #set $mate1 = list()
|
|
40 #set $mate2 = list()
|
|
41 #for $mate_pair in $library.mate_list:
|
|
42 $mate1.append( str($mate_pair.first_strand_query) )
|
|
43 $mate2.append( str($mate_pair.second_strand_query) )
|
|
44 #end for
|
|
45
|
|
46 -q #echo ','.join($mate1)
|
|
47 -p #echo ','.join($mate2)
|
|
48
|
|
49 -I $library.maxinsertsize
|
|
50 #end if
|
|
51 -m $minsize
|
|
52 -A $accuracy
|
|
53 -H $hitstrategy
|
|
54 #if str( $prime5 ).strip():
|
|
55 -P $prime5
|
|
56 #end if
|
|
57 #if str( $prime3 ).strip():
|
|
58 -Q $prime3
|
|
59 #end if
|
|
60 $polyA
|
|
61 $autoclip
|
|
62 $hardclip
|
|
63 $order
|
|
64 -s
|
|
65 -o $segemehl_out
|
1
|
66 ]]>
|
0
|
67 </command>
|
|
68 <stdio>
|
1
|
69 <regex match="Exit forced"
|
|
70 source="both"
|
|
71 level="fatal"
|
0
|
72 description="Execution halted." />
|
|
73 </stdio>
|
|
74 <inputs>
|
|
75
|
|
76 <conditional name="refGenomeSource">
|
|
77 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
|
|
78 <option value="indexed">Use a built-in index</option>
|
|
79 <option value="history">Use one from the history</option>
|
|
80 </param>
|
|
81 <when value="indexed">
|
|
82 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
|
|
83 <options from_data_table="segemehl_indexes">
|
|
84 <column name="value" index="0"/>
|
|
85 <column name="dbkey" index="1"/>
|
|
86 <column name="name" index="2"/>
|
|
87 <column name="db_path" index="3"/>
|
|
88 <column name="index_path" index="4"/>
|
|
89 <filter type="sort_by" column="2"/>
|
|
90 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
|
|
91 </options>
|
|
92 </param>
|
|
93 </when> <!-- build-in -->
|
|
94 <when value="history">
|
|
95 <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
|
|
96 </when> <!-- history -->
|
|
97 </conditional> <!-- refGenomeSource -->
|
|
98
|
|
99
|
|
100 <conditional name="library">
|
|
101 <param name="type" type="select" label="Is this library paired-end?">
|
|
102 <option value="single">Single-end</option>
|
|
103 <option value="paired">Paired-end</option>
|
|
104 </param>
|
|
105 <when value="single">
|
|
106 <repeat name="reads" title="FASTQ/FASTA files">
|
|
107 <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
|
|
108 </repeat>
|
|
109 </when>
|
|
110 <when value="paired">
|
|
111 <repeat name="mate_list" title="Paired End Pairs" min="1">
|
|
112 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
|
|
113 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
|
|
114 </repeat>
|
|
115 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
|
|
116 </when>
|
|
117 </conditional>
|
|
118
|
|
119
|
|
120 <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)">
|
|
121 <validator type="in_range" min="1"/>
|
|
122 </param>
|
|
123 <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" >
|
|
124 <validator type="in_range" min="1" max="100"/>
|
|
125 </param>
|
|
126 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
|
|
127 <option value="1">report only best scoring hits</option>
|
|
128 <option value="0">report all scoring hits</option>
|
|
129 </param>
|
|
130 <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" />
|
|
131 <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/>
|
|
132 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
|
|
133 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
|
|
134 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/>
|
|
135 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
|
|
136 </inputs>
|
|
137
|
|
138 <outputs>
|
|
139 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
|
|
140 </outputs>
|
|
141 <help>
|
1
|
142 <![CDATA[
|
0
|
143
|
|
144 .. class:: infomark
|
|
145
|
1
|
146 **What it does**
|
0
|
147
|
|
148 Segemehl_ is a short read mapper with gaps.
|
|
149
|
1
|
150 Segemehl_ is a software to map short sequencer reads to reference genomes.
|
|
151 Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions.
|
|
152 Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly.
|
0
|
153 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
|
|
154
|
|
155 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
|
|
156
|
|
157
|
1
|
158 ]]>
|
0
|
159 </help>
|
1
|
160 <citations>
|
|
161 <citation type="doi">10.1371/journal.pcbi.1000502</citation>
|
|
162 </citations>
|
0
|
163 </tool>
|