annotate segemehl.xml @ 2:0da425524259 draft

Uploaded
author bgruening
date Thu, 05 Feb 2015 08:25:13 -0500
parents df7c7d732d31
children 039547ad8fb8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
1 <tool id="segemehl" name="segemehl" version="0.1.6.0">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
2 <description>based short read aligner</description>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
3 <requirements>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
4 <requirement type="package" version="0.1.6">segemehl</requirement>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
5 </requirements>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
6 <command>
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
7 <![CDATA[
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
8 ## prepare segemehl index if no reference genome is supplied
e97db054a88d Uploaded
rnateam
parents:
diff changeset
9 temp_index = `mktemp`;
e97db054a88d Uploaded
rnateam
parents:
diff changeset
10 #if $refGenomeSource.genomeSource == "history":
e97db054a88d Uploaded
rnateam
parents:
diff changeset
11 segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome;
e97db054a88d Uploaded
rnateam
parents:
diff changeset
12 #else:
2
0da425524259 Uploaded
bgruening
parents: 1
diff changeset
13 #set $temp_index = $refGenomeSource.index.fields.index_path
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
14 #end if
e97db054a88d Uploaded
rnateam
parents:
diff changeset
15
e97db054a88d Uploaded
rnateam
parents:
diff changeset
16
e97db054a88d Uploaded
rnateam
parents:
diff changeset
17 ## execute segemehl
e97db054a88d Uploaded
rnateam
parents:
diff changeset
18 segemehl.x
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
19
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
20 ## number of threads
e97db054a88d Uploaded
rnateam
parents:
diff changeset
21 -t "\${GALAXY_SLOTS:-12}"
e97db054a88d Uploaded
rnateam
parents:
diff changeset
22
e97db054a88d Uploaded
rnateam
parents:
diff changeset
23 ## db file path
e97db054a88d Uploaded
rnateam
parents:
diff changeset
24 -d ${refGenomeSource.index.fields.db_path}
e97db054a88d Uploaded
rnateam
parents:
diff changeset
25
e97db054a88d Uploaded
rnateam
parents:
diff changeset
26 -i $temp_index
e97db054a88d Uploaded
rnateam
parents:
diff changeset
27
e97db054a88d Uploaded
rnateam
parents:
diff changeset
28 ## check for single/pair-end
e97db054a88d Uploaded
rnateam
parents:
diff changeset
29 #if str( $library.type ) == "single":
e97db054a88d Uploaded
rnateam
parents:
diff changeset
30 #set $query_list = list()
e97db054a88d Uploaded
rnateam
parents:
diff changeset
31 ## prepare inputs
e97db054a88d Uploaded
rnateam
parents:
diff changeset
32 #for $fastq in $library.reads:
e97db054a88d Uploaded
rnateam
parents:
diff changeset
33 $query_list.append('%s' %($fastq.input_query))
e97db054a88d Uploaded
rnateam
parents:
diff changeset
34 #end for
e97db054a88d Uploaded
rnateam
parents:
diff changeset
35 -q "#echo ' '.join( $query_list )#"
e97db054a88d Uploaded
rnateam
parents:
diff changeset
36 #else
e97db054a88d Uploaded
rnateam
parents:
diff changeset
37 ## prepare inputs
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
38
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
39 #set $mate1 = list()
e97db054a88d Uploaded
rnateam
parents:
diff changeset
40 #set $mate2 = list()
e97db054a88d Uploaded
rnateam
parents:
diff changeset
41 #for $mate_pair in $library.mate_list:
e97db054a88d Uploaded
rnateam
parents:
diff changeset
42 $mate1.append( str($mate_pair.first_strand_query) )
e97db054a88d Uploaded
rnateam
parents:
diff changeset
43 $mate2.append( str($mate_pair.second_strand_query) )
e97db054a88d Uploaded
rnateam
parents:
diff changeset
44 #end for
e97db054a88d Uploaded
rnateam
parents:
diff changeset
45
e97db054a88d Uploaded
rnateam
parents:
diff changeset
46 -q #echo ','.join($mate1)
e97db054a88d Uploaded
rnateam
parents:
diff changeset
47 -p #echo ','.join($mate2)
e97db054a88d Uploaded
rnateam
parents:
diff changeset
48
e97db054a88d Uploaded
rnateam
parents:
diff changeset
49 -I $library.maxinsertsize
e97db054a88d Uploaded
rnateam
parents:
diff changeset
50 #end if
e97db054a88d Uploaded
rnateam
parents:
diff changeset
51 -m $minsize
e97db054a88d Uploaded
rnateam
parents:
diff changeset
52 -A $accuracy
e97db054a88d Uploaded
rnateam
parents:
diff changeset
53 -H $hitstrategy
e97db054a88d Uploaded
rnateam
parents:
diff changeset
54 #if str( $prime5 ).strip():
2
0da425524259 Uploaded
bgruening
parents: 1
diff changeset
55 -P "$prime5"
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
56 #end if
e97db054a88d Uploaded
rnateam
parents:
diff changeset
57 #if str( $prime3 ).strip():
2
0da425524259 Uploaded
bgruening
parents: 1
diff changeset
58 -Q "$prime3"
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
59 #end if
e97db054a88d Uploaded
rnateam
parents:
diff changeset
60 $polyA
e97db054a88d Uploaded
rnateam
parents:
diff changeset
61 $autoclip
e97db054a88d Uploaded
rnateam
parents:
diff changeset
62 $hardclip
e97db054a88d Uploaded
rnateam
parents:
diff changeset
63 $order
e97db054a88d Uploaded
rnateam
parents:
diff changeset
64 -s
e97db054a88d Uploaded
rnateam
parents:
diff changeset
65 -o $segemehl_out
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
66 ]]>
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
67 </command>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
68 <stdio>
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
69 <regex match="Exit forced"
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
70 source="both"
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
71 level="fatal"
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
72 description="Execution halted." />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
73 </stdio>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
74 <inputs>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
75
e97db054a88d Uploaded
rnateam
parents:
diff changeset
76 <conditional name="refGenomeSource">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
77 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
78 <option value="indexed">Use a built-in index</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
79 <option value="history">Use one from the history</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
80 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
81 <when value="indexed">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
82 <param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed, contact your Galaxy admin">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
83 <options from_data_table="segemehl_indexes">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
84 <column name="value" index="0"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
85 <column name="dbkey" index="1"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
86 <column name="name" index="2"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
87 <column name="db_path" index="3"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
88 <column name="index_path" index="4"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
89 <filter type="sort_by" column="2"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
90 <validator type="no_options" message="No indexes are available for the selected input dataset"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
91 </options>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
92 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
93 </when> <!-- build-in -->
e97db054a88d Uploaded
rnateam
parents:
diff changeset
94 <when value="history">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
95 <param name="own_reference_genome" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
96 </when> <!-- history -->
e97db054a88d Uploaded
rnateam
parents:
diff changeset
97 </conditional> <!-- refGenomeSource -->
e97db054a88d Uploaded
rnateam
parents:
diff changeset
98
e97db054a88d Uploaded
rnateam
parents:
diff changeset
99
e97db054a88d Uploaded
rnateam
parents:
diff changeset
100 <conditional name="library">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
101 <param name="type" type="select" label="Is this library paired-end?">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
102 <option value="single">Single-end</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
103 <option value="paired">Paired-end</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
104 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
105 <when value="single">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
106 <repeat name="reads" title="FASTQ/FASTA files">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
107 <param name="input_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads fasta/fastq file" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
108 </repeat>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
109 </when>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
110 <when value="paired">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
111 <repeat name="mate_list" title="Paired End Pairs" min="1">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
112 <param name="first_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from first strand" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
113 <param name="second_strand_query" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Reads from second strand" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
114 </repeat>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
115 <param name="maxinsertsize" type="integer" value="5000" label="Maximum size of the inserts (paired end)" help="default: 5000 (-I)" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
116 </when>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
117 </conditional>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
118
e97db054a88d Uploaded
rnateam
parents:
diff changeset
119
e97db054a88d Uploaded
rnateam
parents:
diff changeset
120 <param name="minsize" type="integer" value="12" size="5" label="Minimum size of queries" help="default: 12 (-m)">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
121 <validator type="in_range" min="1"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
122 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
123 <param name="accuracy" type="integer" value="85" size="5" label="Min percentage of matches per read in semi-global alignment" help="default: 85 (-A)" >
e97db054a88d Uploaded
rnateam
parents:
diff changeset
124 <validator type="in_range" min="1" max="100"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
125 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
126 <param name="hitstrategy" type="select" label="Hits to report?" help="(-H)">
e97db054a88d Uploaded
rnateam
parents:
diff changeset
127 <option value="1">report only best scoring hits</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
128 <option value="0">report all scoring hits</option>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
129 </param>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
130 <param name="prime5" type="text" size="80" label="add 5' adapter" help="default: none (-Q)" />
e97db054a88d Uploaded
rnateam
parents:
diff changeset
131 <param name="prime3" type="text" size="80" label="add 3' adapter" help="default: none (-P)"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
132 <param name="polyA" type="boolean" truevalue="--polyA" falsevalue="" checked="false" label="Clip polyA tail" help="(-T)"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
133 <param name="autoclip" type="boolean" truevalue="--autoclip" falsevalue="" checked="false" label="Autoclip unknown 3prime adapter" help="(-Y)"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
134 <param name="hardclip" type="boolean" truevalue="--hardclip" falsevalue="" checked="false" label="Enable hard clipping" help="-C"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
135 <param name="order" type="boolean" truevalue="--order" falsevalue="" checked="false" label="Sorts the output by chromsome and position" help="(-O)"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
136 </inputs>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
137
e97db054a88d Uploaded
rnateam
parents:
diff changeset
138 <outputs>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
139 <data format="sam" name="segemehl_out" label="Read alignments on ${on_string}"/>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
140 </outputs>
e97db054a88d Uploaded
rnateam
parents:
diff changeset
141 <help>
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
142 <![CDATA[
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
143
e97db054a88d Uploaded
rnateam
parents:
diff changeset
144 .. class:: infomark
e97db054a88d Uploaded
rnateam
parents:
diff changeset
145
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
146 **What it does**
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
147
e97db054a88d Uploaded
rnateam
parents:
diff changeset
148 Segemehl_ is a short read mapper with gaps.
e97db054a88d Uploaded
rnateam
parents:
diff changeset
149
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
150 Segemehl_ is a software to map short sequencer reads to reference genomes.
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
151 Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions.
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
152 Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly.
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
153 segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl_ allows bisulfite sequencing mapping and split read mapping.
e97db054a88d Uploaded
rnateam
parents:
diff changeset
154
e97db054a88d Uploaded
rnateam
parents:
diff changeset
155 .. _Segemehl: http://www.bioinf.uni-leipzig.de/Software/segemehl/
e97db054a88d Uploaded
rnateam
parents:
diff changeset
156
e97db054a88d Uploaded
rnateam
parents:
diff changeset
157
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
158 ]]>
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
159 </help>
1
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
160 <citations>
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
161 <citation type="doi">10.1371/journal.pcbi.1000502</citation>
df7c7d732d31 Uploaded
rnateam
parents: 0
diff changeset
162 </citations>
0
e97db054a88d Uploaded
rnateam
parents:
diff changeset
163 </tool>